本記事はPythonで簡単なx86エミュレータを作成します。
前回ではIOポートの読み書きに使用されるin/out命令について学びました。
今回はBIOSの機能を使った文字出力について学んでいきます。
BIOSとは
前回ではIOポートの読み書きに使用されるin/out命令について学びました。
in/out命令によってIOポートに接続された周辺機器などのデバイスにアクセスし、読み書きすることが可能になりますが、機器ごとの仕様に差異があると、読み書きをするタイミングが異なり調整が必要になってきます。
そこでこれらの差異を吸収するために用意されたインターフェースがBIOS(Basic Input Output System:基本入出力システム)になります。
BIOSはIntel 8086の時代に使われたリアルモードで動作するプログラムなので、プロテクトモードで動作する現代的なOSでは殆ど直接的に触れることはありません。
しかし、PCの起動直後にハードウェアに異常がないかをチェックし、HDDに記録されたOSをメモリにロードして実行(ブートストラップローダ)する大切な役割はBIOSが担っています。
アセンブリ言語プログラム
今回は以下の参考書籍と同じアセンブリ言語プログラムを使用します。
;subroutine32.asm BITS 32 org 0x7c00 start: mov esi, msg call puts jmp 0 puts: mov al, [esi] inc esi cmp al, 0 je puts_end mov ah, 0x0e mov ebx, 10 int 0x10 jmp puts puts_end: ret msg: db "Hello, World!", 0x0d, 0x0a, 0
上記ではputs
内の14行目~16行目がBIOSの処理になっていて、mov ah, 0x0e
で1文字を表示させるテレタイプ出力を指定し、mov ebx, 10
で出力する文字色(ここでは緑)を指定し、int 0x10
でBIOSの機能を呼び出します。
int命令はCPUに対してソフトウェア割り込み(Interrupt)をする命令になります。
Pythonによるスクリプトの作成
それでは、PythonでBIOSのテレタイプ出力機能を実装していきます。
# emulator.py import sys class ModRM: def __init__(self): self.modrm = { "mod" :0x00, "opecode" :0x00, "reg_index" :0x00, "rm" :0x00, "sib" :0x00, "disp8" :0x00, "disp32" :0x00 } class Emulator: def __init__(self): self.register_name_extended = [ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"] self.register_name = [ "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH" ] self.registers = { "EAX": 0x00, "ECX": 0x00, "EDX": 0x00, "EBX": 0x00, "ESP": 0x00, "EBP": 0x00, "ESI": 0x00, "EDI": 0x00, } self.eflags = 0x00 self.memory = None self.eip = 0x00 self.instructions = [None for i in range(256)] def init_instructions(self): self.instructions[0x01] = self.add_rm32_r32 self.instructions[0x3b] = self.cmp_r32_rm32 self.instructions[0x3c] = self.cmp_al_imm8 for i in range(8): self.instructions[0x40+i] = self.inc_r32 for i in range(8): self.instructions[0x50+i] = self.push_r32 for i in range(8): self.instructions[0x58+i] = self.pop_r32 self.instructions[0x68] = self.push_imm32 self.instructions[0x6a] = self.push_imm8 self.instructions[0x70] = self.jo self.instructions[0x71] = self.jno self.instructions[0x72] = self.jc self.instructions[0x73] = self.jnc self.instructions[0x74] = self.jz self.instructions[0x75] = self.jnz self.instructions[0x78] = self.js self.instructions[0x79] = self.jns self.instructions[0x7c] = self.jl self.instructions[0x7e] = self.jle self.instructions[0x83] = self.code_83 self.instructions[0x89] = self.mov_rm32_r32 self.instructions[0x8a] = self.mov_r8_rm8 self.instructions[0x8b] = self.mov_r32_rm32 for i in range(8): self.instructions[0xb0 + i] = self.mov_r8_imm8 for i in range(8): self.instructions[0xb8 + i] = self.mov_r32_imm32 self.instructions[0xc3] = self.ret self.instructions[0xc7] = self.mov_rm32_imm32 self.instructions[0xc9] = self.leave self.instructions[0xcd] = self.swi self.instructions[0xe8] = self.call_rel32 self.instructions[0xe9] = self.near_jump self.instructions[0xeb] = self.short_jump self.instructions[0xec] = self.in_al_dx self.instructions[0xee] = self.out_dx_al self.instructions[0xff] = self.code_ff def create_emu(self, size, eip, esp): self.eip = eip self.registers["ESP"] = esp self.memory = [0x00 for _ in range(size)] def dump_registers(self): for i in range(8): name = self.register_name_extended[i] print("{} = 0x{:08x}".format(name, self.registers[name])) print("EIP = 0x{:08x}".format(self.eip)) def mov_r32_imm32(self): reg = self.get_code8(0) - 0xb8 value = self.get_code32(1) reg_name = self.register_name_extended[reg] self.registers[reg_name] = value self.eip += 5 if self.eip >= 0x100000000: self.eip ^= 0x100000000 def short_jump(self): diff = self.get_sign_code8(1) if diff & 0x80: diff -= 0x100 self.eip += (diff + 2) def get_code8(self, index): code = self.memory[self.eip + index] if not type(code) == int: code = int.from_bytes(code, 'little') return code def get_sign_code8(self, index): code = self.memory[self.eip + index] if not type(code) == int: code = int.from_bytes(code, 'little') return code & 0xff def get_code32(self, index): ret = 0x00 for i in range(4): ret |= self.get_code8(index + i) << (i * 8) return ret def get_sign_code32(self, index): return self.get_code32(index) def near_jump(self): diff = self.get_sign_code32(1) if diff & 0x80000000: diff -= 0x100000000 self.eip += (diff + 5) def parse_modrm(self): m = ModRM() code = self.get_code8(0) m.modrm["mod"] = ((code & 0xc0) >> 6) m.modrm["opecode"] = m.modrm["reg_index"] = ((code & 0x38) >> 3) m.modrm["rm"] = code & 0x07 self.eip += 1 if (m.modrm["mod"] != 3 and m.modrm["rm"] == 4): m.modrm["sib"] = self.get_code8(0) eip += 1 if (m.modrm["mod"] == 0 and m.modrm["rm"] == 5) or m.modrm["mod"] == 2: m.modrm["disp32"] = self.get_sign_code32(0) m.modrm["disp8"] = m.modrm["disp32"] & 0xff eip += 4 elif m.modrm["mod"] == 1: m.modrm["disp8"] = m.modrm["disp32"] = self.get_sign_code8(0) self.eip += 1 return m def mov_rm32_imm32(self): self.eip += 1 m = self.parse_modrm() value = self.get_code32(0) self.eip += 4 self.set_rm32(m, value) def set_rm32(self, m, value): if m.modrm["mod"] == 3: self.set_register32(m.modrm["rm"], value) else: address = self.calc_memory_address(m) self.set_memory32(address, value) def set_memory8(self, address, value): self.memory[address] = value & 0xff def set_memory32(self, address, value): for i in range(4): self.set_memory8(address+i, value >> (i*8)) def calc_memory_address(self, m): if m.modrm["mod"] == 0: if m.modrm["rm"] == 4: print("not implemented ModRM mod = 0, rm = 4") sys.exit(0) elif m.modrm["rm"] == 5: return m.modrm["disp32"] else: return self.get_register32(m.modrm["rm"]) elif m.modrm["mod"] == 1: if m.modrm["rm"] == 4: print("not implemented ModRM mod = 1, rm = 4") sys.exit(0) else: return self.get_register32(m.modrm["rm"]) + m.modrm["disp8"] elif m.modrm["mod"] == 2: if m.modrm["rm"] == 4: print("not implemented ModRM mod = 2, rm = 4") sys.exit(0) else: return self.get_register32(m.modrm["rm"]) + m.modrm["disp32"] else: print("not implemented ModRM mod = 3") sys.exit(0) def mov_rm32_r32(self): self.eip += 1 m = self.parse_modrm() r32 = self.get_r32(m) self.set_rm32(m, r32) def mov_r32_rm32(self): self.eip += 1 m = self.parse_modrm() rm32 = self.get_rm32(m) self.set_r32(m, rm32) def get_rm32(self, m): if m.modrm["mod"] == 3: return self.get_register32(m.modrm["rm"]) else: address = self.calc_memory_address(m) return self.get_memory32(address) def get_memory8(self, address): return self.memory[address] def get_memory32(self, address): ret = 0 for i in range(4): mem = self.get_memory8(address + i) if not type(mem) == int: mem = ord(mem) ret |= mem << (8*i) return ret def set_r32(self, m, value): self.set_register32(m.modrm["reg_index"], value) def get_r32(self, m): return self.get_register32(m.modrm["reg_index"]) def add_rm32_r32(self): self.eip += 1 m = self.parse_modrm() r32 = self.get_r32(m) rm32 = self.get_rm32(m) self.set_rm32(m, rm32 + r32) def sub_rm32_imm8(self, m): rm32 = self.get_rm32(m) imm8 = self.get_sign_code8(0) self.eip += 1 result = rm32 - imm8 self.set_rm32(m, result) self.update_eflags_sub(rm32, imm8, result) def code_83(self): self.eip += 1 m = self.parse_modrm() if m.modrm["opecode"] == 0: self.add_rm32_imm8(m) elif m.modrm["opecode"] == 5: self.sub_rm32_imm8(m) elif m.modrm["opecode"] == 7: self.cmp_rm32_imm8(m) else: print("not implemented: 83 /{}".format(m.modrm["opecode"])) sys.exit(1) def inc_rm32(self, m): value = self.get_rm32(m) self.set_rm32(m, value + 1) def inc_r32(self): reg = self.get_code8(0) - 0x40 self.set_register32(reg, self.get_register32(reg)+1) self.eip += 1 def code_ff(self): self.eip += 1 m = self.parse_modrm() if m.modrm["opecode"] == 0: self.inc_rm32(m) else: print("not implemented: FF /{}".format(m.modrm["opecode"])) sys.exit(1) def get_register32(self, index): reg = self.register_name_extended[index] return self.registers[reg] def set_register32(self, index, value): reg = self.register_name_extended[index] self.registers[reg] = value def push_r32(self): reg = self.get_code8(0) - 0x50 self.push32(self.get_register32(reg)) self.eip += 1 def pop_r32(self): reg = self.get_code8(0) - 0x58 self.set_register32(reg, self.pop32()) self.eip += 1 def push32(self, value): esp = self.register_name_extended.index("ESP") address = self.get_register32(esp) - 4 self.set_register32(esp, address) self.set_memory32(address, value) def pop32(self): esp = self.register_name_extended.index("ESP") address = self.get_register32(esp) ret = self.get_memory32(address) self.set_register32(esp, address + 4) return ret def call_rel32(self): diff = self.get_sign_code32(1) if diff & 0x80000000: diff -= 0x100000000 self.push32(self.eip + 5) self.eip += (diff + 5) def ret(self): self.eip = self.pop32() def leave(self): ebp = self.get_register32(self.register_name_extended.index("EBP")) self.set_register32(self.register_name_extended.index("ESP"), ebp) self.set_register32(self.register_name_extended.index("EBP"), self.pop32()) self.eip += 1 def push_imm8(self): value = self.get_code8(1) self.push32(value) self.eip += 2 def push_imm32(self): value = self.get_code32(1) self.push32(value) self.eip += 5 def add_rm32_imm8(self, m): rm32 = self.get_rm32(m) imm8 = self.get_sign_code8(0) self.eip += 1 self.set_rm32(m, rm32+imm8) def cmp_r32_rm32(self): self.eip += 1 m = self.parse_modrm() r32 = self.get_r32(m) rm32 = self.get_rm32(m) result = r32 - rm32 self.update_eflags_sub(r32, rm32, result) def cmp_rm32_imm8(self, m): rm32 = self.get_rm32(m) imm8 = self.get_sign_code8(0) print(rm32, imm8) self.eip += 1 result = rm32 - imm8 self.update_eflags_sub(rm32, imm8, result) def update_eflags_sub(self, v1, v2, result): sign1 = v1 >> 31 sign2 = v2 >> 31 signr = (result >> 31) & 1 self.set_carry(result >> 32) self.set_zero(result == 0) self.set_sign(signr) self.set_overflow(sign1 != sign2 and sign1 != signr) def set_carry(self, is_carry): if is_carry: self.eflags |= CARRY_FLAG else: self.eflags &= ~CARRY_FLAG def set_zero(self, is_zero): if is_zero: self.eflags |= ZERO_FLAG else: self.eflags &= ~ZERO_FLAG def set_sign(self, is_sign): if is_sign: self.eflags |= SIGN_FLAG else: self.eflags &= ~SIGN_FLAG def set_overflow(self, is_overflow): if is_overflow: self.eflags |= OVERFLOW_FLAG else: self.eflags &= ~OVERFLOW_FLAG def is_carry(self): return (self.eflags & CARRY_FLAG) != 0 def is_zero(self): return (self.eflags & ZERO_FLAG) != 0 def is_sign(self): return (self.eflags & SIGN_FLAG) != 0 def is_overflow(self): return (self.eflags & OVERFLOW_FLAG) != 0 def j(func): def wrapper(self, *args, **kwargs): if func(self, *args, **kwargs): diff = self.get_sign_code8(1) else: diff = 0 self.eip += (diff + 2) return wrapper def jn(func): def wrapper(self, *args, **kwargs): if func(self, *args, **kwargs): diff = 0 else: diff = self.get_sign_code8(1) self.eip += (diff + 2) return wrapper @j def jc(self): return self.is_carry() @jn def jnc(self): return self.is_carry() @j def js(self): return self.is_sign() @jn def jns(self): return self.is_sign() @j def jz(self): return self.is_zero() @jn def jnz(self): return self.is_zero() @j def jo(self): return self.is_overflow() @jn def jno(self): return self.is_overflow() def jl(self): if self.is_sign() != self.is_overflow(): diff = self.get_sign_code8(1) if diff & 0x80: diff -= 0x100 else: diff = 0 self.eip += (diff + 2) def jle(self): if self.is_zero() or self.is_sign() != self.is_overflow(): diff = self.get_sign_code8(1) if diff & 0x80: diff -= 0x100 else: diff = 0 self.eip += (diff + 2) def in_al_dx(self): address = self.get_register32(self.register_name_extended.index("EDX")) & 0xffff value = ord(self.io_in8(address)) self.set_register8(self.register_name.index("AL"), value) self.eip += 1 def out_dx_al(self): address = self.get_register32(self.register_name_extended.index("EDX")) & 0xffff value = self.get_register32(self.register_name.index("AL")) & 0xff self.io_out8(address, value) self.eip += 1 def io_in8(self, address): if address == 0x03f8: return sys.stdin.read(1) def io_out8(self, address, value): if address == 0x03f8: sys.stdout.write(chr(value)) sys.stdout.flush() def get_register8(self, index): if index < 4: reg_name = self.register_name_extended[index] return self.registers[reg_name] & 0xff else: reg_name = self.register_name_extended[index-4] return (self.registers[reg_name] >> 8) & 0xff def set_register8(self, index, value): if index < 4: reg_name = self.register_name_extended[index] r = self.registers[reg_name] & 0xffffff00 self.registers[reg_name] = r | value else: reg_name = self.register_name_extended[index-4] r = self.registers[reg_name] & 0xffff00ff self.registers[reg_name] = r | (value << 8) def mov_r8_imm8(self): reg = self.get_code8(0) - 0xB0 self.set_register8(reg, self.get_code8(1)) self.eip += 2 def cmp_al_imm8(self): value = self.get_code8(1) al = self.get_register8(self.register_name_extended.index("EAX")) & 0xff result = al - value self.update_eflags_sub(al, value, result) self.eip += 2 def mov_r8_rm8(self): self.eip += 1 m = self.parse_modrm() rm8 = self.get_rm8(m) self.set_r8(m, rm8) def set_r8(self, m, value): self.set_register8(m.modrm["reg_index"], value) def get_rm8(self, m): if m.modrm["mod"] == 3: return self.get_register32(m["rm"]) else: address = self.calc_memory_address(m) return self.get_memory32(address) def swi(self): int_index = self.get_code8(1) self.eip += 2 if int_index == 0x10: self.bios_video() else: print("unknown interrupt: 0x{:02x}".format(int_index)) def put_string(self, string, size): for i in range(size): self.io_out8(0x03f8, ord(string[i])) def bios_video_teletype(self): color = self.get_register8(self.register_name.index("BL")) & 0x0f ch = self.get_register8(self.register_name.index("AL")) terminal_color = bios_to_terminal[color & 0x07] if color & 0x08: bright = 1 else: bright = 0 buf = "\x1b[{};{}m{}\x1b[0m".format(bright, terminal_color, chr(ch)) self.put_string(buf, len(buf)) def bios_video(self): func = self.get_register8(self.register_name.index("AH")) if func == 0x0e: self.bios_video_teletype() else: print("not implemented BIOS video function: 0x{:02x}".format(func)) bios_to_terminal = [30, 34, 32, 36, 31, 35, 33, 37] CARRY_FLAG = 1 ZERO_FLAG = 1 << 6 SIGN_FLAG = 1 << 7 OVERFLOW_FLAG = 1 << 11 mem_size = 1024 * 1024 emu = Emulator() emu.create_emu(mem_size, 0x7c00, 0x7c00) binary = open('subroutine32.bin', 'rb') offset = 0x7c00 while True: b = binary.read(1) if b == b'': break emu.memory[offset] = b offset += 1 binary.close() quiet = 0 if '-q' in sys.argv: quiet = 1 emu.init_instructions() while emu.eip < mem_size: code = emu.get_code8(0) if not quiet: print("EIP = 0x{:02x}, Code = 0x{:02x}".format(emu.eip, code)) if emu.instructions[code] == None: print("\n\nNot Implemented: 0x{:02x}".format(code)) break emu.instructions[code]() if emu.eip == 0x00: print("\n\nend of program.\n\n") break emu.dump_registers()
なお、上記では指定された文字色をターミナルに出力するためにANSIエスケープシーケンスと 呼ばれるものを利用しています。
"\x1b[<輝度>;<色番号>m<文字列>\x1b[0m"
をターミナルに出力することで指定した輝度と色で文字列が出力されます。
動作確認
それでは、上記で作成したスクリプトを実行してみます。
なお、事前にアセンブリ言語のプログラムはbinファイルとしてビルドしておきます。
また、WindowsのコマンドプロンプトやPowerShellではANSIエスケープシーケンスに対応していないため、WSL上のUbuntuで実行しました。
> python3 emulator.py -q Hello, World! # 緑色で出力される end of program. EAX = 0x7f7f0e00 ECX = 0x00000000 EDX = 0x00000000 EBX = 0x0000000a ESP = 0x00007c00 EBP = 0x00000000 ESI = 0x00007c32 EDI = 0x00000000 EIP = 0x00000000
問題なくBIOSの機能を使って"Hello, World!"の文字列を出力することが確認できました。