diff --git a/README.md b/README.md index 5c99f2f..1a46f6e 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A Game Boy ROM disassembler. - Generates assembly code compatible with RGBDS (v0.3.8+ recommended, see [Notes](#notes)) - Supports ROMs with multiple banks -- Supports .sym files to define labels, code, data, text and image blocks +- Supports .sym files to define labels, code, data, text, image blocks, and comments - Outputs a makefile to rebuild the ROM - Uses defines from hardware.inc v2.7 for hardware registers ([source](https://github.com/tobiasvl/hardware.inc)) - Slow on large ROMs @@ -41,6 +41,8 @@ All values (except for image widths) should be in hexadecimal. Entries start wi Block types can be defined by using the ```.code```, ```.data```, ```.text```, and ```.image``` magic labels, followed by the length of the block in bytes. +Comments to add to the assembly code should start with `;;`, followed by the bank and address. + ### Code Adding a label for some code: @@ -102,6 +104,24 @@ Resulting image: ![Imgur](https://i.imgur.com/iX5FCXL.png) +### Comments + +You may like to add comments to the resulting assembly files, for instance, if you are only able to share the symbol file for licensing reasons: + +``` +01:ad43 Check_Health +;; 01:a4d3 Updates player health if they're being hit +``` + +Resulting assembly file: + +``` +Check_Health: +; Updates player health if they're being hit + push BC + ... +``` + ## Notes - For constant expressions, RGBDS will by default optimise instructions like ```LD [$FF40],a``` to ```LDH [$FF00+40],a```, so these are encoded as data bytes using a macro to ensure exact reproduction of the original ROM (thanks to ISSOtm). RGBDS >= v0.3.7 has an option to disable this optimisation. Use ```--disable-auto-ldh``` with mgbdis to disable the macro. diff --git a/mgbdis.py b/mgbdis.py index 529126e..1452e21 100755 --- a/mgbdis.py +++ b/mgbdis.py @@ -231,6 +231,7 @@ def add_target_address(self, instruction_name, address): def resolve_blocks(self): blocks = self.symbols.get_blocks(self.bank_number) + comments = self.symbols.get_comments(self.bank_number) block_start_addresses = sorted(blocks.keys()) resolved_blocks = dict() @@ -253,6 +254,7 @@ def resolve_blocks(self): 'type': block['type'], 'length': end_address - start_address, 'arguments': block['arguments'], + 'comments': comments, } if next_start_address is None and (end_address != self.memory_base_address + 0x4000): @@ -260,7 +262,8 @@ def resolve_blocks(self): resolved_blocks[end_address] = { 'type': 'code', 'length': (self.memory_base_address + 0x4000) - end_address, - 'arguments': None + 'arguments': None, + 'comments': comments, } if next_start_address is not None and end_address < next_start_address: @@ -268,7 +271,8 @@ def resolve_blocks(self): resolved_blocks[end_address] = { 'type': 'code', 'length': next_start_address - end_address, - 'arguments': None + 'arguments': None, + 'comments': comments, } self.blocks = resolved_blocks @@ -367,6 +371,10 @@ def format_data(self, data): return self.format_instruction(self.style['db'], data) + def format_comments(self, comments): + return '\n'.join(comments) + + def append_output(self, text): self.output.append(text) @@ -401,27 +409,26 @@ def disassemble(self, rom, first_pass = False): start_address = block_start_addresses[index] block = self.blocks[start_address] end_address = start_address + block['length'] - self.disassemble_block_range[block['type']](rom, self.rom_base_address + start_address, self.rom_base_address + end_address, block['arguments']) + self.disassemble_block_range[block['type']](rom, self.rom_base_address + start_address, self.rom_base_address + end_address, block['arguments'], block['comments']) self.append_empty_line_if_none_already() return '\n'.join(self.output) - def process_code_in_range(self, rom, start_address, end_address, arguments = None): + def process_code_in_range(self, rom, start_address, end_address, arguments = None, comments = None): if not self.first_pass and debug: print('Disassembling code in range: {} - {}'.format(hex_word(start_address), hex_word(end_address))) self.pc = start_address while self.pc < end_address: - instruction = self.disassemble_at_pc(rom, end_address) + instruction = self.disassemble_at_pc(rom, end_address, comments) - def disassemble_at_pc(self, rom, end_address): + def disassemble_at_pc(self, rom, end_address, comments): pc = self.pc pc_mem_address = rom_address_to_mem_address(pc) length = 1 opcode = rom.data[pc] - comment = None operands = None operand_values = list() @@ -600,8 +607,10 @@ def disassemble_at_pc(self, rom, end_address): if len(labels): self.append_labels_to_output(labels) - if comment is not None: - self.append_output(comment) + for offset in range(length): + address = pc_mem_address + offset + if address in comments: + self.append_output(self.format_comments(comments[address])) instruction_bytes = rom.data[pc:pc + length] self.append_output(self.format_instruction(instruction_name, operand_values, pc_mem_address, instruction_bytes)) @@ -621,7 +630,7 @@ def disassemble_at_pc(self, rom, end_address): self.append_output('') - def process_data_in_range(self, rom, start_address, end_address, arguments = None): + def process_data_in_range(self, rom, start_address, end_address, arguments = None, comments = None): if not self.first_pass and debug: print('Outputting data in range: {} - {}'.format(hex_word(start_address), hex_word(end_address))) @@ -639,6 +648,9 @@ def process_data_in_range(self, rom, start_address, end_address, arguments = Non self.append_labels_to_output(labels) + if comments and address in comments: + self.append_output(self.format_comments(comments[address])) + values.append(hex_byte(rom.data[address])) # output max of 16 bytes per line, and ensure any remaining values are output @@ -647,7 +659,7 @@ def process_data_in_range(self, rom, start_address, end_address, arguments = Non values = list() - def process_text_in_range(self, rom, start_address, end_address, arguments = None): + def process_text_in_range(self, rom, start_address, end_address, arguments = None, comments = None): if not self.first_pass and debug: print('Outputting text in range: {} - {}'.format(hex_word(start_address), hex_word(end_address))) @@ -670,6 +682,9 @@ def process_text_in_range(self, rom, start_address, end_address, arguments = Non self.append_labels_to_output(labels) + if comments and address in comments: + self.append_output(self.format_comments(comments[address])) + byte = rom.data[address] if byte >= 0x20 and byte < 0x7F: text += chr(byte) @@ -685,13 +700,16 @@ def process_text_in_range(self, rom, start_address, end_address, arguments = Non if len(values): self.append_output(self.format_data(values)) - def process_image_in_range(self, rom, start_address, end_address, arguments = None): + def process_image_in_range(self, rom, start_address, end_address, arguments = None, comments = None): if not self.first_pass and debug: print('Outputting image in range: {} - {}'.format(hex_word(start_address), hex_word(end_address))) if self.first_pass: return + if comments and start_address in comments: + self.append_output(self.format_comments(comments[start_address])) + mem_address = rom_address_to_mem_address(start_address) labels = self.get_labels_for_non_code_address(mem_address) if len(labels): @@ -710,17 +728,49 @@ class Symbols: def __init__(self): self.symbols = dict() self.blocks = dict() + self.comments = dict() def load_sym_file(self, symbols_path): f = open(symbols_path, 'r') for line in f: - # ignore comments and empty lines - if line[0] != ';' and len(line.strip()): + if line[:3] == ';; ': + self.add_comment(line) + # ignore normal comments and empty lines + elif line[0] != ';' and len(line.strip()): self.add_symbol_definition(line) f.close() + def add_comment(self, comment): + split_comment = comment.split(maxsplit=2)[1:] + if len(split_comment) != 2: + return + + location, body = split_comment + try: + bank, address = location.split(':') + bank = int(bank, 16) + address = int(address, 16) + except: + # Ignore comments that don't have a location + return + + if body[-1] == '\n': + body = body[:-1] + + if not len(body): + # Assume this is a blank comment for formatting + # Don't include whitespace at the end of the line + body = ';' + else: + body = '; {}'.format(body) + + bank_comments = self.get_comments(bank) + if address not in bank_comments: + bank_comments[address] = [] + + bank_comments[address].append(body) def add_symbol_definition(self, symbol_def): try: @@ -808,6 +858,11 @@ def get_blocks(self, bank): return self.blocks[bank] + def get_comments(self, bank): + if bank not in self.comments: + self.comments[bank] = dict() + return self.comments[bank] + class ROM: def __init__(self, rom_path, style):