Skip to content

Commit

Permalink
Update output folder structure (#13)
Browse files Browse the repository at this point in the history
* Update output folder structure
* update gitignore with new output folders
* fix 10.1.5 test
  • Loading branch information
clearbluejar authored Jan 10, 2024
1 parent c7adcdd commit 0b5946c
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 22 deletions.
Empty file removed .ghidra_projects/.placeholder
Empty file.
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,4 @@ dmypy.json
.pyre/

# project specific ignores
.ghidra_projects*/
decompilations*/
.symbols*/
ghidrecomps*/
32 changes: 23 additions & 9 deletions ghidrecomp/decompile.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,25 @@ def decompile(args: Namespace):
print(f'Starting decompliations: {args}')

bin_path = Path(args.bin)
project_location = Path(args.project_path)
thread_count = args.thread_count

output_path = Path(args.output_path) / bin_path.name
output_path = Path(args.output_path)
bin_output_path = output_path / bin_path.name
decomp_path = bin_output_path / 'decomps'
output_path.mkdir(exist_ok=True, parents=True)
bin_output_path.mkdir(exist_ok=True, parents=True)
decomp_path.mkdir(exist_ok=True, parents=True)


if args.project_path == 'ghidra_projects':
project_location = output_path / args.project_path
else:
project_location = Path(args.project_path)

if args.symbols_path == 'symbols':
symbols_path = output_path / args.symbols_path
else:
symbols_path = Path(args.symbols_path)

# turn on verbose
launcher = HeadlessPyhidraLauncher(True)
Expand All @@ -159,7 +173,7 @@ def decompile(args: Namespace):
if args.sym_file_path:
set_pdb(program, args.sym_file_path)
else:
setup_symbol_server(args.symbols_path)
setup_symbol_server(symbols_path)

set_remote_pdbs(program, True)

Expand Down Expand Up @@ -206,7 +220,7 @@ def decompile(args: Namespace):

if args.cppexport:
print(f"Decompiling {len(all_funcs)} functions using Ghidra's CppExporter")
c_file = Path(args.output_path) / Path(bin_path.name + '.c')
c_file = decomp_path / Path(bin_path.name + '.c')
start = time()
decompile_to_single_file(c_file, program)
print(f'Decompiled {len(all_funcs)} functions for {program.name} in {time() - start}')
Expand All @@ -221,7 +235,7 @@ def decompile(args: Namespace):
start = time()
with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor:
futures = (executor.submit(decompile_func, func, decompilers, thread_id % thread_count, monitor=monitor)
for thread_id, func in enumerate(all_funcs) if args.skip_cache or not (output_path / (get_filename(func) + '.c')).exists())
for thread_id, func in enumerate(all_funcs) if args.skip_cache or not (decomp_path / (get_filename(func) + '.c')).exists())

for future in concurrent.futures.as_completed(futures):
decompilations.append(future.result())
Expand All @@ -235,20 +249,20 @@ def decompile(args: Namespace):
# Save all decomps
start = time()
with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor:
futures = (executor.submit((output_path / (name + '.c')).write_text, decomp)
futures = (executor.submit((decomp_path / (name + '.c')).write_text, decomp)
for name, decomp, sig in decompilations)

for future in concurrent.futures.as_completed(futures):
pass

print(f'Wrote {completed} decompilations for {program.name} to {output_path} in {time() - start}')
print(f'Wrote {completed} decompilations for {program.name} to {decomp_path} in {time() - start}')

# Generate callgrpahs for functions
if args.callgraphs:

start = time()
completed = 0
callgraph_path = output_path / 'callgraphs'
callgraph_path = bin_output_path / 'callgraphs'
callgraphs_completed_path = callgraph_path / 'completed_callgraphs.json'
if callgraphs_completed_path.exists():
callgraphs_completed = json.loads(callgraphs_completed_path.read_text())
Expand Down Expand Up @@ -289,4 +303,4 @@ def decompile(args: Namespace):
print(f'Wrote {completed} callgraphs for {program.name} to {callgraph_path} in {time() - start}')
print(f'{len(all_funcs) - completed} callgraphs already existed.')

return (all_funcs, decompilations, output_path, str(program.compiler), str(program.languageID), callgraphs)
return (all_funcs, decompilations, bin_output_path, str(program.compiler), str(program.languageID), callgraphs)
6 changes: 3 additions & 3 deletions ghidrecomp/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ def get_parser() -> argparse.ArgumentParser:
parser.add_argument('bin', help='Path to binary used for analysis')
parser.add_argument('--cppexport', action='store_true', help='Use Ghidras CppExporter to decompile to single file')
parser.add_argument('--filter', dest='filters', action='append', help='Regex match for function name')
parser.add_argument('--project-path', help='Path to base ghidra projects ', default='.ghidra_projects')
parser.add_argument('--project-path', help='Path to base ghidra projects ', default='ghidra_projects')
parser.add_argument('--gdt', help='Additional GDT to apply', nargs='?', action='append')
parser.add_argument('-o', '--output-path', help='Location for all decompilations', default='decompilations')
parser.add_argument('-o', '--output-path', help='Location for all decompilations', default='ghidrecomps')
parser.add_argument("-v", "--version", action="version", version=__version__)
parser.add_argument("--skip-cache", action='store_true',
help='Skip cached and genearate new decomp and callgraphs.')

group = parser.add_mutually_exclusive_group()
group.add_argument('--sym-file-path', help='Specify single pdb symbol file for bin')
group.add_argument('-s', '--symbols-path', help='Path for local symbols directory', default='.symbols')
group.add_argument('-s', '--symbols-path', help='Path for local symbols directory', default='symbols')
group.add_argument('--skip-symbols', help='Do not apply symbols', action='store_true')

parser.add_argument('-t', '--thread-count', type=int,
Expand Down
18 changes: 13 additions & 5 deletions tests/test_gdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,18 @@ def test_apply_gdt(shared_datadir: Path):

args = parser.parse_args([f"{bin_path.absolute()}"])

project_location = Path(args.project_path)
output_path = Path(args.output_path) / bin_path.name
output_path.mkdir(exist_ok=True, parents=True)

output_path = Path(args.output_path)

if args.project_path == 'ghidra_projects':
project_location = output_path / args.project_path
else:
project_location = Path(args.project_path)

if args.symbols_path == 'symbols':
symbols_path = output_path / args.symbols_path
else:
symbols_path = Path(args.symbols_path)

# turn on verbose
pyhidra.start(True)

Expand All @@ -98,7 +106,7 @@ def test_apply_gdt(shared_datadir: Path):

program: "Program" = flat_api.getCurrentProgram()

setup_symbol_server(args.symbols_path)
setup_symbol_server(symbols_path)

set_remote_pdbs(program, True)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_ghidrecomp.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_decomplie_afd(shared_datadir: Path):

all_funcs, decompilations, output_path, compiler, lang_id, callgraphs = decompile(args)

assert (len(all_funcs) == 1275 or len(all_funcs) == 1273)
assert (len(all_funcs) == 1275 or len(all_funcs) == 1273 or len(all_funcs) == 1172)
assert (len(decompilations) == 1275 or len(decompilations) == 1273)
assert output_path == expected_output_path
assert compiler == 'visualstudio:unknown'
Expand All @@ -91,7 +91,7 @@ def test_decomplie_afd_cached(shared_datadir: Path):

all_funcs, decompilations, output_path, compiler, lang_id, callgraphs = decompile(args)

assert (len(all_funcs) == 1275 or len(all_funcs) == 1273)
assert (len(all_funcs) == 1275 or len(all_funcs) == 1273 or len(all_funcs) == 1172)
assert len(decompilations) == 0
assert output_path == expected_output_path
assert compiler == 'visualstudio:unknown'
Expand Down

0 comments on commit 0b5946c

Please sign in to comment.