Skip to content

Commit

Permalink
refactor parser and callgraph_gen
Browse files Browse the repository at this point in the history
  • Loading branch information
clearbluejar committed Jan 11, 2024
1 parent cd3b09b commit 2ac0669
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 79 deletions.
2 changes: 1 addition & 1 deletion ghidrecomp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

# Expose API
from .decompile import decompile
from .utility import get_parser
from .parser import get_parser

__all__ = ["get_parser", "decompile"]
2 changes: 1 addition & 1 deletion ghidrecomp/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .decompile import decompile
from .utility import get_parser
from .parser import get_parser


def main():
Expand Down
44 changes: 43 additions & 1 deletion ghidrecomp/callgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,34 @@
import json
import sys
import re
import argparse

from typing import TYPE_CHECKING
from functools import lru_cache

# don't really limit the graph
MAX_DEPTH = sys.getrecursionlimit() - 1

MAX_PATH_LEN = 50

# needed for ghidra python vscode autocomplete
if TYPE_CHECKING:
import ghidra
from ghidra_builtins import *


def add_cg_args_to_parser(parser: argparse.ArgumentParser):

group = parser.add_argument_group('Callgraph Options')
group.add_argument('--callgraphs', help='Generate callgraph markdown', action='store_true')
group.add_argument('--callgraph-filter',
help='Only generate callgraphs for functions matching filter', default='.')
group.add_argument('--mdd', '--max-display-depth', help='Max Depth for graph generation', dest='max_display_depth')
group.add_argument('--max-time-cg-gen', help='Max time in seconds to wait for callgraph gen.', default=5)
group.add_argument('--cg-direction', help='Direction for callgraph.',
choices=['calling', 'called', 'both'], default='calling')



class CallGraph:

def __init__(self, root=None):
Expand Down Expand Up @@ -525,3 +539,31 @@ def gen_callgraph_md(f: "ghidra.program.model.listing.Function", called: str, ca
'''

return md_template


def gen_callgraph(func: 'ghidra.program.model.listing.Function', max_display_depth=None, direction='calling', max_run_time=None, name=None):

if name is None:
name = f'{func.getName()[:MAX_PATH_LEN]}-{func.entryPoint}'

# print(f'Generating {direction} callgraph for : {name}')
flow = ''
callgraph = None

if direction == 'calling':
callgraph = get_calling(func, max_run_time=max_run_time)
elif direction == 'called':
callgraph = get_called(func, max_run_time=max_run_time)
else:
raise Exception(f'Unsupported callgraph direction {direction}')

if callgraph is not None:
flow = callgraph.gen_mermaid_flow_graph(
shaded_nodes=callgraph.get_endpoints(),
max_display_depth=max_display_depth,
wrap_mermaid=True)
flow_ends = callgraph.gen_mermaid_flow_graph(
shaded_nodes=callgraph.get_endpoints(), endpoint_only=True, wrap_mermaid=True)
mind = callgraph.gen_mermaid_mind_map(max_display_depth=3, wrap_mermaid=True)

return [name, direction, callgraph, [['flow', flow], ['flow_ends', flow_ends], ['mind', mind]]]
29 changes: 2 additions & 27 deletions ghidrecomp/decompile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pyhidra import HeadlessPyhidraLauncher, open_program

from .utility import set_pdb, setup_symbol_server, set_remote_pdbs, analyze_program, get_pdb, apply_gdt
from .callgraph import get_called, get_calling, CallGraph
from .callgraph import get_called, get_calling, CallGraph, gen_callgraph

# needed for ghidra python vscode autocomplete
if TYPE_CHECKING:
Expand Down Expand Up @@ -99,31 +99,6 @@ def decompile_to_single_file(path: Path,
decompiler.export(c_file, prog, prog.getMemory(), monitor)


def gen_callgraph(func: 'ghidra.program.model.listing.Function', max_display_depth=None, direction='calling', max_run_time=None):

name = get_filename(func)
# print(f'Generating {direction} callgraph for : {name}')
flow = ''
callgraph = None

if direction == 'calling':
callgraph = get_calling(func, max_run_time=max_run_time)
elif direction == 'called':
callgraph = get_called(func, max_run_time=max_run_time)
else:
raise Exception(f'Unsupported callgraph direction {direction}')

if callgraph is not None:
flow = callgraph.gen_mermaid_flow_graph(
shaded_nodes=callgraph.get_endpoints(),
max_display_depth=max_display_depth,
wrap_mermaid=True)
flow_ends = callgraph.gen_mermaid_flow_graph(
shaded_nodes=callgraph.get_endpoints(), endpoint_only=True, wrap_mermaid=True)
mind = callgraph.gen_mermaid_mind_map(max_display_depth=3, wrap_mermaid=True)

return [name, direction, callgraph, [['flow', flow], ['flow_ends', flow_ends], ['mind', mind]]]


def decompile(args: Namespace):

Expand Down Expand Up @@ -281,7 +256,7 @@ def decompile(args: Namespace):
max_display_depth = int(args.max_display_depth)

with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor:
futures = (executor.submit(gen_callgraph, func, max_display_depth, direction, args.max_time_cg_gen)
futures = (executor.submit(gen_callgraph, func, max_display_depth, direction, args.max_time_cg_gen, get_filename(func))
for direction in directions for func in all_funcs if args.skip_cache or get_filename(func) not in callgraphs_completed and re.search(args.callgraph_filter, func.name) is not None)

for future in concurrent.futures.as_completed(futures):
Expand Down
45 changes: 45 additions & 0 deletions ghidrecomp/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import argparse
import multiprocessing

from ghidrecomp import __version__

from .callgraph import add_cg_args_to_parser
from .bsim import add_bsim_args_to_parser

THREAD_COUNT = multiprocessing.cpu_count()

def get_parser() -> argparse.ArgumentParser:

parser = argparse.ArgumentParser(description='ghidrecomp - A Command Line Ghidra Decompiler',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('bin', help='Path to binary used for analysis')
parser.add_argument('--cppexport', action='store_true', help='Use Ghidras CppExporter to decompile to single file')
parser.add_argument('--filter', dest='filters', action='append', help='Regex match for function name')
parser.add_argument('--project-path', help='Path to base ghidra projects ', default='ghidra_projects')
parser.add_argument('--gdt', help='Additional GDT to apply', nargs='?', action='append')
parser.add_argument('-o', '--output-path', help='Location for all decompilations', default='ghidrecomps')
parser.add_argument("-v", "--version", action="version", version=__version__)
parser.add_argument("--skip-cache", action='store_true',
help='Skip cached and genearate new decomp and callgraphs.')

group = parser.add_mutually_exclusive_group()
group.add_argument('--sym-file-path', help='Specify single pdb symbol file for bin')
group.add_argument('-s', '--symbols-path', help='Path for local symbols directory', default='symbols')
group.add_argument('--skip-symbols', help='Do not apply symbols', action='store_true')

parser.add_argument('-t', '--thread-count', type=int,
help='Threads to use for processing. Defaults to cpu count', default=THREAD_COUNT)
parser.add_argument('--va', help='Enable verbose analysis', action='store_true')
parser.add_argument('--fa', help='Force new analysis (even if already analyzed)', action='store_true')

group = parser.add_argument_group('JVM Options')
group.add_argument('--max-ram-percent', help='Set JVM Max Ram %% of host RAM', default=50.0)
group.add_argument('--print-flags', help='Print JVM flags at start', action='store_true')

add_cg_args_to_parser(parser)

add_bsim_args_to_parser(parser)


return parser
49 changes: 1 addition & 48 deletions ghidrecomp/utility.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,13 @@
import argparse
import multiprocessing

from pathlib import Path
from typing import Union, TYPE_CHECKING
from pyhidra import launcher

from ghidrecomp import __version__

THREAD_COUNT = multiprocessing.cpu_count()

# needed for ghidra python vscode autocomplete
if TYPE_CHECKING:
import ghidra
from ghidra_builtins import * # noqa: F403


def get_parser() -> argparse.ArgumentParser:

parser = argparse.ArgumentParser(description='ghidrecomp - A Command Line Ghidra Decompiler',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('bin', help='Path to binary used for analysis')
parser.add_argument('--cppexport', action='store_true', help='Use Ghidras CppExporter to decompile to single file')
parser.add_argument('--filter', dest='filters', action='append', help='Regex match for function name')
parser.add_argument('--project-path', help='Path to base ghidra projects ', default='ghidra_projects')
parser.add_argument('--gdt', help='Additional GDT to apply', nargs='?', action='append')
parser.add_argument('-o', '--output-path', help='Location for all decompilations', default='ghidrecomps')
parser.add_argument("-v", "--version", action="version", version=__version__)
parser.add_argument("--skip-cache", action='store_true',
help='Skip cached and genearate new decomp and callgraphs.')

group = parser.add_mutually_exclusive_group()
group.add_argument('--sym-file-path', help='Specify single pdb symbol file for bin')
group.add_argument('-s', '--symbols-path', help='Path for local symbols directory', default='symbols')
group.add_argument('--skip-symbols', help='Do not apply symbols', action='store_true')

parser.add_argument('-t', '--thread-count', type=int,
help='Threads to use for processing. Defaults to cpu count', default=THREAD_COUNT)
parser.add_argument('--va', help='Enable verbose analysis', action='store_true')
parser.add_argument('--fa', help='Force new analysis (even if already analyzed)', action='store_true')

group = parser.add_argument_group('JVM Options')
group.add_argument('--max-ram-percent', help='Set JVM Max Ram %% of host RAM', default=50.0)
group.add_argument('--print-flags', help='Print JVM flags at start', action='store_true')

group = parser.add_argument_group('Callgraph Options')
group.add_argument('--callgraphs', help='Generate callgraph markdown', action='store_true')
group.add_argument('--callgraph-filter',
help='Only generate callgraphs for functions matching filter', default='.')
group.add_argument('--mdd', '--max-display-depth', help='Max Depth for graph generation', dest='max_display_depth')
group.add_argument('--max-time-cg-gen', help='Max time in seconds to wait for callgraph gen.', default=5)
group.add_argument('--cg-direction', help='Direction for callgraph.',
choices=['calling', 'called', 'both'], default='calling')

return parser


def analyze_program(program, verbose: bool = False, force_analysis: bool = False, save: bool = False):
"""
Modified pyhidra.core._analyze_program
Expand Down
3 changes: 2 additions & 1 deletion tests/test_gdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

import pyhidra
from pyhidra.core import _setup_project, _analyze_program
from ghidrecomp.utility import apply_gdt, get_parser, setup_symbol_server, set_remote_pdbs
from ghidrecomp.utility import apply_gdt, setup_symbol_server, set_remote_pdbs
from ghidrecomp.decompile import analyze_program
from ghidrecomp.parser import get_parser

if TYPE_CHECKING:
from ghidra.program.flatapi import FlatProgramAPI
Expand Down

0 comments on commit 2ac0669

Please sign in to comment.