From 89780e2bcee22d39a5895afb56d5794f90fdd630 Mon Sep 17 00:00:00 2001 From: clearbluejar <3752074+clearbluejar@users.noreply.github.com> Date: Mon, 8 Aug 2022 13:56:04 -0400 Subject: [PATCH] adding pyhidra support --- .devcontainer/post-create.sh | 7 ++- README.md | 115 +++++++++++++++++++++++++++++++---- sample-pyhidra.py | 34 +++++++++++ 3 files changed, 142 insertions(+), 14 deletions(-) mode change 100644 => 100755 .devcontainer/post-create.sh create mode 100644 sample-pyhidra.py diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh old mode 100644 new mode 100755 index b9c7e8c..8b88e36 --- a/.devcontainer/post-create.sh +++ b/.devcontainer/post-create.sh @@ -17,8 +17,11 @@ echo $PYI_WHL_RELEASE_URLS echo $PYI_WHL_DOWNLOAD_URL pip install "${PYI_WHL_DOWNLOAD_URL}" -# Download latest Ghidra Bridge +# Install ghidra-bridge pip install ghidra_bridge -# Install bridge scripts +# Install bridge scripts to local dir python -m ghidra_bridge.install_server .ghidra_bridge + +# Install pyhdira +pip install pyhidra \ No newline at end of file diff --git a/README.md b/README.md index 96923f0..75fe71e 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@ A skeleton repo to provide a Ghidra Headless (non-GUI) Python scripting environm - Provisions specified versions Ghidra based on `GHIDRA_VERSION` in [devcontainer.json](.devcontainer/devcontainer.json#L15-L16) - Auto complete for Ghidra Python script setup and configured - via pyi typings from [VDOO-Connected-Trust/ghidra-pyi-generator](https://github.com/VDOO-Connected-Trust/ghidra-pyi-generator) -- IDE debugging over RPC - - via [justfoxing/ghidra_bridge](https://github.com/justfoxing/ghidra_bridge) +- IDE debugging (available from either) + - [justfoxing/ghidra_bridge](https://github.com/justfoxing/ghidra_bridge) over RPC + - [pyhidra](https://github.com/dod-cyber-crime-center/pyhidra) leveraging native CPython interpreter using [jpype](https://jpype.readthedocs.io/en/latest/) - Demonstrates running python scripts in [various ways](#different-ways-to-run-a-ghidra-headless-script). ## About @@ -60,18 +61,21 @@ The manual setup essentially has to mimic the following scripts:
Expand for Manual Setup Steps - - 1. [Install Ghidra](https://github.com/NationalSecurityAgency/ghidra/releases) yourself. 2. Update `GHIDRA_INSTALL_DIR` and other variables in [settings.json](.vscode/settings.json) with your install paths. 3. Set environment variable with `GHIDRA_VERSION` - `export GHIDRA_VERSION=10.1.4` -4. Install `ghidra-stubs` that match your `GHIDRA_VERSION` +4. Setup `venv` + - `python3 -m venv .env` +5. Install pip packages - autocomplete - - `pip install https://github.com/clearbluejar/ghidra-pyi-generator/releases/download/v1.0.3-10.1.4/ghidra_stubs-10.1.4.refs_heads_master-py2.py3-none-any.whl` or `pip install ghidra-stubs` from pypi (this is an outdated version) + - `ghidra-stubs` that match your `GHIDRA_VERSION` + - `pip install https://github.com/clearbluejar/ghidra-pyi-generator/releases/download/v1.0.3-10.1.4/ghidra_stubs-10.1.4.refs_heads_master-py2.py3-none-any.whl` or `pip install ghidra-stubs` from pypi (this is an outdated version) - ghidra bridge - `pip install ghidra-bridge` - `python -m ghidra_bridge.install_server .ghidra_bridge` + - pyhidra + - `pip install pyhidra`
@@ -158,15 +162,15 @@ Step 4 runs the script on the imported binary after analysis (*-postscript*) on There are several ways to run a Ghidra Python script. 1. Run via launch on [run_headless.py](run_headless.py). - - The most straightforward means to run the script. It simply uses subprocess module with the correct arguments to run the sample.py. + - The most straightforward means to run the script. It simply uses subprocess module to call `analyzeHeadless` with the correct arguments to run the [sample.py](sample.py). - It also creates a properties file needed to pass arguments to some Ghidra API calls. 2. Run the task `Run Current Python Script in Ghidra Jython` within [tasks.json](.vscode/tasks.json). - To use this task make sure you have open and focused the [sample.py](sample.py). 3. Run via launch on [sample-bridge.py](sample-bridge.py) leveraging `ghidra-bridge`. - 1. Requires the ghidra-bridge to [start prior to connecting](sample-bridge.py#L43-L49) via bridge. - 2. Instead of properties file, [passes](sample-bridge.py#L37) `ls` argument to ghidra-bridge server. -4. Run [sample.py](sample.py) directly in Ghidra via the GUI after copying it to the `ghidra_scripts` directory. If you are doing that, you likely don't need this repo. - + - Requires the ghidra-bridge to [start prior to connecting](sample-bridge.py#L43-L49) via bridge. + - Instead of properties file, [passes](sample-bridge.py#L37) `ls` argument to ghidra-bridge server. +4. Run [sample-pyhidra.py](sample-pyhidra.py) leveraging `pyhidra` (best one! It really just works with the help of `jpype`) +5. Run [sample.py](sample.py) directly in Ghidra via the GUI after copying it to the `ghidra_scripts` directory. If you are doing that, you likely don't need this repo. ### Sample Outputs @@ -424,6 +428,92 @@ Shutting down ghidra_bridge_server : 43841 ``` +
4. Run via launch on sample-pyhidra.py + +```terminal +(.env) vscode ➜ /workspaces/ghidra-python-vscode-devcontainer-skeleton (main ✗) $ cd /workspaces/ghidra-python-vscode-devcontainer-skeleton ; /usr/bin/env /workspaces/ghidra-python-vscode-devcontainer-skeleton/.env/bin/python /home/vscode/.vscode-server/extensions/ms-python.python-2022.12.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher 40875 -- /workspaces/ghidra-python-vscode-devcontainer-skeleton/sample-pyhidra.py +/ghidra/Ghidra/Framework/Utility/lib/Utility.jar +INFO Using log config file: jar:file:/ghidra/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization) +INFO Using log file: /home/vscode/.ghidra/.ghidra_10.1.4_PUBLIC/application.log (LoggingInitialization) +INFO Loading user preferences: /home/vscode/.ghidra/.ghidra_10.1.4_PUBLIC/preferences (Preferences) +INFO Class search complete (813 ms) (ClassSearcher) +INFO Initializing SSL Context (SSLContextInitializer) +INFO Initializing Random Number Generator... (SecureRandomFactory) +INFO Random Number Generator initialization complete: NativePRNGNonBlocking (SecureRandomFactory) +INFO Trust manager disabled, cacerts have not been set (ApplicationTrustManagerFactory) +INFO Opening project: /workspaces/ghidra-python-vscode-devcontainer-skeleton/.ghidra_projects/sample_project/sample_project/sample_project (DefaultProject) +INFO DWARF external debug information found: ExternalDebugInfo [filename=1a4999161b8b2da681b80d8bf351e40afc40ad.debug, crc=1816f651, hash=9c1a4999161b8b2da681b80d8bf351e40afc40ad] (ExternalDebugFilesService) +INFO Unable to find DWARF information, skipping DWARF analysis (DWARFAnalyzer) +ERROR os/linux_arm_64/decompile does not exist (DecompileProcessFactory) +INFO Packed database cache: /tmp/vscode-Ghidra/packed-db-cache (PackedDatabaseCache) +INFO ----------------------------------------------------- + AARCH64 ELF PLT Thunks 0.017 secs + ASCII Strings 0.249 secs + Apply Data Archives 0.230 secs + Basic Constant Reference Analyzer 1.394 secs + Call Convention ID 0.008 secs + Call-Fixup Installer 0.004 secs + Create Address Tables 0.024 secs + Create Function 0.000 secs + DWARF 0.017 secs + Data Reference 0.037 secs + Decompiler Switch Analysis 0.164 secs + Demangler GNU 0.214 secs + Disassemble Entry Points 0.013 secs + Embedded Media 0.013 secs + External Entry References 0.000 secs + Function Start Search 0.106 secs + Function Start Search After Code 0.012 secs + Function Start Search After Data 0.031 secs + GCC Exception Handlers 0.471 secs + Non-Returning Functions - Discovered 0.026 secs + Non-Returning Functions - Known 0.019 secs + Reference 0.093 secs + Shared Return Calls 0.026 secs + Stack 0.069 secs + Subroutine References 0.036 secs +----------------------------------------------------- + Total Time 3 secs +----------------------------------------------------- + (AutoAnalysisManager) +Program Info: +Program: ls: AARCH64:LE:64:v8A_default (Sat Aug 06 02:18:37 UTC 2022) + +Memory layout: +Imagebase: 0x100000 +segment_2.1 [start: 0x1048576, end: 0x1049143] +.interp [start: 0x1049144, end: 0x1049170] +.note.gnu.build-id [start: 0x1049172, end: 0x1049207] +.note.ABI-tag [start: 0x1049208, end: 0x1049239] +.gnu.hash [start: 0x1049240, end: 0x1049303] +.dynsym [start: 0x1049304, end: 0x1052423] +.dynstr [start: 0x1052424, end: 0x1053877] +.gnu.version [start: 0x1053878, end: 0x1054137] +.gnu.version_r [start: 0x1054144, end: 0x1054255] +.rela.dyn [start: 0x1054256, end: 0x1060087] +.rela.plt [start: 0x1060088, end: 0x1062703] +.init [start: 0x1062704, end: 0x1062723] +.plt [start: 0x1062736, end: 0x1064511] +.text [start: 0x1064512, end: 0x1149231] +.fini [start: 0x1149232, end: 0x1149247] +.rodata [start: 0x1149248, end: 0x1168549] +.eh_frame_hdr [start: 0x1168552, end: 0x1170795] +.eh_frame [start: 0x1170800, end: 0x1182903] +.init_array [start: 0x1250024, end: 0x1250031] +.fini_array [start: 0x1250032, end: 0x1250039] +.data.rel.ro [start: 0x1250040, end: 0x1252607] +.dynamic [start: 0x1252608, end: 0x1253119] +.got [start: 0x1253120, end: 0x1253351] +.got.plt [start: 0x1253352, end: 0x1254247] +.data [start: 0x1254248, end: 0x1254935] +.bss [start: 0x1254936, end: 0x1259735] +EXTERNAL [start: 0x1261568, end: 0x1262527] +.gnu_debugaltlink [start: 0x0, end: 0x73] +.gnu_debuglink [start: 0x0, end: 0x51] +.shstrtab [start: 0x0, end: 0x279] +_elfSectionHeaders [start: 0x0, end: 0x1855] +``` +
## Ghidra Python Headless Scripting Hangups @@ -431,4 +521,5 @@ Shutting down ghidra_bridge_server : 43841 2. In order to pass arguments to api calls like [askProgram](https://ghidra.re/ghidra_docs/api/ghidra/app/script/GhidraScript.html#askProgram(java.lang.String)) (which sets the current program being analyzed) either: - a `.properties` file needs to exist with the same name and location as the script being run. In this case a [sample.properties](sample.properties) sets the arguments for [sample.py](sample.py). - the args have to be passed on the command line when running `analyzeHeadless`. For [sample-bridge.py](sample-bridge.py), the args are awkwardly passed when ghidra_bridge_server [starts](sample-bridge.py#L37), as that server running within the Ghidra context is the only time analyzeHeadless is called. More details [here](https://github.com/justfoxing/ghidra_bridge#headless-analysis-context). -3. `ghidra-bridge` has to be started and running before you [connect](sample-bridge.py#L53) to it. The bridge can be started outside of sample-bridge.py, but you won't be able to pass arguments to it if neeed. Also, `ghidra-bridge` is slow for large analysis. Its best feature is the ability to step through and inspect the sample-bridge.py script within the IDE. \ No newline at end of file +3. `ghidra-bridge` has to be started and running before you [connect](sample-bridge.py#L53) to it. The bridge can be started outside of sample-bridge.py, but you won't be able to pass arguments to it if neeed. Also, `ghidra-bridge` is slow for large analysis. Its best feature is the ability to step through and inspect the sample-bridge.py script within the IDE. +4. `pyhidra` - Need to be wary of conflicting module names. As python stdlib and Ghidra have some conflicting module names (such as `pdb`), there are sometimes issues getting access to the full Ghidra Script API with `pyhidra`. Python prefers local modules and stdlib over the Java imports. This is due to [this issue](https://jpype.readthedocs.io/en/latest/userguide.html#importing-java-classes) in `jpype`. \ No newline at end of file diff --git a/sample-pyhidra.py b/sample-pyhidra.py new file mode 100644 index 0000000..367b973 --- /dev/null +++ b/sample-pyhidra.py @@ -0,0 +1,34 @@ +import os +import pyhidra + +#### Section to make autocomplete work +try: + import ghidra + from ghidra_builtins import * +except: + pass +#### + +PROJECT_NAME = os.getenv('PROJECT_NAME') +PROJECT_LOCATION = os.path.join(os.getenv('GHIDRA_PROJECTS_PATH'),PROJECT_NAME) + +pyhidra.start(True) # setting Verbose output + +with pyhidra.open_program("/bin/ls", project_name=PROJECT_NAME, project_location=PROJECT_LOCATION) as flat_api: + + prog = flat_api.getCurrentProgram() + + print("Program Info:") + program_name = prog.getName() + creation_date = prog.getCreationDate() + language_id = prog.getLanguageID() + compiler_spec_id = prog.getCompilerSpec().getCompilerSpecID() + print("Program: {}: {}_{} ({})\n".format(program_name, language_id, compiler_spec_id, creation_date)) + + # Get info about the current program's memory layout + print("Memory layout:") + print("Imagebase: " + hex(prog.getImageBase().getOffset())) + for block in prog.getMemory().getBlocks(): + start = block.getStart().getOffset() + end = block.getEnd().getOffset() + print("{} [start: 0x{}, end: 0x{}]".format(block.getName(), start, end)) \ No newline at end of file