Documentation Index
Fetch the complete documentation index at: https://mintlify.com/NationalSecurityAgency/ghidra/llms.txt
Use this file to discover all available pages before exploring further.
Introduction
Ghidra supports Python scripting through Jython, a Java implementation of Python 2.7. Python scripts provide a more concise syntax compared to Java while still offering full access to the Ghidra API.
For modern Python 3 support, see PyGhidra which uses native CPython.
Python Script Structure
Basic Template
## ###
# Script description
# @category Examples
# @runtime Jython
from ghidra.app.script import GhidraScript
from ghidra.program.model.listing import CodeUnit
# Global state variables available:
# - currentProgram
# - currentAddress
# - currentLocation
# - currentSelection
# - currentHighlight
# - monitor
def main():
# Your script code here
println("Hello from Python!")
if __name__ == "__main__":
main()
Use comment directives for metadata:
## Description of what this script does
# @category Analysis
# @author Your Name
# @keybinding F6
# @menupath Tools.Python Scripts.My Script
# @runtime Jython
Accessing the Ghidra API
State Variables
Python scripts automatically have access to these variables:
# Program access
program = currentProgram
if program is None:
println("No program is open")
exit()
# Current location
addr = currentAddress
if addr:
println("Current address: " + addr.toString())
# Selection
if currentSelection and not currentSelection.isEmpty():
println("Selection exists")
# Task monitor
if monitor.isCancelled():
println("User cancelled")
exit()
Importing Ghidra Classes
# Address operations
from ghidra.program.model.address import Address, AddressSet
# Listing operations
from ghidra.program.model.listing import (
CodeUnit, Function, Instruction, Data, Listing
)
# Symbol operations
from ghidra.program.model.symbol import (
Symbol, SymbolTable, SourceType, RefType
)
# Memory operations
from ghidra.program.model.mem import Memory, MemoryBlock
# Data types
from ghidra.program.model.data import (
DataType, StringDataType, PointerDataType
)
Common Operations
Address Manipulation
# Create address from offset
addr = toAddr(0x401000)
addr = toAddr("0x401000")
# Address arithmetic
next_addr = addr.add(4)
prev_addr = addr.subtract(4)
offset = addr.getOffset()
# Get program bounds
min_addr = currentProgram.getMinAddress()
max_addr = currentProgram.getMaxAddress()
Memory Access
mem = currentProgram.getMemory()
# Read bytes
bytes_array = getBytes(addr, 16)
for b in bytes_array:
print("%02x" % (b & 0xff))
# Read integers
value = getInt(addr)
qword = getLong(addr)
word = getShort(addr)
byte_val = getByte(addr)
# Write data (requires transaction)
tx_id = currentProgram.startTransaction("Write Data")
try:
setByte(addr, 0x90)
setInt(addr, 0x12345678)
currentProgram.endTransaction(tx_id, True)
except:
currentProgram.endTransaction(tx_id, False)
raise
Listing Operations
listing = currentProgram.getListing()
# Get code unit
cu = listing.getCodeUnitAt(addr)
if cu:
println("Code unit: " + str(cu))
# Iterate instructions
instruction_iter = listing.getInstructions(addr, True)
while instruction_iter.hasNext():
if monitor.isCancelled():
break
instr = instruction_iter.next()
println("%s: %s" % (instr.getAddress(), instr.getMnemonicString()))
# Get data
data = listing.getDataAt(addr)
if data:
println("Data type: " + data.getDataType().getName())
Function Operations
# Get function
func = getFunctionAt(addr)
func = getFunctionContaining(addr)
if func:
# Function properties
name = func.getName()
entry = func.getEntryPoint()
body = func.getBody()
println("Function: " + name)
println("Entry: " + entry.toString())
# Parameters
params = func.getParameters()
for param in params:
println("%s: %s" % (param.getName(), param.getDataType()))
# Return type
ret_type = func.getReturnType()
println("Returns: " + ret_type.getName())
# Iterate all functions
func_iter = listing.getFunctions(True)
while func_iter.hasNext():
f = func_iter.next()
println("%s @ %s" % (f.getName(), f.getEntryPoint()))
Symbol Operations
sym_table = currentProgram.getSymbolTable()
# Get symbols at address
symbols = sym_table.getSymbols(addr)
for sym in symbols:
println("%s - %s" % (sym.getName(), sym.getSymbolType()))
# Get primary symbol
primary = sym_table.getPrimarySymbol(addr)
if primary:
println("Primary: " + primary.getName())
# Create label (requires transaction)
tx_id = currentProgram.startTransaction("Create Label")
try:
createLabel(addr, "my_label", True)
currentProgram.endTransaction(tx_id, True)
except:
currentProgram.endTransaction(tx_id, False)
raise
# Find symbols by name
sym_iter = sym_table.getSymbolIterator("main", True)
while sym_iter.hasNext():
sym = sym_iter.next()
println("Found: %s @ %s" % (sym.getName(), sym.getAddress()))
cu = listing.getCodeUnitAt(addr)
# Set comments (requires transaction)
tx_id = currentProgram.startTransaction("Add Comments")
try:
cu.setComment(CodeUnit.EOL_COMMENT, "End of line comment")
cu.setComment(CodeUnit.PRE_COMMENT, "Pre comment")
cu.setComment(CodeUnit.POST_COMMENT, "Post comment")
cu.setComment(CodeUnit.PLATE_COMMENT, "Plate comment")
currentProgram.endTransaction(tx_id, True)
except:
currentProgram.endTransaction(tx_id, False)
raise
# Read comments
eol_comment = cu.getComment(CodeUnit.EOL_COMMENT)
pre_comment = cu.getComment(CodeUnit.PRE_COMMENT)
Real Script Examples
Example 1: Locate Memory Address for File Offset
Source: LocateMemoryAddressesForFileOffset.py
## ###
# Locate memory address for a file offset
# @category Examples
# @runtime Jython
import sys
from ghidra.program.model.address import Address
from ghidra.program.model.listing import CodeUnit
def getFileOffset():
userFileOffset = askString('File offset',
'Please provide a hexadecimal file offset')
try:
int(userFileOffset, 16)
except ValueError:
raise ValueError('Please provide a hexadecimal file offset.')
myFileOffset = long(userFileOffset, 16)
if myFileOffset < 0:
raise ValueError('Offset cannot be a negative value.')
return myFileOffset
def processAddress(addr, memBlockName, fileOffset):
println('File offset ' + hex(fileOffset) +
' is associated with memory block:address ' +
memBlockName + ':' + addr.toString())
myCodeUnit = currentProgram.getListing().getCodeUnitContaining(addr)
comment = myCodeUnit.getComment(0)
new_comment = (getScriptName() + ': File offset: ' + hex(fileOffset) +
', Memory block:address ' + memBlockName + ':' + addr.toString())
if not comment:
myCodeUnit.setComment(0, new_comment)
else:
myCodeUnit.setComment(0, comment + ' ' + new_comment)
myFileOffset = getFileOffset()
mem = currentProgram.getMemory()
addressList = mem.locateAddressesForFileOffset(myFileOffset)
if addressList.isEmpty():
println('No memory address found for: ' + hex(myFileOffset))
elif addressList.size() == 1:
address = addressList.get(0)
processAddress(address, mem.getBlock(address).getName(), myFileOffset)
else:
println('Possible memory block:address are:')
for addr in addressList:
println(mem.getBlock(addr).getName() + ":" + addr.toString())
Example 2: Find Strings in Called Functions
Source: RecursiveStringFinder.py (simplified)
## ###
# Find all strings used within a function and its callees
# @category: Functions
# @runtime Jython
import ghidra.app.script.GhidraScript
import ghidra.program.model.data.StringDataType as StringDataType
def getStringAtAddr(addr):
"""Get string at an address, if present"""
data = getDataAt(addr)
if data is not None:
dt = data.getDataType()
if isinstance(dt, StringDataType):
return str(data)
return None
def getStringReferences(insn):
"""Get strings referenced in instruction operands"""
numOperands = insn.getNumOperands()
found = []
for i in range(numOperands):
opRefs = insn.getOperandReferences(i)
for o in opRefs:
if o.getReferenceType().isData():
string = getStringAtAddr(o.getToAddress())
if string is not None:
found.append((insn.getMinAddress(),
o.getToAddress(), string))
return found
def getFunctionReferences(insn):
"""Return functions referenced in the instruction"""
numOperands = insn.getNumOperands()
lst = []
for i in range(numOperands):
opRefs = insn.getOperandReferences(i)
for o in opRefs:
if o.getReferenceType().isCall():
lst.append((insn.getMinAddress(), o.getToAddress()))
return lst
# Main logic
bigfunc = getFunctionContaining(currentAddress)
if bigfunc is None:
println("Please place the cursor within a function!")
else:
AddrSetView = bigfunc.getBody()
addresses = AddrSetView.getAddresses(True)
strings_found = []
while addresses.hasNext():
addr = addresses.next()
insn = getInstructionAt(addr)
if insn is not None:
strings = getStringReferences(insn)
strings_found.extend(strings)
println("Strings found:")
for from_addr, to_addr, string in strings_found:
println(" %s -> %s: %s" % (from_addr, to_addr, string))
println("Done!")
User Interaction
Ask Methods
# Ask for string
name = askString("Input", "Enter name:")
if name is None:
exit() # User cancelled
# Ask for integer
count = askInt("Count", "Enter count:")
# Ask for address
addr = askAddress("Address", "Enter address:")
# Ask for file
file_obj = askFile("Output File", "Choose:")
# Ask for directory
dir_obj = askDirectory("Directory", "Choose:")
# Ask yes/no
proceed = askYesNo("Confirm", "Continue processing?")
# Ask choice
from java.util import Arrays
choice = askChoice("Selection", "Choose option:",
["Option1", "Option2", "Option3"], "Option1")
Output Methods
# Print to console
println("Message")
print("No newline")
# Print errors
printerr("Error occurred!")
# Format output
println("Address: 0x%x, Value: %d" % (addr.getOffset(), value))
Python-Specific Features
List Comprehensions
# Get all function names
func_iter = listing.getFunctions(True)
func_names = [f.getName() for f in func_iter]
# Filter functions by prefix
my_funcs = [f for f in listing.getFunctions(True)
if f.getName().startswith("my_")]
Dictionary Usage
# Build function address map
func_map = {}
for func in listing.getFunctions(True):
func_map[func.getName()] = func.getEntryPoint()
# Lookup
if "main" in func_map:
println("main @ " + func_map["main"].toString())
Exception Handling
try:
# Risky operation
value = getInt(addr)
except Exception as e:
printerr("Error: " + str(e))
import traceback
traceback.print_exc()
finally:
println("Cleanup")
Limitations
Jython 2.7
Jython is based on Python 2.7, which has several limitations:
- No Python 3 syntax (print is a statement, not function)
- No f-strings
- Limited standard library
- No recent Python packages
Java Integration
Some Python idioms don’t work with Java objects:
# This doesn't work
for i in range(instruction.getNumOperands()):
# Java arrays aren't iterable in Jython
# Use this instead
for i in xrange(instruction.getNumOperands()):
op = instruction.getOperand(i)
Jython scripts are generally slower than Java scripts due to interpretation overhead.
Migrating to PyGhidra
For modern Python 3 support, consider PyGhidra:
# PyGhidra example (Python 3)
import pyghidra
pyghidra.start()
with pyghidra.open_program("binary.exe") as flat_api:
program = flat_api.getCurrentProgram()
listing = program.getListing()
for func in listing.getFunctions(True):
print(f"{func.getName()} @ {func.getEntryPoint()}")
Best Practices
- Check for None - Always validate objects before use
- Use transactions - Wrap modifications in transactions
- Monitor cancellation - Check
monitor.isCancelled() in loops
- Handle exceptions - Use try/except for robustness
- Import at top - Import all Ghidra classes at script start