unicorn speedtest initial commit

This commit is contained in:
Dominik Maier
2021-01-23 00:02:59 +01:00
parent ac21e4dd73
commit c4118e869d
15 changed files with 1297 additions and 1 deletions

View File

@ -0,0 +1,6 @@
output
harness
harness-debug
target
target.o
target.offsets.*

View File

@ -0,0 +1,17 @@
CFLAGS += -Wall -Werror -Wextra -Wpedantic -Og -g -fPIE
.PHONY: all clean
all: target target.offsets.main
clean:
rm -rf *.o target target.offsets.*
target.o: target.c
${CC} ${CFLAGS} -c target.c -o $@
target: target.o
${CC} ${CFLAGS} target.o -o $@
target.offsets.main: target
./get_offsets.py

View File

@ -0,0 +1,65 @@
# Speedtest
This is a simple sample harness for a non-crashing file,
to show the raw speed of C, Rust, and Python harnesses.
## Compiling...
Make sure, you built unicornafl first (`../../build_unicorn_support.sh`).
Then, follow these individual steps:
### Rust
```bash
cd rust
cargo build --release
../../../afl-fuzz -i ../sample_inputs -o out -- ./target/release/harness @@
```
### C
```bash
cd c
make
../../../afl-fuzz -i ../sample_inputs -o out -- ./harness @@
```
### python
```bash
cd python
../../../afl-fuzz -i ../sample_inputs -o out -U -- python3 ./harness.py @@
```
## Results
TODO: add results here.
## Compiling speedtest_target.c
You shouldn't need to compile simple_target.c since a X86_64 binary version is
pre-built and shipped in this sample folder. This file documents how the binary
was built in case you want to rebuild it or recompile it for any reason.
The pre-built binary (simple_target_x86_64.bin) was built using -g -O0 in gcc.
We then load the binary and execute the main function directly.
## Addresses for the harness:
To find the address (in hex) of main, run:
```bash
objdump -M intel -D target | grep '<main>:' | cut -d" " -f1
```
To find all call sites to magicfn, run:
```bash
objdump -M intel -D target | grep '<magicfn>$' | cut -d":" -f1
```
For malloc callsites:
```bash
objdump -M intel -D target | grep '<malloc@plt>$' | cut -d":" -f1
```
And free callsites:
```bash
objdump -M intel -D target | grep '<free@plt>$' | cut -d":" -f1
```

View File

@ -0,0 +1,53 @@
# UnicornAFL Usage
# Original Unicorn Example Makefile by Nguyen Anh Quynh <aquynh@gmail.com>, 2015
# Adapted for AFL++ by domenukk <domenukk@gmail.com>, 2020
.POSIX:
UNAME_S =$(shell uname -s)# GNU make
UNAME_S:sh=uname -s # BSD make
_UNIQ=_QINU_
LIBDIR = ../../../unicornafl
BIN_EXT =
AR_EXT = a
# Verbose output?
V ?= 0
CFLAGS += -Wall -Werror -Wextra -Wno-unused-parameter -I../../../unicornafl/include
LDFLAGS += -L$(LIBDIR) -lpthread -lm
_LRT = $(_UNIQ)$(UNAME_S:Linux=)
__LRT = $(_LRT:$(_UNIQ)=-lrt)
LRT = $(__LRT:$(_UNIQ)=)
LDFLAGS += $(LRT)
_CC = $(_UNIQ)$(CROSS)
__CC = $(_CC:$(_UNIQ)=$(CC))
MYCC = $(__CC:$(_UNIQ)$(CROSS)=$(CROSS)gcc)
.PHONY: all clean
all: fuzz
clean:
rm -rf *.o harness harness-debug
harness.o: harness.c ../../../unicornafl/include/unicorn/*.h
${MYCC} ${CFLAGS} -O3 -c harness.c -o $@
harness-debug.o: harness.c ../../../unicornafl/include/unicorn/*.h
${MYCC} ${CFLAGS} -fsanitize=address -g -Og -c harness.c -o $@
harness: harness.o
${MYCC} -L${LIBDIR} harness.o ../../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
harness-debug: harness-debug.o
${MYCC} -fsanitize=address -g -Og -L${LIBDIR} harness-debug.o ../../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug
../target:
$(MAKE) -C ..
fuzz: ../target harness
SKIP_BINCHECK=1 ../../../../afl-fuzz -i ../sample_inputs -o ./output -- ./harness @@

View File

@ -0,0 +1,390 @@
/*
Simple test harness for AFL++'s unicornafl c mode.
This loads the simple_target_x86_64 binary into
Unicorn's memory map for emulation, places the specified input into
argv[1], sets up argv, and argc and executes 'main()'.
If run inside AFL, afl_fuzz automatically does the "right thing"
Run under AFL as follows:
$ cd <afl_path>/unicorn_mode/samples/simple/
$ make
$ ../../../afl-fuzz -m none -i sample_inputs -o out -- ./harness @@
*/
// This is not your everyday Unicorn.
#define UNICORN_AFL
#include <string.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unicorn/unicorn.h>
// Path to the file containing the binary to emulate
#define BINARY_FILE ("../target")
// Memory map for the code to be tested
// Arbitrary address where code to test will be loaded
static const int64_t BASE_ADDRESS = 0x0;
// Max size for the code (64kb)
static const int64_t CODE_SIZE_MAX = 0x00010000;
// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) )
static const int64_t INPUT_ADDRESS = 0x00100000;
// Maximum size for our input
static const int64_t INPUT_MAX = 0x00100000;
// Where our pseudo-heap is at
static const int64_t HEAP_ADDRESS = 0x00200000;
// Maximum allowable size for the heap
static const int64_t HEAP_SIZE_MAX = 0x000F0000;
// Address of the stack (Some random address again)
static const int64_t STACK_ADDRESS = 0x00400000;
// Size of the stack (arbitrarily chosen, just make it big enough)
static const int64_t STACK_SIZE = 0x000F0000;
// Alignment for unicorn mappings (seems to be needed)
static const int64_t ALIGNMENT = 0x1000;
static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size);
}
static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size);
}
/* Unicorn page needs to be 0x1000 aligned, apparently */
static uint64_t pad(uint64_t size) {
if (size % ALIGNMENT == 0) { return size; }
return ((size / ALIGNMENT) + 1) * ALIGNMENT;
}
/* returns the filesize in bytes, -1 or error. */
static off_t afl_mmap_file(char *filename, char **buf_ptr) {
off_t ret = -1;
int fd = open(filename, O_RDONLY);
struct stat st = {0};
if (fstat(fd, &st)) goto exit;
off_t in_len = st.st_size;
if (in_len == -1) {
/* This can only ever happen on 32 bit if the file is exactly 4gb. */
fprintf(stderr, "Filesize of %s too large\n", filename);
goto exit;
}
*buf_ptr = mmap(0, in_len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (*buf_ptr != MAP_FAILED) ret = in_len;
exit:
close(fd);
return ret;
}
/* Place the input at the right spot inside unicorn.
This code path is *HOT*, do as little work as possible! */
static bool place_input_callback(
uc_engine *uc,
char *input,
size_t input_len,
uint32_t persistent_round,
void *data
){
// printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS);
if (input_len >= INPUT_MAX) {
// Test input too short or too long, ignore this testcase
return false;
}
// We need a valid c string, make sure it never goes out of bounds.
input[input_len-1] = '\0';
// Write the testcase to unicorn.
uc_mem_write(uc, INPUT_ADDRESS, input, input_len);
return true;
}
// exit in case the unicorn-internal mmap fails.
static void mem_map_checked(uc_engine *uc, uint64_t addr, size_t size, uint32_t mode) {
size = pad(size);
//printf("SIZE %llx, align: %llx\n", size, ALIGNMENT);
uc_err err = uc_mem_map(uc, addr, size, mode);
if (err != UC_ERR_OK) {
printf("Error mapping %ld bytes at 0x%lx: %s (mode: %d)\n", size, addr, uc_strerror(err), mode);
exit(1);
}
}
// allocates an array, reads all addrs to the given array ptr, returns a size
ssize_t read_all_addrs(char *path, uint64_t *addrs, size_t max_count) {
FILE *f = fopen(path, "r");
if (!f) {
perror("fopen");
fprintf(stderr, "Could not read %s, make sure you ran ./get_offsets.py\n", path);
exit(-1);
}
for (size_t i = 0; i < max_count; i++) {
bool end = false;
if(fscanf(f, "%lx", &addrs[i]) == EOF) {
end = true;
i--;
} else if (fgetc(f) == EOF) {
end = true;
}
if (end) {
printf("Set %ld addrs for %s\n", i + 1, path);
fclose(f);
return i + 1;
}
}
return max_count;
}
// Read all addresses from the given file, and set a hook for them.
void set_all_hooks(uc_engine *uc, char *hook_file, void *hook_fn) {
FILE *f = fopen(hook_file, "r");
if (!f) {
fprintf(stderr, "Could not read %s, make sure you ran ./get_offsets.py\n", hook_file);
exit(-1);
}
uint64_t hook_addr;
for (int hook_count = 0; 1; hook_count++) {
if(fscanf(f, "%lx", &hook_addr) == EOF) {
printf("Set %d hooks for %s\n", hook_count, hook_file);
fclose(f);
return;
}
printf("got new hook addr %lx (count: %d) ohbytw: sizeof %lx\n", hook_addr, hook_count, sizeof(uc_hook));
hook_addr += BASE_ADDRESS;
// We'll leek these hooks like a good citizen.
uc_hook *hook = calloc(1, sizeof(uc_hook));
if (!hook) {
perror("calloc");
exit(-1);
}
uc_hook_add(uc, hook, UC_HOOK_CODE, hook_fn, NULL, hook_addr, hook_addr);
// guzzle up newline
if (fgetc(f) == EOF) {
printf("Set %d hooks for %s\n", hook_count, hook_file);
fclose(f);
return;
}
}
}
// This is a fancy print function that we're just going to skip for fuzzing.
static void hook_magicfn(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
address += size;
uc_reg_write(uc, UC_X86_REG_RIP, &address);
}
static bool already_allocated = false;
// We use a very simple malloc/free stub here, that only works for exactly one allocation at a time.
static void hook_malloc(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
if (already_allocated) {
printf("Double malloc, not supported right now!\n");
abort();
}
// read the first param.
uint64_t malloc_size;
uc_reg_read(uc, UC_X86_REG_RDI, &malloc_size);
if (malloc_size > HEAP_SIZE_MAX) {
printf("Tried to allocated %ld bytes, but we only support up to %ld\n", malloc_size, HEAP_SIZE_MAX);
abort();
}
uc_reg_write(uc, UC_X86_REG_RAX, &HEAP_ADDRESS);
address += size;
uc_reg_write(uc, UC_X86_REG_RIP, &address);
already_allocated = true;
}
// No real free, just set the "used"-flag to false.
static void hook_free(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
if (!already_allocated) {
printf("Double free detected. Real bug?\n");
abort();
}
// read the first param.
uint64_t free_ptr;
uc_reg_read(uc, UC_X86_REG_RDI, &free_ptr);
if (free_ptr != HEAP_ADDRESS) {
printf("Tried to free wrong mem region: 0x%lx at code loc 0x%lx\n", free_ptr, address);
abort();
}
address += size;
uc_reg_write(uc, UC_X86_REG_RIP, &address);
already_allocated = false;
}
int main(int argc, char **argv, char **envp) {
if (argc == 1) {
printf("Test harness to measure speed against Rust and python. Usage: harness [-t] <inputfile>\n");
exit(1);
}
bool tracing = false;
char *filename = argv[1];
if (argc > 2 && !strcmp(argv[1], "-t")) {
tracing = true;
filename = argv[2];
}
uc_engine *uc;
uc_err err;
uc_hook hooks[2];
char *file_contents;
// Initialize emulator in X86_64 mode
err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc);
if (err) {
printf("Failed on uc_open() with error returned: %u (%s)\n",
err, uc_strerror(err));
return -1;
}
// If we want tracing output, set the callbacks here
if (tracing) {
// tracing all basic blocks with customized callback
uc_hook_add(uc, &hooks[0], UC_HOOK_BLOCK, hook_block, NULL, 1, 0);
uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, 1, 0);
}
printf("The input testcase is set to %s\n", filename);
printf("Loading target from %s\n", BINARY_FILE);
off_t len = afl_mmap_file(BINARY_FILE, &file_contents);
printf("Binary file size: %lx\n", len);
if (len < 0) {
perror("Could not read binary to emulate");
return -2;
}
if (len == 0) {
fprintf(stderr, "File at '%s' is empty\n", BINARY_FILE);
return -3;
}
if (len > CODE_SIZE_MAX) {
fprintf(stderr, "Binary too large, increase CODE_SIZE_MAX\n");
return -4;
}
// Map memory.
mem_map_checked(uc, BASE_ADDRESS, len, UC_PROT_ALL);
fflush(stdout);
// write machine code to be emulated to memory
if (uc_mem_write(uc, BASE_ADDRESS, file_contents, len) != UC_ERR_OK) {
puts("Error writing to CODE");
exit(-1);
}
// Release copied contents
munmap(file_contents, len);
// Set the program counter to the start of the code
FILE *f = fopen("../target.offsets.main", "r");
if (!f) {
perror("fopen");
puts("Could not read offset to main function, make sure you ran ./get_offsets.py");
exit(-1);
}
uint64_t start_address;
if(fscanf(f, "%lx", &start_address) == EOF) {
puts("Start address not found in target.offests.main");
exit(-1);
}
fclose(f);
start_address += BASE_ADDRESS;
printf("Execution will start at 0x%lx", start_address);
// Set the program counter to the start of the code
uc_reg_write(uc, UC_X86_REG_RIP, &start_address); // address of entry point of main()
// Setup the Stack
mem_map_checked(uc, STACK_ADDRESS, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE);
// Setup the stack pointer, but allocate two pointers for the pointers to input
uint64_t val = STACK_ADDRESS + STACK_SIZE - 16;
//printf("Stack at %lu\n", stack_val);
uc_reg_write(uc, UC_X86_REG_RSP, &val);
// reserve some space for our input data
mem_map_checked(uc, INPUT_ADDRESS, INPUT_MAX, UC_PROT_READ);
// argc = 2
val = 2;
uc_reg_write(uc, UC_X86_REG_RDI, &val);
//RSI points to our little 2 QWORD space at the beginning of the stack...
val = STACK_ADDRESS + STACK_SIZE - 16;
uc_reg_write(uc, UC_X86_REG_RSI, &val);
//... which points to the Input. Write the ptr to mem in little endian.
uint32_t addr_little = STACK_ADDRESS;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// The chances you are on a big_endian system aren't too high, but still...
__builtin_bswap32(addr_little);
#endif
uc_mem_write(uc, STACK_ADDRESS + STACK_SIZE - 16, (char *)&addr_little, 4);
set_all_hooks(uc, "../target.offsets.malloc", hook_malloc);
set_all_hooks(uc, "../target.offsets.magicfn", hook_magicfn);
set_all_hooks(uc, "../target.offsets.free", hook_free);
int exit_count_max = 100;
// we don't need more exits for now.
uint64_t exits[exit_count_max];
ssize_t exit_count = read_all_addrs("../target.offsets.main_ends", exits, exit_count_max);
if (exit_count < 1) {
printf("Could not find exits! aborting.\n");
abort();
}
printf("Starting to fuzz. Running from addr %ld to one of these %ld exits:\n", start_address, exit_count);
for (ssize_t i = 0; i < exit_count; i++) {
printf(" exit %ld: %ld\n", i, exits[i]);
}
fflush(stdout);
// let's gooo
uc_afl_ret afl_ret = uc_afl_fuzz(
uc, // The unicorn instance we prepared
filename, // Filename of the input to process. In AFL this is usually the '@@' placeholder, outside it's any input file.
place_input_callback, // Callback that places the input (automatically loaded from the file at filename) in the unicorninstance
exits, // Where to exit (this is an array)
exit_count, // Count of end addresses
NULL, // Optional calback to run after each exec
false, // true, if the optional callback should be run also for non-crashes
1000, // For persistent mode: How many rounds to run
NULL // additional data pointer
);
switch(afl_ret) {
case UC_AFL_RET_ERROR:
printf("Error starting to fuzz");
return -3;
break;
case UC_AFL_RET_NO_AFL:
printf("No AFL attached - We are done with a single run.");
break;
default:
break;
}
return 0;
}

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python3
"""This simple script uses objdump to parse important addresses from the target"""
import shlex
import subprocess
objdump_output = subprocess.check_output(
shlex.split("objdump -M intel -D target")
).decode()
main_loc = None
main_ends = []
main_ended = False
magicfn_calls = []
malloc_calls = []
free_calls = []
strlen_calls = []
def line2addr(line):
return "0x" + line.split(":", 1)[0].strip()
last_line = None
for line in objdump_output.split("\n"):
line = line.strip()
def read_addr_if_endswith(findme, list_to):
"""
Look, for example, for the addr like:
12a9: e8 f2 fd ff ff call 10a0 <free@plt>
"""
if line.endswith(findme):
list_to.append(line2addr(line))
if main_loc is not None and main_ended is False:
# We want to know where main ends. An empty line in objdump.
if len(line) == 0:
main_ends.append(line2addr(last_line))
main_ended = True
elif "ret" in line:
main_ends.append(line2addr(line))
if "<main>:" in line:
if main_loc is not None:
raise Exception("Found multiple main functions, odd target!")
# main_loc is the label, so it's parsed differntly (i.e. `0000000000001220 <main>:`)
main_loc = "0x" + line.strip().split(" ", 1)[0].strip()
else:
[
read_addr_if_endswith(*x)
for x in [
("<free@plt>", free_calls),
("<malloc@plt>", malloc_calls),
("<strlen@plt>", strlen_calls),
("<magicfn>", magicfn_calls),
]
]
last_line = line
if main_loc is None:
raise (
"Could not find main in ./target! Make sure objdump is installed and the target is compiled."
)
with open("target.offsets.main", "w") as f:
f.write(main_loc)
with open("target.offsets.main_ends", "w") as f:
f.write("\n".join(main_ends))
with open("target.offsets.magicfn", "w") as f:
f.write("\n".join(magicfn_calls))
with open("target.offsets.malloc", "w") as f:
f.write("\n".join(malloc_calls))
with open("target.offsets.free", "w") as f:
f.write("\n".join(free_calls))
with open("target.offsets.strlen", "w") as f:
f.write("\n".join(strlen_calls))

View File

@ -0,0 +1,8 @@
all: fuzz
../target:
$(MAKE) -C ..
fuzz: ../target
rm -rf ./ouptput
../../../../afl-fuzz -s 1 -U -i ../sample_inputs -o ./output -- python3 harness.py @@

View File

@ -0,0 +1,277 @@
#!/usr/bin/env python3
"""
Simple test harness for AFL's Unicorn Mode.
This loads the speedtest target binary (precompiled X64 code) into
Unicorn's memory map for emulation, places the specified input into
Argv, and executes main.
There should not be any crashes - it's a speedtest against Rust and c.
Before running this harness, call make in the parent folder.
Run under AFL as follows:
$ cd <afl_path>/unicorn_mode/samples/speedtest/python
$ ../../../../afl-fuzz -U -i ../sample_inputs -o ./output -- python3 harness.py @@
"""
import argparse
import os
import struct
from unicornafl import *
from unicornafl.unicorn_const import UC_ARCH_X86, UC_HOOK_CODE, UC_MODE_64
from unicornafl.x86_const import (
UC_X86_REG_RAX,
UC_X86_REG_RDI,
UC_X86_REG_RIP,
UC_X86_REG_RSI,
UC_X86_REG_RSP,
)
# Memory map for the code to be tested
BASE_ADDRESS = 0x0 # Arbitrary address where the (PIE) target binary will be loaded to
CODE_SIZE_MAX = 0x00010000 # Max size for the code (64kb)
INPUT_ADDRESS = 0x00100000 # where we put our stuff
INPUT_MAX = 0x00100000 # max size for our input
HEAP_ADDRESS = 0x00200000 # Heap addr
HEAP_SIZE_MAX = 0x000F0000 # Maximum allowable size for the heap
STACK_ADDRESS = 0x00400000 # Address of the stack (arbitrarily chosen)
STACK_SIZE = 0x000F0000 # Size of the stack (arbitrarily chosen)
target_path = os.path.abspath(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
)
target_bin = os.path.join(target_path, "target")
def get_offsets_for(name):
full_path = os.path.join(target_path, f"target.offsets.{name}")
with open(full_path) as f:
return [int(x, 16) + BASE_ADDRESS for x in f.readlines()]
# Read all offsets from our objdump file
main_offset = get_offsets_for("main")[0]
main_ends = get_offsets_for("main_ends")
malloc_callsites = get_offsets_for("malloc")
free_callsites = get_offsets_for("free")
magicfn_callsites = get_offsets_for("magicfn")
# Joke's on me: strlen got inlined by my compiler
strlen_callsites = get_offsets_for("strlen")
try:
# If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
from capstone import *
cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
def unicorn_debug_instruction(uc, address, size, user_data):
mem = uc.mem_read(address, size)
for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
bytes(mem), size
):
print(" Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
except ImportError:
def unicorn_debug_instruction(uc, address, size, user_data):
print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
def unicorn_debug_block(uc, address, size, user_data):
print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
if access == UC_MEM_WRITE:
print(
" >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
address, size, value
)
)
else:
print(" >>> Read: addr=0x{0:016x} size={1}".format(address, size))
def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
if access == UC_MEM_WRITE_UNMAPPED:
print(
" >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
address, size, value
)
)
else:
print(
" >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
)
already_allocated = False
def malloc_hook(uc, address, size, user_data):
"""
We use a very simple malloc/free stub here, that only works for exactly one allocation at a time.
"""
global already_allocated
if already_allocated:
print("Double malloc, not supported right now!")
os.abort()
# read the first param
malloc_size = uc.reg_read(UC_X86_REG_RDI)
if malloc_size > HEAP_SIZE_MAX:
print(
f"Tried to allocate {malloc_size} bytes, aint't nobody got space for that! (We may only allocate up to {HEAP_SIZE_MAX})"
)
os.abort()
uc.reg_write(UC_X86_REG_RAX, HEAP_ADDRESS)
uc.reg_write(UC_X86_REG_RIP, address + size)
already_allocated = True
def free_hook(uc, address, size, user_data):
"""
No real free, just set the "used"-flag to false.
"""
global already_allocated
if not already_allocated:
print("Double free detected. Real bug?")
os.abort()
# read the first param
free_ptr = uc.reg_read(UC_X86_REG_RDI)
if free_ptr != HEAP_ADDRESS:
print(
f"Tried to free wrong mem region: {hex(free_ptr)} at code loc {hex(address)}"
)
os.abort()
uc.reg_write(UC_X86_REG_RIP, address + size)
already_allocated = False
# def strlen_hook(uc, address, size, user_data):
# """
# No real strlen, we know the len is == our input.
# This completely ignores '\0', but for this target, do we really care?
# """
# global input_len
# print(f"Returning len {input_len}")
# uc.reg_write(UC_X86_REG_RAX, input_len)
# uc.reg_write(UC_X86_REG_RIP, address + size)
def magicfn_hook(uc, address, size, user_data):
"""
This is a fancy print function that we're just going to skip for fuzzing.
"""
uc.reg_write(UC_X86_REG_RIP, address + size)
def main():
parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
parser.add_argument(
"input_file",
type=str,
help="Path to the file containing the mutated input to load",
)
parser.add_argument(
"-t",
"--trace",
default=False,
action="store_true",
help="Enables debug tracing",
)
args = parser.parse_args()
# Instantiate a MIPS32 big endian Unicorn Engine instance
uc = Uc(UC_ARCH_X86, UC_MODE_64)
if args.trace:
uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
uc.hook_add(
UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
unicorn_debug_mem_invalid_access,
)
print("The input testcase is set to {}".format(args.input_file))
# ---------------------------------------------------
# Load the binary to emulate and map it into memory
with open(target_bin, "rb") as f:
binary_code = f.read()
# Apply constraints to the mutated input
if len(binary_code) > CODE_SIZE_MAX:
print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX))
return
# Write the binary to its place in mem
uc.mem_map(BASE_ADDRESS, CODE_SIZE_MAX)
uc.mem_write(BASE_ADDRESS, binary_code)
# Set the program counter to the start of the code
uc.reg_write(UC_X86_REG_RIP, main_offset)
# Setup the stack.
uc.mem_map(STACK_ADDRESS, STACK_SIZE)
# Setup the stack pointer, but allocate two pointers for the pointers to input.
uc.reg_write(UC_X86_REG_RSP, STACK_ADDRESS + STACK_SIZE - 16)
# Setup our input space, and push the pointer to it in the function params
uc.mem_map(INPUT_ADDRESS, INPUT_MAX)
# We have argc = 2
uc.reg_write(UC_X86_REG_RDI, 2)
# RSI points to our little 2 QWORD space at the beginning of the stack...
uc.reg_write(UC_X86_REG_RSI, STACK_ADDRESS + STACK_SIZE - 16)
# ... which points to the Input. Write the ptr to mem in little endian.
uc.mem_write(STACK_ADDRESS + STACK_SIZE - 16, struct.pack("<Q", INPUT_ADDRESS))
for addr in malloc_callsites:
uc.hook_add(UC_HOOK_CODE, malloc_hook, begin=addr, end=addr)
for addr in free_callsites:
uc.hook_add(UC_HOOK_CODE, free_hook, begin=addr, end=addr)
if len(strlen_callsites):
# strlen got inlined for my compiler.
print(
"Oops, your compiler emitted strlen as function. You may have to change the harness."
)
# for addr in strlen_callsites:
# uc.hook_add(UC_HOOK_CODE, strlen_hook, begin=addr, end=addr)
for addr in magicfn_callsites:
uc.hook_add(UC_HOOK_CODE, magicfn_hook, begin=addr, end=addr + 1)
# -----------------------------------------------------
# Set up a callback to place input data (do little work here, it's called for every single iteration! This code is *HOT*)
# We did not pass in any data and don't use persistent mode, so we can ignore these params.
# Be sure to check out the docstrings for the uc.afl_* functions.
def place_input_callback(uc, input, persistent_round, data):
# Apply constraints to the mutated input
input_len = len(input)
# global input_len
if input_len > INPUT_MAX:
#print("Test input is too long (> {} bytes)")
return False
# print(f"Placing input: {input} in round {persistent_round}")
# Make sure the string is always 0-terminated (as it would be "in the wild")
input[-1] = b'\0'
# Write the mutated command into the data buffer
uc.mem_write(INPUT_ADDRESS, input)
#uc.reg_write(UC_X86_REG_RIP, main_offset)
print(f"Starting to fuzz. Running from addr {main_offset} to one of {main_ends}")
# Start the fuzzer.
uc.afl_fuzz(args.input_file, place_input_callback, main_ends, persistent_iters=1000)
if __name__ == "__main__":
main()

View File

@ -0,0 +1 @@
target

View File

@ -0,0 +1,80 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "build-helper"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdce191bf3fa4995ce948c8c83b4640a1745457a149e73c6db75b4ffe36aad5f"
dependencies = [
"semver",
]
[[package]]
name = "capstone"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "031ba51c39151a1d6336ec859646153187204b0147c7b3f6fe2de636f1b8dbb3"
dependencies = [
"capstone-sys",
]
[[package]]
name = "capstone-sys"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fae25eddcb80e24f98c35952c37a91ff7f8d0f60dbbdafb9763e8d5cc566b8d7"
dependencies = [
"cc",
]
[[package]]
name = "cc"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
[[package]]
name = "libc"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929"
[[package]]
name = "semver"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a3186ec9e65071a2095434b1f5bb24838d4e8e130f584c790f6033c79943537"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "unicornafl"
version = "1.0.0"
dependencies = [
"bitflags",
"build-helper",
"capstone",
"libc",
]
[[package]]
name = "unicornafl_harness"
version = "0.1.0"
dependencies = [
"capstone",
"libc",
"unicornafl",
]

View File

@ -0,0 +1,13 @@
[package]
name = "unicornafl_harness"
version = "0.1.0"
authors = ["Dominik Maier <domenukk@gmail.com>"]
edition = "2018"
[dependencies]
unicornafl = { path = "../../../unicornafl/bindings/rust/", version="1.0.0" }
capstone="0.6.0"
libc="0.2.66"
[profile.release]
panic = "abort"

View File

@ -0,0 +1,231 @@
extern crate capstone;
extern crate libc;
use core::cell::{Cell, RefCell};
use libc::{c_void, munmap};
use std::{
env,
fs::File,
io::{self, Read},
process::abort,
};
use unicornafl::{
unicorn_const::{uc_error, Arch, Mode, Permission},
utils::*,
RegisterX86::*,
};
const BINARY: &str = &"../target";
// Memory map for the code to be tested
// Arbitrary address where code to test will be loaded
const BASE_ADDRESS: u64 = 0x0;
// Max size for the code (64kb)
const CODE_SIZE_MAX: u64 = 0x00010000;
// Location where the input will be placed (make sure the uclated program knows this somehow, too ;) )
const INPUT_ADDRESS: u64 = 0x00100000;
// Maximum size for our input
const INPUT_MAX: u64 = 0x00100000;
// Where our pseudo-heap is at
const HEAP_ADDRESS: u64 = 0x00200000;
// Maximum allowable size for the heap
const HEAP_SIZE_MAX: u64 = 0x000F0000;
// Address of the stack (Some random address again)
const STACK_ADDRESS: u64 = 0x00400000;
// Size of the stack (arbitrarily chosen, just make it big enough)
const STACK_SIZE: u64 = 0x000F0000;
macro_rules! hook {
($addr:expr, $func:expr) => {
uc.add_code_hook($addr, $addr, Box::new($func))
.expect(&format!("failed to set {} hook", stringify!($func)));
};
($addr:expr, $func:expr, $opt_name:expr) => {
uc.add_code_hook($addr, $addr, Box::new($func))
.expect(&format!("failed to set {} hook", $opt_name));
};
}
fn read_file(filename: &str) -> Result<Vec<u8>, io::Error> {
let mut f = File::open(filename)?;
let mut buffer = Vec::new();
f.read_to_end(&mut buffer)?;
Ok(buffer)
}
/// Our location parser
fn parse_locs(loc_name: &str) -> Result<Vec<u64>, io::Error> {
let contents = &read_file(&format!("../target.offsets.{}", loc_name))?;
str_from_u8_unchecked(&contents)
.split("\n")
.filter_map(|x| u64::from_str_radix(x, 16))
.collect()
}
// find null terminated string in vec
pub unsafe fn str_from_u8_unchecked(utf8_src: &[u8]) -> &str {
let nul_range_end = utf8_src
.iter()
.position(|&c| c == b'\0')
.unwrap_or(utf8_src.len());
::std::str::from_utf8_unchecked(&utf8_src[0..nul_range_end])
}
fn align(size: u64) -> u64 {
const ALIGNMENT: u64 = 0x1000;
if size % ALIGNMENT == 0 {
size
} else {
((size / ALIGNMENT) + 1) * ALIGNMENT
}
}
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() == 1 {
println!("Missing parameter <uclation_input> (@@ for AFL)");
return;
}
let input_file = &args[1];
println!("The input testcase is set to {}", input_file);
uclate(input_file).unwrap();
}
fn uclate(input_file: &str) -> Result<(), io::Error> {
let mut uc = Unicorn::new(Arch::X86, Mode::MODE_64, 0)?;
let binary = read_file(BINARY).expect(&format!("Could not read modem image: {}", BINARY));
let aligned_binary_size = align(binary.len() as u64);
// Apply constraints to the mutated input
if binary.len() as u64 > CODE_SIZE_MAX {
println!("Binary code is too large (> {} bytes)", CODE_SIZE_MAX);
Ok(())
}
// Write the binary to its place in mem
uc.mem_map(
BASE_ADDRESS,
CODE_SIZE_MAX,
Permission::READ | Permission::WRITE,
)?;
uc.mem_write(BASE_ADDR, binary);
// Set the program counter to the start of the code
let main_locs = parse_locs("main")?;
uc.reg_write(RIP, main_locs[0])?;
// Setup the stack.
uc.mem_map(
STACK_ADDRESS,
STACK_SIZE as usize,
Permission::READ | Permission::WRITE,
)?;
// Setup the stack pointer, but allocate two pointers for the pointers to input.
uc.reg_write(RSP, STACK_ADDRESS + STACK_SIZE - 16)?;
// Setup our input space, and push the pointer to it in the function params
uc.mem_map(INPUT_ADDRESS, INPUT_MAX as usize, Permission::READ)?;
// We have argc = 2
uc.reg_write(RDI, 2)?;
// RSI points to our little 2 QWORD space at the beginning of the stack...
uc.reg_write(RSI, STACK_ADDRESS + STACK_SIZE - 16)?;
// ... which points to the Input. Write the ptr to mem in little endian.
uc.mem_write(
STACK_ADDRESS + STACK_SIZE - 16,
(INPUT_ADDRESS as u32).to_le_bytes(),
)?;
let already_allocated = Cell::new(false);
let already_allocated_malloc = already_allocated.clone();
let hook_malloc = move |mut uc: Unicorn, addr: u64, size: u32| {
if already_allocated_malloc.get() {
println!("Double malloc, not supported right now!");
abort();
}
// read the first param
let malloc_size = uc.reg_read(RDI).unwrap();
if malloc_size > HEAP_SIZE_MAX {
println!(
"Tried to allocate {} bytes, but we may only allocate up to {}",
malloc_size, HEAP_SIZE_MAX
);
abort();
}
uc.reg_write(RAX, HEAP_ADDRESS).unwrap();
uc.reg_write(RIP, addr + size as u64).unwrap();
already_allocated_malloc.set(true);
};
let already_allocated_free = already_allocated.clone();
let hook_free = move |mut uc: Unicorn, addr: u64, size: u32| {
if already_allocated_free.get() {
println!("Double free detected. Real bug?");
abort();
}
// read the first param
let free_ptr = uc.reg_read(RDI).unwrap();
if free_ptr != HEAP_ADDRESS {
println!(
"Tried to free wrong mem region {:x} at code loc {:x}",
free_ptr, addr
);
abort();
}
uc.reg_write(RIP, addr + size as u64);
already_allocated_free.set(false);
};
/*
BEGIN FUNCTION HOOKS
*/
let hook_magicfn =
move |mut uc: Unicorn, addr: u64, size: u32| uc.reg_write(RIP, address + size as u64);
for addr in parse_locs("malloc")? {
hook!(addr, hook_malloc, "malloc");
}
for addr in parse_locs("free")? {
hook!(addr, hook_free, "free");
}
for addr in parse_locs("magicfn")? {
hook!(addr, hook_magicfn, "magicfn");
}
let place_input_callback = |mut uc: Unicorn, afl_input: &[u8], _persistent_round: i32| {
// apply constraints to the mutated input
if afl_input.len() > INPUT_MAX as usize {
//println!("Skipping testcase with leng {}", afl_input.len());
return false;
}
// TODO: afl_input[-1] = b'\0'
uc.mem_write(INPUT_ADDRESS, afl_input).unwrap();
true
};
let crash_validation_callback =
|uc: Unicorn, result: uc_error, _input: &[u8], _: i32| result != uc_error::OK;
end_addrs = parse_locs("main_ends")?;
let ret = uc.afl_fuzz(
input_file,
Box::new(place_input_callback),
&end_addrs,
Box::new(crash_validation_callback),
false,
1,
);
match ret {
Ok(_) => {}
Err(e) => panic!(format!("found non-ok unicorn exit: {:?}", e)),
}
Ok(())
}

View File

@ -0,0 +1 @@
a

View File

@ -0,0 +1,77 @@
/*
* Sample target file to test afl-unicorn fuzzing capabilities.
* This is a very trivial example that will, however, never crash.
* Crashing would change the execution speed.
*
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
// Random print function we can hook in our harness to test hook speeds.
char magicfn(char to_print) {
puts("Printing a char, just minding my own business: ");
putchar(to_print);
putchar('\n');
return to_print;
}
int main(int argc, char** argv) {
if (argc < 2) {
printf("Gimme input pl0x!\n");
return -1;
}
// Make sure the hooks work...
char *test = malloc(1024);
if (!test) {
printf("Uh-Oh, malloc doesn't work!");
abort();
}
free(test);
char *data_buf = argv[1];
// We can start the unicorn hooking here.
uint64_t data_len = strlen(data_buf);
if (data_len < 20) return -2;
for (; data_len --> 0 ;) {
char *buf_cpy = NULL;
if (data_len) {
buf_cpy = malloc(data_len);
if (!buf_cpy) {
puts("Oof, malloc failed! :/");
abort();
}
memcpy(buf_cpy, data_buf, data_len);
}
if (data_len >= 18) {
free(buf_cpy);
continue;
}
if (data_len > 2 && data_len < 18) {
buf_cpy[data_len - 1] = (char) 0x90;
} else if (data_buf[9] == (char) 0x90 && data_buf[10] != 0x00 && buf_cpy[11] == (char) 0x90) {
// Cause a crash if data[10] is not zero, but [9] and [11] are zero
unsigned char valid_read = buf_cpy[10];
if (magicfn(valid_read) != valid_read) {
puts("Oof, the hook for data_buf[10] is broken?");
abort();
}
}
free(buf_cpy);
}
if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) {
// Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
unsigned char valid_read = data_buf[0];
if (magicfn(valid_read) != valid_read) {
puts("Oof, the hook for data_buf[0] is broken?");
abort();
}
}
magicfn('q');
return 0;
}