Added AFL-Unicorn mode

This commit is contained in:
Dominik Maier
2019-07-25 02:26:51 +02:00
parent 9246f21f2a
commit 00dc8a0ad5
36 changed files with 2681 additions and 46 deletions

2
.gitignore vendored
View File

@ -19,3 +19,5 @@ afl-tmin
as as
qemu_mode/qemu-3.1.0 qemu_mode/qemu-3.1.0
qemu_mode/qemu-3.1.0.tar.xz qemu_mode/qemu-3.1.0.tar.xz
unicorn_mode/unicorn
unicorn_mode/unicorn-*

View File

@ -77,7 +77,6 @@ static volatile u8
child_timed_out; /* Child timed out? */ child_timed_out; /* Child timed out? */
/* Constants used for describing byte behavior. */ /* Constants used for describing byte behavior. */
#define RESP_NONE 0x00 /* Changing byte is a no-op. */ #define RESP_NONE 0x00 /* Changing byte is a no-op. */
@ -741,7 +740,8 @@ static void usage(u8* argv0) {
" -f file - input file read by the tested program (stdin)\n" " -f file - input file read by the tested program (stdin)\n"
" -t msec - timeout for each run (%u ms)\n" " -t msec - timeout for each run (%u ms)\n"
" -m megs - memory limit for child process (%u MB)\n" " -m megs - memory limit for child process (%u MB)\n"
" -Q - use binary-only instrumentation (QEMU mode)\n\n" " -Q - use binary-only instrumentation (QEMU mode)\n"
" -U - use unicorn-based instrumentation (Unicorn mode)\n\n"
"Analysis settings:\n\n" "Analysis settings:\n\n"
@ -867,20 +867,19 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
} }
/* Main entry point */ /* Main entry point */
int main(int argc, char** argv) { int main(int argc, char** argv) {
s32 opt; s32 opt;
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0; u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
char** use_argv; char** use_argv;
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH; doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
SAYF(cCYA "afl-analyze" VERSION cRST " by <lcamtuf@google.com>\n"); SAYF(cCYA "afl-analyze" VERSION cRST " by <lcamtuf@google.com>\n");
while ((opt = getopt(argc,argv,"+i:f:m:t:eQ")) > 0) while ((opt = getopt(argc,argv,"+i:f:m:t:eQU")) > 0)
switch (opt) { switch (opt) {
@ -960,6 +959,14 @@ int main(int argc, char** argv) {
qemu_mode = 1; qemu_mode = 1;
break; break;
case 'U':
if (unicorn_mode) FATAL("Multiple -U options not supported");
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
unicorn_mode = 1;
break;
default: default:
usage(argv[0]); usage(argv[0]);

View File

@ -49,9 +49,9 @@ MEM_LIMIT=100
TIMEOUT=none TIMEOUT=none
unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
while getopts "+i:o:f:m:t:eQC" opt; do while getopts "+i:o:f:m:t:eQUC" opt; do
case "$opt" in case "$opt" in
@ -83,6 +83,11 @@ while getopts "+i:o:f:m:t:eQC" opt; do
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
QEMU_MODE=1 QEMU_MODE=1
;; ;;
"U")
EXTRA_PAR="$EXTRA_PAR -U"
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
UNICORN_MODE=1
;;
"?") "?")
exit 1 exit 1
;; ;;
@ -111,6 +116,7 @@ Execution control settings:
-m megs - memory limit for child process ($MEM_LIMIT MB) -m megs - memory limit for child process ($MEM_LIMIT MB)
-t msec - run time limit for child process (none) -t msec - run time limit for child process (none)
-Q - use binary-only instrumentation (QEMU mode) -Q - use binary-only instrumentation (QEMU mode)
-U - use unicorn-based instrumentation (Unicorn mode)
Minimization settings: Minimization settings:
@ -196,7 +202,7 @@ if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
fi fi
if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2

View File

@ -208,6 +208,7 @@ EXP_ST u8 skip_deterministic, /* Skip deterministic stages? */
shuffle_queue, /* Shuffle input queue? */ shuffle_queue, /* Shuffle input queue? */
bitmap_changed = 1, /* Time to update bitmap? */ bitmap_changed = 1, /* Time to update bitmap? */
qemu_mode, /* Running in QEMU mode? */ qemu_mode, /* Running in QEMU mode? */
unicorn_mode, /* Running in Unicorn mode? */
skip_requested, /* Skip request, via SIGUSR1 */ skip_requested, /* Skip request, via SIGUSR1 */
run_over10m, /* Run time over 10 minutes? */ run_over10m, /* Run time over 10 minutes? */
persistent_mode, /* Running in persistent mode? */ persistent_mode, /* Running in persistent mode? */
@ -1547,6 +1548,7 @@ static void minimize_bits(u8* dst, u8* src) {
} }
/* Find first power of two greater or equal to val (assuming val under /* Find first power of two greater or equal to val (assuming val under
2^63). */ 2^63). */
@ -1569,6 +1571,7 @@ static u64 next_p2(u64 val) {
for every byte in the bitmap. We win that slot if there is no previous for every byte in the bitmap. We win that slot if there is no previous
contender, or if the contender has a more favorable speed x size factor. */ contender, or if the contender has a more favorable speed x size factor. */
static void update_bitmap_score(struct queue_entry* q) { static void update_bitmap_score(struct queue_entry* q) {
u32 i; u32 i;
@ -1584,6 +1587,7 @@ static void update_bitmap_score(struct queue_entry* q) {
if (top_rated[i]) { if (top_rated[i]) {
/* Faster-executing or smaller test cases are favored. */
u64 top_rated_fuzz_p2 = next_p2 (top_rated[i]->n_fuzz); u64 top_rated_fuzz_p2 = next_p2 (top_rated[i]->n_fuzz);
u64 top_rated_fav_factor = top_rated[i]->exec_us * top_rated[i]->len; u64 top_rated_fav_factor = top_rated[i]->exec_us * top_rated[i]->len;
@ -1682,7 +1686,6 @@ static void cull_queue(void) {
} }
/* Load postprocessor, if available. */ /* Load postprocessor, if available. */
static void setup_post(void) { static void setup_post(void) {
@ -2301,6 +2304,8 @@ EXP_ST void init_forkserver(char** argv) {
if (!forksrv_pid) { if (!forksrv_pid) {
/* CHILD PROCESS */
struct rlimit r; struct rlimit r;
/* Umpf. On OpenBSD, the default fd limit for root users is set to /* Umpf. On OpenBSD, the default fd limit for root users is set to
@ -2408,6 +2413,8 @@ EXP_ST void init_forkserver(char** argv) {
} }
/* PARENT PROCESS */
/* Close the unneeded endpoints. */ /* Close the unneeded endpoints. */
close(ctl_pipe[0]); close(ctl_pipe[0]);
@ -3755,7 +3762,7 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
"exec_timeout : %u\n" "exec_timeout : %u\n"
"afl_banner : %s\n" "afl_banner : %s\n"
"afl_version : " VERSION "\n" "afl_version : " VERSION "\n"
"target_mode : %s%s%s%s%s%s%s\n" "target_mode : %s%s%s%s%s%s%s%s\n"
"command_line : %s\n", "command_line : %s\n",
start_time / 1000, get_cur_time() / 1000, getpid(), start_time / 1000, get_cur_time() / 1000, getpid(),
queue_cycle ? (queue_cycle - 1) : 0, total_execs, eps, queue_cycle ? (queue_cycle - 1) : 0, total_execs, eps,
@ -3765,10 +3772,10 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
unique_hangs, last_path_time / 1000, last_crash_time / 1000, unique_hangs, last_path_time / 1000, last_crash_time / 1000,
last_hang_time / 1000, total_execs - last_crash_execs, last_hang_time / 1000, total_execs - last_crash_execs,
exec_tmout, use_banner, exec_tmout, use_banner,
qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "", unicorn_mode ? "unicorn" : "", qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "",
no_forkserver ? "no_forksrv " : "", crash_mode ? "crash " : "", no_forkserver ? "no_forksrv " : "", crash_mode ? "crash " : "",
persistent_mode ? "persistent " : "", deferred_mode ? "deferred " : "", persistent_mode ? "persistent " : "", deferred_mode ? "deferred " : "",
(qemu_mode || dumb_mode || no_forkserver || crash_mode || (unicorn_mode || qemu_mode || dumb_mode || no_forkserver || crash_mode ||
persistent_mode || deferred_mode) ? "" : "default", persistent_mode || deferred_mode) ? "" : "default",
orig_cmdline); orig_cmdline);
/* ignore errors */ /* ignore errors */
@ -4702,7 +4709,7 @@ static void show_init_stats(void) {
SAYF("\n"); SAYF("\n");
if (avg_us > (qemu_mode ? 50000 : 10000)) if (avg_us > ((qemu_mode || unicorn_mode) ? 50000 : 10000))
WARNF(cLRD "The target binary is pretty slow! See %s/perf_tips.txt.", WARNF(cLRD "The target binary is pretty slow! See %s/perf_tips.txt.",
doc_path); doc_path);
@ -4779,6 +4786,7 @@ static void show_init_stats(void) {
} }
#ifdef USE_PYTHON #ifdef USE_PYTHON
static u8 trim_case_python(char** argv, struct queue_entry* q, u8* in_buf) { static u8 trim_case_python(char** argv, struct queue_entry* q, u8* in_buf) {
@ -11090,7 +11098,7 @@ EXP_ST void check_binary(u8* fname) {
#endif /* ^!__APPLE__ */ #endif /* ^!__APPLE__ */
if (!qemu_mode && !dumb_mode && if (!qemu_mode && !unicorn_mode && !dumb_mode &&
!memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) { !memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
SAYF("\n" cLRD "[-] " cRST SAYF("\n" cLRD "[-] " cRST
@ -11110,15 +11118,15 @@ EXP_ST void check_binary(u8* fname) {
} }
if (qemu_mode && if ((qemu_mode || unicorn_mode) &&
memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) { memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
SAYF("\n" cLRD "[-] " cRST SAYF("\n" cLRD "[-] " cRST
"This program appears to be instrumented with afl-gcc, but is being run in\n" "This program appears to be instrumented with afl-gcc, but is being run in\n"
" QEMU mode (-Q). This is probably not what you want - this setup will be\n" " QEMU or Unicorn mode (-Q or -U). This is probably not what you want -\n"
" slow and offer no practical benefits.\n"); " this setup will be slow and offer no practical benefits.\n");
FATAL("Instrumentation found in -Q mode"); FATAL("Instrumentation found in -Q or -U mode");
} }
@ -11245,6 +11253,7 @@ static void usage(u8* argv0) {
" -t msec - timeout for each run (auto-scaled, 50-%u ms)\n" " -t msec - timeout for each run (auto-scaled, 50-%u ms)\n"
" -m megs - memory limit for child process (%u MB)\n" " -m megs - memory limit for child process (%u MB)\n"
" -Q - use binary-only instrumentation (QEMU mode)\n" " -Q - use binary-only instrumentation (QEMU mode)\n"
" -U - use Unicorn-based instrumentation (Unicorn mode)\n\n"
" -L minutes - use MOpt(imize) mode and set the limit time for entering the\n" " -L minutes - use MOpt(imize) mode and set the limit time for entering the\n"
" pacemaker mode (minutes of no new paths, 0 = immediately).\n" " pacemaker mode (minutes of no new paths, 0 = immediately).\n"
" a recommended value is 10-60. see docs/README.MOpt\n\n" " a recommended value is 10-60. see docs/README.MOpt\n\n"
@ -11863,7 +11872,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
} }
/* Make a copy of the current command line. */ /* Make a copy of the current command line. */
static void save_cmdline(u32 argc, char** argv) { static void save_cmdline(u32 argc, char** argv) {
@ -11925,7 +11933,7 @@ int main(int argc, char** argv) {
gettimeofday(&tv, &tz); gettimeofday(&tv, &tz);
init_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); init_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Qe:p:s:V:E:L:")) > 0) while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:QUe:p:s:V:E:L:")) > 0)
switch (opt) { switch (opt) {
@ -12126,6 +12134,15 @@ int main(int argc, char** argv) {
break; break;
case 'U': /* Unicorn mode */
if (unicorn_mode) FATAL("Multiple -U options not supported");
unicorn_mode = 1;
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
break;
case 'V': { case 'V': {
most_time_key = 1; most_time_key = 1;
if (sscanf(optarg, "%llu", &most_time) < 1 || optarg[0] == '-') if (sscanf(optarg, "%llu", &most_time) < 1 || optarg[0] == '-')
@ -12259,6 +12276,7 @@ int main(int argc, char** argv) {
if (crash_mode) FATAL("-C and -n are mutually exclusive"); if (crash_mode) FATAL("-C and -n are mutually exclusive");
if (qemu_mode) FATAL("-Q and -n are mutually exclusive"); if (qemu_mode) FATAL("-Q and -n are mutually exclusive");
if (unicorn_mode) FATAL("-U and -n are mutually exclusive");
} }

View File

@ -72,7 +72,6 @@ static volatile u8
child_timed_out, /* Child timed out? */ child_timed_out, /* Child timed out? */
child_crashed; /* Child crashed? */ child_crashed; /* Child crashed? */
/* Classify tuple counts. Instead of mapping to individual bits, as in /* Classify tuple counts. Instead of mapping to individual bits, as in
afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */ afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */
@ -405,7 +404,9 @@ static void usage(u8* argv0) {
" -t msec - timeout for each run (none)\n" " -t msec - timeout for each run (none)\n"
" -m megs - memory limit for child process (%u MB)\n" " -m megs - memory limit for child process (%u MB)\n"
" -Q - use binary-only instrumentation (QEMU mode)\n\n" " -Q - use binary-only instrumentation (QEMU mode)\n"
" -U - use Unicorn-based instrumentation (Unicorn mode)\n"
" (Not necessary, here for consistency with other afl-* tools)\n\n"
"Other settings:\n\n" "Other settings:\n\n"
@ -534,19 +535,18 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
} }
/* Main entry point */ /* Main entry point */
int main(int argc, char** argv) { int main(int argc, char** argv) {
s32 opt; s32 opt;
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0; u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
u32 tcnt; u32 tcnt;
char** use_argv; char** use_argv;
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH; doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQbc")) > 0) while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQUbc")) > 0)
switch (opt) { switch (opt) {
@ -643,6 +643,14 @@ int main(int argc, char** argv) {
qemu_mode = 1; qemu_mode = 1;
break; break;
case 'U':
if (unicorn_mode) FATAL("Multiple -U options not supported");
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
unicorn_mode = 1;
break;
case 'b': case 'b':
/* Secret undocumented mode. Writes output in raw binary format /* Secret undocumented mode. Writes output in raw binary format

View File

@ -898,7 +898,9 @@ static void usage(u8* argv0) {
" -f file - input file read by the tested program (stdin)\n" " -f file - input file read by the tested program (stdin)\n"
" -t msec - timeout for each run (%u ms)\n" " -t msec - timeout for each run (%u ms)\n"
" -m megs - memory limit for child process (%u MB)\n" " -m megs - memory limit for child process (%u MB)\n"
" -Q - use binary-only instrumentation (QEMU mode)\n\n" " -Q - use binary-only instrumentation (QEMU mode)\n"
" -U - use Unicorn-based instrumentation (Unicorn mode)\n\n"
" (Not necessary, here for consistency with other afl-* tools)\n\n"
"Minimization settings:\n\n" "Minimization settings:\n\n"
@ -1025,7 +1027,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
} }
/* Read mask bitmap from file. This is for the -B option. */ /* Read mask bitmap from file. This is for the -B option. */
static void read_bitmap(u8* fname) { static void read_bitmap(u8* fname) {
@ -1047,14 +1048,14 @@ static void read_bitmap(u8* fname) {
int main(int argc, char** argv) { int main(int argc, char** argv) {
s32 opt; s32 opt;
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0; u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
char** use_argv; char** use_argv;
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH; doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
SAYF(cCYA "afl-tmin" VERSION cRST " by <lcamtuf@google.com>\n"); SAYF(cCYA "afl-tmin" VERSION cRST " by <lcamtuf@google.com>\n");
while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQ")) > 0) while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQU")) > 0)
switch (opt) { switch (opt) {
@ -1146,6 +1147,14 @@ int main(int argc, char** argv) {
qemu_mode = 1; qemu_mode = 1;
break; break;
case 'U':
if (unicorn_mode) FATAL("Multiple -Q options not supported");
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
unicorn_mode = 1;
break;
case 'B': /* load bitmap */ case 'B': /* load bitmap */
/* This is a secret undocumented option! It is speculated to be useful /* This is a secret undocumented option! It is speculated to be useful

View File

@ -59,6 +59,10 @@
#define MEM_LIMIT_QEMU 200 #define MEM_LIMIT_QEMU 200
/* Default memory limit when running in Unicorn mode (MB): */
#define MEM_LIMIT_UNICORN 200
/* Number of calibration cycles per every new test case (and for test /* Number of calibration cycles per every new test case (and for test
cases that show variable behavior): */ cases that show variable behavior): */

View File

@ -12,7 +12,7 @@ The following is a description of how these can be fuzzed with afl++
!!!!! !!!!!
TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
use afl -Q qemu_mode, or better: use both in parallel use afl -Q qemu_mode.
!!!!! !!!!!
@ -27,6 +27,16 @@ It is the easiest to use alternative and even works for cross-platform binaries.
As it is included in afl++ this needs no URL. As it is included in afl++ this needs no URL.
UNICORN
-------
Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
In contrast to QEMU, Unicorn does not offer a full system or even userland emulation.
Runtime environment and/or loaders have to be written from scratch, if needed.
On top, block chaining has been removed. This means the speed boost introduced in
to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
For further information, check out ./unicorn_mode.txt.
DYNINST DYNINST
------- -------
Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
@ -111,21 +121,6 @@ Pintool solutions:
https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported
Non-AFL solutions
-----------------
There are many binary-only fuzzing frameworks. Some are great for CTFs but don't
work with large binaries, other are very slow but have good path discovery,
some are very hard to set-up ...
QSYM: https://github.com/sslab-gatech/qsym
Manticore: https://github.com/trailofbits/manticore
S2E: https://github.com/S2E
<please send me any missing that are good>
That's it! That's it!
News, corrections, updates? News, corrections, updates?
Email vh@thc.org Email vh@thc.org

107
docs/unicorn_mode.txt Normal file
View File

@ -0,0 +1,107 @@
=========================================================
Unicorn-based binary-only instrumentation for afl-fuzz
=========================================================
1) Introduction
---------------
The code in ./unicorn_mode allows you to build a standalone feature that
leverages the Unicorn Engine and allows callers to obtain instrumentation
output for black-box, closed-source binary code snippets. This mechanism
can be then used by afl-fuzz to stress-test targets that couldn't be built
with afl-gcc or used in QEMU mode, or with other extensions such as
TriforceAFL.
There is a significant performance penalty compared to native AFL,
but at least we're able to use AFL on these binaries, right?
The idea and much of the implementation comes from Nathan Voss <njvoss299@gmail.com>.
2) How to use
-------------
*** Building AFL's Unicorn Mode ***
First, make afl as usual.
Once that completes successfully you need to build and add in the Unicorn Mode
features:
$ cd unicorn_mode
$ ./build_unicorn_support.sh
NOTE: This script downloads a recent Unicorn Engine commit that has been tested
and is stable-ish from the Unicorn github page. If you are offline, you'll need
to hack up this script a little bit and supply your own copy of Unicorn's latest
stable release. It's not very hard, just check out the beginning of the
build_unicorn_support.sh script and adjust as necessary.
Building Unicorn will take a little bit (~5-10 minutes). Once it completes
it automatically compiles a sample application and verify that it works.
*** Fuzzing with Unicorn Mode ***
To really use unicorn-mode effectively you need to prepare the following:
* Relevant binary code to be fuzzed
* Knowledge of the memory map and good starting state
* Folder containing sample inputs to start fuzzing with
- Same ideas as any other AFL inputs
- Quality/speed of results will depend greatly on quality of starting
samples
- See AFL's guidance on how to create a sample corpus
* Unicorn-based test harness which:
- Adds memory map regions
- Loads binary code into memory
- Emulates at least one instruction*
- Yeah, this is lame. See 'Gotchas' section below for more info
- Loads and verifies data to fuzz from a command-line specified file
- AFL will provide mutated inputs by changing the file passed to
the test harness
- Presumably the data to be fuzzed is at a fixed buffer address
- If input constraints (size, invalid bytes, etc.) are known they
should be checked after the file is loaded. If a constraint
fails, just exit the test harness. AFL will treat the input as
'uninteresting' and move on.
- Sets up registers and memory state for beginning of test
- Emulates the interested code from beginning to end
- If a crash is detected, the test harness must 'crash' by
throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.)
Once you have all those things ready to go you just need to run afl-fuzz in
'unicorn-mode' by passing in the '-U' flag:
$ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@
The normal afl-fuzz command line format applies to everything here. Refer to
AFL's main documentation for more info about how to use afl-fuzz effectively.
For a much clearer vision of what all of this looks like, please refer to the
sample provided in the 'unicorn_mode/samples' directory. There is also a blog
post that goes over the basics at:
https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf
The 'helper_scripts' directory also contains several helper scripts that allow you
to dump context from a running process, load it, and hook heap allocations. For details
on how to use this check out the follow-up blog post to the one linked above.
A example use of AFL-Unicorn mode is discussed in the Paper Unicorefuzz:
https://www.usenix.org/conference/woot19/presentation/maier
3) Gotchas, feedback, bugs
--------------------------
To make sure that AFL's fork server starts up correctly the Unicorn test
harness script must emulate at least one instruction before loading the
data that will be fuzzed from the input file. It doesn't matter what the
instruction is, nor if it is valid. This is an artifact of how the fork-server
is started and could likely be fixed with some clever re-arranging of the
patches applied to Unicorn.
Running the build script builds Unicorn and its python bindings and installs
them on your system. This installation will supersede any existing Unicorn
installation with the patched afl-unicorn version.
Refer to the unicorn_mode/samples/arm_example/arm_tester.c for an example
of how to do this properly! If you don't get this right, AFL will not
load any mutated inputs and your fuzzing will be useless!

16
unicorn_mode/README.md Normal file
View File

@ -0,0 +1,16 @@
```
__ _ _
__ _ / _| | _ _ _ __ (_) ___ ___ _ __ _ __
/ _` | |_| |___| | | | '_ \| |/ __/ _ \| '__| '_ \
| (_| | _| |___| |_| | | | | | (_| (_) | | | | | |
\__,_|_| |_| \__,_|_| |_|_|\___\___/|_| |_| |_|
```
afl-unicorn lets you fuzz any piece of binary that can be emulated by [Unicorn Engine](http://www.unicorn-engine.org/).
For an in-depth description of what this is, how to install it, and how to use it check out this [blog post](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf).
For general help with AFL, please refer to both the official [AFL website](http://lcamtuf.coredump.cx/afl/) and the documents in the /doc/ directory.
Created by Nathan Voss, originally funded by [Battelle](https://www.battelle.org/cyber).

View File

@ -0,0 +1,186 @@
#!/bin/sh
#
# american fuzzy lop - Unicorn-Mode build script
# --------------------------------------
#
# Written by Nathan Voss <njvoss99@gmail.com>
#
# Adapted from code by Andrew Griffiths <agriffiths@google.com> and
# Michal Zalewski <lcamtuf@google.com>
#
# Adapted for Afl++ by Dominik Maier <mail@dmnk.co>
#
# Copyright 2017 Battelle Memorial Institute. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This script downloads, patches, and builds a version of Unicorn with
# minor tweaks to allow Unicorn-emulated binaries to be run under
# afl-fuzz.
#
# The modifications reside in patches/*. The standalone Unicorn library
# will be written to /usr/lib/libunicornafl.so, and the Python bindings
# will be installed system-wide.
#
# You must make sure that Unicorn Engine is not already installed before
# running this script. If it is, please uninstall it first.
UNICORN_URL="https://github.com/unicorn-engine/unicorn/archive/24f55a7973278f20f0de21b904851d99d4716263.tar.gz"
UNICORN_SHA384="7180d47ca52c99b4c073a343a2ead91da1a829fdc3809f3ceada5d872e162962eab98873a8bc7971449d5f34f41fdb93"
echo "================================================="
echo "Unicorn-AFL build script"
echo "================================================="
echo
echo "[*] Performing basic sanity checks..."
if [ ! "`uname -s`" = "Linux" ]; then
echo "[-] Error: Unicorn instrumentation is supported only on Linux."
exit 1
fi
if [ ! -f "patches/afl-unicorn-cpu-inl.h" -o ! -f "../config.h" ]; then
echo "[-] Error: key files not found - wrong working directory?"
exit 1
fi
if [ ! -f "../afl-showmap" ]; then
echo "[-] Error: ../afl-showmap not found - compile AFL first!"
exit 1
fi
for i in wget python automake autoconf sha384sum; do
T=`which "$i" 2>/dev/null`
if [ "$T" = "" ]; then
echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i'."
exit 1
fi
done
if ! which easy_install > /dev/null; then
echo "[-] Error: Python setup-tools not found. Run 'sudo apt-get install python-setuptools'."
exit 1
fi
if echo "$CC" | grep -qF /afl-; then
echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
exit 1
fi
echo "[+] All checks passed!"
ARCHIVE="`basename -- "$UNICORN_URL"`"
CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
if [ ! "$CKSUM" = "$UNICORN_SHA384" ]; then
echo "[*] Downloading Unicorn v1.0.1 from the web..."
rm -f "$ARCHIVE"
wget -O "$ARCHIVE" -- "$UNICORN_URL" || exit 1
CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
fi
if [ "$CKSUM" = "$UNICORN_SHA384" ]; then
echo "[+] Cryptographic signature on $ARCHIVE checks out."
else
echo "[-] Error: signature mismatch on $ARCHIVE (perhaps download error?)."
exit 1
fi
echo "[*] Uncompressing archive (this will take a while)..."
rm -rf "unicorn" || exit 1
mkdir "unicorn" || exit 1
tar xzf "$ARCHIVE" -C ./unicorn --strip-components=1 || exit 1
echo "[+] Unpacking successful."
rm -rf "$ARCHIVE" || exit 1
echo "[*] Applying patches..."
cp patches/afl-unicorn-cpu-inl.h unicorn || exit 1
patch -p1 --directory unicorn <patches/patches.diff || exit 1
echo "[+] Patching done."
echo "[*] Configuring Unicorn build..."
cd "unicorn" || exit 1
echo "[+] Configuration complete."
echo "[*] Attempting to build Unicorn (fingers crossed!)..."
UNICORN_QEMU_FLAGS='--python=python2' make || exit 1
echo "[+] Build process successful!"
echo "[*] Installing Unicorn python bindings..."
cd bindings/python || exit 1
if [ -z "$VIRTUAL_ENV" ]; then
echo "[*] Info: Installing python unicorn using --user"
python setup.py install --user || exit 1
else
echo "[*] Info: Installing python unicorn to virtualenv: $VIRTUAL_ENV"
python setup.py install || exit 1
fi
export LIBUNICORN_PATH='$(pwd)' # in theory, this allows to switch between afl-unicorn and unicorn so files.
cd ../../ || exit 1
echo "[+] Unicorn bindings installed successfully."
# Compile the sample, run it, verify that it works!
echo "[*] Testing unicorn-mode functionality by running a sample test harness under afl-unicorn"
cd ../samples/simple || exit 1
# Run afl-showmap on the sample application. If anything comes out then it must have worked!
unset AFL_INST_RATIO
echo 0 | ../../../afl-showmap -U -m none -q -o .test-instr0 -- python simple_test_harness.py ./sample_inputs/sample1.bin || exit 1
if [ -s .test-instr0 ]
then
echo "[+] Instrumentation tests passed. "
echo "[+] All set, you can now use Unicorn mode (-U) in afl-fuzz!"
RETVAL=0
else
echo "[-] Error: Unicorn mode doesn't seem to work!"
RETVAL=1
fi
rm -f .test-instr0
exit $RETVAL

View File

@ -0,0 +1,104 @@
"""
template_test_harness.py
Template which loads the context of a process into a Unicorn Engine,
instance, loads a custom (mutated) inputs, and executes the
desired code. Designed to be used in conjunction with one of the
Unicorn Context Dumper scripts.
Author:
Nathan Voss <njvoss299@gmail.com>
"""
import argparse
from unicorn import *
from unicorn.x86_const import * # TODO: Set correct architecture here as necessary
import unicorn_loader
# Simple stand-in heap to prevent OS/kernel issues
unicorn_heap = None
# Start and end address of emulation
START_ADDRESS = # TODO: Set start address here
END_ADDRESS = # TODO: Set end address here
"""
Implement target-specific hooks in here.
Stub out, skip past, and re-implement necessary functionality as appropriate
"""
def unicorn_hook_instruction(uc, address, size, user_data):
# TODO: Setup hooks and handle anything you need to here
# - For example, hook malloc/free/etc. and handle it internally
pass
#------------------------
#---- Main test function
def main():
parser = argparse.ArgumentParser()
parser.add_argument('context_dir', type=str, help="Directory containing process context")
parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input content")
parser.add_argument('-d', '--debug', default=False, action="store_true", help="Dump trace info")
args = parser.parse_args()
print("Loading context from {}".format(args.context_dir))
uc = unicorn_loader.AflUnicornEngine(args.context_dir, enable_trace=args.debug, debug_print=False)
# Instantiate the hook function to avoid emulation errors
global unicorn_heap
unicorn_heap = unicorn_loader.UnicornSimpleHeap(uc, debug_print=True)
uc.hook_add(UC_HOOK_CODE, unicorn_hook_instruction)
# Execute 1 instruction just to startup the forkserver
# NOTE: This instruction will be executed again later, so be sure that
# there are no negative consequences to the overall execution state.
# If there are, change the later call to emu_start to no re-execute
# the first instruction.
print("Starting the forkserver by executing 1 instruction")
try:
uc.emu_start(START_ADDRESS, 0, 0, count=1)
except UcError as e:
print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
return
# Allocate a buffer and load a mutated input and put it into the right spot
if args.input_file:
print("Loading input content from {}".format(args.input_file))
input_file = open(args.input_file, 'rb')
input_content = input_file.read()
input_file.close()
# TODO: Apply constraints to mutated input here
raise exceptions.NotImplementedError('No constraints on the mutated inputs have been set!')
# Allocate a new buffer and put the input into it
buf_addr = unicorn_heap.malloc(len(input_content))
uc.mem_write(buf_addr, input_content)
print("Allocated mutated input buffer @ 0x{0:016x}".format(buf_addr))
# TODO: Set the input into the state so it will be handled
raise exceptions.NotImplementedError('The mutated input was not loaded into the Unicorn state!')
# Run the test
print("Executing from 0x{0:016x} to 0x{1:016x}".format(START_ADDRESS, END_ADDRESS))
try:
result = uc.emu_start(START_ADDRESS, END_ADDRESS, timeout=0, count=0)
except UcError as e:
# If something went wrong during emulation a signal is raised to force this
# script to crash in a way that AFL can detect ('uc.force_crash()' should be
# called for any condition that you want AFL to treat as a crash).
print("Execution failed with error: {}".format(e))
uc.dump_regs()
uc.force_crash(e)
print("Final register state:")
uc.dump_regs()
print("Done.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,190 @@
"""
unicorn_dumper_gdb.py
When run with GDB sitting at a debug breakpoint, this
dumps the current state (registers/memory/etc) of
the process to a directory consisting of an index
file with register and segment information and
sub-files containing all actual process memory.
The output of this script is expected to be used
to initialize context for Unicorn emulation.
-----------
In order to run this script, GEF needs to be running in the GDB session (gef.py)
# HELPERS from: https://github.com/hugsy/gef/blob/master/gef.py
It can be loaded with:
source <path_to_gef>/gef.py
Call this function when at a breakpoint in your process with:
source unicorn_dumper_gdb.py
-----------
"""
import datetime
import hashlib
import json
import os
import sys
import time
import zlib
# GDB Python SDK
import gdb
# Maximum segment size that we'll store
# Yep, this could break stuff pretty quickly if we
# omit something that's used during emulation.
MAX_SEG_SIZE = 128 * 1024 * 1024
# Name of the index file
INDEX_FILE_NAME = "_index.json"
#----------------------
#---- Helper Functions
def map_arch():
arch = get_arch() # from GEF
if 'x86_64' in arch or 'x86-64' in arch:
return "x64"
elif 'x86' in arch or 'i386' in arch:
return "x86"
elif 'aarch64' in arch or 'arm64' in arch:
return "arm64le"
elif 'aarch64_be' in arch:
return "arm64be"
elif 'armeb' in arch:
# check for THUMB mode
cpsr = get_register('cpsr')
if (cpsr & (1 << 5)):
return "armbethumb"
else:
return "armbe"
elif 'arm' in arch:
# check for THUMB mode
cpsr = get_register('cpsr')
if (cpsr & (1 << 5)):
return "armlethumb"
else:
return "armle"
else:
return ""
#-----------------------
#---- Dumping functions
def dump_arch_info():
arch_info = {}
arch_info["arch"] = map_arch()
return arch_info
def dump_regs():
reg_state = {}
for reg in current_arch.all_registers:
reg_val = get_register(reg)
# current dumper script looks for register values to be hex strings
# reg_str = "0x{:08x}".format(reg_val)
# if "64" in get_arch():
# reg_str = "0x{:016x}".format(reg_val)
# reg_state[reg.strip().strip('$')] = reg_str
reg_state[reg.strip().strip('$')] = reg_val
return reg_state
def dump_process_memory(output_dir):
# Segment information dictionary
final_segment_list = []
# GEF:
vmmap = get_process_maps()
if not vmmap:
print("No address mapping information found")
return final_segment_list
for entry in vmmap:
if entry.page_start == entry.page_end:
continue
seg_info = {'start': entry.page_start, 'end': entry.page_end, 'name': entry.path, 'permissions': {
"r": entry.is_readable() > 0,
"w": entry.is_writable() > 0,
"x": entry.is_executable() > 0
}, 'content_file': ''}
# "(deleted)" may or may not be valid, but don't push it.
if entry.is_readable() and not '(deleted)' in entry.path:
try:
# Compress and dump the content to a file
seg_content = read_memory(entry.page_start, entry.size)
if(seg_content == None):
print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.page_start, entry.path))
else:
print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.page_start, len(seg_content), entry.path, repr(seg_info['permissions'])))
compressed_seg_content = zlib.compress(seg_content)
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
seg_info["content_file"] = md5_sum
# Write the compressed contents to disk
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
out_file.write(compressed_seg_content)
out_file.close()
except:
print("Exception reading segment ({}): {}".format(entry.path, sys.exc_info()[0]))
else:
print("Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start))
# Add the segment to the list
final_segment_list.append(seg_info)
return final_segment_list
#----------
#---- Main
def main():
print("----- Unicorn Context Dumper -----")
print("You must be actively debugging before running this!")
print("If it fails, double check that you are actively debugging before running.")
try:
GEF_TEST = set_arch()
except Exception as e:
print("!!! GEF not running in GDB. Please run gef.py by executing:")
print('\tpython execfile ("<path_to_gef>/gef.py")')
return
try:
# Create the output directory
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
output_path = "UnicornContext_" + timestamp
if not os.path.exists(output_path):
os.makedirs(output_path)
print("Process context will be output to {}".format(output_path))
# Get the context
context = {
"arch": dump_arch_info(),
"regs": dump_regs(),
"segments": dump_process_memory(output_path),
}
# Write the index file
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
index_file.write(json.dumps(context, indent=4))
index_file.close()
print("Done.")
except Exception as e:
print("!!! ERROR:\n\t{}".format(repr(e)))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,209 @@
"""
unicorn_dumper_ida.py
When run with IDA (<v7) sitting at a debug breakpoint,
dumps the current state (registers/memory/etc) of
the process to a directory consisting of an index
file with register and segment information and
sub-files containing all actual process memory.
The output of this script is expected to be used
to initialize context for Unicorn emulation.
"""
import datetime
import hashlib
import json
import os
import sys
import time
import zlib
# IDA Python SDK
from idaapi import *
from idc import *
# Maximum segment size that we'll store
# Yep, this could break stuff pretty quickly if we
# omit something that's used during emulation.
MAX_SEG_SIZE = 128 * 1024 * 1024
# Name of the index file
INDEX_FILE_NAME = "_index.json"
#----------------------
#---- Helper Functions
def get_arch():
if ph.id == PLFM_386 and ph.flag & PR_USE64:
return "x64"
elif ph.id == PLFM_386 and ph.flag & PR_USE32:
return "x86"
elif ph.id == PLFM_ARM and ph.flag & PR_USE64:
if cvar.inf.is_be():
return "arm64be"
else:
return "arm64le"
elif ph.id == PLFM_ARM and ph.flag & PR_USE32:
if cvar.inf.is_be():
return "armbe"
else:
return "armle"
else:
return ""
def get_register_list(arch):
if arch == "arm64le" or arch == "arm64be":
arch = "arm64"
elif arch == "armle" or arch == "armbe":
arch = "arm"
registers = {
"x64" : [
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"rip", "rsp", "efl",
"cs", "ds", "es", "fs", "gs", "ss",
],
"x86" : [
"eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
"eip", "esp", "efl",
"cs", "ds", "es", "fs", "gs", "ss",
],
"arm" : [
"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
"R8", "R9", "R10", "R11", "R12", "PC", "SP", "LR",
"PSR",
],
"arm64" : [
"X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",
"X8", "X9", "X10", "X11", "X12", "X13", "X14",
"X15", "X16", "X17", "X18", "X19", "X20", "X21",
"X22", "X23", "X24", "X25", "X26", "X27", "X28",
"PC", "SP", "FP", "LR", "CPSR"
# "NZCV",
]
}
return registers[arch]
#-----------------------
#---- Dumping functions
def dump_arch_info():
arch_info = {}
arch_info["arch"] = get_arch()
return arch_info
def dump_regs():
reg_state = {}
for reg in get_register_list(get_arch()):
reg_state[reg] = GetRegValue(reg)
return reg_state
def dump_process_memory(output_dir):
# Segment information dictionary
segment_list = []
# Loop over the segments, fill in the info dictionary
for seg_ea in Segments():
seg_start = SegStart(seg_ea)
seg_end = SegEnd(seg_ea)
seg_size = seg_end - seg_start
seg_info = {}
seg_info["name"] = SegName(seg_ea)
seg_info["start"] = seg_start
seg_info["end"] = seg_end
perms = getseg(seg_ea).perm
seg_info["permissions"] = {
"r": False if (perms & SEGPERM_READ) == 0 else True,
"w": False if (perms & SEGPERM_WRITE) == 0 else True,
"x": False if (perms & SEGPERM_EXEC) == 0 else True,
}
if (perms & SEGPERM_READ) and seg_size <= MAX_SEG_SIZE and isLoaded(seg_start):
try:
# Compress and dump the content to a file
seg_content = get_many_bytes(seg_start, seg_end - seg_start)
if(seg_content == None):
print("Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(SegName(seg_ea), seg_ea))
seg_info["content_file"] = ""
else:
print("Dumping segment {0}@0x{1:016x} (size:{2})".format(SegName(seg_ea), seg_ea, len(seg_content)))
compressed_seg_content = zlib.compress(seg_content)
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
seg_info["content_file"] = md5_sum
# Write the compressed contents to disk
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
out_file.write(compressed_seg_content)
out_file.close()
except:
print("Exception reading segment: {}".format(sys.exc_info()[0]))
seg_info["content_file"] = ""
else:
print("Skipping segment {0}@0x{1:016x}".format(SegName(seg_ea), seg_ea))
seg_info["content_file"] = ""
# Add the segment to the list
segment_list.append(seg_info)
return segment_list
"""
TODO: FINISH IMPORT DUMPING
def import_callback(ea, name, ord):
if not name:
else:
# True -> Continue enumeration
# False -> End enumeration
return True
def dump_imports():
import_dict = {}
for i in xrange(0, number_of_import_modules):
enum_import_names(i, import_callback)
return import_dict
"""
#----------
#---- Main
def main():
try:
print("----- Unicorn Context Dumper -----")
print("You must be actively debugging before running this!")
print("If it fails, double check that you are actively debugging before running.")
# Create the output directory
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
output_path = os.path.dirname(os.path.abspath(GetIdbPath()))
output_path = os.path.join(output_path, "UnicornContext_" + timestamp)
if not os.path.exists(output_path):
os.makedirs(output_path)
print("Process context will be output to {}".format(output_path))
# Get the context
context = {
"arch": dump_arch_info(),
"regs": dump_regs(),
"segments": dump_process_memory(output_path),
#"imports": dump_imports(),
}
# Write the index file
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
index_file.write(json.dumps(context, indent=4))
index_file.close()
print("Done.")
except Exception, e:
print("!!! ERROR:\n\t{}".format(str(e)))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,299 @@
"""
unicorn_dumper_lldb.py
When run with LLDB sitting at a debug breakpoint, this
dumps the current state (registers/memory/etc) of
the process to a directory consisting of an index
file with register and segment information and
sub-files containing all actual process memory.
The output of this script is expected to be used
to initialize context for Unicorn emulation.
-----------
Call this function when at a breakpoint in your process with:
command script import -r unicorn_dumper_lldb
If there is trouble with "split on a NoneType", issue the following command:
script lldb.target.triple
and try to import the script again.
-----------
"""
from copy import deepcopy
import datetime
import hashlib
import json
import os
import sys
import time
import zlib
# LLDB Python SDK
import lldb
# Maximum segment size that we'll store
# Yep, this could break stuff pretty quickly if we
# omit something that's used during emulation.
MAX_SEG_SIZE = 128 * 1024 * 1024
# Name of the index file
INDEX_FILE_NAME = "_index.json"
DEBUG_MEM_FILE_NAME = "_memory.json"
# Page size required by Unicorn
UNICORN_PAGE_SIZE = 0x1000
# Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
#----------------------
#---- Helper Functions
def overlap_alignments(segments, memory):
final_list = []
curr_seg_idx = 0
curr_end_addr = 0
curr_node = None
current_segment = None
sorted_segments = sorted(segments, key=lambda k: (k['start'], k['end']))
if curr_seg_idx < len(sorted_segments):
current_segment = sorted_segments[curr_seg_idx]
for mem in sorted(memory, key=lambda k: (k['start'], -k['end'])):
if curr_node is None:
if current_segment is not None and current_segment['start'] == mem['start']:
curr_node = deepcopy(current_segment)
curr_node['permissions'] = mem['permissions']
else:
curr_node = deepcopy(mem)
curr_end_addr = curr_node['end']
while curr_end_addr <= mem['end']:
if curr_node['end'] == mem['end']:
if current_segment is not None and current_segment['start'] > curr_node['start'] and current_segment['start'] < curr_node['end']:
curr_node['end'] = current_segment['start']
if(curr_node['end'] > curr_node['start']):
final_list.append(curr_node)
curr_node = deepcopy(current_segment)
curr_node['permissions'] = mem['permissions']
curr_end_addr = curr_node['end']
else:
if(curr_node['end'] > curr_node['start']):
final_list.append(curr_node)
# if curr_node is a segment
if current_segment is not None and current_segment['end'] == mem['end']:
curr_seg_idx += 1
if curr_seg_idx < len(sorted_segments):
current_segment = sorted_segments[curr_seg_idx]
else:
current_segment = None
curr_node = None
break
# could only be a segment
else:
if curr_node['end'] < mem['end']:
# check for remaining segments and valid segments
if(curr_node['end'] > curr_node['start']):
final_list.append(curr_node)
curr_seg_idx += 1
if curr_seg_idx < len(sorted_segments):
current_segment = sorted_segments[curr_seg_idx]
else:
current_segment = None
if current_segment is not None and current_segment['start'] <= curr_end_addr and current_segment['start'] < mem['end']:
curr_node = deepcopy(current_segment)
curr_node['permissions'] = mem['permissions']
else:
# no more segments
curr_node = deepcopy(mem)
curr_node['start'] = curr_end_addr
curr_end_addr = curr_node['end']
return final_list
# https://github.com/llvm-mirror/llvm/blob/master/include/llvm/ADT/Triple.h
def get_arch():
arch, arch_vendor, arch_os = lldb.target.GetTriple().split('-')
if arch == 'x86_64':
return "x64"
elif arch == 'x86' or arch == 'i386':
return "x86"
elif arch == 'aarch64' or arch == 'arm64':
return "arm64le"
elif arch == 'aarch64_be':
return "arm64be"
elif arch == 'armeb':
return "armbe"
elif arch == 'arm':
return "armle"
else:
return ""
#-----------------------
#---- Dumping functions
def dump_arch_info():
arch_info = {}
arch_info["arch"] = get_arch()
return arch_info
def dump_regs():
reg_state = {}
for reg_list in lldb.frame.GetRegisters():
if 'general purpose registers' in reg_list.GetName().lower():
for reg in reg_list:
reg_state[reg.GetName()] = int(reg.GetValue(), 16)
return reg_state
def get_section_info(sec):
name = sec.name if sec.name is not None else ''
if sec.GetParent().name is not None:
name = sec.GetParent().name + '.' + sec.name
module_name = sec.addr.module.file.GetFilename()
module_name = module_name if module_name is not None else ''
long_name = module_name + '.' + name
return sec.addr.load_addr, (sec.addr.load_addr + sec.size), sec.size, long_name
def dump_process_memory(output_dir):
# Segment information dictionary
raw_segment_list = []
raw_memory_list = []
# 1st pass:
# Loop over the segments, fill in the segment info dictionary
for module in lldb.target.module_iter():
for seg_ea in module.section_iter():
seg_info = {'module': module.file.GetFilename() }
seg_info['start'], seg_info['end'], seg_size, seg_info['name'] = get_section_info(seg_ea)
# TODO: Ugly hack for -1 LONG address on 32-bit
if seg_info['start'] >= sys.maxint or seg_size <= 0:
print "Throwing away page: {}".format(seg_info['name'])
continue
# Page-align segment
seg_info['start'] = ALIGN_PAGE_DOWN(seg_info['start'])
seg_info['end'] = ALIGN_PAGE_UP(seg_info['end'])
print("Appending: {}".format(seg_info['name']))
raw_segment_list.append(seg_info)
# Add the stack memory region (just hardcode 0x1000 around the current SP)
sp = lldb.frame.GetSP()
start_sp = ALIGN_PAGE_DOWN(sp)
raw_segment_list.append({'start': start_sp, 'end': start_sp + 0x1000, 'name': 'STACK'})
# Write the original memory to file for debugging
index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), 'w')
index_file.write(json.dumps(raw_segment_list, indent=4))
index_file.close()
# Loop over raw memory regions
mem_info = lldb.SBMemoryRegionInfo()
start_addr = -1
next_region_addr = 0
while next_region_addr > start_addr:
err = lldb.process.GetMemoryRegionInfo(next_region_addr, mem_info)
# TODO: Should check err.success. If False, what do we do?
if not err.success:
break
next_region_addr = mem_info.GetRegionEnd()
if next_region_addr >= sys.maxsize:
break
start_addr = mem_info.GetRegionBase()
end_addr = mem_info.GetRegionEnd()
# Unknown region name
region_name = 'UNKNOWN'
# Ignore regions that aren't even mapped
if mem_info.IsMapped() and mem_info.IsReadable():
mem_info_obj = {'start': start_addr, 'end': end_addr, 'name': region_name, 'permissions': {
"r": mem_info.IsReadable(),
"w": mem_info.IsWritable(),
"x": mem_info.IsExecutable()
}}
raw_memory_list.append(mem_info_obj)
final_segment_list = overlap_alignments(raw_segment_list, raw_memory_list)
for seg_info in final_segment_list:
try:
seg_info['content_file'] = ''
start_addr = seg_info['start']
end_addr = seg_info['end']
region_name = seg_info['name']
# Compress and dump the content to a file
err = lldb.SBError()
seg_content = lldb.process.ReadMemory(start_addr, end_addr - start_addr, err)
if(seg_content == None):
print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(start_addr, region_name))
seg_info['content_file'] = ''
else:
print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(start_addr, len(seg_content), region_name, repr(seg_info['permissions'])))
compressed_seg_content = zlib.compress(seg_content)
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
seg_info['content_file'] = md5_sum
# Write the compressed contents to disk
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
out_file.write(compressed_seg_content)
out_file.close()
except:
print("Exception reading segment ({}): {}".format(region_name, sys.exc_info()[0]))
return final_segment_list
#----------
#---- Main
def main():
try:
print("----- Unicorn Context Dumper -----")
print("You must be actively debugging before running this!")
print("If it fails, double check that you are actively debugging before running.")
# Create the output directory
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
output_path = "UnicornContext_" + timestamp
if not os.path.exists(output_path):
os.makedirs(output_path)
print("Process context will be output to {}".format(output_path))
# Get the context
context = {
"arch": dump_arch_info(),
"regs": dump_regs(),
"segments": dump_process_memory(output_path),
}
# Write the index file
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
index_file.write(json.dumps(context, indent=4))
index_file.close()
print("Done.")
except Exception, e:
print("!!! ERROR:\n\t{}".format(repr(e)))
if __name__ == "__main__":
main()
elif lldb.debugger:
main()

View File

@ -0,0 +1,224 @@
"""
unicorn_dumper_pwndbg.py
When run with GDB sitting at a debug breakpoint, this
dumps the current state (registers/memory/etc) of
the process to a directory consisting of an index
file with register and segment information and
sub-files containing all actual process memory.
The output of this script is expected to be used
to initialize context for Unicorn emulation.
-----------
In order to run this script, PWNDBG needs to be running in the GDB session (gdbinit.py)
# HELPERS from: https://github.com/pwndbg/pwndbg
It can be loaded with:
source <path_to_pwndbg>/gdbinit.py
Call this function when at a breakpoint in your process with:
source unicorn_dumper_pwndbg.py
-----------
"""
import datetime
import hashlib
import json
import os
import sys
import time
import zlib
# GDB Python SDK
import gdb
pwndbg_loaded = False
try:
import pwndbg.arch
import pwndbg.regs
import pwndbg.vmmap
import pwndbg.memory
pwndbg_loaded = True
except ImportError:
print("!!! PWNGDB not running in GDB. Please run gdbinit.py by executing:")
print('\tpython execfile ("<path_to_pwndbg>/gdbinit.py")')
# Maximum segment size that we'll store
# Yep, this could break stuff pretty quickly if we
# omit something that's used during emulation.
MAX_SEG_SIZE = 128 * 1024 * 1024
# Name of the index file
INDEX_FILE_NAME = "_index.json"
#----------------------
#---- Helper Functions
def map_arch():
arch = pwndbg.arch.current # from PWNDBG
if 'x86_64' in arch or 'x86-64' in arch:
return "x64"
elif 'x86' in arch or 'i386' in arch:
return "x86"
elif 'aarch64' in arch or 'arm64' in arch:
return "arm64le"
elif 'aarch64_be' in arch:
return "arm64be"
elif 'arm' in arch:
cpsr = pwndbg.regs['cpsr']
# check endianess
if pwndbg.arch.endian == 'big':
# check for THUMB mode
if (cpsr & (1 << 5)):
return "armbethumb"
else:
return "armbe"
else:
# check for THUMB mode
if (cpsr & (1 << 5)):
return "armlethumb"
else:
return "armle"
elif 'mips' in arch:
if pwndbg.arch.endian == 'little':
return 'mipsel'
else:
return 'mips'
else:
return ""
#-----------------------
#---- Dumping functions
def dump_arch_info():
arch_info = {}
arch_info["arch"] = map_arch()
return arch_info
def dump_regs():
reg_state = {}
for reg in pwndbg.regs.all:
reg_val = pwndbg.regs[reg]
# current dumper script looks for register values to be hex strings
# reg_str = "0x{:08x}".format(reg_val)
# if "64" in get_arch():
# reg_str = "0x{:016x}".format(reg_val)
# reg_state[reg.strip().strip('$')] = reg_str
reg_state[reg.strip().strip('$')] = reg_val
return reg_state
def dump_process_memory(output_dir):
# Segment information dictionary
final_segment_list = []
# PWNDBG:
vmmap = pwndbg.vmmap.get()
# Pointer to end of last dumped memory segment
segment_last_addr = 0x0;
start = None
end = None
if not vmmap:
print("No address mapping information found")
return final_segment_list
# Assume segment entries are sorted by start address
for entry in vmmap:
if entry.start == entry.end:
continue
start = entry.start
end = entry.end
if (segment_last_addr > entry.start): # indicates overlap
if (segment_last_addr > entry.end): # indicates complete overlap, so we skip the segment entirely
continue
else:
start = segment_last_addr
seg_info = {'start': start, 'end': end, 'name': entry.objfile, 'permissions': {
"r": entry.read,
"w": entry.write,
"x": entry.execute
}, 'content_file': ''}
# "(deleted)" may or may not be valid, but don't push it.
if entry.read and not '(deleted)' in entry.objfile:
try:
# Compress and dump the content to a file
seg_content = pwndbg.memory.read(start, end - start)
if(seg_content == None):
print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.start, entry.objfile))
else:
print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.start, len(seg_content), entry.objfile, repr(seg_info['permissions'])))
compressed_seg_content = zlib.compress(seg_content)
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
seg_info["content_file"] = md5_sum
# Write the compressed contents to disk
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
out_file.write(compressed_seg_content)
out_file.close()
except:
print("Exception reading segment ({}): {}".format(entry.objfile, sys.exc_info()[0]))
else:
print("Skipping segment {0}@0x{1:016x}".format(entry.objfile, entry.start))
segment_last_addr = end
# Add the segment to the list
final_segment_list.append(seg_info)
return final_segment_list
#----------
#---- Main
def main():
print("----- Unicorn Context Dumper -----")
print("You must be actively debugging before running this!")
print("If it fails, double check that you are actively debugging before running.")
try:
# Create the output directory
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
output_path = "UnicornContext_" + timestamp
if not os.path.exists(output_path):
os.makedirs(output_path)
print("Process context will be output to {}".format(output_path))
# Get the context
context = {
"arch": dump_arch_info(),
"regs": dump_regs(),
"segments": dump_process_memory(output_path),
}
# Write the index file
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
index_file.write(json.dumps(context, indent=4))
index_file.close()
print("Done.")
except Exception as e:
print("!!! ERROR:\n\t{}".format(repr(e)))
if __name__ == "__main__" and pwndbg_loaded:
main()

View File

@ -0,0 +1,560 @@
"""
unicorn_loader.py
Loads a process context dumped created using a
Unicorn Context Dumper script into a Unicorn Engine
instance. Once this is performed emulation can be
started.
"""
import argparse
import binascii
from collections import namedtuple
import datetime
import hashlib
import json
import os
import signal
import struct
import time
import zlib
# Unicorn imports
from unicorn import *
from unicorn.arm_const import *
from unicorn.arm64_const import *
from unicorn.x86_const import *
from unicorn.mips_const import *
# Name of the index file
INDEX_FILE_NAME = "_index.json"
# Page size required by Unicorn
UNICORN_PAGE_SIZE = 0x1000
# Max allowable segment size (1G)
MAX_ALLOWABLE_SEG_SIZE = 1024 * 1024 * 1024
# Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
#---------------------------------------
#---- Unicorn-based heap implementation
class UnicornSimpleHeap(object):
""" Use this class to provide a simple heap implementation. This should
be used if malloc/free calls break things during emulation. This heap also
implements basic guard-page capabilities which enable immediate notice of
heap overflow and underflows.
"""
# Helper data-container used to track chunks
class HeapChunk(object):
def __init__(self, actual_addr, total_size, data_size):
self.total_size = total_size # Total size of the chunk (including padding and guard page)
self.actual_addr = actual_addr # Actual start address of the chunk
self.data_size = data_size # Size requested by the caller of actual malloc call
self.data_addr = actual_addr + UNICORN_PAGE_SIZE # Address where data actually starts
# Returns true if the specified buffer is completely within the chunk, else false
def is_buffer_in_chunk(self, addr, size):
if addr >= self.data_addr and ((addr + size) <= (self.data_addr + self.data_size)):
return True
else:
return False
# Skip the zero-page to avoid weird potential issues with segment registers
HEAP_MIN_ADDR = 0x00002000
HEAP_MAX_ADDR = 0xFFFFFFFF
_uc = None # Unicorn engine instance to interact with
_chunks = [] # List of all known chunks
_debug_print = False # True to print debug information
def __init__(self, uc, debug_print=False):
self._uc = uc
self._debug_print = debug_print
# Add the watchpoint hook that will be used to implement psuedo-guard page support
self._uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__check_mem_access)
def malloc(self, size):
# Figure out the overall size to be allocated/mapped
# - Allocate at least 1 4k page of memory to make Unicorn happy
# - Add guard pages at the start and end of the region
total_chunk_size = UNICORN_PAGE_SIZE + ALIGN_PAGE_UP(size) + UNICORN_PAGE_SIZE
# Gross but efficient way to find space for the chunk:
chunk = None
for addr in xrange(self.HEAP_MIN_ADDR, self.HEAP_MAX_ADDR, UNICORN_PAGE_SIZE):
try:
self._uc.mem_map(addr, total_chunk_size, UC_PROT_READ | UC_PROT_WRITE)
chunk = self.HeapChunk(addr, total_chunk_size, size)
if self._debug_print:
print("Allocating 0x{0:x}-byte chunk @ 0x{1:016x}".format(chunk.data_size, chunk.data_addr))
break
except UcError as e:
continue
# Something went very wrong
if chunk == None:
return 0
self._chunks.append(chunk)
return chunk.data_addr
def calloc(self, size, count):
# Simple wrapper around malloc with calloc() args
return self.malloc(size*count)
def realloc(self, ptr, new_size):
# Wrapper around malloc(new_size) / memcpy(new, old, old_size) / free(old)
if self._debug_print:
print("Reallocating chunk @ 0x{0:016x} to be 0x{1:x} bytes".format(ptr, new_size))
old_chunk = None
for chunk in self._chunks:
if chunk.data_addr == ptr:
old_chunk = chunk
new_chunk_addr = self.malloc(new_size)
if old_chunk != None:
self._uc.mem_write(new_chunk_addr, str(self._uc.mem_read(old_chunk.data_addr, old_chunk.data_size)))
self.free(old_chunk.data_addr)
return new_chunk_addr
def free(self, addr):
for chunk in self._chunks:
if chunk.is_buffer_in_chunk(addr, 1):
if self._debug_print:
print("Freeing 0x{0:x}-byte chunk @ 0x{0:016x}".format(chunk.req_size, chunk.data_addr))
self._uc.mem_unmap(chunk.actual_addr, chunk.total_size)
self._chunks.remove(chunk)
return True
return False
# Implements basic guard-page functionality
def __check_mem_access(self, uc, access, address, size, value, user_data):
for chunk in self._chunks:
if address >= chunk.actual_addr and ((address + size) <= (chunk.actual_addr + chunk.total_size)):
if chunk.is_buffer_in_chunk(address, size) == False:
if self._debug_print:
print("Heap over/underflow attempting to {0} 0x{1:x} bytes @ {2:016x}".format( \
"write" if access == UC_MEM_WRITE else "read", size, address))
# Force a memory-based crash
uc.force_crash(UcError(UC_ERR_READ_PROT))
#---------------------------
#---- Loading function
class AflUnicornEngine(Uc):
def __init__(self, context_directory, enable_trace=False, debug_print=False):
"""
Initializes an AflUnicornEngine instance, which extends standard the UnicornEngine
with a bunch of helper routines that are useful for creating afl-unicorn test harnesses.
Parameters:
- context_directory: Path to the directory generated by one of the context dumper scripts
- enable_trace: If True trace information will be printed to STDOUT
- debug_print: If True debugging information will be printed while loading the context
"""
# Make sure the index file exists and load it
index_file_path = os.path.join(context_directory, INDEX_FILE_NAME)
if not os.path.isfile(index_file_path):
raise Exception("Index file not found. Expected it to be at {}".format(index_file_path))
# Load the process context from the index file
if debug_print:
print("Loading process context index from {}".format(index_file_path))
index_file = open(index_file_path, 'r')
context = json.load(index_file)
index_file.close()
# Check the context to make sure we have the basic essential components
if 'arch' not in context:
raise Exception("Couldn't find architecture information in index file")
if 'regs' not in context:
raise Exception("Couldn't find register information in index file")
if 'segments' not in context:
raise Exception("Couldn't find segment/memory information in index file")
# Set the UnicornEngine instance's architecture and mode
self._arch_str = context['arch']['arch']
arch, mode = self.__get_arch_and_mode(self._arch_str)
Uc.__init__(self, arch, mode)
# Load the registers
regs = context['regs']
reg_map = self.__get_register_map(self._arch_str)
for register, value in regs.iteritems():
if debug_print:
print("Reg {0} = {1}".format(register, value))
if not reg_map.has_key(register.lower()):
if debug_print:
print("Skipping Reg: {}".format(register))
else:
reg_write_retry = True
try:
self.reg_write(reg_map[register.lower()], value)
reg_write_retry = False
except Exception as e:
if debug_print:
print("ERROR writing register: {}, value: {} -- {}".format(register, value, repr(e)))
if reg_write_retry:
if debug_print:
print("Trying to parse value ({}) as hex string".format(value))
try:
self.reg_write(reg_map[register.lower()], int(value, 16))
except Exception as e:
if debug_print:
print("ERROR writing hex string register: {}, value: {} -- {}".format(register, value, repr(e)))
# Setup the memory map and load memory content
self.__map_segments(context['segments'], context_directory, debug_print)
if enable_trace:
self.hook_add(UC_HOOK_BLOCK, self.__trace_block)
self.hook_add(UC_HOOK_CODE, self.__trace_instruction)
self.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__trace_mem_access)
self.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, self.__trace_mem_invalid_access)
if debug_print:
print("Done loading context.")
def get_arch(self):
return self._arch
def get_mode(self):
return self._mode
def get_arch_str(self):
return self._arch_str
def force_crash(self, uc_error):
""" This function should be called to indicate to AFL that a crash occurred during emulation.
You can pass the exception received from Uc.emu_start
"""
mem_errors = [
UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
]
if uc_error.errno in mem_errors:
# Memory error - throw SIGSEGV
os.kill(os.getpid(), signal.SIGSEGV)
elif uc_error.errno == UC_ERR_INSN_INVALID:
# Invalid instruction - throw SIGILL
os.kill(os.getpid(), signal.SIGILL)
else:
# Not sure what happened - throw SIGABRT
os.kill(os.getpid(), signal.SIGABRT)
def dump_regs(self):
""" Dumps the contents of all the registers to STDOUT """
for reg in sorted(self.__get_register_map(self._arch_str).items(), key=lambda reg: reg[0]):
print(">>> {0:>4}: 0x{1:016x}".format(reg[0], self.reg_read(reg[1])))
# TODO: Make this dynamically get the stack pointer register and pointer width for the current architecture
"""
def dump_stack(self, window=10):
print(">>> Stack:")
stack_ptr_addr = self.reg_read(UC_X86_REG_RSP)
for i in xrange(-window, window + 1):
addr = stack_ptr_addr + (i*8)
print("{0}0x{1:016x}: 0x{2:016x}".format( \
'SP->' if i == 0 else ' ', addr, \
struct.unpack('<Q', self.mem_read(addr, 8))[0]))
"""
#-----------------------------
#---- Loader Helper Functions
def __map_segment(self, name, address, size, perms, debug_print=False):
# - size is unsigned and must be != 0
# - starting address must be aligned to 4KB
# - map size must be multiple of the page size (4KB)
mem_start = address
mem_end = address + size
mem_start_aligned = ALIGN_PAGE_DOWN(mem_start)
mem_end_aligned = ALIGN_PAGE_UP(mem_end)
if debug_print:
if mem_start_aligned != mem_start or mem_end_aligned != mem_end:
print("Aligning segment to page boundary:")
print(" name: {}".format(name))
print(" start: {0:016x} -> {1:016x}".format(mem_start, mem_start_aligned))
print(" end: {0:016x} -> {1:016x}".format(mem_end, mem_end_aligned))
print("Mapping segment from {0:016x} - {1:016x} with perm={2}: {3}".format(mem_start_aligned, mem_end_aligned, perms, name))
if(mem_start_aligned < mem_end_aligned):
self.mem_map(mem_start_aligned, mem_end_aligned - mem_start_aligned, perms)
def __map_segments(self, segment_list, context_directory, debug_print=False):
for segment in segment_list:
# Get the segment information from the index
name = segment['name']
seg_start = segment['start']
seg_end = segment['end']
perms = \
(UC_PROT_READ if segment['permissions']['r'] == True else 0) | \
(UC_PROT_WRITE if segment['permissions']['w'] == True else 0) | \
(UC_PROT_EXEC if segment['permissions']['x'] == True else 0)
if debug_print:
print("Handling segment {}".format(name))
# Check for any overlap with existing segments. If there is, it must
# be consolidated and merged together before mapping since Unicorn
# doesn't allow overlapping segments.
found = False
overlap_start = False
overlap_end = False
tmp = 0
for (mem_start, mem_end, mem_perm) in self.mem_regions():
mem_end = mem_end + 1
if seg_start >= mem_start and seg_end < mem_end:
found = True
break
if seg_start >= mem_start and seg_start < mem_end:
overlap_start = True
tmp = mem_end
break
if seg_end >= mem_start and seg_end < mem_end:
overlap_end = True
tmp = mem_start
break
# Map memory into the address space if it is of an acceptable size.
if (seg_end - seg_start) > MAX_ALLOWABLE_SEG_SIZE:
if debug_print:
print("Skipping segment (LARGER THAN {0}) from {1:016x} - {2:016x} with perm={3}: {4}".format(MAX_ALLOWABLE_SEG_SIZE, seg_start, seg_end, perms, name))
continue
elif not found: # Make sure it's not already mapped
if overlap_start: # Partial overlap (start)
self.__map_segment(name, tmp, seg_end - tmp, perms, debug_print)
elif overlap_end: # Patrial overlap (end)
self.__map_segment(name, seg_start, tmp - seg_start, perms, debug_print)
else: # Not found
self.__map_segment(name, seg_start, seg_end - seg_start, perms, debug_print)
else:
if debug_print:
print("Segment {} already mapped. Moving on.".format(name))
# Load the content (if available)
if 'content_file' in segment and len(segment['content_file']) > 0:
content_file_path = os.path.join(context_directory, segment['content_file'])
if not os.path.isfile(content_file_path):
raise Exception("Unable to find segment content file. Expected it to be at {}".format(content_file_path))
#if debug_print:
# print("Loading content for segment {} from {}".format(name, segment['content_file']))
content_file = open(content_file_path, 'rb')
compressed_content = content_file.read()
content_file.close()
self.mem_write(seg_start, zlib.decompress(compressed_content))
else:
if debug_print:
print("No content found for segment {0} @ {1:016x}".format(name, seg_start))
self.mem_write(seg_start, '\x00' * (seg_end - seg_start))
def __get_arch_and_mode(self, arch_str):
arch_map = {
"x64" : [ UC_X86_REG_RIP, UC_ARCH_X86, UC_MODE_64 ],
"x86" : [ UC_X86_REG_EIP, UC_ARCH_X86, UC_MODE_32 ],
"arm64be" : [ UC_ARM64_REG_PC, UC_ARCH_ARM64, UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
"arm64le" : [ UC_ARM64_REG_PC, UC_ARCH_ARM64, UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
"armbe" : [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
"armle" : [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
"armbethumb": [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_THUMB | UC_MODE_BIG_ENDIAN ],
"armlethumb": [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_THUMB | UC_MODE_LITTLE_ENDIAN ],
"mips" : [ UC_MIPS_REG_PC, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_BIG_ENDIAN ],
"mipsel" : [ UC_MIPS_REG_PC, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_LITTLE_ENDIAN ],
}
return (arch_map[arch_str][1], arch_map[arch_str][2])
def __get_register_map(self, arch):
if arch == "arm64le" or arch == "arm64be":
arch = "arm64"
elif arch == "armle" or arch == "armbe" or "thumb" in arch:
arch = "arm"
elif arch == "mipsel":
arch = "mips"
registers = {
"x64" : {
"rax": UC_X86_REG_RAX,
"rbx": UC_X86_REG_RBX,
"rcx": UC_X86_REG_RCX,
"rdx": UC_X86_REG_RDX,
"rsi": UC_X86_REG_RSI,
"rdi": UC_X86_REG_RDI,
"rbp": UC_X86_REG_RBP,
"rsp": UC_X86_REG_RSP,
"r8": UC_X86_REG_R8,
"r9": UC_X86_REG_R9,
"r10": UC_X86_REG_R10,
"r11": UC_X86_REG_R11,
"r12": UC_X86_REG_R12,
"r13": UC_X86_REG_R13,
"r14": UC_X86_REG_R14,
"r15": UC_X86_REG_R15,
"rip": UC_X86_REG_RIP,
"rsp": UC_X86_REG_RSP,
"efl": UC_X86_REG_EFLAGS,
"cs": UC_X86_REG_CS,
"ds": UC_X86_REG_DS,
"es": UC_X86_REG_ES,
"fs": UC_X86_REG_FS,
"gs": UC_X86_REG_GS,
"ss": UC_X86_REG_SS,
},
"x86" : {
"eax": UC_X86_REG_EAX,
"ebx": UC_X86_REG_EBX,
"ecx": UC_X86_REG_ECX,
"edx": UC_X86_REG_EDX,
"esi": UC_X86_REG_ESI,
"edi": UC_X86_REG_EDI,
"ebp": UC_X86_REG_EBP,
"esp": UC_X86_REG_ESP,
"eip": UC_X86_REG_EIP,
"esp": UC_X86_REG_ESP,
"efl": UC_X86_REG_EFLAGS,
# Segment registers removed...
# They caused segfaults (from unicorn?) when they were here
},
"arm" : {
"r0": UC_ARM_REG_R0,
"r1": UC_ARM_REG_R1,
"r2": UC_ARM_REG_R2,
"r3": UC_ARM_REG_R3,
"r4": UC_ARM_REG_R4,
"r5": UC_ARM_REG_R5,
"r6": UC_ARM_REG_R6,
"r7": UC_ARM_REG_R7,
"r8": UC_ARM_REG_R8,
"r9": UC_ARM_REG_R9,
"r10": UC_ARM_REG_R10,
"r11": UC_ARM_REG_R11,
"r12": UC_ARM_REG_R12,
"pc": UC_ARM_REG_PC,
"sp": UC_ARM_REG_SP,
"lr": UC_ARM_REG_LR,
"cpsr": UC_ARM_REG_CPSR
},
"arm64" : {
"x0": UC_ARM64_REG_X0,
"x1": UC_ARM64_REG_X1,
"x2": UC_ARM64_REG_X2,
"x3": UC_ARM64_REG_X3,
"x4": UC_ARM64_REG_X4,
"x5": UC_ARM64_REG_X5,
"x6": UC_ARM64_REG_X6,
"x7": UC_ARM64_REG_X7,
"x8": UC_ARM64_REG_X8,
"x9": UC_ARM64_REG_X9,
"x10": UC_ARM64_REG_X10,
"x11": UC_ARM64_REG_X11,
"x12": UC_ARM64_REG_X12,
"x13": UC_ARM64_REG_X13,
"x14": UC_ARM64_REG_X14,
"x15": UC_ARM64_REG_X15,
"x16": UC_ARM64_REG_X16,
"x17": UC_ARM64_REG_X17,
"x18": UC_ARM64_REG_X18,
"x19": UC_ARM64_REG_X19,
"x20": UC_ARM64_REG_X20,
"x21": UC_ARM64_REG_X21,
"x22": UC_ARM64_REG_X22,
"x23": UC_ARM64_REG_X23,
"x24": UC_ARM64_REG_X24,
"x25": UC_ARM64_REG_X25,
"x26": UC_ARM64_REG_X26,
"x27": UC_ARM64_REG_X27,
"x28": UC_ARM64_REG_X28,
"pc": UC_ARM64_REG_PC,
"sp": UC_ARM64_REG_SP,
"fp": UC_ARM64_REG_FP,
"lr": UC_ARM64_REG_LR,
"nzcv": UC_ARM64_REG_NZCV,
"cpsr": UC_ARM_REG_CPSR,
},
"mips" : {
"0" : UC_MIPS_REG_ZERO,
"at": UC_MIPS_REG_AT,
"v0": UC_MIPS_REG_V0,
"v1": UC_MIPS_REG_V1,
"a0": UC_MIPS_REG_A0,
"a1": UC_MIPS_REG_A1,
"a2": UC_MIPS_REG_A2,
"a3": UC_MIPS_REG_A3,
"t0": UC_MIPS_REG_T0,
"t1": UC_MIPS_REG_T1,
"t2": UC_MIPS_REG_T2,
"t3": UC_MIPS_REG_T3,
"t4": UC_MIPS_REG_T4,
"t5": UC_MIPS_REG_T5,
"t6": UC_MIPS_REG_T6,
"t7": UC_MIPS_REG_T7,
"t8": UC_MIPS_REG_T8,
"t9": UC_MIPS_REG_T9,
"s0": UC_MIPS_REG_S0,
"s1": UC_MIPS_REG_S1,
"s2": UC_MIPS_REG_S2,
"s3": UC_MIPS_REG_S3,
"s4": UC_MIPS_REG_S4,
"s5": UC_MIPS_REG_S5,
"s6": UC_MIPS_REG_S6,
"s7": UC_MIPS_REG_S7,
"s8": UC_MIPS_REG_S8,
"k0": UC_MIPS_REG_K0,
"k1": UC_MIPS_REG_K1,
"gp": UC_MIPS_REG_GP,
"pc": UC_MIPS_REG_PC,
"sp": UC_MIPS_REG_SP,
"fp": UC_MIPS_REG_FP,
"ra": UC_MIPS_REG_RA,
"hi": UC_MIPS_REG_HI,
"lo": UC_MIPS_REG_LO
}
}
return registers[arch]
#---------------------------
# Callbacks for tracing
# TODO: Make integer-printing fixed widths dependent on bitness of architecture
# (i.e. only show 4 bytes for 32-bit, 8 bytes for 64-bit)
# TODO: Figure out how best to determine the capstone mode and architecture here
"""
try:
# If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
from capstone import *
cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
def __trace_instruction(self, uc, address, size, user_data):
mem = uc.mem_read(address, size)
for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
print(" Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
except ImportError:
def __trace_instruction(self, uc, address, size, user_data):
print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
"""
def __trace_instruction(self, uc, address, size, user_data):
print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
def __trace_block(self, uc, address, size, user_data):
print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
def __trace_mem_access(self, uc, access, address, size, value, user_data):
if access == UC_MEM_WRITE:
print(" >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
else:
print(" >>> Read: addr=0x{0:016x} size={1}".format(address, size))
def __trace_mem_invalid_access(self, uc, access, address, size, value, user_data):
if access == UC_MEM_WRITE_UNMAPPED:
print(" >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
else:
print(" >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))

View File

@ -0,0 +1,290 @@
/*
american fuzzy lop - high-performance binary-only instrumentation
-----------------------------------------------------------------
Written by Andrew Griffiths <agriffiths@google.com> and
Michal Zalewski <lcamtuf@google.com>
TCG instrumentation and block chaining support by Andrea Biondo
<andrea.biondo965@gmail.com>
Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co>
Idea & design very much by Andrew Griffiths.
Copyright 2015, 2016 Google Inc. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
This code is a shim patched into the separately-distributed source
code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality
to implement AFL-style instrumentation and to take care of the remaining
parts of the AFL fork server logic.
The resulting QEMU binary is essentially a standalone instrumentation
tool; for an example of how to leverage it for other purposes, you can
have a look at afl-showmap.c.
*/
#include <sys/shm.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "../../config.h"
/***************************
* VARIOUS AUXILIARY STUFF *
***************************/
/* A snippet patched into tb_find_slow to inform the parent process that
we have hit a new block that hasn't been translated yet, and to tell
it to translate within its own context, too (this avoids translation
overhead in the next forked-off copy). */
#define AFL_UNICORN_CPU_SNIPPET1 do { \
afl_request_tsl(pc, cs_base, flags); \
} while (0)
/* This snippet kicks in when the instruction pointer is positioned at
_start and does the usual forkserver stuff, not very different from
regular instrumentation injected via afl-as.h. */
#define AFL_UNICORN_CPU_SNIPPET2 do { \
if(unlikely(afl_first_instr == 0)) { \
afl_setup(); \
afl_forkserver(env); \
afl_first_instr = 1; \
} \
afl_maybe_log(tb->pc); \
} while (0)
/* We use one additional file descriptor to relay "needs translation"
messages between the child and the fork server. */
#define TSL_FD (FORKSRV_FD - 1)
/* This is equivalent to afl-as.h: */
static unsigned char *afl_area_ptr;
/* Set in the child process in forkserver mode: */
static unsigned char afl_fork_child;
static unsigned int afl_forksrv_pid;
/* Instrumentation ratio: */
static unsigned int afl_inst_rms = MAP_SIZE;
/* Function declarations. */
static void afl_setup(void);
static void afl_forkserver(CPUArchState*);
static inline void afl_maybe_log(unsigned long);
static void afl_wait_tsl(CPUArchState*, int);
static void afl_request_tsl(target_ulong, target_ulong, uint64_t);
static TranslationBlock *tb_find_slow(CPUArchState*, target_ulong,
target_ulong, uint64_t);
/* Data structure passed around by the translate handlers: */
struct afl_tsl {
target_ulong pc;
target_ulong cs_base;
uint64_t flags;
};
/*************************
* ACTUAL IMPLEMENTATION *
*************************/
/* Set up SHM region and initialize other stuff. */
static void afl_setup(void) {
char *id_str = getenv(SHM_ENV_VAR),
*inst_r = getenv("AFL_INST_RATIO");
int shm_id;
if (inst_r) {
unsigned int r;
r = atoi(inst_r);
if (r > 100) r = 100;
if (!r) r = 1;
afl_inst_rms = MAP_SIZE * r / 100;
}
if (id_str) {
shm_id = atoi(id_str);
afl_area_ptr = shmat(shm_id, NULL, 0);
if (afl_area_ptr == (void*)-1) exit(1);
/* With AFL_INST_RATIO set to a low value, we want to touch the bitmap
so that the parent doesn't give up on us. */
if (inst_r) afl_area_ptr[0] = 1;
}
}
/* Fork server logic, invoked once we hit first emulated instruction. */
static void afl_forkserver(CPUArchState *env) {
static unsigned char tmp[4];
if (!afl_area_ptr) return;
/* Tell the parent that we're alive. If the parent doesn't want
to talk, assume that we're not running in forkserver mode. */
if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
afl_forksrv_pid = getpid();
/* All right, let's await orders... */
while (1) {
pid_t child_pid;
int status, t_fd[2];
/* Whoops, parent dead? */
if (read(FORKSRV_FD, tmp, 4) != 4) exit(2);
/* Establish a channel with child to grab translation commands. We'll
read from t_fd[0], child will write to TSL_FD. */
if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3);
close(t_fd[1]);
child_pid = fork();
if (child_pid < 0) exit(4);
if (!child_pid) {
/* Child process. Close descriptors and run free. */
afl_fork_child = 1;
close(FORKSRV_FD);
close(FORKSRV_FD + 1);
close(t_fd[0]);
return;
}
/* Parent. */
close(TSL_FD);
if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) exit(5);
/* Collect translation requests until child dies and closes the pipe. */
afl_wait_tsl(env, t_fd[0]);
/* Get and relay exit status to parent. */
if (waitpid(child_pid, &status, 0) < 0) exit(6);
if (write(FORKSRV_FD + 1, &status, 4) != 4) exit(7);
}
}
/* The equivalent of the tuple logging routine from afl-as.h. */
static inline void afl_maybe_log(unsigned long cur_loc) {
static __thread unsigned long prev_loc;
// DEBUG
//printf("IN AFL_MAYBE_LOG 0x%lx\n", cur_loc);
// MODIFIED FOR UNICORN MODE -> We want to log all addresses,
// so the checks for 'start < addr < end' are removed
if(!afl_area_ptr)
return;
// DEBUG
//printf("afl_area_ptr = %p\n", afl_area_ptr);
/* Looks like QEMU always maps to fixed locations, so ASAN is not a
concern. Phew. But instruction addresses may be aligned. Let's mangle
the value to get something quasi-uniform. */
cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
cur_loc &= MAP_SIZE - 1;
/* Implement probabilistic instrumentation by looking at scrambled block
address. This keeps the instrumented locations stable across runs. */
// DEBUG
//printf("afl_inst_rms = 0x%lx\n", afl_inst_rms);
if (cur_loc >= afl_inst_rms) return;
// DEBUG
//printf("cur_loc = 0x%lx\n", cur_loc);
afl_area_ptr[cur_loc ^ prev_loc]++;
prev_loc = cur_loc >> 1;
}
/* This code is invoked whenever QEMU decides that it doesn't have a
translation of a particular block and needs to compute it. When this happens,
we tell the parent to mirror the operation, so that the next fork() has a
cached copy. */
static void afl_request_tsl(target_ulong pc, target_ulong cb, uint64_t flags) {
struct afl_tsl t;
if (!afl_fork_child) return;
t.pc = pc;
t.cs_base = cb;
t.flags = flags;
if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
return;
}
/* This is the other side of the same channel. Since timeouts are handled by
afl-fuzz simply killing the child, we can just wait until the pipe breaks. */
static void afl_wait_tsl(CPUArchState *env, int fd) {
struct afl_tsl t;
while (1) {
/* Broken pipe means it's time to return to the fork server routine. */
if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
break;
tb_find_slow(env, t.pc, t.cs_base, t.flags);
}
close(fd);
}

View File

@ -0,0 +1,107 @@
diff --git a/Makefile b/Makefile
index 7d73782..fb3ccfd 100644
--- a/Makefile
+++ b/Makefile
@@ -88,6 +88,10 @@ AR = llvm-ar
LDFLAGS := -fsanitize=address ${LDFLAGS}
endif
+ifeq ($(UNICORN_AFL),yes)
+UNICORN_CFLAGS += -DUNICORN_AFL
+endif
+
ifeq ($(CROSS),)
CC ?= cc
AR ?= ar
diff --git a/config.mk b/config.mk
index c3621fb..c7b4f7e 100644
--- a/config.mk
+++ b/config.mk
@@ -8,7 +8,7 @@
# Compile with debug info when you want to debug code.
# Change this to 'no' for release edition.
-UNICORN_DEBUG ?= yes
+UNICORN_DEBUG ?= no
################################################################################
# Specify which archs you want to compile in. By default, we build all archs.
@@ -28,3 +28,9 @@ UNICORN_STATIC ?= yes
# a shared library.
UNICORN_SHARED ?= yes
+
+
+################################################################################
+# Changing 'UNICORN_AFLL = yes' to 'UNICORN_AFL = no' disables AFL instrumentation
+
+UNICORN_AFL ?= yes
diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c
index 7755adf..8114b70 100644
--- a/qemu/cpu-exec.c
+++ b/qemu/cpu-exec.c
@@ -24,6 +24,11 @@
#include "uc_priv.h"
+#if defined(UNICORN_AFL)
+#include "../afl-unicorn-cpu-inl.h"
+static int afl_first_instr = 0;
+#endif
+
static tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr);
static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
target_ulong cs_base, uint64_t flags);
@@ -231,6 +236,10 @@ int cpu_exec(struct uc_struct *uc, CPUArchState *env) // qq
next_tb & TB_EXIT_MASK, tb);
}
+#if defined(UNICORN_AFL)
+ AFL_UNICORN_CPU_SNIPPET2;
+#endif
+
/* cpu_interrupt might be called while translating the
TB, but before it is linked into a potentially
infinite loop and becomes env->current_tb. Avoid
@@ -369,6 +378,11 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
not_found:
/* if no translated code available, then translate it now */
tb = tb_gen_code(cpu, pc, cs_base, (int)flags, 0); // qq
+
+#if defined(UNICORN_AFL)
+ /* There seems to be no chaining in unicorn ever? :( */
+ AFL_UNICORN_CPU_SNIPPET1;
+#endif
found:
/* Move the last found TB to the head of the list */
diff --git a/qemu/translate-all.c b/qemu/translate-all.c
index 1a96c34..7ef4878 100644
--- a/qemu/translate-all.c
+++ b/qemu/translate-all.c
@@ -403,11 +403,25 @@ static PageDesc *page_find_alloc(struct uc_struct *uc, tb_page_addr_t index, int
#if defined(CONFIG_USER_ONLY)
/* We can't use g_malloc because it may recurse into a locked mutex. */
+#if defined(UNICORN_AFL)
+ /* This was added by unicorn-afl to bail out semi-gracefully if out of memory. */
+# define ALLOC(P, SIZE) \
+ do { \
+ void* _tmp = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
+ if (_tmp == (void*)-1) { \
+ qemu_log(">>> Out of memory for stack, bailing out. <<<\n"); \
+ exit(1); \
+ } \
+ (P) = _tmp; \
+ } while (0)
+#else /* !UNICORN_AFL */
# define ALLOC(P, SIZE) \
do { \
P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
} while (0)
+#endif /* UNICORN_AFL */
#else
# define ALLOC(P, SIZE) \
do { P = g_malloc0(SIZE); } while (0)

View File

@ -0,0 +1,41 @@
Compiling simple_target.c
==========================
You shouldn't need to compile simple_target.c since a MIPS binary version is
pre-built and shipped with afl-unicorn. This file documents how the binary
was built in case you want to rebuild it or recompile it for any reason.
The pre-built binary (simple_target.bin) was built by cross-compiling
simple_target.c for MIPS using the mips-linux-gnu-gcc package on an Ubuntu
16.04 LTS system. This cross compiler (and associated binutils) was installed
from apt-get packages:
```
sudo apt-get install gcc-mips-linux-gnu
```
simple_target.c was compiled without optimization, position-independent,
and without standard libraries using the following command line:
```
mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
```
The .text section from the resulting ELF binary was then extracted to create
the raw binary blob that is loaded and emulated by simple_test_harness.py:
```
mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin
```
In summary, to recreate simple_taget.bin execute the following:
```
mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
&& mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin
&& rm simple_target.elf
```
Note that the output of this is padded with nulls for 16-byte alignment. This is
important when emulating it, as NOPs will be added after the return of main()
as necessary.

View File

@ -0,0 +1 @@
a

View File

@ -0,0 +1 @@
a

View File

@ -0,0 +1,3 @@
python
../samples/simple/simple_test_harness.py
../samples/simple/sample_inputs

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,28 @@
start_time : 1563137991
last_update : 1563155870
fuzzer_pid : 16972
cycles_done : 86
execs_done : 4807
execs_per_sec : 0.00
paths_total : 1
paths_favored : 1
paths_found : 0
paths_imported : 0
max_depth : 1
cur_path : 0
pending_favs : 0
pending_total : 0
variable_paths : 0
stability : 100.00%
bitmap_cvg : 0.00%
unique_crashes : 0
unique_hangs : 0
last_path : 0
last_crash : 0
last_hang : 0
execs_since_crash : 4807
exec_timeout : 9999999
afl_banner : python
afl_version : 2.52b
target_mode : unicorn
command_line : /mnt/c/Users/DMaier/tmp/afl-unicorn/afl-fuzz -U -m none -t 9999999 -i ../samples/simple/in -o ../samples/simple/out -- python ../samples/simple/simple_test_harness.py ../samples/simple/sample_inputs

View File

@ -0,0 +1,7 @@
# unix_time, cycles_done, cur_path, paths_total, pending_total, pending_favs, map_size, unique_crashes, unique_hangs, max_depth, execs_per_sec
1563154187, 0, 0, 1, 1, 1, 0.00%, 0, 0, 1, inf
1563154197, 2, 0, 1, 0, 0, 0.00%, 0, 0, 1, 36.23
1563154202, 9, 0, 1, 0, 0, 0.00%, 0, 0, 1, 32.83
1563154207, 15, 0, 1, 0, 0, 0.00%, 0, 0, 1, 31.33
1563154212, 22, 0, 1, 0, 0, 0.00%, 0, 0, 1, 31.05
1563154217, 29, 0, 1, 0, 0, 0.00%, 0, 0, 1, 33.90

View File

@ -0,0 +1 @@
a

View File

@ -0,0 +1 @@
abcd

Binary file not shown.

View File

@ -0,0 +1 @@


View File

@ -0,0 +1 @@


View File

@ -0,0 +1 @@


Binary file not shown.

View File

@ -0,0 +1,37 @@
/*
* Sample target file to test afl-unicorn fuzzing capabilities.
* This is a very trivial example that will crash pretty easily
* in several different exciting ways.
*
* Input is assumed to come from a buffer located at DATA_ADDRESS
* (0x00300000), so make sure that your Unicorn emulation of this
* puts user data there.
*
* Written by Nathan Voss <njvoss99@gmail.com>
*/
// Magic address where mutated data will be placed
#define DATA_ADDRESS 0x00300000
int main(void)
{
unsigned char* data_buf = (unsigned char*)DATA_ADDRESS;
if(data_buf[20] != 0)
{
// Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04'
unsigned char invalid_read = *(unsigned char*)0x00000000;
}
else if(data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2])
{
// Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
unsigned char invalid_read = *(unsigned char*)0x00000000;
}
else if(data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00)
{
// Cause a crash if data[10] is not zero, but [9] and [11] are zero
unsigned char invalid_read = *(unsigned char*)0x00000000;
}
return 0;
}

View File

@ -0,0 +1,170 @@
"""
Simple test harness for AFL's Unicorn Mode.
This loads the simple_target.bin binary (precompiled as MIPS code) into
Unicorn's memory map for emulation, places the specified input into
simple_target's buffer (hardcoded to be at 0x300000), and executes 'main()'.
If any crashes occur during emulation, this script throws a matching signal
to tell AFL that a crash occurred.
Run under AFL as follows:
$ cd <afl_path>/unicorn_mode/samples/simple/
$ ../../../afl-fuzz -U -m none -i ./sample_inputs -o ./output -- python simple_test_harness.py @@
"""
import argparse
import os
import signal
from unicorn import *
from unicorn.mips_const import *
# Path to the file containing the binary to emulate
BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
# Memory map for the code to be tested
CODE_ADDRESS = 0x00100000 # Arbitrary address where code to test will be loaded
CODE_SIZE_MAX = 0x00010000 # Max size for the code (64kb)
STACK_ADDRESS = 0x00200000 # Address of the stack (arbitrarily chosen)
STACK_SIZE = 0x00010000 # Size of the stack (arbitrarily chosen)
DATA_ADDRESS = 0x00300000 # Address where mutated data will be placed
DATA_SIZE_MAX = 0x00010000 # Maximum allowable size of mutated data
try:
# If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
from capstone import *
cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
def unicorn_debug_instruction(uc, address, size, user_data):
mem = uc.mem_read(address, size)
for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
print(" Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
except ImportError:
def unicorn_debug_instruction(uc, address, size, user_data):
print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
def unicorn_debug_block(uc, address, size, user_data):
print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
if access == UC_MEM_WRITE:
print(" >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
else:
print(" >>> Read: addr=0x{0:016x} size={1}".format(address, size))
def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
if access == UC_MEM_WRITE_UNMAPPED:
print(" >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
else:
print(" >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))
def force_crash(uc_error):
# This function should be called to indicate to AFL that a crash occurred during emulation.
# Pass in the exception received from Uc.emu_start()
mem_errors = [
UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
]
if uc_error.errno in mem_errors:
# Memory error - throw SIGSEGV
os.kill(os.getpid(), signal.SIGSEGV)
elif uc_error.errno == UC_ERR_INSN_INVALID:
# Invalid instruction - throw SIGILL
os.kill(os.getpid(), signal.SIGILL)
else:
# Not sure what happened - throw SIGABRT
os.kill(os.getpid(), signal.SIGABRT)
def main():
parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
parser.add_argument('-d', '--debug', default=False, action="store_true", help="Enables debug tracing")
args = parser.parse_args()
# Instantiate a MIPS32 big endian Unicorn Engine instance
uc = Uc(UC_ARCH_MIPS, UC_MODE_MIPS32 + UC_MODE_BIG_ENDIAN)
if args.debug:
uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
#---------------------------------------------------
# Load the binary to emulate and map it into memory
print("Loading data input from {}".format(args.input_file))
binary_file = open(BINARY_FILE, 'rb')
binary_code = binary_file.read()
binary_file.close()
# Apply constraints to the mutated input
if len(binary_code) > CODE_SIZE_MAX:
print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX))
return
# Write the mutated command into the data buffer
uc.mem_map(CODE_ADDRESS, CODE_SIZE_MAX)
uc.mem_write(CODE_ADDRESS, binary_code)
# Set the program counter to the start of the code
start_address = CODE_ADDRESS # Address of entry point of main()
end_address = CODE_ADDRESS + 0xf4 # Address of last instruction in main()
uc.reg_write(UC_MIPS_REG_PC, start_address)
#-----------------
# Setup the stack
uc.mem_map(STACK_ADDRESS, STACK_SIZE)
uc.reg_write(UC_MIPS_REG_SP, STACK_ADDRESS + STACK_SIZE)
#-----------------------------------------------------
# Emulate 1 instruction to kick off AFL's fork server
# THIS MUST BE DONE BEFORE LOADING USER DATA!
# If this isn't done every single run, the AFL fork server
# will not be started appropriately and you'll get erratic results!
# It doesn't matter what this returns with, it just has to execute at
# least one instruction in order to get the fork server started.
# Execute 1 instruction just to startup the forkserver
print("Starting the AFL forkserver by executing 1 instruction")
try:
uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), 0, 0, count=1)
except UcError as e:
print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
return
#-----------------------------------------------
# Load the mutated input and map it into memory
# Load the mutated input from disk
print("Loading data input from {}".format(args.input_file))
input_file = open(args.input_file, 'rb')
input = input_file.read()
input_file.close()
# Apply constraints to the mutated input
if len(input) > DATA_SIZE_MAX:
print("Test input is too long (> {} bytes)".format(DATA_SIZE_MAX))
return
# Write the mutated command into the data buffer
uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
uc.mem_write(DATA_ADDRESS, input)
#------------------------------------------------------------
# Emulate the code, allowing it to process the mutated input
print("Executing until a crash or execution reaches 0x{0:016x}".format(end_address))
try:
result = uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), end_address, timeout=0, count=0)
except UcError as e:
print("Execution failed with error: {}".format(e))
force_crash(e)
print("Done.")
if __name__ == "__main__":
main()