mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-18 04:38:08 +00:00
Added AFL-Unicorn mode
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@ -19,3 +19,5 @@ afl-tmin
|
||||
as
|
||||
qemu_mode/qemu-3.1.0
|
||||
qemu_mode/qemu-3.1.0.tar.xz
|
||||
unicorn_mode/unicorn
|
||||
unicorn_mode/unicorn-*
|
||||
|
@ -77,7 +77,6 @@ static volatile u8
|
||||
child_timed_out; /* Child timed out? */
|
||||
|
||||
|
||||
|
||||
/* Constants used for describing byte behavior. */
|
||||
|
||||
#define RESP_NONE 0x00 /* Changing byte is a no-op. */
|
||||
@ -741,7 +740,8 @@ static void usage(u8* argv0) {
|
||||
" -f file - input file read by the tested program (stdin)\n"
|
||||
" -t msec - timeout for each run (%u ms)\n"
|
||||
" -m megs - memory limit for child process (%u MB)\n"
|
||||
" -Q - use binary-only instrumentation (QEMU mode)\n\n"
|
||||
" -Q - use binary-only instrumentation (QEMU mode)\n"
|
||||
" -U - use unicorn-based instrumentation (Unicorn mode)\n\n"
|
||||
|
||||
"Analysis settings:\n\n"
|
||||
|
||||
@ -867,20 +867,19 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Main entry point */
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
s32 opt;
|
||||
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
|
||||
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
|
||||
char** use_argv;
|
||||
|
||||
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
|
||||
|
||||
SAYF(cCYA "afl-analyze" VERSION cRST " by <lcamtuf@google.com>\n");
|
||||
|
||||
while ((opt = getopt(argc,argv,"+i:f:m:t:eQ")) > 0)
|
||||
while ((opt = getopt(argc,argv,"+i:f:m:t:eQU")) > 0)
|
||||
|
||||
switch (opt) {
|
||||
|
||||
@ -960,6 +959,14 @@ int main(int argc, char** argv) {
|
||||
qemu_mode = 1;
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
|
||||
if (unicorn_mode) FATAL("Multiple -U options not supported");
|
||||
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
|
||||
|
||||
unicorn_mode = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
usage(argv[0]);
|
||||
|
14
afl-cmin
14
afl-cmin
@ -49,9 +49,9 @@ MEM_LIMIT=100
|
||||
TIMEOUT=none
|
||||
|
||||
unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
|
||||
AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE
|
||||
AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
|
||||
|
||||
while getopts "+i:o:f:m:t:eQC" opt; do
|
||||
while getopts "+i:o:f:m:t:eQUC" opt; do
|
||||
|
||||
case "$opt" in
|
||||
|
||||
@ -83,6 +83,11 @@ while getopts "+i:o:f:m:t:eQC" opt; do
|
||||
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
|
||||
QEMU_MODE=1
|
||||
;;
|
||||
"U")
|
||||
EXTRA_PAR="$EXTRA_PAR -U"
|
||||
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
|
||||
UNICORN_MODE=1
|
||||
;;
|
||||
"?")
|
||||
exit 1
|
||||
;;
|
||||
@ -111,7 +116,8 @@ Execution control settings:
|
||||
-m megs - memory limit for child process ($MEM_LIMIT MB)
|
||||
-t msec - run time limit for child process (none)
|
||||
-Q - use binary-only instrumentation (QEMU mode)
|
||||
|
||||
-U - use unicorn-based instrumentation (Unicorn mode)
|
||||
|
||||
Minimization settings:
|
||||
|
||||
-C - keep crashing inputs, reject everything else
|
||||
@ -196,7 +202,7 @@ if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
|
||||
|
||||
fi
|
||||
|
||||
if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then
|
||||
if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
|
||||
|
||||
if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
|
||||
echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
|
||||
|
42
afl-fuzz.c
42
afl-fuzz.c
@ -208,6 +208,7 @@ EXP_ST u8 skip_deterministic, /* Skip deterministic stages? */
|
||||
shuffle_queue, /* Shuffle input queue? */
|
||||
bitmap_changed = 1, /* Time to update bitmap? */
|
||||
qemu_mode, /* Running in QEMU mode? */
|
||||
unicorn_mode, /* Running in Unicorn mode? */
|
||||
skip_requested, /* Skip request, via SIGUSR1 */
|
||||
run_over10m, /* Run time over 10 minutes? */
|
||||
persistent_mode, /* Running in persistent mode? */
|
||||
@ -1547,6 +1548,7 @@ static void minimize_bits(u8* dst, u8* src) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Find first power of two greater or equal to val (assuming val under
|
||||
2^63). */
|
||||
|
||||
@ -1569,6 +1571,7 @@ static u64 next_p2(u64 val) {
|
||||
for every byte in the bitmap. We win that slot if there is no previous
|
||||
contender, or if the contender has a more favorable speed x size factor. */
|
||||
|
||||
|
||||
static void update_bitmap_score(struct queue_entry* q) {
|
||||
|
||||
u32 i;
|
||||
@ -1584,6 +1587,7 @@ static void update_bitmap_score(struct queue_entry* q) {
|
||||
|
||||
if (top_rated[i]) {
|
||||
|
||||
/* Faster-executing or smaller test cases are favored. */
|
||||
u64 top_rated_fuzz_p2 = next_p2 (top_rated[i]->n_fuzz);
|
||||
u64 top_rated_fav_factor = top_rated[i]->exec_us * top_rated[i]->len;
|
||||
|
||||
@ -1682,7 +1686,6 @@ static void cull_queue(void) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Load postprocessor, if available. */
|
||||
|
||||
static void setup_post(void) {
|
||||
@ -2301,6 +2304,8 @@ EXP_ST void init_forkserver(char** argv) {
|
||||
|
||||
if (!forksrv_pid) {
|
||||
|
||||
/* CHILD PROCESS */
|
||||
|
||||
struct rlimit r;
|
||||
|
||||
/* Umpf. On OpenBSD, the default fd limit for root users is set to
|
||||
@ -2408,6 +2413,8 @@ EXP_ST void init_forkserver(char** argv) {
|
||||
|
||||
}
|
||||
|
||||
/* PARENT PROCESS */
|
||||
|
||||
/* Close the unneeded endpoints. */
|
||||
|
||||
close(ctl_pipe[0]);
|
||||
@ -3755,7 +3762,7 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
|
||||
"exec_timeout : %u\n"
|
||||
"afl_banner : %s\n"
|
||||
"afl_version : " VERSION "\n"
|
||||
"target_mode : %s%s%s%s%s%s%s\n"
|
||||
"target_mode : %s%s%s%s%s%s%s%s\n"
|
||||
"command_line : %s\n",
|
||||
start_time / 1000, get_cur_time() / 1000, getpid(),
|
||||
queue_cycle ? (queue_cycle - 1) : 0, total_execs, eps,
|
||||
@ -3765,10 +3772,10 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
|
||||
unique_hangs, last_path_time / 1000, last_crash_time / 1000,
|
||||
last_hang_time / 1000, total_execs - last_crash_execs,
|
||||
exec_tmout, use_banner,
|
||||
qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "",
|
||||
unicorn_mode ? "unicorn" : "", qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "",
|
||||
no_forkserver ? "no_forksrv " : "", crash_mode ? "crash " : "",
|
||||
persistent_mode ? "persistent " : "", deferred_mode ? "deferred " : "",
|
||||
(qemu_mode || dumb_mode || no_forkserver || crash_mode ||
|
||||
(unicorn_mode || qemu_mode || dumb_mode || no_forkserver || crash_mode ||
|
||||
persistent_mode || deferred_mode) ? "" : "default",
|
||||
orig_cmdline);
|
||||
/* ignore errors */
|
||||
@ -4702,7 +4709,7 @@ static void show_init_stats(void) {
|
||||
|
||||
SAYF("\n");
|
||||
|
||||
if (avg_us > (qemu_mode ? 50000 : 10000))
|
||||
if (avg_us > ((qemu_mode || unicorn_mode) ? 50000 : 10000))
|
||||
WARNF(cLRD "The target binary is pretty slow! See %s/perf_tips.txt.",
|
||||
doc_path);
|
||||
|
||||
@ -4779,6 +4786,7 @@ static void show_init_stats(void) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_PYTHON
|
||||
static u8 trim_case_python(char** argv, struct queue_entry* q, u8* in_buf) {
|
||||
|
||||
@ -11090,7 +11098,7 @@ EXP_ST void check_binary(u8* fname) {
|
||||
|
||||
#endif /* ^!__APPLE__ */
|
||||
|
||||
if (!qemu_mode && !dumb_mode &&
|
||||
if (!qemu_mode && !unicorn_mode && !dumb_mode &&
|
||||
!memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
|
||||
|
||||
SAYF("\n" cLRD "[-] " cRST
|
||||
@ -11110,15 +11118,15 @@ EXP_ST void check_binary(u8* fname) {
|
||||
|
||||
}
|
||||
|
||||
if (qemu_mode &&
|
||||
if ((qemu_mode || unicorn_mode) &&
|
||||
memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
|
||||
|
||||
SAYF("\n" cLRD "[-] " cRST
|
||||
"This program appears to be instrumented with afl-gcc, but is being run in\n"
|
||||
" QEMU mode (-Q). This is probably not what you want - this setup will be\n"
|
||||
" slow and offer no practical benefits.\n");
|
||||
" QEMU or Unicorn mode (-Q or -U). This is probably not what you want -\n"
|
||||
" this setup will be slow and offer no practical benefits.\n");
|
||||
|
||||
FATAL("Instrumentation found in -Q mode");
|
||||
FATAL("Instrumentation found in -Q or -U mode");
|
||||
|
||||
}
|
||||
|
||||
@ -11245,6 +11253,7 @@ static void usage(u8* argv0) {
|
||||
" -t msec - timeout for each run (auto-scaled, 50-%u ms)\n"
|
||||
" -m megs - memory limit for child process (%u MB)\n"
|
||||
" -Q - use binary-only instrumentation (QEMU mode)\n"
|
||||
" -U - use Unicorn-based instrumentation (Unicorn mode)\n\n"
|
||||
" -L minutes - use MOpt(imize) mode and set the limit time for entering the\n"
|
||||
" pacemaker mode (minutes of no new paths, 0 = immediately).\n"
|
||||
" a recommended value is 10-60. see docs/README.MOpt\n\n"
|
||||
@ -11863,7 +11872,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Make a copy of the current command line. */
|
||||
|
||||
static void save_cmdline(u32 argc, char** argv) {
|
||||
@ -11925,7 +11933,7 @@ int main(int argc, char** argv) {
|
||||
gettimeofday(&tv, &tz);
|
||||
init_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
|
||||
|
||||
while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Qe:p:s:V:E:L:")) > 0)
|
||||
while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:QUe:p:s:V:E:L:")) > 0)
|
||||
|
||||
switch (opt) {
|
||||
|
||||
@ -12126,6 +12134,15 @@ int main(int argc, char** argv) {
|
||||
|
||||
break;
|
||||
|
||||
case 'U': /* Unicorn mode */
|
||||
|
||||
if (unicorn_mode) FATAL("Multiple -U options not supported");
|
||||
unicorn_mode = 1;
|
||||
|
||||
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
|
||||
|
||||
break;
|
||||
|
||||
case 'V': {
|
||||
most_time_key = 1;
|
||||
if (sscanf(optarg, "%llu", &most_time) < 1 || optarg[0] == '-')
|
||||
@ -12259,6 +12276,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
if (crash_mode) FATAL("-C and -n are mutually exclusive");
|
||||
if (qemu_mode) FATAL("-Q and -n are mutually exclusive");
|
||||
if (unicorn_mode) FATAL("-U and -n are mutually exclusive");
|
||||
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,6 @@ static volatile u8
|
||||
child_timed_out, /* Child timed out? */
|
||||
child_crashed; /* Child crashed? */
|
||||
|
||||
|
||||
/* Classify tuple counts. Instead of mapping to individual bits, as in
|
||||
afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */
|
||||
|
||||
@ -405,7 +404,9 @@ static void usage(u8* argv0) {
|
||||
|
||||
" -t msec - timeout for each run (none)\n"
|
||||
" -m megs - memory limit for child process (%u MB)\n"
|
||||
" -Q - use binary-only instrumentation (QEMU mode)\n\n"
|
||||
" -Q - use binary-only instrumentation (QEMU mode)\n"
|
||||
" -U - use Unicorn-based instrumentation (Unicorn mode)\n"
|
||||
" (Not necessary, here for consistency with other afl-* tools)\n\n"
|
||||
|
||||
"Other settings:\n\n"
|
||||
|
||||
@ -534,19 +535,18 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Main entry point */
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
s32 opt;
|
||||
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
|
||||
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
|
||||
u32 tcnt;
|
||||
char** use_argv;
|
||||
|
||||
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
|
||||
|
||||
while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQbc")) > 0)
|
||||
while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQUbc")) > 0)
|
||||
|
||||
switch (opt) {
|
||||
|
||||
@ -643,6 +643,14 @@ int main(int argc, char** argv) {
|
||||
qemu_mode = 1;
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
|
||||
if (unicorn_mode) FATAL("Multiple -U options not supported");
|
||||
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
|
||||
|
||||
unicorn_mode = 1;
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
|
||||
/* Secret undocumented mode. Writes output in raw binary format
|
||||
|
17
afl-tmin.c
17
afl-tmin.c
@ -898,7 +898,9 @@ static void usage(u8* argv0) {
|
||||
" -f file - input file read by the tested program (stdin)\n"
|
||||
" -t msec - timeout for each run (%u ms)\n"
|
||||
" -m megs - memory limit for child process (%u MB)\n"
|
||||
" -Q - use binary-only instrumentation (QEMU mode)\n\n"
|
||||
" -Q - use binary-only instrumentation (QEMU mode)\n"
|
||||
" -U - use Unicorn-based instrumentation (Unicorn mode)\n\n"
|
||||
" (Not necessary, here for consistency with other afl-* tools)\n\n"
|
||||
|
||||
"Minimization settings:\n\n"
|
||||
|
||||
@ -1025,7 +1027,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Read mask bitmap from file. This is for the -B option. */
|
||||
|
||||
static void read_bitmap(u8* fname) {
|
||||
@ -1047,14 +1048,14 @@ static void read_bitmap(u8* fname) {
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
s32 opt;
|
||||
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
|
||||
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
|
||||
char** use_argv;
|
||||
|
||||
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
|
||||
|
||||
SAYF(cCYA "afl-tmin" VERSION cRST " by <lcamtuf@google.com>\n");
|
||||
|
||||
while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQ")) > 0)
|
||||
while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQU")) > 0)
|
||||
|
||||
switch (opt) {
|
||||
|
||||
@ -1146,6 +1147,14 @@ int main(int argc, char** argv) {
|
||||
qemu_mode = 1;
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
|
||||
if (unicorn_mode) FATAL("Multiple -Q options not supported");
|
||||
if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
|
||||
|
||||
unicorn_mode = 1;
|
||||
break;
|
||||
|
||||
case 'B': /* load bitmap */
|
||||
|
||||
/* This is a secret undocumented option! It is speculated to be useful
|
||||
|
4
config.h
4
config.h
@ -59,6 +59,10 @@
|
||||
|
||||
#define MEM_LIMIT_QEMU 200
|
||||
|
||||
/* Default memory limit when running in Unicorn mode (MB): */
|
||||
|
||||
#define MEM_LIMIT_UNICORN 200
|
||||
|
||||
/* Number of calibration cycles per every new test case (and for test
|
||||
cases that show variable behavior): */
|
||||
|
||||
|
@ -12,7 +12,7 @@ The following is a description of how these can be fuzzed with afl++
|
||||
|
||||
!!!!!
|
||||
TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
|
||||
use afl -Q qemu_mode, or better: use both in parallel
|
||||
use afl -Q qemu_mode.
|
||||
!!!!!
|
||||
|
||||
|
||||
@ -27,6 +27,16 @@ It is the easiest to use alternative and even works for cross-platform binaries.
|
||||
As it is included in afl++ this needs no URL.
|
||||
|
||||
|
||||
UNICORN
|
||||
-------
|
||||
Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
|
||||
In contrast to QEMU, Unicorn does not offer a full system or even userland emulation.
|
||||
Runtime environment and/or loaders have to be written from scratch, if needed.
|
||||
On top, block chaining has been removed. This means the speed boost introduced in
|
||||
to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
|
||||
For further information, check out ./unicorn_mode.txt.
|
||||
|
||||
|
||||
DYNINST
|
||||
-------
|
||||
Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
|
||||
@ -111,21 +121,6 @@ Pintool solutions:
|
||||
https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported
|
||||
|
||||
|
||||
Non-AFL solutions
|
||||
-----------------
|
||||
|
||||
There are many binary-only fuzzing frameworks. Some are great for CTFs but don't
|
||||
work with large binaries, other are very slow but have good path discovery,
|
||||
some are very hard to set-up ...
|
||||
|
||||
QSYM: https://github.com/sslab-gatech/qsym
|
||||
Manticore: https://github.com/trailofbits/manticore
|
||||
S2E: https://github.com/S2E
|
||||
<please send me any missing that are good>
|
||||
|
||||
|
||||
|
||||
That's it!
|
||||
News, corrections, updates?
|
||||
Email vh@thc.org
|
||||
|
||||
|
107
docs/unicorn_mode.txt
Normal file
107
docs/unicorn_mode.txt
Normal file
@ -0,0 +1,107 @@
|
||||
=========================================================
|
||||
Unicorn-based binary-only instrumentation for afl-fuzz
|
||||
=========================================================
|
||||
|
||||
1) Introduction
|
||||
---------------
|
||||
|
||||
The code in ./unicorn_mode allows you to build a standalone feature that
|
||||
leverages the Unicorn Engine and allows callers to obtain instrumentation
|
||||
output for black-box, closed-source binary code snippets. This mechanism
|
||||
can be then used by afl-fuzz to stress-test targets that couldn't be built
|
||||
with afl-gcc or used in QEMU mode, or with other extensions such as
|
||||
TriforceAFL.
|
||||
|
||||
There is a significant performance penalty compared to native AFL,
|
||||
but at least we're able to use AFL on these binaries, right?
|
||||
|
||||
The idea and much of the implementation comes from Nathan Voss <njvoss299@gmail.com>.
|
||||
|
||||
2) How to use
|
||||
-------------
|
||||
|
||||
*** Building AFL's Unicorn Mode ***
|
||||
|
||||
First, make afl as usual.
|
||||
Once that completes successfully you need to build and add in the Unicorn Mode
|
||||
features:
|
||||
|
||||
$ cd unicorn_mode
|
||||
$ ./build_unicorn_support.sh
|
||||
|
||||
NOTE: This script downloads a recent Unicorn Engine commit that has been tested
|
||||
and is stable-ish from the Unicorn github page. If you are offline, you'll need
|
||||
to hack up this script a little bit and supply your own copy of Unicorn's latest
|
||||
stable release. It's not very hard, just check out the beginning of the
|
||||
build_unicorn_support.sh script and adjust as necessary.
|
||||
|
||||
Building Unicorn will take a little bit (~5-10 minutes). Once it completes
|
||||
it automatically compiles a sample application and verify that it works.
|
||||
|
||||
*** Fuzzing with Unicorn Mode ***
|
||||
|
||||
To really use unicorn-mode effectively you need to prepare the following:
|
||||
|
||||
* Relevant binary code to be fuzzed
|
||||
* Knowledge of the memory map and good starting state
|
||||
* Folder containing sample inputs to start fuzzing with
|
||||
- Same ideas as any other AFL inputs
|
||||
- Quality/speed of results will depend greatly on quality of starting
|
||||
samples
|
||||
- See AFL's guidance on how to create a sample corpus
|
||||
* Unicorn-based test harness which:
|
||||
- Adds memory map regions
|
||||
- Loads binary code into memory
|
||||
- Emulates at least one instruction*
|
||||
- Yeah, this is lame. See 'Gotchas' section below for more info
|
||||
- Loads and verifies data to fuzz from a command-line specified file
|
||||
- AFL will provide mutated inputs by changing the file passed to
|
||||
the test harness
|
||||
- Presumably the data to be fuzzed is at a fixed buffer address
|
||||
- If input constraints (size, invalid bytes, etc.) are known they
|
||||
should be checked after the file is loaded. If a constraint
|
||||
fails, just exit the test harness. AFL will treat the input as
|
||||
'uninteresting' and move on.
|
||||
- Sets up registers and memory state for beginning of test
|
||||
- Emulates the interested code from beginning to end
|
||||
- If a crash is detected, the test harness must 'crash' by
|
||||
throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.)
|
||||
|
||||
Once you have all those things ready to go you just need to run afl-fuzz in
|
||||
'unicorn-mode' by passing in the '-U' flag:
|
||||
|
||||
$ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@
|
||||
|
||||
The normal afl-fuzz command line format applies to everything here. Refer to
|
||||
AFL's main documentation for more info about how to use afl-fuzz effectively.
|
||||
|
||||
For a much clearer vision of what all of this looks like, please refer to the
|
||||
sample provided in the 'unicorn_mode/samples' directory. There is also a blog
|
||||
post that goes over the basics at:
|
||||
|
||||
https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf
|
||||
|
||||
The 'helper_scripts' directory also contains several helper scripts that allow you
|
||||
to dump context from a running process, load it, and hook heap allocations. For details
|
||||
on how to use this check out the follow-up blog post to the one linked above.
|
||||
|
||||
A example use of AFL-Unicorn mode is discussed in the Paper Unicorefuzz:
|
||||
https://www.usenix.org/conference/woot19/presentation/maier
|
||||
|
||||
3) Gotchas, feedback, bugs
|
||||
--------------------------
|
||||
|
||||
To make sure that AFL's fork server starts up correctly the Unicorn test
|
||||
harness script must emulate at least one instruction before loading the
|
||||
data that will be fuzzed from the input file. It doesn't matter what the
|
||||
instruction is, nor if it is valid. This is an artifact of how the fork-server
|
||||
is started and could likely be fixed with some clever re-arranging of the
|
||||
patches applied to Unicorn.
|
||||
|
||||
Running the build script builds Unicorn and its python bindings and installs
|
||||
them on your system. This installation will supersede any existing Unicorn
|
||||
installation with the patched afl-unicorn version.
|
||||
|
||||
Refer to the unicorn_mode/samples/arm_example/arm_tester.c for an example
|
||||
of how to do this properly! If you don't get this right, AFL will not
|
||||
load any mutated inputs and your fuzzing will be useless!
|
16
unicorn_mode/README.md
Normal file
16
unicorn_mode/README.md
Normal file
@ -0,0 +1,16 @@
|
||||
```
|
||||
__ _ _
|
||||
__ _ / _| | _ _ _ __ (_) ___ ___ _ __ _ __
|
||||
/ _` | |_| |___| | | | '_ \| |/ __/ _ \| '__| '_ \
|
||||
| (_| | _| |___| |_| | | | | | (_| (_) | | | | | |
|
||||
\__,_|_| |_| \__,_|_| |_|_|\___\___/|_| |_| |_|
|
||||
|
||||
```
|
||||
|
||||
afl-unicorn lets you fuzz any piece of binary that can be emulated by [Unicorn Engine](http://www.unicorn-engine.org/).
|
||||
|
||||
For an in-depth description of what this is, how to install it, and how to use it check out this [blog post](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf).
|
||||
|
||||
For general help with AFL, please refer to both the official [AFL website](http://lcamtuf.coredump.cx/afl/) and the documents in the /doc/ directory.
|
||||
|
||||
Created by Nathan Voss, originally funded by [Battelle](https://www.battelle.org/cyber).
|
186
unicorn_mode/build_unicorn_support.sh
Normal file
186
unicorn_mode/build_unicorn_support.sh
Normal file
@ -0,0 +1,186 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# american fuzzy lop - Unicorn-Mode build script
|
||||
# --------------------------------------
|
||||
#
|
||||
# Written by Nathan Voss <njvoss99@gmail.com>
|
||||
#
|
||||
# Adapted from code by Andrew Griffiths <agriffiths@google.com> and
|
||||
# Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
# Adapted for Afl++ by Dominik Maier <mail@dmnk.co>
|
||||
#
|
||||
# Copyright 2017 Battelle Memorial Institute. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at:
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# This script downloads, patches, and builds a version of Unicorn with
|
||||
# minor tweaks to allow Unicorn-emulated binaries to be run under
|
||||
# afl-fuzz.
|
||||
#
|
||||
# The modifications reside in patches/*. The standalone Unicorn library
|
||||
# will be written to /usr/lib/libunicornafl.so, and the Python bindings
|
||||
# will be installed system-wide.
|
||||
#
|
||||
# You must make sure that Unicorn Engine is not already installed before
|
||||
# running this script. If it is, please uninstall it first.
|
||||
|
||||
UNICORN_URL="https://github.com/unicorn-engine/unicorn/archive/24f55a7973278f20f0de21b904851d99d4716263.tar.gz"
|
||||
UNICORN_SHA384="7180d47ca52c99b4c073a343a2ead91da1a829fdc3809f3ceada5d872e162962eab98873a8bc7971449d5f34f41fdb93"
|
||||
|
||||
echo "================================================="
|
||||
echo "Unicorn-AFL build script"
|
||||
echo "================================================="
|
||||
echo
|
||||
|
||||
echo "[*] Performing basic sanity checks..."
|
||||
|
||||
if [ ! "`uname -s`" = "Linux" ]; then
|
||||
|
||||
echo "[-] Error: Unicorn instrumentation is supported only on Linux."
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
if [ ! -f "patches/afl-unicorn-cpu-inl.h" -o ! -f "../config.h" ]; then
|
||||
|
||||
echo "[-] Error: key files not found - wrong working directory?"
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
if [ ! -f "../afl-showmap" ]; then
|
||||
|
||||
echo "[-] Error: ../afl-showmap not found - compile AFL first!"
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
for i in wget python automake autoconf sha384sum; do
|
||||
|
||||
T=`which "$i" 2>/dev/null`
|
||||
|
||||
if [ "$T" = "" ]; then
|
||||
|
||||
echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i'."
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
if ! which easy_install > /dev/null; then
|
||||
|
||||
echo "[-] Error: Python setup-tools not found. Run 'sudo apt-get install python-setuptools'."
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
if echo "$CC" | grep -qF /afl-; then
|
||||
|
||||
echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
echo "[+] All checks passed!"
|
||||
|
||||
ARCHIVE="`basename -- "$UNICORN_URL"`"
|
||||
|
||||
CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
|
||||
|
||||
if [ ! "$CKSUM" = "$UNICORN_SHA384" ]; then
|
||||
|
||||
echo "[*] Downloading Unicorn v1.0.1 from the web..."
|
||||
rm -f "$ARCHIVE"
|
||||
wget -O "$ARCHIVE" -- "$UNICORN_URL" || exit 1
|
||||
|
||||
CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
|
||||
|
||||
fi
|
||||
|
||||
if [ "$CKSUM" = "$UNICORN_SHA384" ]; then
|
||||
|
||||
echo "[+] Cryptographic signature on $ARCHIVE checks out."
|
||||
|
||||
else
|
||||
|
||||
echo "[-] Error: signature mismatch on $ARCHIVE (perhaps download error?)."
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
echo "[*] Uncompressing archive (this will take a while)..."
|
||||
|
||||
rm -rf "unicorn" || exit 1
|
||||
mkdir "unicorn" || exit 1
|
||||
tar xzf "$ARCHIVE" -C ./unicorn --strip-components=1 || exit 1
|
||||
|
||||
echo "[+] Unpacking successful."
|
||||
|
||||
rm -rf "$ARCHIVE" || exit 1
|
||||
|
||||
echo "[*] Applying patches..."
|
||||
|
||||
cp patches/afl-unicorn-cpu-inl.h unicorn || exit 1
|
||||
patch -p1 --directory unicorn <patches/patches.diff || exit 1
|
||||
|
||||
echo "[+] Patching done."
|
||||
|
||||
echo "[*] Configuring Unicorn build..."
|
||||
|
||||
cd "unicorn" || exit 1
|
||||
|
||||
echo "[+] Configuration complete."
|
||||
|
||||
echo "[*] Attempting to build Unicorn (fingers crossed!)..."
|
||||
|
||||
UNICORN_QEMU_FLAGS='--python=python2' make || exit 1
|
||||
|
||||
echo "[+] Build process successful!"
|
||||
|
||||
echo "[*] Installing Unicorn python bindings..."
|
||||
cd bindings/python || exit 1
|
||||
if [ -z "$VIRTUAL_ENV" ]; then
|
||||
echo "[*] Info: Installing python unicorn using --user"
|
||||
python setup.py install --user || exit 1
|
||||
else
|
||||
echo "[*] Info: Installing python unicorn to virtualenv: $VIRTUAL_ENV"
|
||||
python setup.py install || exit 1
|
||||
fi
|
||||
export LIBUNICORN_PATH='$(pwd)' # in theory, this allows to switch between afl-unicorn and unicorn so files.
|
||||
|
||||
cd ../../ || exit 1
|
||||
|
||||
echo "[+] Unicorn bindings installed successfully."
|
||||
|
||||
# Compile the sample, run it, verify that it works!
|
||||
echo "[*] Testing unicorn-mode functionality by running a sample test harness under afl-unicorn"
|
||||
|
||||
cd ../samples/simple || exit 1
|
||||
|
||||
# Run afl-showmap on the sample application. If anything comes out then it must have worked!
|
||||
unset AFL_INST_RATIO
|
||||
echo 0 | ../../../afl-showmap -U -m none -q -o .test-instr0 -- python simple_test_harness.py ./sample_inputs/sample1.bin || exit 1
|
||||
|
||||
if [ -s .test-instr0 ]
|
||||
then
|
||||
|
||||
echo "[+] Instrumentation tests passed. "
|
||||
echo "[+] All set, you can now use Unicorn mode (-U) in afl-fuzz!"
|
||||
RETVAL=0
|
||||
|
||||
else
|
||||
|
||||
echo "[-] Error: Unicorn mode doesn't seem to work!"
|
||||
RETVAL=1
|
||||
|
||||
fi
|
||||
|
||||
rm -f .test-instr0
|
||||
|
||||
exit $RETVAL
|
104
unicorn_mode/helper_scripts/template_test_harness.py
Normal file
104
unicorn_mode/helper_scripts/template_test_harness.py
Normal file
@ -0,0 +1,104 @@
|
||||
"""
|
||||
template_test_harness.py
|
||||
|
||||
Template which loads the context of a process into a Unicorn Engine,
|
||||
instance, loads a custom (mutated) inputs, and executes the
|
||||
desired code. Designed to be used in conjunction with one of the
|
||||
Unicorn Context Dumper scripts.
|
||||
|
||||
Author:
|
||||
Nathan Voss <njvoss299@gmail.com>
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
from unicorn import *
|
||||
from unicorn.x86_const import * # TODO: Set correct architecture here as necessary
|
||||
|
||||
import unicorn_loader
|
||||
|
||||
# Simple stand-in heap to prevent OS/kernel issues
|
||||
unicorn_heap = None
|
||||
|
||||
# Start and end address of emulation
|
||||
START_ADDRESS = # TODO: Set start address here
|
||||
END_ADDRESS = # TODO: Set end address here
|
||||
|
||||
"""
|
||||
Implement target-specific hooks in here.
|
||||
Stub out, skip past, and re-implement necessary functionality as appropriate
|
||||
"""
|
||||
def unicorn_hook_instruction(uc, address, size, user_data):
|
||||
|
||||
# TODO: Setup hooks and handle anything you need to here
|
||||
# - For example, hook malloc/free/etc. and handle it internally
|
||||
pass
|
||||
|
||||
#------------------------
|
||||
#---- Main test function
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('context_dir', type=str, help="Directory containing process context")
|
||||
parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input content")
|
||||
parser.add_argument('-d', '--debug', default=False, action="store_true", help="Dump trace info")
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Loading context from {}".format(args.context_dir))
|
||||
uc = unicorn_loader.AflUnicornEngine(args.context_dir, enable_trace=args.debug, debug_print=False)
|
||||
|
||||
# Instantiate the hook function to avoid emulation errors
|
||||
global unicorn_heap
|
||||
unicorn_heap = unicorn_loader.UnicornSimpleHeap(uc, debug_print=True)
|
||||
uc.hook_add(UC_HOOK_CODE, unicorn_hook_instruction)
|
||||
|
||||
# Execute 1 instruction just to startup the forkserver
|
||||
# NOTE: This instruction will be executed again later, so be sure that
|
||||
# there are no negative consequences to the overall execution state.
|
||||
# If there are, change the later call to emu_start to no re-execute
|
||||
# the first instruction.
|
||||
print("Starting the forkserver by executing 1 instruction")
|
||||
try:
|
||||
uc.emu_start(START_ADDRESS, 0, 0, count=1)
|
||||
except UcError as e:
|
||||
print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
|
||||
return
|
||||
|
||||
# Allocate a buffer and load a mutated input and put it into the right spot
|
||||
if args.input_file:
|
||||
print("Loading input content from {}".format(args.input_file))
|
||||
input_file = open(args.input_file, 'rb')
|
||||
input_content = input_file.read()
|
||||
input_file.close()
|
||||
|
||||
# TODO: Apply constraints to mutated input here
|
||||
raise exceptions.NotImplementedError('No constraints on the mutated inputs have been set!')
|
||||
|
||||
# Allocate a new buffer and put the input into it
|
||||
buf_addr = unicorn_heap.malloc(len(input_content))
|
||||
uc.mem_write(buf_addr, input_content)
|
||||
print("Allocated mutated input buffer @ 0x{0:016x}".format(buf_addr))
|
||||
|
||||
# TODO: Set the input into the state so it will be handled
|
||||
raise exceptions.NotImplementedError('The mutated input was not loaded into the Unicorn state!')
|
||||
|
||||
# Run the test
|
||||
print("Executing from 0x{0:016x} to 0x{1:016x}".format(START_ADDRESS, END_ADDRESS))
|
||||
try:
|
||||
result = uc.emu_start(START_ADDRESS, END_ADDRESS, timeout=0, count=0)
|
||||
except UcError as e:
|
||||
# If something went wrong during emulation a signal is raised to force this
|
||||
# script to crash in a way that AFL can detect ('uc.force_crash()' should be
|
||||
# called for any condition that you want AFL to treat as a crash).
|
||||
print("Execution failed with error: {}".format(e))
|
||||
uc.dump_regs()
|
||||
uc.force_crash(e)
|
||||
|
||||
print("Final register state:")
|
||||
uc.dump_regs()
|
||||
|
||||
print("Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
190
unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
Normal file
190
unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
Normal file
@ -0,0 +1,190 @@
|
||||
"""
|
||||
unicorn_dumper_gdb.py
|
||||
|
||||
When run with GDB sitting at a debug breakpoint, this
|
||||
dumps the current state (registers/memory/etc) of
|
||||
the process to a directory consisting of an index
|
||||
file with register and segment information and
|
||||
sub-files containing all actual process memory.
|
||||
|
||||
The output of this script is expected to be used
|
||||
to initialize context for Unicorn emulation.
|
||||
|
||||
-----------
|
||||
|
||||
In order to run this script, GEF needs to be running in the GDB session (gef.py)
|
||||
# HELPERS from: https://github.com/hugsy/gef/blob/master/gef.py
|
||||
It can be loaded with:
|
||||
source <path_to_gef>/gef.py
|
||||
|
||||
Call this function when at a breakpoint in your process with:
|
||||
source unicorn_dumper_gdb.py
|
||||
|
||||
-----------
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import zlib
|
||||
|
||||
# GDB Python SDK
|
||||
import gdb
|
||||
|
||||
# Maximum segment size that we'll store
|
||||
# Yep, this could break stuff pretty quickly if we
|
||||
# omit something that's used during emulation.
|
||||
MAX_SEG_SIZE = 128 * 1024 * 1024
|
||||
|
||||
# Name of the index file
|
||||
INDEX_FILE_NAME = "_index.json"
|
||||
|
||||
#----------------------
|
||||
#---- Helper Functions
|
||||
|
||||
def map_arch():
|
||||
arch = get_arch() # from GEF
|
||||
if 'x86_64' in arch or 'x86-64' in arch:
|
||||
return "x64"
|
||||
elif 'x86' in arch or 'i386' in arch:
|
||||
return "x86"
|
||||
elif 'aarch64' in arch or 'arm64' in arch:
|
||||
return "arm64le"
|
||||
elif 'aarch64_be' in arch:
|
||||
return "arm64be"
|
||||
elif 'armeb' in arch:
|
||||
# check for THUMB mode
|
||||
cpsr = get_register('cpsr')
|
||||
if (cpsr & (1 << 5)):
|
||||
return "armbethumb"
|
||||
else:
|
||||
return "armbe"
|
||||
elif 'arm' in arch:
|
||||
# check for THUMB mode
|
||||
cpsr = get_register('cpsr')
|
||||
if (cpsr & (1 << 5)):
|
||||
return "armlethumb"
|
||||
else:
|
||||
return "armle"
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
#-----------------------
|
||||
#---- Dumping functions
|
||||
|
||||
def dump_arch_info():
|
||||
arch_info = {}
|
||||
arch_info["arch"] = map_arch()
|
||||
return arch_info
|
||||
|
||||
|
||||
def dump_regs():
|
||||
reg_state = {}
|
||||
for reg in current_arch.all_registers:
|
||||
reg_val = get_register(reg)
|
||||
# current dumper script looks for register values to be hex strings
|
||||
# reg_str = "0x{:08x}".format(reg_val)
|
||||
# if "64" in get_arch():
|
||||
# reg_str = "0x{:016x}".format(reg_val)
|
||||
# reg_state[reg.strip().strip('$')] = reg_str
|
||||
reg_state[reg.strip().strip('$')] = reg_val
|
||||
return reg_state
|
||||
|
||||
|
||||
def dump_process_memory(output_dir):
|
||||
# Segment information dictionary
|
||||
final_segment_list = []
|
||||
|
||||
# GEF:
|
||||
vmmap = get_process_maps()
|
||||
if not vmmap:
|
||||
print("No address mapping information found")
|
||||
return final_segment_list
|
||||
|
||||
for entry in vmmap:
|
||||
if entry.page_start == entry.page_end:
|
||||
continue
|
||||
|
||||
seg_info = {'start': entry.page_start, 'end': entry.page_end, 'name': entry.path, 'permissions': {
|
||||
"r": entry.is_readable() > 0,
|
||||
"w": entry.is_writable() > 0,
|
||||
"x": entry.is_executable() > 0
|
||||
}, 'content_file': ''}
|
||||
|
||||
# "(deleted)" may or may not be valid, but don't push it.
|
||||
if entry.is_readable() and not '(deleted)' in entry.path:
|
||||
try:
|
||||
# Compress and dump the content to a file
|
||||
seg_content = read_memory(entry.page_start, entry.size)
|
||||
if(seg_content == None):
|
||||
print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.page_start, entry.path))
|
||||
else:
|
||||
print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.page_start, len(seg_content), entry.path, repr(seg_info['permissions'])))
|
||||
compressed_seg_content = zlib.compress(seg_content)
|
||||
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
|
||||
seg_info["content_file"] = md5_sum
|
||||
|
||||
# Write the compressed contents to disk
|
||||
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
|
||||
out_file.write(compressed_seg_content)
|
||||
out_file.close()
|
||||
|
||||
except:
|
||||
print("Exception reading segment ({}): {}".format(entry.path, sys.exc_info()[0]))
|
||||
else:
|
||||
print("Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start))
|
||||
|
||||
# Add the segment to the list
|
||||
final_segment_list.append(seg_info)
|
||||
|
||||
|
||||
return final_segment_list
|
||||
|
||||
#----------
|
||||
#---- Main
|
||||
|
||||
def main():
|
||||
print("----- Unicorn Context Dumper -----")
|
||||
print("You must be actively debugging before running this!")
|
||||
print("If it fails, double check that you are actively debugging before running.")
|
||||
try:
|
||||
GEF_TEST = set_arch()
|
||||
except Exception as e:
|
||||
print("!!! GEF not running in GDB. Please run gef.py by executing:")
|
||||
print('\tpython execfile ("<path_to_gef>/gef.py")')
|
||||
return
|
||||
|
||||
try:
|
||||
|
||||
# Create the output directory
|
||||
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
|
||||
output_path = "UnicornContext_" + timestamp
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
||||
print("Process context will be output to {}".format(output_path))
|
||||
|
||||
# Get the context
|
||||
context = {
|
||||
"arch": dump_arch_info(),
|
||||
"regs": dump_regs(),
|
||||
"segments": dump_process_memory(output_path),
|
||||
}
|
||||
|
||||
# Write the index file
|
||||
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
|
||||
index_file.write(json.dumps(context, indent=4))
|
||||
index_file.close()
|
||||
print("Done.")
|
||||
|
||||
except Exception as e:
|
||||
print("!!! ERROR:\n\t{}".format(repr(e)))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
209
unicorn_mode/helper_scripts/unicorn_dumper_ida.py
Normal file
209
unicorn_mode/helper_scripts/unicorn_dumper_ida.py
Normal file
@ -0,0 +1,209 @@
|
||||
"""
|
||||
unicorn_dumper_ida.py
|
||||
|
||||
When run with IDA (<v7) sitting at a debug breakpoint,
|
||||
dumps the current state (registers/memory/etc) of
|
||||
the process to a directory consisting of an index
|
||||
file with register and segment information and
|
||||
sub-files containing all actual process memory.
|
||||
|
||||
The output of this script is expected to be used
|
||||
to initialize context for Unicorn emulation.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import zlib
|
||||
|
||||
# IDA Python SDK
|
||||
from idaapi import *
|
||||
from idc import *
|
||||
|
||||
# Maximum segment size that we'll store
|
||||
# Yep, this could break stuff pretty quickly if we
|
||||
# omit something that's used during emulation.
|
||||
MAX_SEG_SIZE = 128 * 1024 * 1024
|
||||
|
||||
# Name of the index file
|
||||
INDEX_FILE_NAME = "_index.json"
|
||||
|
||||
#----------------------
|
||||
#---- Helper Functions
|
||||
|
||||
def get_arch():
|
||||
if ph.id == PLFM_386 and ph.flag & PR_USE64:
|
||||
return "x64"
|
||||
elif ph.id == PLFM_386 and ph.flag & PR_USE32:
|
||||
return "x86"
|
||||
elif ph.id == PLFM_ARM and ph.flag & PR_USE64:
|
||||
if cvar.inf.is_be():
|
||||
return "arm64be"
|
||||
else:
|
||||
return "arm64le"
|
||||
elif ph.id == PLFM_ARM and ph.flag & PR_USE32:
|
||||
if cvar.inf.is_be():
|
||||
return "armbe"
|
||||
else:
|
||||
return "armle"
|
||||
else:
|
||||
return ""
|
||||
|
||||
def get_register_list(arch):
|
||||
if arch == "arm64le" or arch == "arm64be":
|
||||
arch = "arm64"
|
||||
elif arch == "armle" or arch == "armbe":
|
||||
arch = "arm"
|
||||
|
||||
registers = {
|
||||
"x64" : [
|
||||
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp",
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
"rip", "rsp", "efl",
|
||||
"cs", "ds", "es", "fs", "gs", "ss",
|
||||
],
|
||||
"x86" : [
|
||||
"eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
|
||||
"eip", "esp", "efl",
|
||||
"cs", "ds", "es", "fs", "gs", "ss",
|
||||
],
|
||||
"arm" : [
|
||||
"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
|
||||
"R8", "R9", "R10", "R11", "R12", "PC", "SP", "LR",
|
||||
"PSR",
|
||||
],
|
||||
"arm64" : [
|
||||
"X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",
|
||||
"X8", "X9", "X10", "X11", "X12", "X13", "X14",
|
||||
"X15", "X16", "X17", "X18", "X19", "X20", "X21",
|
||||
"X22", "X23", "X24", "X25", "X26", "X27", "X28",
|
||||
"PC", "SP", "FP", "LR", "CPSR"
|
||||
# "NZCV",
|
||||
]
|
||||
}
|
||||
return registers[arch]
|
||||
|
||||
#-----------------------
|
||||
#---- Dumping functions
|
||||
|
||||
def dump_arch_info():
|
||||
arch_info = {}
|
||||
arch_info["arch"] = get_arch()
|
||||
return arch_info
|
||||
|
||||
def dump_regs():
|
||||
reg_state = {}
|
||||
for reg in get_register_list(get_arch()):
|
||||
reg_state[reg] = GetRegValue(reg)
|
||||
return reg_state
|
||||
|
||||
def dump_process_memory(output_dir):
|
||||
# Segment information dictionary
|
||||
segment_list = []
|
||||
|
||||
# Loop over the segments, fill in the info dictionary
|
||||
for seg_ea in Segments():
|
||||
seg_start = SegStart(seg_ea)
|
||||
seg_end = SegEnd(seg_ea)
|
||||
seg_size = seg_end - seg_start
|
||||
|
||||
seg_info = {}
|
||||
seg_info["name"] = SegName(seg_ea)
|
||||
seg_info["start"] = seg_start
|
||||
seg_info["end"] = seg_end
|
||||
|
||||
perms = getseg(seg_ea).perm
|
||||
seg_info["permissions"] = {
|
||||
"r": False if (perms & SEGPERM_READ) == 0 else True,
|
||||
"w": False if (perms & SEGPERM_WRITE) == 0 else True,
|
||||
"x": False if (perms & SEGPERM_EXEC) == 0 else True,
|
||||
}
|
||||
|
||||
if (perms & SEGPERM_READ) and seg_size <= MAX_SEG_SIZE and isLoaded(seg_start):
|
||||
try:
|
||||
# Compress and dump the content to a file
|
||||
seg_content = get_many_bytes(seg_start, seg_end - seg_start)
|
||||
if(seg_content == None):
|
||||
print("Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(SegName(seg_ea), seg_ea))
|
||||
seg_info["content_file"] = ""
|
||||
else:
|
||||
print("Dumping segment {0}@0x{1:016x} (size:{2})".format(SegName(seg_ea), seg_ea, len(seg_content)))
|
||||
compressed_seg_content = zlib.compress(seg_content)
|
||||
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
|
||||
seg_info["content_file"] = md5_sum
|
||||
|
||||
# Write the compressed contents to disk
|
||||
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
|
||||
out_file.write(compressed_seg_content)
|
||||
out_file.close()
|
||||
except:
|
||||
print("Exception reading segment: {}".format(sys.exc_info()[0]))
|
||||
seg_info["content_file"] = ""
|
||||
else:
|
||||
print("Skipping segment {0}@0x{1:016x}".format(SegName(seg_ea), seg_ea))
|
||||
seg_info["content_file"] = ""
|
||||
|
||||
# Add the segment to the list
|
||||
segment_list.append(seg_info)
|
||||
|
||||
return segment_list
|
||||
|
||||
"""
|
||||
TODO: FINISH IMPORT DUMPING
|
||||
def import_callback(ea, name, ord):
|
||||
if not name:
|
||||
else:
|
||||
|
||||
# True -> Continue enumeration
|
||||
# False -> End enumeration
|
||||
return True
|
||||
|
||||
def dump_imports():
|
||||
import_dict = {}
|
||||
|
||||
for i in xrange(0, number_of_import_modules):
|
||||
enum_import_names(i, import_callback)
|
||||
|
||||
return import_dict
|
||||
"""
|
||||
|
||||
#----------
|
||||
#---- Main
|
||||
|
||||
def main():
|
||||
|
||||
try:
|
||||
print("----- Unicorn Context Dumper -----")
|
||||
print("You must be actively debugging before running this!")
|
||||
print("If it fails, double check that you are actively debugging before running.")
|
||||
|
||||
# Create the output directory
|
||||
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
|
||||
output_path = os.path.dirname(os.path.abspath(GetIdbPath()))
|
||||
output_path = os.path.join(output_path, "UnicornContext_" + timestamp)
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
||||
print("Process context will be output to {}".format(output_path))
|
||||
|
||||
# Get the context
|
||||
context = {
|
||||
"arch": dump_arch_info(),
|
||||
"regs": dump_regs(),
|
||||
"segments": dump_process_memory(output_path),
|
||||
#"imports": dump_imports(),
|
||||
}
|
||||
|
||||
# Write the index file
|
||||
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
|
||||
index_file.write(json.dumps(context, indent=4))
|
||||
index_file.close()
|
||||
print("Done.")
|
||||
|
||||
except Exception, e:
|
||||
print("!!! ERROR:\n\t{}".format(str(e)))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
299
unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
Normal file
299
unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
Normal file
@ -0,0 +1,299 @@
|
||||
"""
|
||||
unicorn_dumper_lldb.py
|
||||
|
||||
When run with LLDB sitting at a debug breakpoint, this
|
||||
dumps the current state (registers/memory/etc) of
|
||||
the process to a directory consisting of an index
|
||||
file with register and segment information and
|
||||
sub-files containing all actual process memory.
|
||||
|
||||
The output of this script is expected to be used
|
||||
to initialize context for Unicorn emulation.
|
||||
|
||||
-----------
|
||||
|
||||
Call this function when at a breakpoint in your process with:
|
||||
command script import -r unicorn_dumper_lldb
|
||||
|
||||
If there is trouble with "split on a NoneType", issue the following command:
|
||||
script lldb.target.triple
|
||||
|
||||
and try to import the script again.
|
||||
|
||||
-----------
|
||||
|
||||
"""
|
||||
|
||||
from copy import deepcopy
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import zlib
|
||||
|
||||
# LLDB Python SDK
|
||||
import lldb
|
||||
|
||||
# Maximum segment size that we'll store
|
||||
# Yep, this could break stuff pretty quickly if we
|
||||
# omit something that's used during emulation.
|
||||
MAX_SEG_SIZE = 128 * 1024 * 1024
|
||||
|
||||
# Name of the index file
|
||||
INDEX_FILE_NAME = "_index.json"
|
||||
DEBUG_MEM_FILE_NAME = "_memory.json"
|
||||
|
||||
# Page size required by Unicorn
|
||||
UNICORN_PAGE_SIZE = 0x1000
|
||||
|
||||
# Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
|
||||
ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
|
||||
ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
|
||||
|
||||
#----------------------
|
||||
#---- Helper Functions
|
||||
|
||||
def overlap_alignments(segments, memory):
|
||||
final_list = []
|
||||
curr_seg_idx = 0
|
||||
curr_end_addr = 0
|
||||
curr_node = None
|
||||
current_segment = None
|
||||
sorted_segments = sorted(segments, key=lambda k: (k['start'], k['end']))
|
||||
if curr_seg_idx < len(sorted_segments):
|
||||
current_segment = sorted_segments[curr_seg_idx]
|
||||
for mem in sorted(memory, key=lambda k: (k['start'], -k['end'])):
|
||||
if curr_node is None:
|
||||
if current_segment is not None and current_segment['start'] == mem['start']:
|
||||
curr_node = deepcopy(current_segment)
|
||||
curr_node['permissions'] = mem['permissions']
|
||||
else:
|
||||
curr_node = deepcopy(mem)
|
||||
|
||||
curr_end_addr = curr_node['end']
|
||||
|
||||
while curr_end_addr <= mem['end']:
|
||||
if curr_node['end'] == mem['end']:
|
||||
if current_segment is not None and current_segment['start'] > curr_node['start'] and current_segment['start'] < curr_node['end']:
|
||||
curr_node['end'] = current_segment['start']
|
||||
if(curr_node['end'] > curr_node['start']):
|
||||
final_list.append(curr_node)
|
||||
curr_node = deepcopy(current_segment)
|
||||
curr_node['permissions'] = mem['permissions']
|
||||
curr_end_addr = curr_node['end']
|
||||
else:
|
||||
if(curr_node['end'] > curr_node['start']):
|
||||
final_list.append(curr_node)
|
||||
# if curr_node is a segment
|
||||
if current_segment is not None and current_segment['end'] == mem['end']:
|
||||
curr_seg_idx += 1
|
||||
if curr_seg_idx < len(sorted_segments):
|
||||
current_segment = sorted_segments[curr_seg_idx]
|
||||
else:
|
||||
current_segment = None
|
||||
|
||||
curr_node = None
|
||||
break
|
||||
# could only be a segment
|
||||
else:
|
||||
if curr_node['end'] < mem['end']:
|
||||
# check for remaining segments and valid segments
|
||||
if(curr_node['end'] > curr_node['start']):
|
||||
final_list.append(curr_node)
|
||||
|
||||
curr_seg_idx += 1
|
||||
if curr_seg_idx < len(sorted_segments):
|
||||
current_segment = sorted_segments[curr_seg_idx]
|
||||
else:
|
||||
current_segment = None
|
||||
|
||||
if current_segment is not None and current_segment['start'] <= curr_end_addr and current_segment['start'] < mem['end']:
|
||||
curr_node = deepcopy(current_segment)
|
||||
curr_node['permissions'] = mem['permissions']
|
||||
else:
|
||||
# no more segments
|
||||
curr_node = deepcopy(mem)
|
||||
|
||||
curr_node['start'] = curr_end_addr
|
||||
curr_end_addr = curr_node['end']
|
||||
|
||||
return final_list
|
||||
|
||||
# https://github.com/llvm-mirror/llvm/blob/master/include/llvm/ADT/Triple.h
|
||||
def get_arch():
|
||||
arch, arch_vendor, arch_os = lldb.target.GetTriple().split('-')
|
||||
if arch == 'x86_64':
|
||||
return "x64"
|
||||
elif arch == 'x86' or arch == 'i386':
|
||||
return "x86"
|
||||
elif arch == 'aarch64' or arch == 'arm64':
|
||||
return "arm64le"
|
||||
elif arch == 'aarch64_be':
|
||||
return "arm64be"
|
||||
elif arch == 'armeb':
|
||||
return "armbe"
|
||||
elif arch == 'arm':
|
||||
return "armle"
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
#-----------------------
|
||||
#---- Dumping functions
|
||||
|
||||
def dump_arch_info():
|
||||
arch_info = {}
|
||||
arch_info["arch"] = get_arch()
|
||||
return arch_info
|
||||
|
||||
|
||||
def dump_regs():
|
||||
reg_state = {}
|
||||
for reg_list in lldb.frame.GetRegisters():
|
||||
if 'general purpose registers' in reg_list.GetName().lower():
|
||||
for reg in reg_list:
|
||||
reg_state[reg.GetName()] = int(reg.GetValue(), 16)
|
||||
return reg_state
|
||||
|
||||
def get_section_info(sec):
|
||||
name = sec.name if sec.name is not None else ''
|
||||
if sec.GetParent().name is not None:
|
||||
name = sec.GetParent().name + '.' + sec.name
|
||||
|
||||
module_name = sec.addr.module.file.GetFilename()
|
||||
module_name = module_name if module_name is not None else ''
|
||||
long_name = module_name + '.' + name
|
||||
|
||||
return sec.addr.load_addr, (sec.addr.load_addr + sec.size), sec.size, long_name
|
||||
|
||||
|
||||
def dump_process_memory(output_dir):
|
||||
# Segment information dictionary
|
||||
raw_segment_list = []
|
||||
raw_memory_list = []
|
||||
|
||||
# 1st pass:
|
||||
# Loop over the segments, fill in the segment info dictionary
|
||||
for module in lldb.target.module_iter():
|
||||
for seg_ea in module.section_iter():
|
||||
seg_info = {'module': module.file.GetFilename() }
|
||||
seg_info['start'], seg_info['end'], seg_size, seg_info['name'] = get_section_info(seg_ea)
|
||||
# TODO: Ugly hack for -1 LONG address on 32-bit
|
||||
if seg_info['start'] >= sys.maxint or seg_size <= 0:
|
||||
print "Throwing away page: {}".format(seg_info['name'])
|
||||
continue
|
||||
|
||||
# Page-align segment
|
||||
seg_info['start'] = ALIGN_PAGE_DOWN(seg_info['start'])
|
||||
seg_info['end'] = ALIGN_PAGE_UP(seg_info['end'])
|
||||
print("Appending: {}".format(seg_info['name']))
|
||||
raw_segment_list.append(seg_info)
|
||||
|
||||
# Add the stack memory region (just hardcode 0x1000 around the current SP)
|
||||
sp = lldb.frame.GetSP()
|
||||
start_sp = ALIGN_PAGE_DOWN(sp)
|
||||
raw_segment_list.append({'start': start_sp, 'end': start_sp + 0x1000, 'name': 'STACK'})
|
||||
|
||||
# Write the original memory to file for debugging
|
||||
index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), 'w')
|
||||
index_file.write(json.dumps(raw_segment_list, indent=4))
|
||||
index_file.close()
|
||||
|
||||
# Loop over raw memory regions
|
||||
mem_info = lldb.SBMemoryRegionInfo()
|
||||
start_addr = -1
|
||||
next_region_addr = 0
|
||||
while next_region_addr > start_addr:
|
||||
err = lldb.process.GetMemoryRegionInfo(next_region_addr, mem_info)
|
||||
# TODO: Should check err.success. If False, what do we do?
|
||||
if not err.success:
|
||||
break
|
||||
next_region_addr = mem_info.GetRegionEnd()
|
||||
if next_region_addr >= sys.maxsize:
|
||||
break
|
||||
|
||||
start_addr = mem_info.GetRegionBase()
|
||||
end_addr = mem_info.GetRegionEnd()
|
||||
|
||||
# Unknown region name
|
||||
region_name = 'UNKNOWN'
|
||||
|
||||
# Ignore regions that aren't even mapped
|
||||
if mem_info.IsMapped() and mem_info.IsReadable():
|
||||
mem_info_obj = {'start': start_addr, 'end': end_addr, 'name': region_name, 'permissions': {
|
||||
"r": mem_info.IsReadable(),
|
||||
"w": mem_info.IsWritable(),
|
||||
"x": mem_info.IsExecutable()
|
||||
}}
|
||||
|
||||
raw_memory_list.append(mem_info_obj)
|
||||
|
||||
final_segment_list = overlap_alignments(raw_segment_list, raw_memory_list)
|
||||
|
||||
for seg_info in final_segment_list:
|
||||
try:
|
||||
seg_info['content_file'] = ''
|
||||
start_addr = seg_info['start']
|
||||
end_addr = seg_info['end']
|
||||
region_name = seg_info['name']
|
||||
# Compress and dump the content to a file
|
||||
err = lldb.SBError()
|
||||
seg_content = lldb.process.ReadMemory(start_addr, end_addr - start_addr, err)
|
||||
if(seg_content == None):
|
||||
print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(start_addr, region_name))
|
||||
seg_info['content_file'] = ''
|
||||
else:
|
||||
print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(start_addr, len(seg_content), region_name, repr(seg_info['permissions'])))
|
||||
compressed_seg_content = zlib.compress(seg_content)
|
||||
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
|
||||
seg_info['content_file'] = md5_sum
|
||||
|
||||
# Write the compressed contents to disk
|
||||
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
|
||||
out_file.write(compressed_seg_content)
|
||||
out_file.close()
|
||||
|
||||
except:
|
||||
print("Exception reading segment ({}): {}".format(region_name, sys.exc_info()[0]))
|
||||
|
||||
return final_segment_list
|
||||
|
||||
#----------
|
||||
#---- Main
|
||||
|
||||
def main():
|
||||
|
||||
try:
|
||||
print("----- Unicorn Context Dumper -----")
|
||||
print("You must be actively debugging before running this!")
|
||||
print("If it fails, double check that you are actively debugging before running.")
|
||||
|
||||
# Create the output directory
|
||||
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
|
||||
output_path = "UnicornContext_" + timestamp
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
||||
print("Process context will be output to {}".format(output_path))
|
||||
|
||||
# Get the context
|
||||
context = {
|
||||
"arch": dump_arch_info(),
|
||||
"regs": dump_regs(),
|
||||
"segments": dump_process_memory(output_path),
|
||||
}
|
||||
|
||||
# Write the index file
|
||||
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
|
||||
index_file.write(json.dumps(context, indent=4))
|
||||
index_file.close()
|
||||
print("Done.")
|
||||
|
||||
except Exception, e:
|
||||
print("!!! ERROR:\n\t{}".format(repr(e)))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
elif lldb.debugger:
|
||||
main()
|
224
unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
Normal file
224
unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
Normal file
@ -0,0 +1,224 @@
|
||||
"""
|
||||
unicorn_dumper_pwndbg.py
|
||||
|
||||
When run with GDB sitting at a debug breakpoint, this
|
||||
dumps the current state (registers/memory/etc) of
|
||||
the process to a directory consisting of an index
|
||||
file with register and segment information and
|
||||
sub-files containing all actual process memory.
|
||||
|
||||
The output of this script is expected to be used
|
||||
to initialize context for Unicorn emulation.
|
||||
|
||||
-----------
|
||||
|
||||
In order to run this script, PWNDBG needs to be running in the GDB session (gdbinit.py)
|
||||
# HELPERS from: https://github.com/pwndbg/pwndbg
|
||||
It can be loaded with:
|
||||
source <path_to_pwndbg>/gdbinit.py
|
||||
|
||||
Call this function when at a breakpoint in your process with:
|
||||
source unicorn_dumper_pwndbg.py
|
||||
|
||||
-----------
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import zlib
|
||||
|
||||
# GDB Python SDK
|
||||
import gdb
|
||||
|
||||
pwndbg_loaded = False
|
||||
|
||||
try:
|
||||
import pwndbg.arch
|
||||
import pwndbg.regs
|
||||
import pwndbg.vmmap
|
||||
import pwndbg.memory
|
||||
|
||||
pwndbg_loaded = True
|
||||
|
||||
except ImportError:
|
||||
print("!!! PWNGDB not running in GDB. Please run gdbinit.py by executing:")
|
||||
print('\tpython execfile ("<path_to_pwndbg>/gdbinit.py")')
|
||||
|
||||
# Maximum segment size that we'll store
|
||||
# Yep, this could break stuff pretty quickly if we
|
||||
# omit something that's used during emulation.
|
||||
MAX_SEG_SIZE = 128 * 1024 * 1024
|
||||
|
||||
# Name of the index file
|
||||
INDEX_FILE_NAME = "_index.json"
|
||||
|
||||
#----------------------
|
||||
#---- Helper Functions
|
||||
|
||||
def map_arch():
|
||||
arch = pwndbg.arch.current # from PWNDBG
|
||||
if 'x86_64' in arch or 'x86-64' in arch:
|
||||
return "x64"
|
||||
elif 'x86' in arch or 'i386' in arch:
|
||||
return "x86"
|
||||
elif 'aarch64' in arch or 'arm64' in arch:
|
||||
return "arm64le"
|
||||
elif 'aarch64_be' in arch:
|
||||
return "arm64be"
|
||||
elif 'arm' in arch:
|
||||
cpsr = pwndbg.regs['cpsr']
|
||||
# check endianess
|
||||
if pwndbg.arch.endian == 'big':
|
||||
# check for THUMB mode
|
||||
if (cpsr & (1 << 5)):
|
||||
return "armbethumb"
|
||||
else:
|
||||
return "armbe"
|
||||
else:
|
||||
# check for THUMB mode
|
||||
if (cpsr & (1 << 5)):
|
||||
return "armlethumb"
|
||||
else:
|
||||
return "armle"
|
||||
elif 'mips' in arch:
|
||||
if pwndbg.arch.endian == 'little':
|
||||
return 'mipsel'
|
||||
else:
|
||||
return 'mips'
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
#-----------------------
|
||||
#---- Dumping functions
|
||||
|
||||
def dump_arch_info():
|
||||
arch_info = {}
|
||||
arch_info["arch"] = map_arch()
|
||||
return arch_info
|
||||
|
||||
|
||||
def dump_regs():
|
||||
reg_state = {}
|
||||
for reg in pwndbg.regs.all:
|
||||
reg_val = pwndbg.regs[reg]
|
||||
# current dumper script looks for register values to be hex strings
|
||||
# reg_str = "0x{:08x}".format(reg_val)
|
||||
# if "64" in get_arch():
|
||||
# reg_str = "0x{:016x}".format(reg_val)
|
||||
# reg_state[reg.strip().strip('$')] = reg_str
|
||||
reg_state[reg.strip().strip('$')] = reg_val
|
||||
return reg_state
|
||||
|
||||
|
||||
def dump_process_memory(output_dir):
|
||||
# Segment information dictionary
|
||||
final_segment_list = []
|
||||
|
||||
# PWNDBG:
|
||||
vmmap = pwndbg.vmmap.get()
|
||||
|
||||
# Pointer to end of last dumped memory segment
|
||||
segment_last_addr = 0x0;
|
||||
|
||||
start = None
|
||||
end = None
|
||||
|
||||
if not vmmap:
|
||||
print("No address mapping information found")
|
||||
return final_segment_list
|
||||
|
||||
# Assume segment entries are sorted by start address
|
||||
for entry in vmmap:
|
||||
if entry.start == entry.end:
|
||||
continue
|
||||
|
||||
start = entry.start
|
||||
end = entry.end
|
||||
|
||||
if (segment_last_addr > entry.start): # indicates overlap
|
||||
if (segment_last_addr > entry.end): # indicates complete overlap, so we skip the segment entirely
|
||||
continue
|
||||
else:
|
||||
start = segment_last_addr
|
||||
|
||||
|
||||
seg_info = {'start': start, 'end': end, 'name': entry.objfile, 'permissions': {
|
||||
"r": entry.read,
|
||||
"w": entry.write,
|
||||
"x": entry.execute
|
||||
}, 'content_file': ''}
|
||||
|
||||
# "(deleted)" may or may not be valid, but don't push it.
|
||||
if entry.read and not '(deleted)' in entry.objfile:
|
||||
try:
|
||||
# Compress and dump the content to a file
|
||||
seg_content = pwndbg.memory.read(start, end - start)
|
||||
if(seg_content == None):
|
||||
print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.start, entry.objfile))
|
||||
else:
|
||||
print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.start, len(seg_content), entry.objfile, repr(seg_info['permissions'])))
|
||||
compressed_seg_content = zlib.compress(seg_content)
|
||||
md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
|
||||
seg_info["content_file"] = md5_sum
|
||||
|
||||
# Write the compressed contents to disk
|
||||
out_file = open(os.path.join(output_dir, md5_sum), 'wb')
|
||||
out_file.write(compressed_seg_content)
|
||||
out_file.close()
|
||||
|
||||
except:
|
||||
print("Exception reading segment ({}): {}".format(entry.objfile, sys.exc_info()[0]))
|
||||
else:
|
||||
print("Skipping segment {0}@0x{1:016x}".format(entry.objfile, entry.start))
|
||||
|
||||
segment_last_addr = end
|
||||
|
||||
# Add the segment to the list
|
||||
final_segment_list.append(seg_info)
|
||||
|
||||
|
||||
return final_segment_list
|
||||
|
||||
#----------
|
||||
#---- Main
|
||||
|
||||
def main():
|
||||
print("----- Unicorn Context Dumper -----")
|
||||
print("You must be actively debugging before running this!")
|
||||
print("If it fails, double check that you are actively debugging before running.")
|
||||
|
||||
try:
|
||||
|
||||
# Create the output directory
|
||||
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
|
||||
output_path = "UnicornContext_" + timestamp
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
||||
print("Process context will be output to {}".format(output_path))
|
||||
|
||||
# Get the context
|
||||
context = {
|
||||
"arch": dump_arch_info(),
|
||||
"regs": dump_regs(),
|
||||
"segments": dump_process_memory(output_path),
|
||||
}
|
||||
|
||||
# Write the index file
|
||||
index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
|
||||
index_file.write(json.dumps(context, indent=4))
|
||||
index_file.close()
|
||||
print("Done.")
|
||||
|
||||
except Exception as e:
|
||||
print("!!! ERROR:\n\t{}".format(repr(e)))
|
||||
|
||||
if __name__ == "__main__" and pwndbg_loaded:
|
||||
main()
|
||||
|
560
unicorn_mode/helper_scripts/unicorn_loader.py
Normal file
560
unicorn_mode/helper_scripts/unicorn_loader.py
Normal file
@ -0,0 +1,560 @@
|
||||
"""
|
||||
unicorn_loader.py
|
||||
|
||||
Loads a process context dumped created using a
|
||||
Unicorn Context Dumper script into a Unicorn Engine
|
||||
instance. Once this is performed emulation can be
|
||||
started.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import binascii
|
||||
from collections import namedtuple
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import struct
|
||||
import time
|
||||
import zlib
|
||||
|
||||
# Unicorn imports
|
||||
from unicorn import *
|
||||
from unicorn.arm_const import *
|
||||
from unicorn.arm64_const import *
|
||||
from unicorn.x86_const import *
|
||||
from unicorn.mips_const import *
|
||||
|
||||
# Name of the index file
|
||||
INDEX_FILE_NAME = "_index.json"
|
||||
|
||||
# Page size required by Unicorn
|
||||
UNICORN_PAGE_SIZE = 0x1000
|
||||
|
||||
# Max allowable segment size (1G)
|
||||
MAX_ALLOWABLE_SEG_SIZE = 1024 * 1024 * 1024
|
||||
|
||||
# Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
|
||||
ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
|
||||
ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
|
||||
|
||||
#---------------------------------------
|
||||
#---- Unicorn-based heap implementation
|
||||
|
||||
class UnicornSimpleHeap(object):
|
||||
""" Use this class to provide a simple heap implementation. This should
|
||||
be used if malloc/free calls break things during emulation. This heap also
|
||||
implements basic guard-page capabilities which enable immediate notice of
|
||||
heap overflow and underflows.
|
||||
"""
|
||||
|
||||
# Helper data-container used to track chunks
|
||||
class HeapChunk(object):
|
||||
def __init__(self, actual_addr, total_size, data_size):
|
||||
self.total_size = total_size # Total size of the chunk (including padding and guard page)
|
||||
self.actual_addr = actual_addr # Actual start address of the chunk
|
||||
self.data_size = data_size # Size requested by the caller of actual malloc call
|
||||
self.data_addr = actual_addr + UNICORN_PAGE_SIZE # Address where data actually starts
|
||||
|
||||
# Returns true if the specified buffer is completely within the chunk, else false
|
||||
def is_buffer_in_chunk(self, addr, size):
|
||||
if addr >= self.data_addr and ((addr + size) <= (self.data_addr + self.data_size)):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# Skip the zero-page to avoid weird potential issues with segment registers
|
||||
HEAP_MIN_ADDR = 0x00002000
|
||||
HEAP_MAX_ADDR = 0xFFFFFFFF
|
||||
|
||||
_uc = None # Unicorn engine instance to interact with
|
||||
_chunks = [] # List of all known chunks
|
||||
_debug_print = False # True to print debug information
|
||||
|
||||
def __init__(self, uc, debug_print=False):
|
||||
self._uc = uc
|
||||
self._debug_print = debug_print
|
||||
|
||||
# Add the watchpoint hook that will be used to implement psuedo-guard page support
|
||||
self._uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__check_mem_access)
|
||||
|
||||
def malloc(self, size):
|
||||
# Figure out the overall size to be allocated/mapped
|
||||
# - Allocate at least 1 4k page of memory to make Unicorn happy
|
||||
# - Add guard pages at the start and end of the region
|
||||
total_chunk_size = UNICORN_PAGE_SIZE + ALIGN_PAGE_UP(size) + UNICORN_PAGE_SIZE
|
||||
# Gross but efficient way to find space for the chunk:
|
||||
chunk = None
|
||||
for addr in xrange(self.HEAP_MIN_ADDR, self.HEAP_MAX_ADDR, UNICORN_PAGE_SIZE):
|
||||
try:
|
||||
self._uc.mem_map(addr, total_chunk_size, UC_PROT_READ | UC_PROT_WRITE)
|
||||
chunk = self.HeapChunk(addr, total_chunk_size, size)
|
||||
if self._debug_print:
|
||||
print("Allocating 0x{0:x}-byte chunk @ 0x{1:016x}".format(chunk.data_size, chunk.data_addr))
|
||||
break
|
||||
except UcError as e:
|
||||
continue
|
||||
# Something went very wrong
|
||||
if chunk == None:
|
||||
return 0
|
||||
self._chunks.append(chunk)
|
||||
return chunk.data_addr
|
||||
|
||||
def calloc(self, size, count):
|
||||
# Simple wrapper around malloc with calloc() args
|
||||
return self.malloc(size*count)
|
||||
|
||||
def realloc(self, ptr, new_size):
|
||||
# Wrapper around malloc(new_size) / memcpy(new, old, old_size) / free(old)
|
||||
if self._debug_print:
|
||||
print("Reallocating chunk @ 0x{0:016x} to be 0x{1:x} bytes".format(ptr, new_size))
|
||||
old_chunk = None
|
||||
for chunk in self._chunks:
|
||||
if chunk.data_addr == ptr:
|
||||
old_chunk = chunk
|
||||
new_chunk_addr = self.malloc(new_size)
|
||||
if old_chunk != None:
|
||||
self._uc.mem_write(new_chunk_addr, str(self._uc.mem_read(old_chunk.data_addr, old_chunk.data_size)))
|
||||
self.free(old_chunk.data_addr)
|
||||
return new_chunk_addr
|
||||
|
||||
def free(self, addr):
|
||||
for chunk in self._chunks:
|
||||
if chunk.is_buffer_in_chunk(addr, 1):
|
||||
if self._debug_print:
|
||||
print("Freeing 0x{0:x}-byte chunk @ 0x{0:016x}".format(chunk.req_size, chunk.data_addr))
|
||||
self._uc.mem_unmap(chunk.actual_addr, chunk.total_size)
|
||||
self._chunks.remove(chunk)
|
||||
return True
|
||||
return False
|
||||
|
||||
# Implements basic guard-page functionality
|
||||
def __check_mem_access(self, uc, access, address, size, value, user_data):
|
||||
for chunk in self._chunks:
|
||||
if address >= chunk.actual_addr and ((address + size) <= (chunk.actual_addr + chunk.total_size)):
|
||||
if chunk.is_buffer_in_chunk(address, size) == False:
|
||||
if self._debug_print:
|
||||
print("Heap over/underflow attempting to {0} 0x{1:x} bytes @ {2:016x}".format( \
|
||||
"write" if access == UC_MEM_WRITE else "read", size, address))
|
||||
# Force a memory-based crash
|
||||
uc.force_crash(UcError(UC_ERR_READ_PROT))
|
||||
|
||||
#---------------------------
|
||||
#---- Loading function
|
||||
|
||||
class AflUnicornEngine(Uc):
|
||||
|
||||
def __init__(self, context_directory, enable_trace=False, debug_print=False):
|
||||
"""
|
||||
Initializes an AflUnicornEngine instance, which extends standard the UnicornEngine
|
||||
with a bunch of helper routines that are useful for creating afl-unicorn test harnesses.
|
||||
|
||||
Parameters:
|
||||
- context_directory: Path to the directory generated by one of the context dumper scripts
|
||||
- enable_trace: If True trace information will be printed to STDOUT
|
||||
- debug_print: If True debugging information will be printed while loading the context
|
||||
"""
|
||||
|
||||
# Make sure the index file exists and load it
|
||||
index_file_path = os.path.join(context_directory, INDEX_FILE_NAME)
|
||||
if not os.path.isfile(index_file_path):
|
||||
raise Exception("Index file not found. Expected it to be at {}".format(index_file_path))
|
||||
|
||||
# Load the process context from the index file
|
||||
if debug_print:
|
||||
print("Loading process context index from {}".format(index_file_path))
|
||||
index_file = open(index_file_path, 'r')
|
||||
context = json.load(index_file)
|
||||
index_file.close()
|
||||
|
||||
# Check the context to make sure we have the basic essential components
|
||||
if 'arch' not in context:
|
||||
raise Exception("Couldn't find architecture information in index file")
|
||||
if 'regs' not in context:
|
||||
raise Exception("Couldn't find register information in index file")
|
||||
if 'segments' not in context:
|
||||
raise Exception("Couldn't find segment/memory information in index file")
|
||||
|
||||
# Set the UnicornEngine instance's architecture and mode
|
||||
self._arch_str = context['arch']['arch']
|
||||
arch, mode = self.__get_arch_and_mode(self._arch_str)
|
||||
Uc.__init__(self, arch, mode)
|
||||
|
||||
# Load the registers
|
||||
regs = context['regs']
|
||||
reg_map = self.__get_register_map(self._arch_str)
|
||||
for register, value in regs.iteritems():
|
||||
if debug_print:
|
||||
print("Reg {0} = {1}".format(register, value))
|
||||
if not reg_map.has_key(register.lower()):
|
||||
if debug_print:
|
||||
print("Skipping Reg: {}".format(register))
|
||||
else:
|
||||
reg_write_retry = True
|
||||
try:
|
||||
self.reg_write(reg_map[register.lower()], value)
|
||||
reg_write_retry = False
|
||||
except Exception as e:
|
||||
if debug_print:
|
||||
print("ERROR writing register: {}, value: {} -- {}".format(register, value, repr(e)))
|
||||
|
||||
if reg_write_retry:
|
||||
if debug_print:
|
||||
print("Trying to parse value ({}) as hex string".format(value))
|
||||
try:
|
||||
self.reg_write(reg_map[register.lower()], int(value, 16))
|
||||
except Exception as e:
|
||||
if debug_print:
|
||||
print("ERROR writing hex string register: {}, value: {} -- {}".format(register, value, repr(e)))
|
||||
|
||||
# Setup the memory map and load memory content
|
||||
self.__map_segments(context['segments'], context_directory, debug_print)
|
||||
|
||||
if enable_trace:
|
||||
self.hook_add(UC_HOOK_BLOCK, self.__trace_block)
|
||||
self.hook_add(UC_HOOK_CODE, self.__trace_instruction)
|
||||
self.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__trace_mem_access)
|
||||
self.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, self.__trace_mem_invalid_access)
|
||||
|
||||
if debug_print:
|
||||
print("Done loading context.")
|
||||
|
||||
def get_arch(self):
|
||||
return self._arch
|
||||
|
||||
def get_mode(self):
|
||||
return self._mode
|
||||
|
||||
def get_arch_str(self):
|
||||
return self._arch_str
|
||||
|
||||
def force_crash(self, uc_error):
|
||||
""" This function should be called to indicate to AFL that a crash occurred during emulation.
|
||||
You can pass the exception received from Uc.emu_start
|
||||
"""
|
||||
mem_errors = [
|
||||
UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
|
||||
UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
|
||||
UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
|
||||
]
|
||||
if uc_error.errno in mem_errors:
|
||||
# Memory error - throw SIGSEGV
|
||||
os.kill(os.getpid(), signal.SIGSEGV)
|
||||
elif uc_error.errno == UC_ERR_INSN_INVALID:
|
||||
# Invalid instruction - throw SIGILL
|
||||
os.kill(os.getpid(), signal.SIGILL)
|
||||
else:
|
||||
# Not sure what happened - throw SIGABRT
|
||||
os.kill(os.getpid(), signal.SIGABRT)
|
||||
|
||||
def dump_regs(self):
|
||||
""" Dumps the contents of all the registers to STDOUT """
|
||||
for reg in sorted(self.__get_register_map(self._arch_str).items(), key=lambda reg: reg[0]):
|
||||
print(">>> {0:>4}: 0x{1:016x}".format(reg[0], self.reg_read(reg[1])))
|
||||
|
||||
# TODO: Make this dynamically get the stack pointer register and pointer width for the current architecture
|
||||
"""
|
||||
def dump_stack(self, window=10):
|
||||
print(">>> Stack:")
|
||||
stack_ptr_addr = self.reg_read(UC_X86_REG_RSP)
|
||||
for i in xrange(-window, window + 1):
|
||||
addr = stack_ptr_addr + (i*8)
|
||||
print("{0}0x{1:016x}: 0x{2:016x}".format( \
|
||||
'SP->' if i == 0 else ' ', addr, \
|
||||
struct.unpack('<Q', self.mem_read(addr, 8))[0]))
|
||||
"""
|
||||
|
||||
#-----------------------------
|
||||
#---- Loader Helper Functions
|
||||
|
||||
def __map_segment(self, name, address, size, perms, debug_print=False):
|
||||
# - size is unsigned and must be != 0
|
||||
# - starting address must be aligned to 4KB
|
||||
# - map size must be multiple of the page size (4KB)
|
||||
mem_start = address
|
||||
mem_end = address + size
|
||||
mem_start_aligned = ALIGN_PAGE_DOWN(mem_start)
|
||||
mem_end_aligned = ALIGN_PAGE_UP(mem_end)
|
||||
if debug_print:
|
||||
if mem_start_aligned != mem_start or mem_end_aligned != mem_end:
|
||||
print("Aligning segment to page boundary:")
|
||||
print(" name: {}".format(name))
|
||||
print(" start: {0:016x} -> {1:016x}".format(mem_start, mem_start_aligned))
|
||||
print(" end: {0:016x} -> {1:016x}".format(mem_end, mem_end_aligned))
|
||||
print("Mapping segment from {0:016x} - {1:016x} with perm={2}: {3}".format(mem_start_aligned, mem_end_aligned, perms, name))
|
||||
if(mem_start_aligned < mem_end_aligned):
|
||||
self.mem_map(mem_start_aligned, mem_end_aligned - mem_start_aligned, perms)
|
||||
|
||||
|
||||
def __map_segments(self, segment_list, context_directory, debug_print=False):
|
||||
for segment in segment_list:
|
||||
|
||||
# Get the segment information from the index
|
||||
name = segment['name']
|
||||
seg_start = segment['start']
|
||||
seg_end = segment['end']
|
||||
perms = \
|
||||
(UC_PROT_READ if segment['permissions']['r'] == True else 0) | \
|
||||
(UC_PROT_WRITE if segment['permissions']['w'] == True else 0) | \
|
||||
(UC_PROT_EXEC if segment['permissions']['x'] == True else 0)
|
||||
|
||||
if debug_print:
|
||||
print("Handling segment {}".format(name))
|
||||
|
||||
# Check for any overlap with existing segments. If there is, it must
|
||||
# be consolidated and merged together before mapping since Unicorn
|
||||
# doesn't allow overlapping segments.
|
||||
found = False
|
||||
overlap_start = False
|
||||
overlap_end = False
|
||||
tmp = 0
|
||||
for (mem_start, mem_end, mem_perm) in self.mem_regions():
|
||||
mem_end = mem_end + 1
|
||||
if seg_start >= mem_start and seg_end < mem_end:
|
||||
found = True
|
||||
break
|
||||
if seg_start >= mem_start and seg_start < mem_end:
|
||||
overlap_start = True
|
||||
tmp = mem_end
|
||||
break
|
||||
if seg_end >= mem_start and seg_end < mem_end:
|
||||
overlap_end = True
|
||||
tmp = mem_start
|
||||
break
|
||||
|
||||
# Map memory into the address space if it is of an acceptable size.
|
||||
if (seg_end - seg_start) > MAX_ALLOWABLE_SEG_SIZE:
|
||||
if debug_print:
|
||||
print("Skipping segment (LARGER THAN {0}) from {1:016x} - {2:016x} with perm={3}: {4}".format(MAX_ALLOWABLE_SEG_SIZE, seg_start, seg_end, perms, name))
|
||||
continue
|
||||
elif not found: # Make sure it's not already mapped
|
||||
if overlap_start: # Partial overlap (start)
|
||||
self.__map_segment(name, tmp, seg_end - tmp, perms, debug_print)
|
||||
elif overlap_end: # Patrial overlap (end)
|
||||
self.__map_segment(name, seg_start, tmp - seg_start, perms, debug_print)
|
||||
else: # Not found
|
||||
self.__map_segment(name, seg_start, seg_end - seg_start, perms, debug_print)
|
||||
else:
|
||||
if debug_print:
|
||||
print("Segment {} already mapped. Moving on.".format(name))
|
||||
|
||||
# Load the content (if available)
|
||||
if 'content_file' in segment and len(segment['content_file']) > 0:
|
||||
content_file_path = os.path.join(context_directory, segment['content_file'])
|
||||
if not os.path.isfile(content_file_path):
|
||||
raise Exception("Unable to find segment content file. Expected it to be at {}".format(content_file_path))
|
||||
#if debug_print:
|
||||
# print("Loading content for segment {} from {}".format(name, segment['content_file']))
|
||||
content_file = open(content_file_path, 'rb')
|
||||
compressed_content = content_file.read()
|
||||
content_file.close()
|
||||
self.mem_write(seg_start, zlib.decompress(compressed_content))
|
||||
|
||||
else:
|
||||
if debug_print:
|
||||
print("No content found for segment {0} @ {1:016x}".format(name, seg_start))
|
||||
self.mem_write(seg_start, '\x00' * (seg_end - seg_start))
|
||||
|
||||
def __get_arch_and_mode(self, arch_str):
|
||||
arch_map = {
|
||||
"x64" : [ UC_X86_REG_RIP, UC_ARCH_X86, UC_MODE_64 ],
|
||||
"x86" : [ UC_X86_REG_EIP, UC_ARCH_X86, UC_MODE_32 ],
|
||||
"arm64be" : [ UC_ARM64_REG_PC, UC_ARCH_ARM64, UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
|
||||
"arm64le" : [ UC_ARM64_REG_PC, UC_ARCH_ARM64, UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
|
||||
"armbe" : [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
|
||||
"armle" : [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
|
||||
"armbethumb": [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_THUMB | UC_MODE_BIG_ENDIAN ],
|
||||
"armlethumb": [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_THUMB | UC_MODE_LITTLE_ENDIAN ],
|
||||
"mips" : [ UC_MIPS_REG_PC, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_BIG_ENDIAN ],
|
||||
"mipsel" : [ UC_MIPS_REG_PC, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_LITTLE_ENDIAN ],
|
||||
}
|
||||
return (arch_map[arch_str][1], arch_map[arch_str][2])
|
||||
|
||||
def __get_register_map(self, arch):
|
||||
if arch == "arm64le" or arch == "arm64be":
|
||||
arch = "arm64"
|
||||
elif arch == "armle" or arch == "armbe" or "thumb" in arch:
|
||||
arch = "arm"
|
||||
elif arch == "mipsel":
|
||||
arch = "mips"
|
||||
|
||||
registers = {
|
||||
"x64" : {
|
||||
"rax": UC_X86_REG_RAX,
|
||||
"rbx": UC_X86_REG_RBX,
|
||||
"rcx": UC_X86_REG_RCX,
|
||||
"rdx": UC_X86_REG_RDX,
|
||||
"rsi": UC_X86_REG_RSI,
|
||||
"rdi": UC_X86_REG_RDI,
|
||||
"rbp": UC_X86_REG_RBP,
|
||||
"rsp": UC_X86_REG_RSP,
|
||||
"r8": UC_X86_REG_R8,
|
||||
"r9": UC_X86_REG_R9,
|
||||
"r10": UC_X86_REG_R10,
|
||||
"r11": UC_X86_REG_R11,
|
||||
"r12": UC_X86_REG_R12,
|
||||
"r13": UC_X86_REG_R13,
|
||||
"r14": UC_X86_REG_R14,
|
||||
"r15": UC_X86_REG_R15,
|
||||
"rip": UC_X86_REG_RIP,
|
||||
"rsp": UC_X86_REG_RSP,
|
||||
"efl": UC_X86_REG_EFLAGS,
|
||||
"cs": UC_X86_REG_CS,
|
||||
"ds": UC_X86_REG_DS,
|
||||
"es": UC_X86_REG_ES,
|
||||
"fs": UC_X86_REG_FS,
|
||||
"gs": UC_X86_REG_GS,
|
||||
"ss": UC_X86_REG_SS,
|
||||
},
|
||||
"x86" : {
|
||||
"eax": UC_X86_REG_EAX,
|
||||
"ebx": UC_X86_REG_EBX,
|
||||
"ecx": UC_X86_REG_ECX,
|
||||
"edx": UC_X86_REG_EDX,
|
||||
"esi": UC_X86_REG_ESI,
|
||||
"edi": UC_X86_REG_EDI,
|
||||
"ebp": UC_X86_REG_EBP,
|
||||
"esp": UC_X86_REG_ESP,
|
||||
"eip": UC_X86_REG_EIP,
|
||||
"esp": UC_X86_REG_ESP,
|
||||
"efl": UC_X86_REG_EFLAGS,
|
||||
# Segment registers removed...
|
||||
# They caused segfaults (from unicorn?) when they were here
|
||||
},
|
||||
"arm" : {
|
||||
"r0": UC_ARM_REG_R0,
|
||||
"r1": UC_ARM_REG_R1,
|
||||
"r2": UC_ARM_REG_R2,
|
||||
"r3": UC_ARM_REG_R3,
|
||||
"r4": UC_ARM_REG_R4,
|
||||
"r5": UC_ARM_REG_R5,
|
||||
"r6": UC_ARM_REG_R6,
|
||||
"r7": UC_ARM_REG_R7,
|
||||
"r8": UC_ARM_REG_R8,
|
||||
"r9": UC_ARM_REG_R9,
|
||||
"r10": UC_ARM_REG_R10,
|
||||
"r11": UC_ARM_REG_R11,
|
||||
"r12": UC_ARM_REG_R12,
|
||||
"pc": UC_ARM_REG_PC,
|
||||
"sp": UC_ARM_REG_SP,
|
||||
"lr": UC_ARM_REG_LR,
|
||||
"cpsr": UC_ARM_REG_CPSR
|
||||
},
|
||||
"arm64" : {
|
||||
"x0": UC_ARM64_REG_X0,
|
||||
"x1": UC_ARM64_REG_X1,
|
||||
"x2": UC_ARM64_REG_X2,
|
||||
"x3": UC_ARM64_REG_X3,
|
||||
"x4": UC_ARM64_REG_X4,
|
||||
"x5": UC_ARM64_REG_X5,
|
||||
"x6": UC_ARM64_REG_X6,
|
||||
"x7": UC_ARM64_REG_X7,
|
||||
"x8": UC_ARM64_REG_X8,
|
||||
"x9": UC_ARM64_REG_X9,
|
||||
"x10": UC_ARM64_REG_X10,
|
||||
"x11": UC_ARM64_REG_X11,
|
||||
"x12": UC_ARM64_REG_X12,
|
||||
"x13": UC_ARM64_REG_X13,
|
||||
"x14": UC_ARM64_REG_X14,
|
||||
"x15": UC_ARM64_REG_X15,
|
||||
"x16": UC_ARM64_REG_X16,
|
||||
"x17": UC_ARM64_REG_X17,
|
||||
"x18": UC_ARM64_REG_X18,
|
||||
"x19": UC_ARM64_REG_X19,
|
||||
"x20": UC_ARM64_REG_X20,
|
||||
"x21": UC_ARM64_REG_X21,
|
||||
"x22": UC_ARM64_REG_X22,
|
||||
"x23": UC_ARM64_REG_X23,
|
||||
"x24": UC_ARM64_REG_X24,
|
||||
"x25": UC_ARM64_REG_X25,
|
||||
"x26": UC_ARM64_REG_X26,
|
||||
"x27": UC_ARM64_REG_X27,
|
||||
"x28": UC_ARM64_REG_X28,
|
||||
"pc": UC_ARM64_REG_PC,
|
||||
"sp": UC_ARM64_REG_SP,
|
||||
"fp": UC_ARM64_REG_FP,
|
||||
"lr": UC_ARM64_REG_LR,
|
||||
"nzcv": UC_ARM64_REG_NZCV,
|
||||
"cpsr": UC_ARM_REG_CPSR,
|
||||
},
|
||||
"mips" : {
|
||||
"0" : UC_MIPS_REG_ZERO,
|
||||
"at": UC_MIPS_REG_AT,
|
||||
"v0": UC_MIPS_REG_V0,
|
||||
"v1": UC_MIPS_REG_V1,
|
||||
"a0": UC_MIPS_REG_A0,
|
||||
"a1": UC_MIPS_REG_A1,
|
||||
"a2": UC_MIPS_REG_A2,
|
||||
"a3": UC_MIPS_REG_A3,
|
||||
"t0": UC_MIPS_REG_T0,
|
||||
"t1": UC_MIPS_REG_T1,
|
||||
"t2": UC_MIPS_REG_T2,
|
||||
"t3": UC_MIPS_REG_T3,
|
||||
"t4": UC_MIPS_REG_T4,
|
||||
"t5": UC_MIPS_REG_T5,
|
||||
"t6": UC_MIPS_REG_T6,
|
||||
"t7": UC_MIPS_REG_T7,
|
||||
"t8": UC_MIPS_REG_T8,
|
||||
"t9": UC_MIPS_REG_T9,
|
||||
"s0": UC_MIPS_REG_S0,
|
||||
"s1": UC_MIPS_REG_S1,
|
||||
"s2": UC_MIPS_REG_S2,
|
||||
"s3": UC_MIPS_REG_S3,
|
||||
"s4": UC_MIPS_REG_S4,
|
||||
"s5": UC_MIPS_REG_S5,
|
||||
"s6": UC_MIPS_REG_S6,
|
||||
"s7": UC_MIPS_REG_S7,
|
||||
"s8": UC_MIPS_REG_S8,
|
||||
"k0": UC_MIPS_REG_K0,
|
||||
"k1": UC_MIPS_REG_K1,
|
||||
"gp": UC_MIPS_REG_GP,
|
||||
"pc": UC_MIPS_REG_PC,
|
||||
"sp": UC_MIPS_REG_SP,
|
||||
"fp": UC_MIPS_REG_FP,
|
||||
"ra": UC_MIPS_REG_RA,
|
||||
"hi": UC_MIPS_REG_HI,
|
||||
"lo": UC_MIPS_REG_LO
|
||||
}
|
||||
}
|
||||
return registers[arch]
|
||||
|
||||
#---------------------------
|
||||
# Callbacks for tracing
|
||||
|
||||
# TODO: Make integer-printing fixed widths dependent on bitness of architecture
|
||||
# (i.e. only show 4 bytes for 32-bit, 8 bytes for 64-bit)
|
||||
|
||||
# TODO: Figure out how best to determine the capstone mode and architecture here
|
||||
"""
|
||||
try:
|
||||
# If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
|
||||
from capstone import *
|
||||
cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
|
||||
def __trace_instruction(self, uc, address, size, user_data):
|
||||
mem = uc.mem_read(address, size)
|
||||
for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
|
||||
print(" Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
|
||||
except ImportError:
|
||||
def __trace_instruction(self, uc, address, size, user_data):
|
||||
print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
|
||||
"""
|
||||
|
||||
def __trace_instruction(self, uc, address, size, user_data):
|
||||
print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
|
||||
|
||||
def __trace_block(self, uc, address, size, user_data):
|
||||
print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
|
||||
|
||||
def __trace_mem_access(self, uc, access, address, size, value, user_data):
|
||||
if access == UC_MEM_WRITE:
|
||||
print(" >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
|
||||
else:
|
||||
print(" >>> Read: addr=0x{0:016x} size={1}".format(address, size))
|
||||
|
||||
def __trace_mem_invalid_access(self, uc, access, address, size, value, user_data):
|
||||
if access == UC_MEM_WRITE_UNMAPPED:
|
||||
print(" >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
|
||||
else:
|
||||
print(" >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))
|
||||
|
290
unicorn_mode/patches/afl-unicorn-cpu-inl.h
Normal file
290
unicorn_mode/patches/afl-unicorn-cpu-inl.h
Normal file
@ -0,0 +1,290 @@
|
||||
/*
|
||||
american fuzzy lop - high-performance binary-only instrumentation
|
||||
-----------------------------------------------------------------
|
||||
|
||||
Written by Andrew Griffiths <agriffiths@google.com> and
|
||||
Michal Zalewski <lcamtuf@google.com>
|
||||
|
||||
TCG instrumentation and block chaining support by Andrea Biondo
|
||||
<andrea.biondo965@gmail.com>
|
||||
Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co>
|
||||
|
||||
Idea & design very much by Andrew Griffiths.
|
||||
|
||||
Copyright 2015, 2016 Google Inc. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at:
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
This code is a shim patched into the separately-distributed source
|
||||
code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality
|
||||
to implement AFL-style instrumentation and to take care of the remaining
|
||||
parts of the AFL fork server logic.
|
||||
|
||||
The resulting QEMU binary is essentially a standalone instrumentation
|
||||
tool; for an example of how to leverage it for other purposes, you can
|
||||
have a look at afl-showmap.c.
|
||||
|
||||
*/
|
||||
|
||||
#include <sys/shm.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include "../../config.h"
|
||||
|
||||
/***************************
|
||||
* VARIOUS AUXILIARY STUFF *
|
||||
***************************/
|
||||
|
||||
/* A snippet patched into tb_find_slow to inform the parent process that
|
||||
we have hit a new block that hasn't been translated yet, and to tell
|
||||
it to translate within its own context, too (this avoids translation
|
||||
overhead in the next forked-off copy). */
|
||||
|
||||
#define AFL_UNICORN_CPU_SNIPPET1 do { \
|
||||
afl_request_tsl(pc, cs_base, flags); \
|
||||
} while (0)
|
||||
|
||||
/* This snippet kicks in when the instruction pointer is positioned at
|
||||
_start and does the usual forkserver stuff, not very different from
|
||||
regular instrumentation injected via afl-as.h. */
|
||||
|
||||
#define AFL_UNICORN_CPU_SNIPPET2 do { \
|
||||
if(unlikely(afl_first_instr == 0)) { \
|
||||
afl_setup(); \
|
||||
afl_forkserver(env); \
|
||||
afl_first_instr = 1; \
|
||||
} \
|
||||
afl_maybe_log(tb->pc); \
|
||||
} while (0)
|
||||
|
||||
/* We use one additional file descriptor to relay "needs translation"
|
||||
messages between the child and the fork server. */
|
||||
|
||||
#define TSL_FD (FORKSRV_FD - 1)
|
||||
|
||||
/* This is equivalent to afl-as.h: */
|
||||
|
||||
static unsigned char *afl_area_ptr;
|
||||
|
||||
/* Set in the child process in forkserver mode: */
|
||||
|
||||
static unsigned char afl_fork_child;
|
||||
static unsigned int afl_forksrv_pid;
|
||||
|
||||
/* Instrumentation ratio: */
|
||||
|
||||
static unsigned int afl_inst_rms = MAP_SIZE;
|
||||
|
||||
/* Function declarations. */
|
||||
|
||||
static void afl_setup(void);
|
||||
static void afl_forkserver(CPUArchState*);
|
||||
static inline void afl_maybe_log(unsigned long);
|
||||
|
||||
static void afl_wait_tsl(CPUArchState*, int);
|
||||
static void afl_request_tsl(target_ulong, target_ulong, uint64_t);
|
||||
|
||||
static TranslationBlock *tb_find_slow(CPUArchState*, target_ulong,
|
||||
target_ulong, uint64_t);
|
||||
|
||||
/* Data structure passed around by the translate handlers: */
|
||||
|
||||
struct afl_tsl {
|
||||
target_ulong pc;
|
||||
target_ulong cs_base;
|
||||
uint64_t flags;
|
||||
};
|
||||
|
||||
/*************************
|
||||
* ACTUAL IMPLEMENTATION *
|
||||
*************************/
|
||||
|
||||
/* Set up SHM region and initialize other stuff. */
|
||||
|
||||
static void afl_setup(void) {
|
||||
|
||||
char *id_str = getenv(SHM_ENV_VAR),
|
||||
*inst_r = getenv("AFL_INST_RATIO");
|
||||
|
||||
int shm_id;
|
||||
|
||||
if (inst_r) {
|
||||
|
||||
unsigned int r;
|
||||
|
||||
r = atoi(inst_r);
|
||||
|
||||
if (r > 100) r = 100;
|
||||
if (!r) r = 1;
|
||||
|
||||
afl_inst_rms = MAP_SIZE * r / 100;
|
||||
|
||||
}
|
||||
|
||||
if (id_str) {
|
||||
|
||||
shm_id = atoi(id_str);
|
||||
afl_area_ptr = shmat(shm_id, NULL, 0);
|
||||
|
||||
if (afl_area_ptr == (void*)-1) exit(1);
|
||||
|
||||
/* With AFL_INST_RATIO set to a low value, we want to touch the bitmap
|
||||
so that the parent doesn't give up on us. */
|
||||
|
||||
if (inst_r) afl_area_ptr[0] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fork server logic, invoked once we hit first emulated instruction. */
|
||||
|
||||
static void afl_forkserver(CPUArchState *env) {
|
||||
|
||||
static unsigned char tmp[4];
|
||||
|
||||
if (!afl_area_ptr) return;
|
||||
|
||||
/* Tell the parent that we're alive. If the parent doesn't want
|
||||
to talk, assume that we're not running in forkserver mode. */
|
||||
|
||||
if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
|
||||
|
||||
afl_forksrv_pid = getpid();
|
||||
|
||||
/* All right, let's await orders... */
|
||||
|
||||
while (1) {
|
||||
|
||||
pid_t child_pid;
|
||||
int status, t_fd[2];
|
||||
|
||||
/* Whoops, parent dead? */
|
||||
|
||||
if (read(FORKSRV_FD, tmp, 4) != 4) exit(2);
|
||||
|
||||
/* Establish a channel with child to grab translation commands. We'll
|
||||
read from t_fd[0], child will write to TSL_FD. */
|
||||
|
||||
if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3);
|
||||
close(t_fd[1]);
|
||||
|
||||
child_pid = fork();
|
||||
if (child_pid < 0) exit(4);
|
||||
|
||||
if (!child_pid) {
|
||||
|
||||
/* Child process. Close descriptors and run free. */
|
||||
|
||||
afl_fork_child = 1;
|
||||
close(FORKSRV_FD);
|
||||
close(FORKSRV_FD + 1);
|
||||
close(t_fd[0]);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
/* Parent. */
|
||||
|
||||
close(TSL_FD);
|
||||
|
||||
if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) exit(5);
|
||||
|
||||
/* Collect translation requests until child dies and closes the pipe. */
|
||||
|
||||
afl_wait_tsl(env, t_fd[0]);
|
||||
|
||||
/* Get and relay exit status to parent. */
|
||||
|
||||
if (waitpid(child_pid, &status, 0) < 0) exit(6);
|
||||
if (write(FORKSRV_FD + 1, &status, 4) != 4) exit(7);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* The equivalent of the tuple logging routine from afl-as.h. */
|
||||
|
||||
static inline void afl_maybe_log(unsigned long cur_loc) {
|
||||
|
||||
static __thread unsigned long prev_loc;
|
||||
|
||||
// DEBUG
|
||||
//printf("IN AFL_MAYBE_LOG 0x%lx\n", cur_loc);
|
||||
|
||||
// MODIFIED FOR UNICORN MODE -> We want to log all addresses,
|
||||
// so the checks for 'start < addr < end' are removed
|
||||
if(!afl_area_ptr)
|
||||
return;
|
||||
|
||||
// DEBUG
|
||||
//printf("afl_area_ptr = %p\n", afl_area_ptr);
|
||||
|
||||
/* Looks like QEMU always maps to fixed locations, so ASAN is not a
|
||||
concern. Phew. But instruction addresses may be aligned. Let's mangle
|
||||
the value to get something quasi-uniform. */
|
||||
|
||||
cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
|
||||
cur_loc &= MAP_SIZE - 1;
|
||||
|
||||
/* Implement probabilistic instrumentation by looking at scrambled block
|
||||
address. This keeps the instrumented locations stable across runs. */
|
||||
|
||||
// DEBUG
|
||||
//printf("afl_inst_rms = 0x%lx\n", afl_inst_rms);
|
||||
|
||||
if (cur_loc >= afl_inst_rms) return;
|
||||
|
||||
// DEBUG
|
||||
//printf("cur_loc = 0x%lx\n", cur_loc);
|
||||
|
||||
afl_area_ptr[cur_loc ^ prev_loc]++;
|
||||
prev_loc = cur_loc >> 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* This code is invoked whenever QEMU decides that it doesn't have a
|
||||
translation of a particular block and needs to compute it. When this happens,
|
||||
we tell the parent to mirror the operation, so that the next fork() has a
|
||||
cached copy. */
|
||||
|
||||
static void afl_request_tsl(target_ulong pc, target_ulong cb, uint64_t flags) {
|
||||
|
||||
struct afl_tsl t;
|
||||
|
||||
if (!afl_fork_child) return;
|
||||
|
||||
t.pc = pc;
|
||||
t.cs_base = cb;
|
||||
t.flags = flags;
|
||||
|
||||
if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* This is the other side of the same channel. Since timeouts are handled by
|
||||
afl-fuzz simply killing the child, we can just wait until the pipe breaks. */
|
||||
|
||||
static void afl_wait_tsl(CPUArchState *env, int fd) {
|
||||
|
||||
struct afl_tsl t;
|
||||
|
||||
while (1) {
|
||||
|
||||
/* Broken pipe means it's time to return to the fork server routine. */
|
||||
|
||||
if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
|
||||
break;
|
||||
|
||||
tb_find_slow(env, t.pc, t.cs_base, t.flags);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
107
unicorn_mode/patches/patches.diff
Normal file
107
unicorn_mode/patches/patches.diff
Normal file
@ -0,0 +1,107 @@
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 7d73782..fb3ccfd 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -88,6 +88,10 @@ AR = llvm-ar
|
||||
LDFLAGS := -fsanitize=address ${LDFLAGS}
|
||||
endif
|
||||
|
||||
+ifeq ($(UNICORN_AFL),yes)
|
||||
+UNICORN_CFLAGS += -DUNICORN_AFL
|
||||
+endif
|
||||
+
|
||||
ifeq ($(CROSS),)
|
||||
CC ?= cc
|
||||
AR ?= ar
|
||||
diff --git a/config.mk b/config.mk
|
||||
index c3621fb..c7b4f7e 100644
|
||||
--- a/config.mk
|
||||
+++ b/config.mk
|
||||
@@ -8,7 +8,7 @@
|
||||
# Compile with debug info when you want to debug code.
|
||||
# Change this to 'no' for release edition.
|
||||
|
||||
-UNICORN_DEBUG ?= yes
|
||||
+UNICORN_DEBUG ?= no
|
||||
|
||||
################################################################################
|
||||
# Specify which archs you want to compile in. By default, we build all archs.
|
||||
@@ -28,3 +28,9 @@ UNICORN_STATIC ?= yes
|
||||
# a shared library.
|
||||
|
||||
UNICORN_SHARED ?= yes
|
||||
+
|
||||
+
|
||||
+################################################################################
|
||||
+# Changing 'UNICORN_AFLL = yes' to 'UNICORN_AFL = no' disables AFL instrumentation
|
||||
+
|
||||
+UNICORN_AFL ?= yes
|
||||
diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c
|
||||
index 7755adf..8114b70 100644
|
||||
--- a/qemu/cpu-exec.c
|
||||
+++ b/qemu/cpu-exec.c
|
||||
@@ -24,6 +24,11 @@
|
||||
|
||||
#include "uc_priv.h"
|
||||
|
||||
+#if defined(UNICORN_AFL)
|
||||
+#include "../afl-unicorn-cpu-inl.h"
|
||||
+static int afl_first_instr = 0;
|
||||
+#endif
|
||||
+
|
||||
static tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr);
|
||||
static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
|
||||
target_ulong cs_base, uint64_t flags);
|
||||
@@ -231,6 +236,10 @@ int cpu_exec(struct uc_struct *uc, CPUArchState *env) // qq
|
||||
next_tb & TB_EXIT_MASK, tb);
|
||||
}
|
||||
|
||||
+#if defined(UNICORN_AFL)
|
||||
+ AFL_UNICORN_CPU_SNIPPET2;
|
||||
+#endif
|
||||
+
|
||||
/* cpu_interrupt might be called while translating the
|
||||
TB, but before it is linked into a potentially
|
||||
infinite loop and becomes env->current_tb. Avoid
|
||||
@@ -369,6 +378,11 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
|
||||
not_found:
|
||||
/* if no translated code available, then translate it now */
|
||||
tb = tb_gen_code(cpu, pc, cs_base, (int)flags, 0); // qq
|
||||
+
|
||||
+#if defined(UNICORN_AFL)
|
||||
+ /* There seems to be no chaining in unicorn ever? :( */
|
||||
+ AFL_UNICORN_CPU_SNIPPET1;
|
||||
+#endif
|
||||
|
||||
found:
|
||||
/* Move the last found TB to the head of the list */
|
||||
diff --git a/qemu/translate-all.c b/qemu/translate-all.c
|
||||
index 1a96c34..7ef4878 100644
|
||||
--- a/qemu/translate-all.c
|
||||
+++ b/qemu/translate-all.c
|
||||
@@ -403,11 +403,25 @@ static PageDesc *page_find_alloc(struct uc_struct *uc, tb_page_addr_t index, int
|
||||
|
||||
#if defined(CONFIG_USER_ONLY)
|
||||
/* We can't use g_malloc because it may recurse into a locked mutex. */
|
||||
+#if defined(UNICORN_AFL)
|
||||
+ /* This was added by unicorn-afl to bail out semi-gracefully if out of memory. */
|
||||
+# define ALLOC(P, SIZE) \
|
||||
+ do { \
|
||||
+ void* _tmp = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
|
||||
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
|
||||
+ if (_tmp == (void*)-1) { \
|
||||
+ qemu_log(">>> Out of memory for stack, bailing out. <<<\n"); \
|
||||
+ exit(1); \
|
||||
+ } \
|
||||
+ (P) = _tmp; \
|
||||
+ } while (0)
|
||||
+#else /* !UNICORN_AFL */
|
||||
# define ALLOC(P, SIZE) \
|
||||
do { \
|
||||
P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
|
||||
} while (0)
|
||||
+#endif /* UNICORN_AFL */
|
||||
#else
|
||||
# define ALLOC(P, SIZE) \
|
||||
do { P = g_malloc0(SIZE); } while (0)
|
41
unicorn_mode/samples/simple/COMPILE.md
Normal file
41
unicorn_mode/samples/simple/COMPILE.md
Normal file
@ -0,0 +1,41 @@
|
||||
Compiling simple_target.c
|
||||
==========================
|
||||
|
||||
You shouldn't need to compile simple_target.c since a MIPS binary version is
|
||||
pre-built and shipped with afl-unicorn. This file documents how the binary
|
||||
was built in case you want to rebuild it or recompile it for any reason.
|
||||
|
||||
The pre-built binary (simple_target.bin) was built by cross-compiling
|
||||
simple_target.c for MIPS using the mips-linux-gnu-gcc package on an Ubuntu
|
||||
16.04 LTS system. This cross compiler (and associated binutils) was installed
|
||||
from apt-get packages:
|
||||
|
||||
```
|
||||
sudo apt-get install gcc-mips-linux-gnu
|
||||
```
|
||||
|
||||
simple_target.c was compiled without optimization, position-independent,
|
||||
and without standard libraries using the following command line:
|
||||
|
||||
```
|
||||
mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
|
||||
```
|
||||
|
||||
The .text section from the resulting ELF binary was then extracted to create
|
||||
the raw binary blob that is loaded and emulated by simple_test_harness.py:
|
||||
|
||||
```
|
||||
mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin
|
||||
```
|
||||
|
||||
In summary, to recreate simple_taget.bin execute the following:
|
||||
|
||||
```
|
||||
mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
|
||||
&& mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin
|
||||
&& rm simple_target.elf
|
||||
```
|
||||
|
||||
Note that the output of this is padded with nulls for 16-byte alignment. This is
|
||||
important when emulating it, as NOPs will be added after the return of main()
|
||||
as necessary.
|
1
unicorn_mode/samples/simple/in/a
Normal file
1
unicorn_mode/samples/simple/in/a
Normal file
@ -0,0 +1 @@
|
||||
a
|
1
unicorn_mode/samples/simple/out/.cur_input
Normal file
1
unicorn_mode/samples/simple/out/.cur_input
Normal file
@ -0,0 +1 @@
|
||||
a
|
3
unicorn_mode/samples/simple/out/cmdline
Normal file
3
unicorn_mode/samples/simple/out/cmdline
Normal file
@ -0,0 +1,3 @@
|
||||
python
|
||||
../samples/simple/simple_test_harness.py
|
||||
../samples/simple/sample_inputs
|
1
unicorn_mode/samples/simple/out/fuzz_bitmap
Normal file
1
unicorn_mode/samples/simple/out/fuzz_bitmap
Normal file
File diff suppressed because one or more lines are too long
28
unicorn_mode/samples/simple/out/fuzzer_stats
Normal file
28
unicorn_mode/samples/simple/out/fuzzer_stats
Normal file
@ -0,0 +1,28 @@
|
||||
start_time : 1563137991
|
||||
last_update : 1563155870
|
||||
fuzzer_pid : 16972
|
||||
cycles_done : 86
|
||||
execs_done : 4807
|
||||
execs_per_sec : 0.00
|
||||
paths_total : 1
|
||||
paths_favored : 1
|
||||
paths_found : 0
|
||||
paths_imported : 0
|
||||
max_depth : 1
|
||||
cur_path : 0
|
||||
pending_favs : 0
|
||||
pending_total : 0
|
||||
variable_paths : 0
|
||||
stability : 100.00%
|
||||
bitmap_cvg : 0.00%
|
||||
unique_crashes : 0
|
||||
unique_hangs : 0
|
||||
last_path : 0
|
||||
last_crash : 0
|
||||
last_hang : 0
|
||||
execs_since_crash : 4807
|
||||
exec_timeout : 9999999
|
||||
afl_banner : python
|
||||
afl_version : 2.52b
|
||||
target_mode : unicorn
|
||||
command_line : /mnt/c/Users/DMaier/tmp/afl-unicorn/afl-fuzz -U -m none -t 9999999 -i ../samples/simple/in -o ../samples/simple/out -- python ../samples/simple/simple_test_harness.py ../samples/simple/sample_inputs
|
7
unicorn_mode/samples/simple/out/plot_data
Normal file
7
unicorn_mode/samples/simple/out/plot_data
Normal file
@ -0,0 +1,7 @@
|
||||
# unix_time, cycles_done, cur_path, paths_total, pending_total, pending_favs, map_size, unique_crashes, unique_hangs, max_depth, execs_per_sec
|
||||
1563154187, 0, 0, 1, 1, 1, 0.00%, 0, 0, 1, inf
|
||||
1563154197, 2, 0, 1, 0, 0, 0.00%, 0, 0, 1, 36.23
|
||||
1563154202, 9, 0, 1, 0, 0, 0.00%, 0, 0, 1, 32.83
|
||||
1563154207, 15, 0, 1, 0, 0, 0.00%, 0, 0, 1, 31.33
|
||||
1563154212, 22, 0, 1, 0, 0, 0.00%, 0, 0, 1, 31.05
|
||||
1563154217, 29, 0, 1, 0, 0, 0.00%, 0, 0, 1, 33.90
|
1
unicorn_mode/samples/simple/out/queue/id:000000,orig:a
Normal file
1
unicorn_mode/samples/simple/out/queue/id:000000,orig:a
Normal file
@ -0,0 +1 @@
|
||||
a
|
1
unicorn_mode/samples/simple/sample_inputs/sample1.bin
Normal file
1
unicorn_mode/samples/simple/sample_inputs/sample1.bin
Normal file
@ -0,0 +1 @@
|
||||
abcd
|
BIN
unicorn_mode/samples/simple/sample_inputs/sample2.bin
Normal file
BIN
unicorn_mode/samples/simple/sample_inputs/sample2.bin
Normal file
Binary file not shown.
1
unicorn_mode/samples/simple/sample_inputs/sample3.bin
Normal file
1
unicorn_mode/samples/simple/sample_inputs/sample3.bin
Normal file
@ -0,0 +1 @@
|
||||
|
1
unicorn_mode/samples/simple/sample_inputs/sample4.bin
Normal file
1
unicorn_mode/samples/simple/sample_inputs/sample4.bin
Normal file
@ -0,0 +1 @@
|
||||
|
1
unicorn_mode/samples/simple/sample_inputs/sample5.bin
Normal file
1
unicorn_mode/samples/simple/sample_inputs/sample5.bin
Normal file
@ -0,0 +1 @@
|
||||
|
BIN
unicorn_mode/samples/simple/simple_target.bin
Normal file
BIN
unicorn_mode/samples/simple/simple_target.bin
Normal file
Binary file not shown.
37
unicorn_mode/samples/simple/simple_target.c
Normal file
37
unicorn_mode/samples/simple/simple_target.c
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Sample target file to test afl-unicorn fuzzing capabilities.
|
||||
* This is a very trivial example that will crash pretty easily
|
||||
* in several different exciting ways.
|
||||
*
|
||||
* Input is assumed to come from a buffer located at DATA_ADDRESS
|
||||
* (0x00300000), so make sure that your Unicorn emulation of this
|
||||
* puts user data there.
|
||||
*
|
||||
* Written by Nathan Voss <njvoss99@gmail.com>
|
||||
*/
|
||||
|
||||
// Magic address where mutated data will be placed
|
||||
#define DATA_ADDRESS 0x00300000
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unsigned char* data_buf = (unsigned char*)DATA_ADDRESS;
|
||||
|
||||
if(data_buf[20] != 0)
|
||||
{
|
||||
// Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04'
|
||||
unsigned char invalid_read = *(unsigned char*)0x00000000;
|
||||
}
|
||||
else if(data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2])
|
||||
{
|
||||
// Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
|
||||
unsigned char invalid_read = *(unsigned char*)0x00000000;
|
||||
}
|
||||
else if(data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00)
|
||||
{
|
||||
// Cause a crash if data[10] is not zero, but [9] and [11] are zero
|
||||
unsigned char invalid_read = *(unsigned char*)0x00000000;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
170
unicorn_mode/samples/simple/simple_test_harness.py
Normal file
170
unicorn_mode/samples/simple/simple_test_harness.py
Normal file
@ -0,0 +1,170 @@
|
||||
"""
|
||||
Simple test harness for AFL's Unicorn Mode.
|
||||
|
||||
This loads the simple_target.bin binary (precompiled as MIPS code) into
|
||||
Unicorn's memory map for emulation, places the specified input into
|
||||
simple_target's buffer (hardcoded to be at 0x300000), and executes 'main()'.
|
||||
If any crashes occur during emulation, this script throws a matching signal
|
||||
to tell AFL that a crash occurred.
|
||||
|
||||
Run under AFL as follows:
|
||||
|
||||
$ cd <afl_path>/unicorn_mode/samples/simple/
|
||||
$ ../../../afl-fuzz -U -m none -i ./sample_inputs -o ./output -- python simple_test_harness.py @@
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import signal
|
||||
|
||||
from unicorn import *
|
||||
from unicorn.mips_const import *
|
||||
|
||||
# Path to the file containing the binary to emulate
|
||||
BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
|
||||
|
||||
# Memory map for the code to be tested
|
||||
CODE_ADDRESS = 0x00100000 # Arbitrary address where code to test will be loaded
|
||||
CODE_SIZE_MAX = 0x00010000 # Max size for the code (64kb)
|
||||
STACK_ADDRESS = 0x00200000 # Address of the stack (arbitrarily chosen)
|
||||
STACK_SIZE = 0x00010000 # Size of the stack (arbitrarily chosen)
|
||||
DATA_ADDRESS = 0x00300000 # Address where mutated data will be placed
|
||||
DATA_SIZE_MAX = 0x00010000 # Maximum allowable size of mutated data
|
||||
|
||||
try:
|
||||
# If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
|
||||
from capstone import *
|
||||
cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
|
||||
def unicorn_debug_instruction(uc, address, size, user_data):
|
||||
mem = uc.mem_read(address, size)
|
||||
for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
|
||||
print(" Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
|
||||
except ImportError:
|
||||
def unicorn_debug_instruction(uc, address, size, user_data):
|
||||
print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
|
||||
|
||||
def unicorn_debug_block(uc, address, size, user_data):
|
||||
print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
|
||||
|
||||
def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
|
||||
if access == UC_MEM_WRITE:
|
||||
print(" >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
|
||||
else:
|
||||
print(" >>> Read: addr=0x{0:016x} size={1}".format(address, size))
|
||||
|
||||
def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
|
||||
if access == UC_MEM_WRITE_UNMAPPED:
|
||||
print(" >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
|
||||
else:
|
||||
print(" >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))
|
||||
|
||||
def force_crash(uc_error):
|
||||
# This function should be called to indicate to AFL that a crash occurred during emulation.
|
||||
# Pass in the exception received from Uc.emu_start()
|
||||
mem_errors = [
|
||||
UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
|
||||
UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
|
||||
UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
|
||||
]
|
||||
if uc_error.errno in mem_errors:
|
||||
# Memory error - throw SIGSEGV
|
||||
os.kill(os.getpid(), signal.SIGSEGV)
|
||||
elif uc_error.errno == UC_ERR_INSN_INVALID:
|
||||
# Invalid instruction - throw SIGILL
|
||||
os.kill(os.getpid(), signal.SIGILL)
|
||||
else:
|
||||
# Not sure what happened - throw SIGABRT
|
||||
os.kill(os.getpid(), signal.SIGABRT)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
|
||||
parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
|
||||
parser.add_argument('-d', '--debug', default=False, action="store_true", help="Enables debug tracing")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Instantiate a MIPS32 big endian Unicorn Engine instance
|
||||
uc = Uc(UC_ARCH_MIPS, UC_MODE_MIPS32 + UC_MODE_BIG_ENDIAN)
|
||||
|
||||
if args.debug:
|
||||
uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
|
||||
uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
|
||||
uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
|
||||
uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
|
||||
|
||||
#---------------------------------------------------
|
||||
# Load the binary to emulate and map it into memory
|
||||
|
||||
print("Loading data input from {}".format(args.input_file))
|
||||
binary_file = open(BINARY_FILE, 'rb')
|
||||
binary_code = binary_file.read()
|
||||
binary_file.close()
|
||||
|
||||
# Apply constraints to the mutated input
|
||||
if len(binary_code) > CODE_SIZE_MAX:
|
||||
print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX))
|
||||
return
|
||||
|
||||
# Write the mutated command into the data buffer
|
||||
uc.mem_map(CODE_ADDRESS, CODE_SIZE_MAX)
|
||||
uc.mem_write(CODE_ADDRESS, binary_code)
|
||||
|
||||
# Set the program counter to the start of the code
|
||||
start_address = CODE_ADDRESS # Address of entry point of main()
|
||||
end_address = CODE_ADDRESS + 0xf4 # Address of last instruction in main()
|
||||
uc.reg_write(UC_MIPS_REG_PC, start_address)
|
||||
|
||||
#-----------------
|
||||
# Setup the stack
|
||||
|
||||
uc.mem_map(STACK_ADDRESS, STACK_SIZE)
|
||||
uc.reg_write(UC_MIPS_REG_SP, STACK_ADDRESS + STACK_SIZE)
|
||||
|
||||
#-----------------------------------------------------
|
||||
# Emulate 1 instruction to kick off AFL's fork server
|
||||
# THIS MUST BE DONE BEFORE LOADING USER DATA!
|
||||
# If this isn't done every single run, the AFL fork server
|
||||
# will not be started appropriately and you'll get erratic results!
|
||||
# It doesn't matter what this returns with, it just has to execute at
|
||||
# least one instruction in order to get the fork server started.
|
||||
|
||||
# Execute 1 instruction just to startup the forkserver
|
||||
print("Starting the AFL forkserver by executing 1 instruction")
|
||||
try:
|
||||
uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), 0, 0, count=1)
|
||||
except UcError as e:
|
||||
print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
|
||||
return
|
||||
|
||||
#-----------------------------------------------
|
||||
# Load the mutated input and map it into memory
|
||||
|
||||
# Load the mutated input from disk
|
||||
print("Loading data input from {}".format(args.input_file))
|
||||
input_file = open(args.input_file, 'rb')
|
||||
input = input_file.read()
|
||||
input_file.close()
|
||||
|
||||
# Apply constraints to the mutated input
|
||||
if len(input) > DATA_SIZE_MAX:
|
||||
print("Test input is too long (> {} bytes)".format(DATA_SIZE_MAX))
|
||||
return
|
||||
|
||||
# Write the mutated command into the data buffer
|
||||
uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
|
||||
uc.mem_write(DATA_ADDRESS, input)
|
||||
|
||||
#------------------------------------------------------------
|
||||
# Emulate the code, allowing it to process the mutated input
|
||||
|
||||
print("Executing until a crash or execution reaches 0x{0:016x}".format(end_address))
|
||||
try:
|
||||
result = uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), end_address, timeout=0, count=0)
|
||||
except UcError as e:
|
||||
print("Execution failed with error: {}".format(e))
|
||||
force_crash(e)
|
||||
|
||||
print("Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user