Added AFL-Unicorn mode

2025-06-18 04:38:08 +00:00 · 2019-07-25 02:26:51 +02:00
parent 9246f21f2a
commit 00dc8a0ad5
36 changed files with 2681 additions and 46 deletions
--- a/.gitignore
+++ b/.gitignore
@ -19,3 +19,5 @@ afl-tmin
 as
 qemu_mode/qemu-3.1.0
 qemu_mode/qemu-3.1.0.tar.xz
 unicorn_mode/unicorn
 unicorn_mode/unicorn-*
--- a/afl-analyze.c
+++ b/afl-analyze.c
@ -77,7 +77,6 @@ static volatile u8
           child_timed_out;           /* Child timed out?                  */
 /* Constants used for describing byte behavior. */
 #define RESP_NONE       0x00          /* Changing byte is a no-op.         */
@ -741,7 +740,8 @@ static void usage(u8* argv0) {
       "  -f file       - input file read by the tested program (stdin)\n"
       "  -t msec       - timeout for each run (%u ms)\n"
       "  -m megs       - memory limit for child process (%u MB)\n"
-       "  -Q            - use binary-only instrumentation (QEMU mode)\n\n"
+       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
       "  -U            - use unicorn-based instrumentation (Unicorn mode)\n\n"
       "Analysis settings:\n\n"
@ -867,20 +867,19 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 }
 /* Main entry point */
 int main(int argc, char** argv) {
  s32 opt;
-  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
+  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
  char** use_argv;
  doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
  SAYF(cCYA "afl-analyze" VERSION cRST " by <lcamtuf@google.com>\n");
-  while ((opt = getopt(argc,argv,"+i:f:m:t:eQ")) > 0)
+  while ((opt = getopt(argc,argv,"+i:f:m:t:eQU")) > 0)
    switch (opt) {
@ -960,6 +959,14 @@ int main(int argc, char** argv) {
        qemu_mode = 1;
        break;
      case 'U':
        if (unicorn_mode) FATAL("Multiple -U options not supported");
        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
        unicorn_mode = 1;
        break;
      default:
        usage(argv[0]);
--- a/12
+++ b/12
@ -49,9 +49,9 @@ MEM_LIMIT=100
 TIMEOUT=none
 unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
-  AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE
+  AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
-while getopts "+i:o:f:m:t:eQC" opt; do
+while getopts "+i:o:f:m:t:eQUC" opt; do
  case "$opt" in 
@ -83,6 +83,11 @@ while getopts "+i:o:f:m:t:eQC" opt; do
         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
         QEMU_MODE=1
         ;;
    "U")
         EXTRA_PAR="$EXTRA_PAR -U"
         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
         UNICORN_MODE=1
         ;;    
    "?")
         exit 1
         ;;
@ -111,6 +116,7 @@ Execution control settings:
  -m megs       - memory limit for child process ($MEM_LIMIT MB)
  -t msec       - run time limit for child process (none)
  -Q            - use binary-only instrumentation (QEMU mode)
  -U            - use unicorn-based instrumentation (Unicorn mode)
 Minimization settings:
@ -196,7 +202,7 @@ if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
 fi
-if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then
+if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
  if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
    echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
--- a/afl-fuzz.c
+++ b/afl-fuzz.c
@ -208,6 +208,7 @@ EXP_ST u8  skip_deterministic,        /* Skip deterministic stages?       */
           shuffle_queue,             /* Shuffle input queue?             */
           bitmap_changed = 1,        /* Time to update bitmap?           */
           qemu_mode,                 /* Running in QEMU mode?            */
           unicorn_mode,              /* Running in Unicorn mode?         */
           skip_requested,            /* Skip request, via SIGUSR1        */
           run_over10m,               /* Run time over 10 minutes?        */
           persistent_mode,           /* Running in persistent mode?      */
@ -1547,6 +1548,7 @@ static void minimize_bits(u8* dst, u8* src) {
 }
 /* Find first power of two greater or equal to val (assuming val under
   2^63). */
@ -1569,6 +1571,7 @@ static u64 next_p2(u64 val) {
   for every byte in the bitmap. We win that slot if there is no previous
   contender, or if the contender has a more favorable speed x size factor. */
 static void update_bitmap_score(struct queue_entry* q) {
  u32 i;
@ -1584,6 +1587,7 @@ static void update_bitmap_score(struct queue_entry* q) {
       if (top_rated[i]) {
         /* Faster-executing or smaller test cases are favored. */
         u64 top_rated_fuzz_p2    = next_p2 (top_rated[i]->n_fuzz);
         u64 top_rated_fav_factor = top_rated[i]->exec_us * top_rated[i]->len;
@ -1682,7 +1686,6 @@ static void cull_queue(void) {
 }
 /* Load postprocessor, if available. */
 static void setup_post(void) {
@ -2301,6 +2304,8 @@ EXP_ST void init_forkserver(char** argv) {
  if (!forksrv_pid) {
    /* CHILD PROCESS */
    struct rlimit r;
    /* Umpf. On OpenBSD, the default fd limit for root users is set to
@ -2408,6 +2413,8 @@ EXP_ST void init_forkserver(char** argv) {
  }
  /* PARENT PROCESS */
  /* Close the unneeded endpoints. */
  close(ctl_pipe[0]);
@ -3755,7 +3762,7 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
             "exec_timeout      : %u\n"
             "afl_banner        : %s\n"
             "afl_version       : " VERSION "\n"
-             "target_mode       : %s%s%s%s%s%s%s\n"
+             "target_mode       : %s%s%s%s%s%s%s%s\n"
             "command_line      : %s\n",
             start_time / 1000, get_cur_time() / 1000, getpid(),
             queue_cycle ? (queue_cycle - 1) : 0, total_execs, eps,
@ -3765,10 +3772,10 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
             unique_hangs, last_path_time / 1000, last_crash_time / 1000,
             last_hang_time / 1000, total_execs - last_crash_execs,
             exec_tmout, use_banner,
-             qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "",
+             unicorn_mode ? "unicorn" : "", qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "",
             no_forkserver ? "no_forksrv " : "", crash_mode ? "crash " : "",
             persistent_mode ? "persistent " : "", deferred_mode ? "deferred " : "",
-             (qemu_mode || dumb_mode || no_forkserver || crash_mode ||
+             (unicorn_mode || qemu_mode || dumb_mode || no_forkserver || crash_mode ||
              persistent_mode || deferred_mode) ? "" : "default",
             orig_cmdline);
             /* ignore errors */
@ -4702,7 +4709,7 @@ static void show_init_stats(void) {
  SAYF("\n");
-  if (avg_us > (qemu_mode ? 50000 : 10000)) 
+  if (avg_us > ((qemu_mode || unicorn_mode) ? 50000 : 10000)) 
    WARNF(cLRD "The target binary is pretty slow! See %s/perf_tips.txt.",
          doc_path);
@ -4779,6 +4786,7 @@ static void show_init_stats(void) {
 }
 #ifdef USE_PYTHON
 static u8 trim_case_python(char** argv, struct queue_entry* q, u8* in_buf) {
@ -11090,7 +11098,7 @@ EXP_ST void check_binary(u8* fname) {
 #endif /* ^!__APPLE__ */
-  if (!qemu_mode && !dumb_mode &&
+  if (!qemu_mode && !unicorn_mode && !dumb_mode &&
      !memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
    SAYF("\n" cLRD "[-] " cRST
@ -11110,15 +11118,15 @@ EXP_ST void check_binary(u8* fname) {
  }
-  if (qemu_mode &&
+  if ((qemu_mode || unicorn_mode) &&
      memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
    SAYF("\n" cLRD "[-] " cRST
         "This program appears to be instrumented with afl-gcc, but is being run in\n"
-         "    QEMU mode (-Q). This is probably not what you want - this setup will be\n"
+         "    QEMU or Unicorn mode (-Q or -U). This is probably not what you want -\n"
-         "    slow and offer no practical benefits.\n");
+         "    this setup will be slow and offer no practical benefits.\n");
-    FATAL("Instrumentation found in -Q mode");
+    FATAL("Instrumentation found in -Q or -U mode");
  }
@ -11245,6 +11253,7 @@ static void usage(u8* argv0) {
       "  -t msec       - timeout for each run (auto-scaled, 50-%u ms)\n"
       "  -m megs       - memory limit for child process (%u MB)\n"
       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
       "  -U            - use Unicorn-based instrumentation (Unicorn mode)\n\n"
       "  -L minutes    - use MOpt(imize) mode and set the limit time for entering the\n"
       "                  pacemaker mode (minutes of no new paths, 0 = immediately).\n"
       "                  a recommended value is 10-60. see docs/README.MOpt\n\n"
@ -11863,7 +11872,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 }
 /* Make a copy of the current command line. */
 static void save_cmdline(u32 argc, char** argv) {
@ -11925,7 +11933,7 @@ int main(int argc, char** argv) {
  gettimeofday(&tv, &tz);
  init_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
-  while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Qe:p:s:V:E:L:")) > 0)
+  while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:QUe:p:s:V:E:L:")) > 0)
    switch (opt) {
@ -12126,6 +12134,15 @@ int main(int argc, char** argv) {
        break;
      case 'U': /* Unicorn mode */
        if (unicorn_mode) FATAL("Multiple -U options not supported");
        unicorn_mode = 1;
        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
        break;
      case 'V': {
           most_time_key = 1;
           if (sscanf(optarg, "%llu", &most_time) < 1 || optarg[0] == '-')
@ -12259,6 +12276,7 @@ int main(int argc, char** argv) {
    if (crash_mode) FATAL("-C and -n are mutually exclusive");
    if (qemu_mode)  FATAL("-Q and -n are mutually exclusive");
    if (unicorn_mode) FATAL("-U and -n are mutually exclusive");
  }
--- a/afl-showmap.c
+++ b/afl-showmap.c
@ -72,7 +72,6 @@ static volatile u8
           child_timed_out,           /* Child timed out?                  */
           child_crashed;             /* Child crashed?                    */
 /* Classify tuple counts. Instead of mapping to individual bits, as in
   afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */
@ -405,7 +404,9 @@ static void usage(u8* argv0) {
       "  -t msec       - timeout for each run (none)\n"
       "  -m megs       - memory limit for child process (%u MB)\n"
-       "  -Q            - use binary-only instrumentation (QEMU mode)\n\n"
+       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
       "  -U            - use Unicorn-based instrumentation (Unicorn mode)\n"
       "                  (Not necessary, here for consistency with other afl-* tools)\n\n"  
       "Other settings:\n\n"
@ -534,19 +535,18 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 }
 /* Main entry point */
 int main(int argc, char** argv) {
  s32 opt;
-  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
+  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
  u32 tcnt;
  char** use_argv;
  doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
-  while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQbc")) > 0)
+  while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQUbc")) > 0)
    switch (opt) {
@ -643,6 +643,14 @@ int main(int argc, char** argv) {
        qemu_mode = 1;
        break;
      case 'U':
        if (unicorn_mode) FATAL("Multiple -U options not supported");
        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
        unicorn_mode = 1;
        break;
      case 'b':
        /* Secret undocumented mode. Writes output in raw binary format
--- a/afl-tmin.c
+++ b/afl-tmin.c
@ -898,7 +898,9 @@ static void usage(u8* argv0) {
       "  -f file       - input file read by the tested program (stdin)\n"
       "  -t msec       - timeout for each run (%u ms)\n"
       "  -m megs       - memory limit for child process (%u MB)\n"
-       "  -Q            - use binary-only instrumentation (QEMU mode)\n\n"
+       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
       "  -U            - use Unicorn-based instrumentation (Unicorn mode)\n\n"
       "                  (Not necessary, here for consistency with other afl-* tools)\n\n"
       "Minimization settings:\n\n"
@ -1025,7 +1027,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 }
 /* Read mask bitmap from file. This is for the -B option. */
 static void read_bitmap(u8* fname) {
@ -1047,14 +1048,14 @@ static void read_bitmap(u8* fname) {
 int main(int argc, char** argv) {
  s32 opt;
-  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
+  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
  char** use_argv;
  doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
  SAYF(cCYA "afl-tmin" VERSION cRST " by <lcamtuf@google.com>\n");
-  while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQ")) > 0)
+  while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQU")) > 0)
    switch (opt) {
@ -1146,6 +1147,14 @@ int main(int argc, char** argv) {
        qemu_mode = 1;
        break;
      case 'U':
        if (unicorn_mode) FATAL("Multiple -Q options not supported");
        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
        unicorn_mode = 1;
        break;
      case 'B': /* load bitmap */
        /* This is a secret undocumented option! It is speculated to be useful
--- a/config.h
+++ b/config.h
@ -59,6 +59,10 @@
 #define MEM_LIMIT_QEMU      200
 /* Default memory limit when running in Unicorn mode (MB): */
 #define MEM_LIMIT_UNICORN   200
 /* Number of calibration cycles per every new test case (and for test
   cases that show variable behavior): */
--- a/docs/binaryonly_fuzzing.txt
+++ b/docs/binaryonly_fuzzing.txt
@ -12,7 +12,7 @@ The following is a description of how these can be fuzzed with afl++
 !!!!!
 TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
-      use afl -Q qemu_mode, or better: use both in parallel
+      use afl -Q qemu_mode.
 !!!!!
@ -27,6 +27,16 @@ It is the easiest to use alternative and even works for cross-platform binaries.
 As it is included in afl++ this needs no URL.
 UNICORN
 -------
 Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
 In contrast to QEMU, Unicorn does not offer a full system or even userland emulation.
 Runtime environment and/or loaders have to be written from scratch, if needed.
 On top, block chaining has been removed. This means the speed boost introduced in 
 to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
 For further information, check out ./unicorn_mode.txt.
 DYNINST
 -------
 Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
@ -111,21 +121,6 @@ Pintool solutions:
  https://github.com/spinpx/afl_pin_mode  <= only old Pintool version supported
 Non-AFL solutions
 -----------------
 There are many binary-only fuzzing frameworks. Some are great for CTFs but don't
 work with large binaries, other are very slow but have good path discovery,
 some are very hard to set-up ...
 QSYM: https://github.com/sslab-gatech/qsym
 Manticore: https://github.com/trailofbits/manticore
 S2E: https://github.com/S2E
 <please send me any missing that are good>
 That's it!
 News, corrections, updates?
 Email vh@thc.org
--- a/docs/unicorn_mode.txt
+++ b/docs/unicorn_mode.txt
@ -0,0 +1,107 @@
 =========================================================
 Unicorn-based binary-only instrumentation for afl-fuzz
 =========================================================
 1) Introduction
 ---------------
 The code in ./unicorn_mode allows you to build a standalone feature that
 leverages the Unicorn Engine and allows callers to obtain instrumentation 
 output for black-box, closed-source binary code snippets. This mechanism 
 can be then used by afl-fuzz to stress-test targets that couldn't be built 
 with afl-gcc or used in QEMU mode, or with other extensions such as 
 TriforceAFL.
 There is a significant performance penalty compared to native AFL,
 but at least we're able to use AFL on these binaries, right?
 The idea and much of the implementation comes from Nathan Voss <njvoss299@gmail.com>.
 2) How to use
 -------------
 *** Building AFL's Unicorn Mode ***
 First, make afl as usual.
 Once that completes successfully you need to build and add in the Unicorn Mode 
 features:
  $ cd unicorn_mode
  $ ./build_unicorn_support.sh
 NOTE: This script downloads a recent Unicorn Engine commit that has been tested 
 and is stable-ish from the Unicorn github page. If you are offline, you'll need 
 to hack up this script a little bit and supply your own copy of Unicorn's latest 
 stable release. It's not very hard, just check out the beginning of the 
 build_unicorn_support.sh script and adjust as necessary.
 Building Unicorn will take a little bit (~5-10 minutes). Once it completes 
 it automatically compiles a sample application and verify that it works.
 *** Fuzzing with Unicorn Mode ***
 To really use unicorn-mode effectively you need to prepare the following:
 	* Relevant binary code to be fuzzed
 	* Knowledge of the memory map and good starting state
 	* Folder containing sample inputs to start fuzzing with
 		- Same ideas as any other AFL inputs
 		- Quality/speed of results will depend greatly on quality of starting 
 		  samples
 		- See AFL's guidance on how to create a sample corpus
 	* Unicorn-based test harness which:
 		- Adds memory map regions
 		- Loads binary code into memory		
 		- Emulates at least one instruction*
 			- Yeah, this is lame. See 'Gotchas' section below for more info		
 		- Loads and verifies data to fuzz from a command-line specified file
 			- AFL will provide mutated inputs by changing the file passed to 
 			  the test harness
 			- Presumably the data to be fuzzed is at a fixed buffer address
 			- If input constraints (size, invalid bytes, etc.) are known they 
 			  should be checked after the file is loaded. If a constraint 
 			  fails, just exit the test harness. AFL will treat the input as 
 			  'uninteresting' and move on.
 		- Sets up registers and memory state for beginning of test
 		- Emulates the interested code from beginning to end
 		- If a crash is detected, the test harness must 'crash' by 
 		  throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.)
 Once you have all those things ready to go you just need to run afl-fuzz in
 'unicorn-mode' by passing in the '-U' flag:
 	$ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@
 The normal afl-fuzz command line format applies to everything here. Refer to
 AFL's main documentation for more info about how to use afl-fuzz effectively.
 For a much clearer vision of what all of this looks like, please refer to the
 sample provided in the 'unicorn_mode/samples' directory. There is also a blog
 post that goes over the basics at:
 https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf
 The 'helper_scripts' directory also contains several helper scripts that allow you 
 to dump context from a running process, load it, and hook heap allocations. For details
 on how to use this check out the follow-up blog post to the one linked above.
 A example use of AFL-Unicorn mode is discussed in the Paper Unicorefuzz:
 https://www.usenix.org/conference/woot19/presentation/maier
 3) Gotchas, feedback, bugs
 --------------------------
 To make sure that AFL's fork server starts up correctly the Unicorn test 
 harness script must emulate at least one instruction before loading the
 data that will be fuzzed from the input file. It doesn't matter what the
 instruction is, nor if it is valid. This is an artifact of how the fork-server
 is started and could likely be fixed with some clever re-arranging of the
 patches applied to Unicorn.
 Running the build script builds Unicorn and its python bindings and installs 
 them on your system. This installation will supersede any existing Unicorn
 installation with the patched afl-unicorn version.
 Refer to the unicorn_mode/samples/arm_example/arm_tester.c for an example
 of how to do this properly! If you don't get this right, AFL will not 
 load any mutated inputs and your fuzzing will be useless!
--- a/unicorn_mode/README.md
+++ b/unicorn_mode/README.md
@ -0,0 +1,16 @@
 ```
        __ _                 _                      
  __ _ / _| |    _   _ _ __ (_) ___ ___  _ __ _ __  
 / _` | |_| |___| | | | '_ \| |/ __/ _ \| '__| '_ \ 
 | (_| |  _| |___| |_| | | | | | (_| (_) | |  | | | |
 \__,_|_| |_|    \__,_|_| |_|_|\___\___/|_|  |_| |_|
 ```
 afl-unicorn lets you fuzz any piece of binary that can be emulated by [Unicorn Engine](http://www.unicorn-engine.org/). 
 For an in-depth description of what this is, how to install it, and how to use it check out this [blog post](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf).
 For general help with AFL, please refer to both the official [AFL website](http://lcamtuf.coredump.cx/afl/) and the documents in the /doc/ directory.
 Created by Nathan Voss, originally funded by [Battelle](https://www.battelle.org/cyber).
--- a/unicorn_mode/build_unicorn_support.sh
+++ b/unicorn_mode/build_unicorn_support.sh
@ -0,0 +1,186 @@
 #!/bin/sh
 #
 # american fuzzy lop - Unicorn-Mode build script
 # --------------------------------------
 #
 # Written by Nathan Voss <njvoss99@gmail.com>
 # 
 # Adapted from code by Andrew Griffiths <agriffiths@google.com> and
 #                      Michal Zalewski <lcamtuf@google.com>
 #
 # Adapted for Afl++ by Dominik Maier <mail@dmnk.co>
 #
 # Copyright 2017 Battelle Memorial Institute. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # This script downloads, patches, and builds a version of Unicorn with
 # minor tweaks to allow Unicorn-emulated binaries to be run under
 # afl-fuzz. 
 #
 # The modifications reside in patches/*. The standalone Unicorn library
 # will be written to /usr/lib/libunicornafl.so, and the Python bindings
 # will be installed system-wide.
 #
 # You must make sure that Unicorn Engine is not already installed before
 # running this script. If it is, please uninstall it first.
 UNICORN_URL="https://github.com/unicorn-engine/unicorn/archive/24f55a7973278f20f0de21b904851d99d4716263.tar.gz"
 UNICORN_SHA384="7180d47ca52c99b4c073a343a2ead91da1a829fdc3809f3ceada5d872e162962eab98873a8bc7971449d5f34f41fdb93"
 echo "================================================="
 echo "Unicorn-AFL build script"
 echo "================================================="
 echo
 echo "[*] Performing basic sanity checks..."
 if [ ! "`uname -s`" = "Linux" ]; then
  echo "[-] Error: Unicorn instrumentation is supported only on Linux."
  exit 1
 fi
 if [ ! -f "patches/afl-unicorn-cpu-inl.h" -o ! -f "../config.h" ]; then
  echo "[-] Error: key files not found - wrong working directory?"
  exit 1
 fi
 if [ ! -f "../afl-showmap" ]; then
  echo "[-] Error: ../afl-showmap not found - compile AFL first!"
  exit 1
 fi
 for i in wget python automake autoconf sha384sum; do
  T=`which "$i" 2>/dev/null`
  if [ "$T" = "" ]; then
    echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i'."
    exit 1
  fi
 done
 if ! which easy_install > /dev/null; then
  echo "[-] Error: Python setup-tools not found. Run 'sudo apt-get install python-setuptools'."
  exit 1
 fi
 if echo "$CC" | grep -qF /afl-; then
  echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
  exit 1
 fi
 echo "[+] All checks passed!"
 ARCHIVE="`basename -- "$UNICORN_URL"`"
 CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
 if [ ! "$CKSUM" = "$UNICORN_SHA384" ]; then
  echo "[*] Downloading Unicorn v1.0.1 from the web..."
  rm -f "$ARCHIVE"
  wget -O "$ARCHIVE" -- "$UNICORN_URL" || exit 1
  CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
 fi
 if [ "$CKSUM" = "$UNICORN_SHA384" ]; then
  echo "[+] Cryptographic signature on $ARCHIVE checks out."
 else
  echo "[-] Error: signature mismatch on $ARCHIVE (perhaps download error?)."
  exit 1
 fi
 echo "[*] Uncompressing archive (this will take a while)..."
 rm -rf "unicorn" || exit 1
 mkdir "unicorn" || exit 1
 tar xzf "$ARCHIVE" -C ./unicorn --strip-components=1 || exit 1
 echo "[+] Unpacking successful."
 rm -rf "$ARCHIVE" || exit 1
 echo "[*] Applying patches..."
 cp patches/afl-unicorn-cpu-inl.h unicorn || exit 1
 patch -p1 --directory unicorn <patches/patches.diff || exit 1
 echo "[+] Patching done."
 echo "[*] Configuring Unicorn build..."
 cd "unicorn" || exit 1
 echo "[+] Configuration complete."
 echo "[*] Attempting to build Unicorn (fingers crossed!)..."
 UNICORN_QEMU_FLAGS='--python=python2' make || exit 1
 echo "[+] Build process successful!"
 echo "[*] Installing Unicorn python bindings..."
 cd bindings/python || exit 1
 if [ -z "$VIRTUAL_ENV" ]; then
  echo "[*] Info: Installing python unicorn using --user"
  python setup.py install --user || exit 1
 else
  echo "[*] Info: Installing python unicorn to virtualenv: $VIRTUAL_ENV"
  python setup.py install || exit 1
 fi
 export LIBUNICORN_PATH='$(pwd)' # in theory, this allows to switch between afl-unicorn and unicorn so files.
 cd ../../ || exit 1
 echo "[+] Unicorn bindings installed successfully."
 # Compile the sample, run it, verify that it works!
 echo "[*] Testing unicorn-mode functionality by running a sample test harness under afl-unicorn"
 cd ../samples/simple || exit 1
 # Run afl-showmap on the sample application. If anything comes out then it must have worked!
 unset AFL_INST_RATIO
 echo 0 | ../../../afl-showmap -U -m none -q -o .test-instr0 -- python simple_test_harness.py ./sample_inputs/sample1.bin || exit 1
 if [ -s .test-instr0 ]
 then
  echo "[+] Instrumentation tests passed. "
  echo "[+] All set, you can now use Unicorn mode (-U) in afl-fuzz!"
  RETVAL=0
 else
  echo "[-] Error: Unicorn mode doesn't seem to work!"
  RETVAL=1
 fi
 rm -f .test-instr0
 exit $RETVAL
--- a/unicorn_mode/helper_scripts/template_test_harness.py
+++ b/unicorn_mode/helper_scripts/template_test_harness.py
@ -0,0 +1,104 @@
 """
    template_test_harness.py
    Template which loads the context of a process into a Unicorn Engine,
    instance, loads a custom (mutated) inputs, and executes the 
    desired code. Designed to be used in conjunction with one of the
    Unicorn Context Dumper scripts.
    Author:
        Nathan Voss <njvoss299@gmail.com>
 """
 import argparse
 from unicorn import *
 from unicorn.x86_const import *  # TODO: Set correct architecture here as necessary
 import unicorn_loader 
 # Simple stand-in heap to prevent OS/kernel issues
 unicorn_heap = None
 # Start and end address of emulation
 START_ADDRESS = # TODO: Set start address here
 END_ADDRESS   = # TODO: Set end address here
 """
    Implement target-specific hooks in here.
    Stub out, skip past, and re-implement necessary functionality as appropriate
 """
 def unicorn_hook_instruction(uc, address, size, user_data):
    # TODO: Setup hooks and handle anything you need to here
    #    - For example, hook malloc/free/etc. and handle it internally
    pass
 #------------------------
 #---- Main test function  
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('context_dir', type=str, help="Directory containing process context")
    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input content")
    parser.add_argument('-d', '--debug', default=False, action="store_true", help="Dump trace info")
    args = parser.parse_args()
    print("Loading context from {}".format(args.context_dir))
    uc = unicorn_loader.AflUnicornEngine(args.context_dir, enable_trace=args.debug, debug_print=False)       
    # Instantiate the hook function to avoid emulation errors
    global unicorn_heap
    unicorn_heap = unicorn_loader.UnicornSimpleHeap(uc, debug_print=True)
    uc.hook_add(UC_HOOK_CODE, unicorn_hook_instruction)
    # Execute 1 instruction just to startup the forkserver
    # NOTE: This instruction will be executed again later, so be sure that
    #       there are no negative consequences to the overall execution state.
    #       If there are, change the later call to emu_start to no re-execute 
    #       the first instruction.
    print("Starting the forkserver by executing 1 instruction")
    try:
        uc.emu_start(START_ADDRESS, 0, 0, count=1)
    except UcError as e:
        print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
        return
    # Allocate a buffer and load a mutated input and put it into the right spot
    if args.input_file:
        print("Loading input content from {}".format(args.input_file))
        input_file = open(args.input_file, 'rb')
        input_content = input_file.read()
        input_file.close()
        # TODO: Apply constraints to mutated input here
        raise exceptions.NotImplementedError('No constraints on the mutated inputs have been set!')
        # Allocate a new buffer and put the input into it
        buf_addr = unicorn_heap.malloc(len(input_content))
        uc.mem_write(buf_addr, input_content)
        print("Allocated mutated input buffer @ 0x{0:016x}".format(buf_addr))
        # TODO: Set the input into the state so it will be handled
        raise exceptions.NotImplementedError('The mutated input was not loaded into the Unicorn state!')
    # Run the test
    print("Executing from 0x{0:016x} to 0x{1:016x}".format(START_ADDRESS, END_ADDRESS))
    try:
        result = uc.emu_start(START_ADDRESS, END_ADDRESS, timeout=0, count=0)
    except UcError as e:
        # If something went wrong during emulation a signal is raised to force this 
        # script to crash in a way that AFL can detect ('uc.force_crash()' should be
        # called for any condition that you want AFL to treat as a crash).
        print("Execution failed with error: {}".format(e))
        uc.dump_regs() 
        uc.force_crash(e)
    print("Final register state:")    
    uc.dump_regs()
    print("Done.")    
 if __name__ == "__main__":
    main()
--- a/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
@ -0,0 +1,190 @@
 """
    unicorn_dumper_gdb.py
    When run with GDB sitting at a debug breakpoint, this
    dumps the current state (registers/memory/etc) of
    the process to a directory consisting of an index 
    file with register and segment information and 
    sub-files containing all actual process memory.
    The output of this script is expected to be used 
    to initialize context for Unicorn emulation.
    -----------
    In order to run this script, GEF needs to be running in the GDB session (gef.py)
    # HELPERS from: https://github.com/hugsy/gef/blob/master/gef.py
    It can be loaded with:
      source <path_to_gef>/gef.py
    Call this function when at a breakpoint in your process with:
      source unicorn_dumper_gdb.py
    -----------
 """
 import datetime
 import hashlib
 import json
 import os
 import sys
 import time
 import zlib
 # GDB Python SDK
 import gdb
 # Maximum segment size that we'll store
 # Yep, this could break stuff pretty quickly if we
 # omit something that's used during emulation.
 MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 #----------------------
 #---- Helper Functions
 def map_arch():
    arch = get_arch() # from GEF
    if 'x86_64' in arch or 'x86-64' in arch:
        return "x64"
    elif 'x86' in arch or 'i386' in arch:
        return "x86"
    elif 'aarch64' in arch or 'arm64' in arch:
        return "arm64le"
    elif 'aarch64_be' in arch:
        return "arm64be"
    elif 'armeb' in arch:
        # check for THUMB mode
        cpsr = get_register('cpsr')
        if (cpsr & (1 << 5)):
            return "armbethumb"
        else:
            return "armbe"
    elif 'arm' in arch:
        # check for THUMB mode
        cpsr = get_register('cpsr')
        if (cpsr & (1 << 5)):
            return "armlethumb"
        else:
            return "armle"
    else:
        return ""
 #-----------------------
 #---- Dumping functions
 def dump_arch_info():
    arch_info = {}
    arch_info["arch"] = map_arch()
    return arch_info
 def dump_regs():
    reg_state = {}
    for reg in current_arch.all_registers:
        reg_val = get_register(reg)
        # current dumper script looks for register values to be hex strings
 #         reg_str = "0x{:08x}".format(reg_val)
 #         if "64" in get_arch():
 #             reg_str = "0x{:016x}".format(reg_val)
 #         reg_state[reg.strip().strip('$')] = reg_str
        reg_state[reg.strip().strip('$')] = reg_val
    return reg_state
 def dump_process_memory(output_dir):
    # Segment information dictionary
    final_segment_list = []
    # GEF:
    vmmap = get_process_maps()
    if not vmmap:
        print("No address mapping information found")
        return final_segment_list
    for entry in vmmap:
        if entry.page_start == entry.page_end:
            continue
        seg_info = {'start': entry.page_start, 'end': entry.page_end, 'name': entry.path, 'permissions': {
            "r": entry.is_readable() > 0,
            "w": entry.is_writable() > 0,
            "x": entry.is_executable() > 0
        }, 'content_file': ''}
        # "(deleted)" may or may not be valid, but don't push it.
        if entry.is_readable() and not '(deleted)' in entry.path:
            try:
                # Compress and dump the content to a file
                seg_content = read_memory(entry.page_start, entry.size)
                if(seg_content == None):
                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.page_start, entry.path))
                else:
                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.page_start, len(seg_content), entry.path, repr(seg_info['permissions'])))
                    compressed_seg_content = zlib.compress(seg_content)
                    md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                    seg_info["content_file"] = md5_sum
                    # Write the compressed contents to disk
                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
                    out_file.write(compressed_seg_content)
                    out_file.close()
            except:
                print("Exception reading segment ({}): {}".format(entry.path, sys.exc_info()[0]))
        else:
            print("Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start))
        # Add the segment to the list
        final_segment_list.append(seg_info)
    return final_segment_list
 #----------
 #---- Main    
 def main():
    print("----- Unicorn Context Dumper -----")
    print("You must be actively debugging before running this!")
    print("If it fails, double check that you are actively debugging before running.")
    try:
        GEF_TEST = set_arch()
    except Exception as e:
        print("!!! GEF not running in GDB.  Please run gef.py by executing:")
        print('\tpython execfile ("<path_to_gef>/gef.py")')
        return
    try:
        # Create the output directory
        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
        output_path = "UnicornContext_" + timestamp
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        print("Process context will be output to {}".format(output_path))
        # Get the context
        context = {
            "arch": dump_arch_info(),
            "regs": dump_regs(), 
            "segments": dump_process_memory(output_path),
        }
        # Write the index file
        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
        index_file.write(json.dumps(context, indent=4))
        index_file.close()    
        print("Done.")
    except Exception as e:
        print("!!! ERROR:\n\t{}".format(repr(e)))
 if __name__ == "__main__":
    main()
--- a/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
@ -0,0 +1,209 @@
 """
    unicorn_dumper_ida.py
    When run with IDA (<v7) sitting at a debug breakpoint, 
    dumps the current state (registers/memory/etc) of
    the process to a directory consisting of an index 
    file with register and segment information and 
    sub-files containing all actual process memory.
    The output of this script is expected to be used 
    to initialize context for Unicorn emulation.
 """
 import datetime
 import hashlib
 import json
 import os
 import sys
 import time
 import zlib
 # IDA Python SDK
 from idaapi import *
 from idc import *
 # Maximum segment size that we'll store
 # Yep, this could break stuff pretty quickly if we
 # omit something that's used during emulation.
 MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 #----------------------
 #---- Helper Functions
 def get_arch():
    if ph.id == PLFM_386 and ph.flag & PR_USE64:
        return "x64"
    elif ph.id == PLFM_386 and ph.flag & PR_USE32:
        return "x86"
    elif ph.id == PLFM_ARM and ph.flag & PR_USE64:
        if cvar.inf.is_be():
            return "arm64be"
        else:
            return "arm64le"
    elif ph.id == PLFM_ARM and ph.flag & PR_USE32:
        if cvar.inf.is_be():
            return "armbe"
        else:
            return "armle"
    else:
        return ""
 def get_register_list(arch):
    if arch == "arm64le" or arch == "arm64be":
        arch = "arm64"
    elif arch == "armle" or arch == "armbe":
        arch = "arm"
    registers = {
        "x64" : [
            "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp",
            "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
            "rip", "rsp", "efl",
            "cs", "ds", "es", "fs", "gs", "ss",
        ],
        "x86" : [
            "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
            "eip", "esp", "efl", 
            "cs", "ds", "es", "fs", "gs", "ss",
        ],        
        "arm" : [
            "R0", "R1", "R2",  "R3",  "R4",  "R5", "R6", "R7",  
            "R8", "R9", "R10", "R11", "R12", "PC", "SP", "LR",  
            "PSR",
        ],
        "arm64" : [
            "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",  
            "X8", "X9", "X10", "X11", "X12", "X13", "X14", 
            "X15", "X16", "X17", "X18", "X19", "X20", "X21", 
            "X22", "X23", "X24", "X25", "X26", "X27", "X28", 
            "PC", "SP", "FP", "LR", "CPSR"
            #    "NZCV",
        ]
    }
    return registers[arch]  
 #-----------------------
 #---- Dumping functions
 def dump_arch_info():
    arch_info = {}
    arch_info["arch"] = get_arch()
    return arch_info
 def dump_regs():
    reg_state = {}
    for reg in get_register_list(get_arch()):
        reg_state[reg] = GetRegValue(reg)
    return reg_state
 def dump_process_memory(output_dir):
    # Segment information dictionary
    segment_list = []
    # Loop over the segments, fill in the info dictionary
    for seg_ea in Segments():
        seg_start = SegStart(seg_ea)
        seg_end = SegEnd(seg_ea)
        seg_size = seg_end - seg_start
        seg_info = {}
        seg_info["name"]  = SegName(seg_ea)
        seg_info["start"] = seg_start
        seg_info["end"]   = seg_end
        perms = getseg(seg_ea).perm
        seg_info["permissions"] = {
            "r": False if (perms & SEGPERM_READ)  == 0 else True,
            "w": False if (perms & SEGPERM_WRITE) == 0 else True,
            "x": False if (perms & SEGPERM_EXEC)  == 0 else True,
        }
        if (perms & SEGPERM_READ) and seg_size <= MAX_SEG_SIZE and isLoaded(seg_start):
            try:
                # Compress and dump the content to a file
                seg_content = get_many_bytes(seg_start, seg_end - seg_start)
                if(seg_content == None):
                    print("Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(SegName(seg_ea), seg_ea))
                    seg_info["content_file"] = ""
                else:
                    print("Dumping segment {0}@0x{1:016x} (size:{2})".format(SegName(seg_ea), seg_ea, len(seg_content)))
                    compressed_seg_content = zlib.compress(seg_content)
                    md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                    seg_info["content_file"] = md5_sum
                    # Write the compressed contents to disk
                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
                    out_file.write(compressed_seg_content)
                    out_file.close()
            except:
                print("Exception reading segment: {}".format(sys.exc_info()[0]))
                seg_info["content_file"] = ""
        else:
            print("Skipping segment {0}@0x{1:016x}".format(SegName(seg_ea), seg_ea))
            seg_info["content_file"] = ""
        # Add the segment to the list
        segment_list.append(seg_info)     
    return segment_list
 """
    TODO: FINISH IMPORT DUMPING
 def import_callback(ea, name, ord):
    if not name:
    else:
    # True -> Continue enumeration
    # False -> End enumeration
    return True
 def dump_imports():
    import_dict = {}
    for i in xrange(0, number_of_import_modules):
        enum_import_names(i, import_callback)
    return import_dict
 """
 #----------
 #---- Main    
 def main():
    try:
        print("----- Unicorn Context Dumper -----")
        print("You must be actively debugging before running this!")
        print("If it fails, double check that you are actively debugging before running.")
        # Create the output directory
        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
        output_path = os.path.dirname(os.path.abspath(GetIdbPath()))
        output_path = os.path.join(output_path, "UnicornContext_" + timestamp)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        print("Process context will be output to {}".format(output_path))
        # Get the context
        context = {
            "arch": dump_arch_info(),
            "regs": dump_regs(), 
            "segments": dump_process_memory(output_path),
            #"imports": dump_imports(),
        }
        # Write the index file
        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
        index_file.write(json.dumps(context, indent=4))
        index_file.close()    
        print("Done.")
    except Exception, e:
        print("!!! ERROR:\n\t{}".format(str(e)))
 if __name__ == "__main__":
    main()
--- a/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
@ -0,0 +1,299 @@
 """
    unicorn_dumper_lldb.py
    When run with LLDB sitting at a debug breakpoint, this
    dumps the current state (registers/memory/etc) of
    the process to a directory consisting of an index 
    file with register and segment information and 
    sub-files containing all actual process memory.
    The output of this script is expected to be used 
    to initialize context for Unicorn emulation.
    -----------
    Call this function when at a breakpoint in your process with:
      command script import -r unicorn_dumper_lldb
    If there is trouble with "split on a NoneType", issue the following command:
      script lldb.target.triple
    and try to import the script again.
    -----------
 """
 from copy import deepcopy
 import datetime
 import hashlib
 import json
 import os
 import sys
 import time
 import zlib
 # LLDB Python SDK
 import lldb
 # Maximum segment size that we'll store
 # Yep, this could break stuff pretty quickly if we
 # omit something that's used during emulation.
 MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 DEBUG_MEM_FILE_NAME = "_memory.json"
 # Page size required by Unicorn
 UNICORN_PAGE_SIZE = 0x1000
 # Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
 ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
 ALIGN_PAGE_UP   = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
 #----------------------
 #---- Helper Functions
 def overlap_alignments(segments, memory):
    final_list = []
    curr_seg_idx = 0
    curr_end_addr = 0
    curr_node = None
    current_segment = None
    sorted_segments = sorted(segments, key=lambda k: (k['start'], k['end']))
    if curr_seg_idx < len(sorted_segments):
        current_segment = sorted_segments[curr_seg_idx]
    for mem in sorted(memory, key=lambda k: (k['start'], -k['end'])):
        if curr_node is None:
            if current_segment is not None and current_segment['start'] == mem['start']:
                curr_node = deepcopy(current_segment)
                curr_node['permissions'] = mem['permissions']
            else:
                curr_node = deepcopy(mem)
            curr_end_addr = curr_node['end']
        while curr_end_addr <= mem['end']:
            if curr_node['end'] == mem['end']:
                if current_segment is not None and current_segment['start'] > curr_node['start'] and current_segment['start'] < curr_node['end']:
                    curr_node['end'] = current_segment['start']
                    if(curr_node['end'] > curr_node['start']):
                        final_list.append(curr_node)
                    curr_node = deepcopy(current_segment)
                    curr_node['permissions'] = mem['permissions']
                    curr_end_addr = curr_node['end']
                else:
                    if(curr_node['end'] > curr_node['start']):
                        final_list.append(curr_node)
                    # if curr_node is a segment
                    if current_segment is not None and current_segment['end'] == mem['end']:
                        curr_seg_idx += 1
                        if curr_seg_idx < len(sorted_segments):
                            current_segment = sorted_segments[curr_seg_idx]
                        else:
                            current_segment = None
                    curr_node = None
                    break
            # could only be a segment
            else:
                if curr_node['end'] < mem['end']:
                    # check for remaining segments and valid segments
                    if(curr_node['end'] > curr_node['start']):
                        final_list.append(curr_node)
                    curr_seg_idx += 1
                    if curr_seg_idx < len(sorted_segments):
                        current_segment = sorted_segments[curr_seg_idx]
                    else:
                        current_segment = None
                    if current_segment is not None and current_segment['start'] <= curr_end_addr and current_segment['start'] < mem['end']:
                        curr_node = deepcopy(current_segment)
                        curr_node['permissions'] = mem['permissions']
                    else:
                        # no more segments
                        curr_node = deepcopy(mem)
                    curr_node['start'] = curr_end_addr
                    curr_end_addr = curr_node['end']
    return final_list    
 # https://github.com/llvm-mirror/llvm/blob/master/include/llvm/ADT/Triple.h
 def get_arch():
    arch, arch_vendor, arch_os = lldb.target.GetTriple().split('-')
    if arch == 'x86_64':
        return "x64"
    elif arch == 'x86' or arch == 'i386':
        return "x86"
    elif arch == 'aarch64' or arch == 'arm64':
        return "arm64le"
    elif arch == 'aarch64_be':
        return "arm64be"
    elif arch == 'armeb':
        return "armbe"
    elif arch == 'arm':
        return "armle"
    else:
        return ""
 #-----------------------
 #---- Dumping functions
 def dump_arch_info():
    arch_info = {}
    arch_info["arch"] = get_arch()
    return arch_info
 def dump_regs():
    reg_state = {}
    for reg_list in lldb.frame.GetRegisters():
        if 'general purpose registers' in reg_list.GetName().lower():
            for reg in reg_list:
                reg_state[reg.GetName()] = int(reg.GetValue(), 16)
    return reg_state
 def get_section_info(sec):
    name = sec.name if sec.name is not None else ''
    if sec.GetParent().name is not None:
        name = sec.GetParent().name + '.' + sec.name
    module_name = sec.addr.module.file.GetFilename()
    module_name = module_name if module_name is not None else ''
    long_name = module_name + '.' + name
    return sec.addr.load_addr, (sec.addr.load_addr + sec.size), sec.size, long_name
 def dump_process_memory(output_dir):
    # Segment information dictionary
    raw_segment_list = []
    raw_memory_list = []
    # 1st pass:
    # Loop over the segments, fill in the segment info dictionary
    for module in lldb.target.module_iter():
        for seg_ea in module.section_iter():
            seg_info = {'module': module.file.GetFilename() }
            seg_info['start'], seg_info['end'], seg_size, seg_info['name'] = get_section_info(seg_ea)
            # TODO: Ugly hack for -1 LONG address on 32-bit
            if seg_info['start'] >= sys.maxint or seg_size <= 0:
                print "Throwing away page: {}".format(seg_info['name'])     
                continue
            # Page-align segment
            seg_info['start'] = ALIGN_PAGE_DOWN(seg_info['start'])
            seg_info['end'] = ALIGN_PAGE_UP(seg_info['end'])
            print("Appending: {}".format(seg_info['name']))
            raw_segment_list.append(seg_info)
    # Add the stack memory region (just hardcode 0x1000 around the current SP)
    sp = lldb.frame.GetSP()
    start_sp = ALIGN_PAGE_DOWN(sp)
    raw_segment_list.append({'start': start_sp, 'end': start_sp + 0x1000, 'name': 'STACK'})
    # Write the original memory to file for debugging
    index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), 'w')
    index_file.write(json.dumps(raw_segment_list, indent=4))
    index_file.close()    
    # Loop over raw memory regions 
    mem_info = lldb.SBMemoryRegionInfo()
    start_addr = -1
    next_region_addr = 0
    while next_region_addr > start_addr:
        err = lldb.process.GetMemoryRegionInfo(next_region_addr, mem_info)
        # TODO: Should check err.success.  If False, what do we do?
        if not err.success:
            break
        next_region_addr = mem_info.GetRegionEnd()
        if next_region_addr >= sys.maxsize:
            break
        start_addr = mem_info.GetRegionBase()
        end_addr = mem_info.GetRegionEnd()
        # Unknown region name
        region_name = 'UNKNOWN'
        # Ignore regions that aren't even mapped
        if mem_info.IsMapped() and mem_info.IsReadable():
            mem_info_obj = {'start': start_addr, 'end': end_addr, 'name': region_name, 'permissions': {
                "r": mem_info.IsReadable(),
                "w": mem_info.IsWritable(),
                "x": mem_info.IsExecutable()
            }}
            raw_memory_list.append(mem_info_obj)
    final_segment_list = overlap_alignments(raw_segment_list, raw_memory_list)
    for seg_info in final_segment_list:
        try:
            seg_info['content_file'] = ''
            start_addr = seg_info['start']
            end_addr = seg_info['end']
            region_name = seg_info['name']
            # Compress and dump the content to a file
            err = lldb.SBError()
            seg_content = lldb.process.ReadMemory(start_addr, end_addr - start_addr, err)
            if(seg_content == None):
                print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(start_addr, region_name))
                seg_info['content_file'] = ''
            else:
                print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(start_addr, len(seg_content), region_name, repr(seg_info['permissions'])))
                compressed_seg_content = zlib.compress(seg_content)
                md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                seg_info['content_file'] = md5_sum
                # Write the compressed contents to disk
                out_file = open(os.path.join(output_dir, md5_sum), 'wb')
                out_file.write(compressed_seg_content)
                out_file.close()
        except:
            print("Exception reading segment ({}): {}".format(region_name, sys.exc_info()[0]))
    return final_segment_list
 #----------
 #---- Main    
 def main():
    try:
        print("----- Unicorn Context Dumper -----")
        print("You must be actively debugging before running this!")
        print("If it fails, double check that you are actively debugging before running.")
        # Create the output directory
        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
        output_path = "UnicornContext_" + timestamp
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        print("Process context will be output to {}".format(output_path))
        # Get the context
        context = {
            "arch": dump_arch_info(),
            "regs": dump_regs(), 
            "segments": dump_process_memory(output_path),
        }
        # Write the index file
        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
        index_file.write(json.dumps(context, indent=4))
        index_file.close()    
        print("Done.")
    except Exception, e:
        print("!!! ERROR:\n\t{}".format(repr(e)))
 if __name__ == "__main__":
    main()
 elif lldb.debugger:
    main()
--- a/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
@ -0,0 +1,224 @@
 """
    unicorn_dumper_pwndbg.py
    When run with GDB sitting at a debug breakpoint, this
    dumps the current state (registers/memory/etc) of
    the process to a directory consisting of an index 
    file with register and segment information and 
    sub-files containing all actual process memory.
    The output of this script is expected to be used 
    to initialize context for Unicorn emulation.
    -----------
    In order to run this script, PWNDBG needs to be running in the GDB session (gdbinit.py)
    # HELPERS from: https://github.com/pwndbg/pwndbg
    It can be loaded with:
      source <path_to_pwndbg>/gdbinit.py
    Call this function when at a breakpoint in your process with:
      source unicorn_dumper_pwndbg.py
    -----------
 """
 import datetime
 import hashlib
 import json
 import os
 import sys
 import time
 import zlib
 # GDB Python SDK
 import gdb
 pwndbg_loaded = False
 try:
    import pwndbg.arch
    import pwndbg.regs
    import pwndbg.vmmap
    import pwndbg.memory
    pwndbg_loaded = True
 except ImportError:
    print("!!! PWNGDB not running in GDB.  Please run gdbinit.py by executing:")
    print('\tpython execfile ("<path_to_pwndbg>/gdbinit.py")')
 # Maximum segment size that we'll store
 # Yep, this could break stuff pretty quickly if we
 # omit something that's used during emulation.
 MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 #----------------------
 #---- Helper Functions
 def map_arch():
    arch = pwndbg.arch.current # from PWNDBG
    if 'x86_64' in arch or 'x86-64' in arch:
        return "x64"
    elif 'x86' in arch or 'i386' in arch:
        return "x86"
    elif 'aarch64' in arch or 'arm64' in arch:
        return "arm64le"
    elif 'aarch64_be' in arch:
        return "arm64be"
    elif 'arm' in arch:
        cpsr = pwndbg.regs['cpsr']
        # check endianess 
        if pwndbg.arch.endian == 'big':
            # check for THUMB mode
            if (cpsr & (1 << 5)):
                return "armbethumb"
            else:
                return "armbe"
        else:
            # check for THUMB mode
            if (cpsr & (1 << 5)):
                return "armlethumb"
            else:
                return "armle"
    elif 'mips' in arch:
        if pwndbg.arch.endian == 'little':
            return 'mipsel'
        else:
            return 'mips'
    else:
        return ""
 #-----------------------
 #---- Dumping functions
 def dump_arch_info():
    arch_info = {}
    arch_info["arch"] = map_arch()
    return arch_info
 def dump_regs():
    reg_state = {}
    for reg in pwndbg.regs.all:
        reg_val = pwndbg.regs[reg]
        # current dumper script looks for register values to be hex strings
 #         reg_str = "0x{:08x}".format(reg_val)
 #         if "64" in get_arch():
 #             reg_str = "0x{:016x}".format(reg_val)
 #         reg_state[reg.strip().strip('$')] = reg_str
        reg_state[reg.strip().strip('$')] = reg_val
    return reg_state
 def dump_process_memory(output_dir):
    # Segment information dictionary
    final_segment_list = []
    # PWNDBG:
    vmmap = pwndbg.vmmap.get()
    # Pointer to end of last dumped memory segment
    segment_last_addr = 0x0;
    start = None
    end   = None
    if not vmmap:
        print("No address mapping information found")
        return final_segment_list
    # Assume segment entries are sorted by start address
    for entry in vmmap:
        if entry.start == entry.end:
            continue
        start = entry.start
        end   = entry.end
        if (segment_last_addr > entry.start): # indicates overlap
            if (segment_last_addr > entry.end): # indicates complete overlap, so we skip the segment entirely
                continue
            else:            
                start = segment_last_addr
        seg_info = {'start': start, 'end': end, 'name': entry.objfile, 'permissions': {
            "r": entry.read,
            "w": entry.write,
            "x": entry.execute
        }, 'content_file': ''}
        # "(deleted)" may or may not be valid, but don't push it.
        if entry.read and not '(deleted)' in entry.objfile:
            try:
                # Compress and dump the content to a file
                seg_content = pwndbg.memory.read(start, end - start)
                if(seg_content == None):
                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.start, entry.objfile))
                else:
                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.start, len(seg_content), entry.objfile, repr(seg_info['permissions'])))
                    compressed_seg_content = zlib.compress(seg_content)
                    md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                    seg_info["content_file"] = md5_sum
                    # Write the compressed contents to disk
                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
                    out_file.write(compressed_seg_content)
                    out_file.close()
            except:
                print("Exception reading segment ({}): {}".format(entry.objfile, sys.exc_info()[0]))
        else:
            print("Skipping segment {0}@0x{1:016x}".format(entry.objfile, entry.start))
        segment_last_addr = end
        # Add the segment to the list
        final_segment_list.append(seg_info)
    return final_segment_list
 #----------
 #---- Main    
 def main():
    print("----- Unicorn Context Dumper -----")
    print("You must be actively debugging before running this!")
    print("If it fails, double check that you are actively debugging before running.")
    try:
        # Create the output directory
        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
        output_path = "UnicornContext_" + timestamp
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        print("Process context will be output to {}".format(output_path))
        # Get the context
        context = {
            "arch": dump_arch_info(),
            "regs": dump_regs(), 
            "segments": dump_process_memory(output_path),
        }
        # Write the index file
        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
        index_file.write(json.dumps(context, indent=4))
        index_file.close()    
        print("Done.")
    except Exception as e:
        print("!!! ERROR:\n\t{}".format(repr(e)))
 if __name__ == "__main__" and pwndbg_loaded:
    main()
--- a/unicorn_mode/helper_scripts/unicorn_loader.py
+++ b/unicorn_mode/helper_scripts/unicorn_loader.py
@ -0,0 +1,560 @@
 """
    unicorn_loader.py
    Loads a process context dumped created using a 
    Unicorn Context Dumper script into a Unicorn Engine 
    instance. Once this is performed emulation can be
    started.
 """
 import argparse
 import binascii
 from collections import namedtuple
 import datetime
 import hashlib
 import json
 import os
 import signal
 import struct
 import time
 import zlib
 # Unicorn imports
 from unicorn import *
 from unicorn.arm_const import *
 from unicorn.arm64_const import *
 from unicorn.x86_const import *
 from unicorn.mips_const import *
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 # Page size required by Unicorn
 UNICORN_PAGE_SIZE = 0x1000
 # Max allowable segment size (1G)
 MAX_ALLOWABLE_SEG_SIZE = 1024 * 1024 * 1024
 # Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
 ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
 ALIGN_PAGE_UP   = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
 #---------------------------------------
 #---- Unicorn-based heap implementation
 class UnicornSimpleHeap(object):
    """ Use this class to provide a simple heap implementation. This should
        be used if malloc/free calls break things during emulation. This heap also
        implements basic guard-page capabilities which enable immediate notice of
        heap overflow and underflows.
    """
    # Helper data-container used to track chunks
    class HeapChunk(object):
        def __init__(self, actual_addr, total_size, data_size):
            self.total_size = total_size                        # Total size of the chunk (including padding and guard page)
            self.actual_addr = actual_addr                      # Actual start address of the chunk
            self.data_size = data_size                          # Size requested by the caller of actual malloc call
            self.data_addr = actual_addr + UNICORN_PAGE_SIZE    # Address where data actually starts
        # Returns true if the specified buffer is completely within the chunk, else false
        def is_buffer_in_chunk(self, addr, size):
            if addr >= self.data_addr and ((addr + size) <= (self.data_addr + self.data_size)):
                return True
            else:
                return False
    # Skip the zero-page to avoid weird potential issues with segment registers
    HEAP_MIN_ADDR = 0x00002000
    HEAP_MAX_ADDR = 0xFFFFFFFF
    _uc = None              # Unicorn engine instance to interact with
    _chunks = []            # List of all known chunks
    _debug_print = False    # True to print debug information
    def __init__(self, uc, debug_print=False):
        self._uc = uc
        self._debug_print = debug_print
        # Add the watchpoint hook that will be used to implement psuedo-guard page support
        self._uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__check_mem_access)
    def malloc(self, size):
        # Figure out the overall size to be allocated/mapped
        #    - Allocate at least 1 4k page of memory to make Unicorn happy
        #    - Add guard pages at the start and end of the region
        total_chunk_size = UNICORN_PAGE_SIZE + ALIGN_PAGE_UP(size) + UNICORN_PAGE_SIZE
        # Gross but efficient way to find space for the chunk:
        chunk = None
        for addr in xrange(self.HEAP_MIN_ADDR, self.HEAP_MAX_ADDR, UNICORN_PAGE_SIZE):
            try:
                self._uc.mem_map(addr, total_chunk_size, UC_PROT_READ | UC_PROT_WRITE)
                chunk = self.HeapChunk(addr, total_chunk_size, size)
                if self._debug_print:
                    print("Allocating 0x{0:x}-byte chunk @ 0x{1:016x}".format(chunk.data_size, chunk.data_addr))
                break
            except UcError as e:
                continue
        # Something went very wrong
        if chunk == None:
            return 0    
        self._chunks.append(chunk)
        return chunk.data_addr
    def calloc(self, size, count):
        # Simple wrapper around malloc with calloc() args
        return self.malloc(size*count)
    def realloc(self, ptr, new_size):
        # Wrapper around malloc(new_size) / memcpy(new, old, old_size) / free(old)
        if self._debug_print:
            print("Reallocating chunk @ 0x{0:016x} to be 0x{1:x} bytes".format(ptr, new_size))
        old_chunk = None
        for chunk in self._chunks:
            if chunk.data_addr == ptr:
                old_chunk = chunk 
        new_chunk_addr = self.malloc(new_size) 
        if old_chunk != None:
            self._uc.mem_write(new_chunk_addr, str(self._uc.mem_read(old_chunk.data_addr, old_chunk.data_size)))
            self.free(old_chunk.data_addr)
        return new_chunk_addr
    def free(self, addr):
        for chunk in self._chunks:
            if chunk.is_buffer_in_chunk(addr, 1):
                if self._debug_print:
                    print("Freeing 0x{0:x}-byte chunk @ 0x{0:016x}".format(chunk.req_size, chunk.data_addr))
                self._uc.mem_unmap(chunk.actual_addr, chunk.total_size)
                self._chunks.remove(chunk)
                return True
        return False
    # Implements basic guard-page functionality
    def __check_mem_access(self, uc, access, address, size, value, user_data):
        for chunk in self._chunks:
            if address >= chunk.actual_addr and ((address + size) <= (chunk.actual_addr + chunk.total_size)):
                if chunk.is_buffer_in_chunk(address, size) == False:
                    if self._debug_print:
                        print("Heap over/underflow attempting to {0} 0x{1:x} bytes @ {2:016x}".format( \
                            "write" if access == UC_MEM_WRITE else "read", size, address))
                    # Force a memory-based crash
                    uc.force_crash(UcError(UC_ERR_READ_PROT))
 #---------------------------
 #---- Loading function
 class AflUnicornEngine(Uc):
    def __init__(self, context_directory, enable_trace=False, debug_print=False):
        """
        Initializes an AflUnicornEngine instance, which extends standard the UnicornEngine
        with a bunch of helper routines that are useful for creating afl-unicorn test harnesses.
        Parameters:
          - context_directory: Path to the directory generated by one of the context dumper scripts
          - enable_trace: If True trace information will be printed to STDOUT
          - debug_print: If True debugging information will be printed while loading the context
        """
        # Make sure the index file exists and load it
        index_file_path = os.path.join(context_directory, INDEX_FILE_NAME)
        if not os.path.isfile(index_file_path):
            raise Exception("Index file not found. Expected it to be at {}".format(index_file_path))
        # Load the process context from the index file
        if debug_print:
            print("Loading process context index from {}".format(index_file_path))
        index_file = open(index_file_path, 'r')
        context = json.load(index_file)
        index_file.close()
        # Check the context to make sure we have the basic essential components
        if 'arch' not in context:
            raise Exception("Couldn't find architecture information in index file")
        if 'regs' not in context:
            raise Exception("Couldn't find register information in index file")
        if 'segments' not in context:
            raise Exception("Couldn't find segment/memory information in index file")
        # Set the UnicornEngine instance's architecture and mode
        self._arch_str = context['arch']['arch']
        arch, mode = self.__get_arch_and_mode(self._arch_str)
        Uc.__init__(self, arch, mode)
        # Load the registers
        regs = context['regs']
        reg_map = self.__get_register_map(self._arch_str)
        for register, value in regs.iteritems():
            if debug_print:
                print("Reg {0} = {1}".format(register, value))
            if not reg_map.has_key(register.lower()):
                if debug_print:
                    print("Skipping Reg: {}".format(register))
            else:
                reg_write_retry = True
                try:
                    self.reg_write(reg_map[register.lower()], value)
                    reg_write_retry = False
                except Exception as e:
                    if debug_print:
                        print("ERROR writing register: {}, value: {} -- {}".format(register, value, repr(e)))
                if reg_write_retry:
                    if debug_print:
                        print("Trying to parse value ({}) as hex string".format(value))
                    try:
                        self.reg_write(reg_map[register.lower()], int(value, 16))
                    except Exception as e:
                        if debug_print:
                            print("ERROR writing hex string register: {}, value: {} -- {}".format(register, value, repr(e)))
        # Setup the memory map and load memory content
        self.__map_segments(context['segments'], context_directory, debug_print)
        if enable_trace:
            self.hook_add(UC_HOOK_BLOCK, self.__trace_block)
            self.hook_add(UC_HOOK_CODE, self.__trace_instruction)
            self.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__trace_mem_access)
            self.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, self.__trace_mem_invalid_access)
        if debug_print:
            print("Done loading context.")
    def get_arch(self):
        return self._arch
    def get_mode(self):
        return self._mode
    def get_arch_str(self):
        return self._arch_str
    def force_crash(self, uc_error):
        """ This function should be called to indicate to AFL that a crash occurred during emulation.
            You can pass the exception received from Uc.emu_start
        """
        mem_errors = [
            UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
            UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
            UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
        ]
        if uc_error.errno in mem_errors:
            # Memory error - throw SIGSEGV
            os.kill(os.getpid(), signal.SIGSEGV)
        elif uc_error.errno == UC_ERR_INSN_INVALID:
            # Invalid instruction - throw SIGILL
            os.kill(os.getpid(), signal.SIGILL)
        else:
            # Not sure what happened - throw SIGABRT
            os.kill(os.getpid(), signal.SIGABRT)
    def dump_regs(self):
        """ Dumps the contents of all the registers to STDOUT """
        for reg in sorted(self.__get_register_map(self._arch_str).items(), key=lambda reg: reg[0]):
            print(">>> {0:>4}: 0x{1:016x}".format(reg[0], self.reg_read(reg[1])))
    # TODO: Make this dynamically get the stack pointer register and pointer width for the current architecture
    """
    def dump_stack(self, window=10):
        print(">>> Stack:")
        stack_ptr_addr = self.reg_read(UC_X86_REG_RSP)
        for i in xrange(-window, window + 1):
            addr = stack_ptr_addr + (i*8)
            print("{0}0x{1:016x}: 0x{2:016x}".format( \
                'SP->' if i == 0 else '    ', addr, \
                struct.unpack('<Q', self.mem_read(addr, 8))[0]))
    """
    #-----------------------------
    #---- Loader Helper Functions
    def __map_segment(self, name, address, size, perms, debug_print=False):
        # - size is unsigned and must be != 0
        # - starting address must be aligned to 4KB
        # - map size must be multiple of the page size (4KB)
        mem_start = address
        mem_end = address + size
        mem_start_aligned = ALIGN_PAGE_DOWN(mem_start)
        mem_end_aligned = ALIGN_PAGE_UP(mem_end)
        if debug_print:
            if mem_start_aligned != mem_start or mem_end_aligned != mem_end:
                print("Aligning segment to page boundary:")
                print("  name:  {}".format(name))
                print("  start: {0:016x} -> {1:016x}".format(mem_start, mem_start_aligned))
                print("  end:   {0:016x} -> {1:016x}".format(mem_end, mem_end_aligned))
            print("Mapping segment from {0:016x} - {1:016x} with perm={2}: {3}".format(mem_start_aligned, mem_end_aligned, perms, name))
        if(mem_start_aligned < mem_end_aligned):
            self.mem_map(mem_start_aligned, mem_end_aligned - mem_start_aligned, perms)
    def __map_segments(self, segment_list, context_directory, debug_print=False):
        for segment in segment_list:
            # Get the segment information from the index
            name = segment['name']
            seg_start = segment['start']
            seg_end = segment['end']
            perms = \
                (UC_PROT_READ  if segment['permissions']['r'] == True else 0) | \
                (UC_PROT_WRITE if segment['permissions']['w'] == True else 0) | \
                (UC_PROT_EXEC  if segment['permissions']['x'] == True else 0)        
            if debug_print:
                print("Handling segment {}".format(name))
            # Check for any overlap with existing segments. If there is, it must
            # be consolidated and merged together before mapping since Unicorn
            # doesn't allow overlapping segments.
            found = False
            overlap_start = False
            overlap_end = False
            tmp = 0
            for (mem_start, mem_end, mem_perm) in self.mem_regions():
                mem_end = mem_end + 1
                if seg_start >= mem_start and seg_end < mem_end:
                    found = True
                    break
                if seg_start >= mem_start and seg_start < mem_end:
                    overlap_start = True
                    tmp = mem_end
                    break
                if seg_end >= mem_start and seg_end < mem_end:
                    overlap_end = True
                    tmp = mem_start
                    break
            # Map memory into the address space if it is of an acceptable size.
            if (seg_end - seg_start) > MAX_ALLOWABLE_SEG_SIZE:
                if debug_print:
                    print("Skipping segment (LARGER THAN {0}) from {1:016x} - {2:016x} with perm={3}: {4}".format(MAX_ALLOWABLE_SEG_SIZE, seg_start, seg_end, perms, name))
                continue
            elif not found:           # Make sure it's not already mapped
                if overlap_start:     # Partial overlap (start)
                    self.__map_segment(name, tmp, seg_end - tmp, perms, debug_print)
                elif overlap_end:       # Patrial overlap (end)
                    self.__map_segment(name, seg_start, tmp - seg_start, perms, debug_print)
                else:                   # Not found
                    self.__map_segment(name, seg_start, seg_end - seg_start, perms, debug_print)
            else:
                if debug_print:
                    print("Segment {} already mapped. Moving on.".format(name))
            # Load the content (if available)
            if 'content_file' in segment and len(segment['content_file']) > 0:
                content_file_path = os.path.join(context_directory, segment['content_file'])
                if not os.path.isfile(content_file_path):
                    raise Exception("Unable to find segment content file. Expected it to be at {}".format(content_file_path))
                #if debug_print:
                #    print("Loading content for segment {} from {}".format(name, segment['content_file']))
                content_file = open(content_file_path, 'rb')
                compressed_content = content_file.read()
                content_file.close()
                self.mem_write(seg_start, zlib.decompress(compressed_content)) 
            else:
                if debug_print:
                    print("No content found for segment {0} @ {1:016x}".format(name, seg_start))
                self.mem_write(seg_start, '\x00' * (seg_end - seg_start))
    def __get_arch_and_mode(self, arch_str):
        arch_map = {
            "x64"       : [ UC_X86_REG_RIP,     UC_ARCH_X86,    UC_MODE_64 ],
            "x86"       : [ UC_X86_REG_EIP,     UC_ARCH_X86,    UC_MODE_32 ],
            "arm64be"   : [ UC_ARM64_REG_PC,    UC_ARCH_ARM64,  UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
            "arm64le"   : [ UC_ARM64_REG_PC,    UC_ARCH_ARM64,  UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
            "armbe"     : [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
            "armle"     : [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
            "armbethumb": [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_THUMB | UC_MODE_BIG_ENDIAN ],
            "armlethumb": [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_THUMB | UC_MODE_LITTLE_ENDIAN ],
            "mips"      : [ UC_MIPS_REG_PC,     UC_ARCH_MIPS,   UC_MODE_MIPS32 | UC_MODE_BIG_ENDIAN ],
            "mipsel"    : [ UC_MIPS_REG_PC,     UC_ARCH_MIPS,   UC_MODE_MIPS32 | UC_MODE_LITTLE_ENDIAN ],
        }
        return (arch_map[arch_str][1], arch_map[arch_str][2])
    def __get_register_map(self, arch):
        if arch == "arm64le" or arch == "arm64be":
            arch = "arm64"
        elif arch == "armle" or arch == "armbe" or "thumb" in arch:
            arch = "arm"
        elif arch == "mipsel":
            arch = "mips"
        registers = {
            "x64" : {
                "rax":    UC_X86_REG_RAX,
                "rbx":    UC_X86_REG_RBX,
                "rcx":    UC_X86_REG_RCX,
                "rdx":    UC_X86_REG_RDX,
                "rsi":    UC_X86_REG_RSI,
                "rdi":    UC_X86_REG_RDI,
                "rbp":    UC_X86_REG_RBP,
                "rsp":    UC_X86_REG_RSP,
                "r8":     UC_X86_REG_R8,
                "r9":     UC_X86_REG_R9,
                "r10":    UC_X86_REG_R10,
                "r11":    UC_X86_REG_R11,
                "r12":    UC_X86_REG_R12,
                "r13":    UC_X86_REG_R13,
                "r14":    UC_X86_REG_R14,
                "r15":    UC_X86_REG_R15,
                "rip":    UC_X86_REG_RIP,
                "rsp":    UC_X86_REG_RSP,
                "efl":    UC_X86_REG_EFLAGS,
                "cs":     UC_X86_REG_CS,
                "ds":     UC_X86_REG_DS,
                "es":     UC_X86_REG_ES,
                "fs":     UC_X86_REG_FS,
                "gs":     UC_X86_REG_GS,
                "ss":     UC_X86_REG_SS,
            },
            "x86" : {
                "eax":    UC_X86_REG_EAX,
                "ebx":    UC_X86_REG_EBX,
                "ecx":    UC_X86_REG_ECX,
                "edx":    UC_X86_REG_EDX,
                "esi":    UC_X86_REG_ESI,
                "edi":    UC_X86_REG_EDI,
                "ebp":    UC_X86_REG_EBP,
                "esp":    UC_X86_REG_ESP,
                "eip":    UC_X86_REG_EIP,
                "esp":    UC_X86_REG_ESP,
                "efl":    UC_X86_REG_EFLAGS,        
                # Segment registers removed...
                # They caused segfaults (from unicorn?) when they were here
            },        
            "arm" : {
                "r0":     UC_ARM_REG_R0,
                "r1":     UC_ARM_REG_R1,
                "r2":     UC_ARM_REG_R2,
                "r3":     UC_ARM_REG_R3,
                "r4":     UC_ARM_REG_R4,
                "r5":     UC_ARM_REG_R5,
                "r6":     UC_ARM_REG_R6,
                "r7":     UC_ARM_REG_R7,
                "r8":     UC_ARM_REG_R8,
                "r9":     UC_ARM_REG_R9,
                "r10":    UC_ARM_REG_R10,
                "r11":    UC_ARM_REG_R11,
                "r12":    UC_ARM_REG_R12,
                "pc":     UC_ARM_REG_PC,
                "sp":     UC_ARM_REG_SP,
                "lr":     UC_ARM_REG_LR,
                "cpsr":   UC_ARM_REG_CPSR
            },
            "arm64" : {
                "x0":     UC_ARM64_REG_X0,
                "x1":     UC_ARM64_REG_X1,
                "x2":     UC_ARM64_REG_X2,
                "x3":     UC_ARM64_REG_X3,
                "x4":     UC_ARM64_REG_X4,
                "x5":     UC_ARM64_REG_X5,
                "x6":     UC_ARM64_REG_X6,
                "x7":     UC_ARM64_REG_X7,
                "x8":     UC_ARM64_REG_X8,
                "x9":     UC_ARM64_REG_X9,
                "x10":    UC_ARM64_REG_X10,
                "x11":    UC_ARM64_REG_X11,
                "x12":    UC_ARM64_REG_X12,
                "x13":    UC_ARM64_REG_X13,
                "x14":    UC_ARM64_REG_X14,
                "x15":    UC_ARM64_REG_X15,
                "x16":    UC_ARM64_REG_X16,
                "x17":    UC_ARM64_REG_X17,
                "x18":    UC_ARM64_REG_X18,
                "x19":    UC_ARM64_REG_X19,
                "x20":    UC_ARM64_REG_X20,
                "x21":    UC_ARM64_REG_X21,
                "x22":    UC_ARM64_REG_X22,
                "x23":    UC_ARM64_REG_X23,
                "x24":    UC_ARM64_REG_X24,
                "x25":    UC_ARM64_REG_X25,
                "x26":    UC_ARM64_REG_X26,
                "x27":    UC_ARM64_REG_X27,
                "x28":    UC_ARM64_REG_X28,
                "pc":     UC_ARM64_REG_PC,
                "sp":     UC_ARM64_REG_SP,
                "fp":     UC_ARM64_REG_FP,
                "lr":     UC_ARM64_REG_LR,
                "nzcv":   UC_ARM64_REG_NZCV,
                "cpsr": UC_ARM_REG_CPSR, 
            },
            "mips" : {
                "0" :     UC_MIPS_REG_ZERO,
                "at":     UC_MIPS_REG_AT,
                "v0":     UC_MIPS_REG_V0,
                "v1":     UC_MIPS_REG_V1,
                "a0":     UC_MIPS_REG_A0,
                "a1":     UC_MIPS_REG_A1,
                "a2":     UC_MIPS_REG_A2,
                "a3":     UC_MIPS_REG_A3,
                "t0":     UC_MIPS_REG_T0,
                "t1":     UC_MIPS_REG_T1,
                "t2":     UC_MIPS_REG_T2,
                "t3":     UC_MIPS_REG_T3,
                "t4":     UC_MIPS_REG_T4,
                "t5":     UC_MIPS_REG_T5,
                "t6":     UC_MIPS_REG_T6,
                "t7":     UC_MIPS_REG_T7,
                "t8":     UC_MIPS_REG_T8,
                "t9":     UC_MIPS_REG_T9,
                "s0":     UC_MIPS_REG_S0,
                "s1":     UC_MIPS_REG_S1,
                "s2":     UC_MIPS_REG_S2,    
                "s3":     UC_MIPS_REG_S3,
                "s4":     UC_MIPS_REG_S4,
                "s5":     UC_MIPS_REG_S5,
                "s6":     UC_MIPS_REG_S6,              
                "s7":     UC_MIPS_REG_S7,
                "s8":     UC_MIPS_REG_S8,  
                "k0":     UC_MIPS_REG_K0,
                "k1":     UC_MIPS_REG_K1,
                "gp":     UC_MIPS_REG_GP,
                "pc":     UC_MIPS_REG_PC,
                "sp":     UC_MIPS_REG_SP,
                "fp":     UC_MIPS_REG_FP,
                "ra":     UC_MIPS_REG_RA,
                "hi":     UC_MIPS_REG_HI,
                "lo":     UC_MIPS_REG_LO
            }
        }
        return registers[arch]   
    #---------------------------
    # Callbacks for tracing 
    # TODO: Make integer-printing fixed widths dependent on bitness of architecture 
    #       (i.e. only show 4 bytes for 32-bit, 8 bytes for 64-bit)
    # TODO: Figure out how best to determine the capstone mode and architecture here
    """
    try:
        # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
        from capstone import *
        cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
        def __trace_instruction(self, uc, address, size, user_data):
            mem = uc.mem_read(address, size)
            for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
                print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
    except ImportError:
        def __trace_instruction(self, uc, address, size, user_data):
            print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
    """
    def __trace_instruction(self, uc, address, size, user_data):
        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))  
    def __trace_block(self, uc, address, size, user_data):
        print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
    def __trace_mem_access(self, uc, access, address, size, value, user_data):
        if access == UC_MEM_WRITE:
            print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
        else:
            print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
    def __trace_mem_invalid_access(self, uc, access, address, size, value, user_data):
        if access == UC_MEM_WRITE_UNMAPPED:
            print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
        else:
            print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
--- a/unicorn_mode/patches/afl-unicorn-cpu-inl.h
+++ b/unicorn_mode/patches/afl-unicorn-cpu-inl.h
@ -0,0 +1,290 @@
 /*
   american fuzzy lop - high-performance binary-only instrumentation
   -----------------------------------------------------------------
   Written by Andrew Griffiths <agriffiths@google.com> and
              Michal Zalewski <lcamtuf@google.com>
   TCG instrumentation and block chaining support by Andrea Biondo
                                      <andrea.biondo965@gmail.com>
   Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co>
   Idea & design very much by Andrew Griffiths.
   Copyright 2015, 2016 Google Inc. All rights reserved.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at:
     http://www.apache.org/licenses/LICENSE-2.0
   This code is a shim patched into the separately-distributed source
   code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality
   to implement AFL-style instrumentation and to take care of the remaining
   parts of the AFL fork server logic.
   The resulting QEMU binary is essentially a standalone instrumentation
   tool; for an example of how to leverage it for other purposes, you can
   have a look at afl-showmap.c.
 */
 #include <sys/shm.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include "../../config.h"
 /***************************
 * VARIOUS AUXILIARY STUFF *
 ***************************/
 /* A snippet patched into tb_find_slow to inform the parent process that
   we have hit a new block that hasn't been translated yet, and to tell
   it to translate within its own context, too (this avoids translation
   overhead in the next forked-off copy). */
 #define AFL_UNICORN_CPU_SNIPPET1 do { \
    afl_request_tsl(pc, cs_base, flags); \
  } while (0)
 /* This snippet kicks in when the instruction pointer is positioned at
   _start and does the usual forkserver stuff, not very different from
   regular instrumentation injected via afl-as.h. */
 #define AFL_UNICORN_CPU_SNIPPET2 do { \
    if(unlikely(afl_first_instr == 0)) { \
      afl_setup(); \
      afl_forkserver(env); \
      afl_first_instr = 1; \
    } \
    afl_maybe_log(tb->pc); \
  } while (0)
 /* We use one additional file descriptor to relay "needs translation"
   messages between the child and the fork server. */
 #define TSL_FD (FORKSRV_FD - 1)
 /* This is equivalent to afl-as.h: */
 static unsigned char *afl_area_ptr;
 /* Set in the child process in forkserver mode: */
 static unsigned char afl_fork_child;
 static unsigned int afl_forksrv_pid;
 /* Instrumentation ratio: */
 static unsigned int afl_inst_rms = MAP_SIZE;
 /* Function declarations. */
 static void afl_setup(void);
 static void afl_forkserver(CPUArchState*);
 static inline void afl_maybe_log(unsigned long);
 static void afl_wait_tsl(CPUArchState*, int);
 static void afl_request_tsl(target_ulong, target_ulong, uint64_t);
 static TranslationBlock *tb_find_slow(CPUArchState*, target_ulong,
                                      target_ulong, uint64_t);
 /* Data structure passed around by the translate handlers: */
 struct afl_tsl {
  target_ulong pc;
  target_ulong cs_base;
  uint64_t flags;
 };
 /*************************
 * ACTUAL IMPLEMENTATION *
 *************************/
 /* Set up SHM region and initialize other stuff. */
 static void afl_setup(void) {
  char *id_str = getenv(SHM_ENV_VAR),
       *inst_r = getenv("AFL_INST_RATIO");
  int shm_id;
  if (inst_r) {
    unsigned int r;
    r = atoi(inst_r);
    if (r > 100) r = 100;
    if (!r) r = 1;
    afl_inst_rms = MAP_SIZE * r / 100;
  }
  if (id_str) {
    shm_id = atoi(id_str);
    afl_area_ptr = shmat(shm_id, NULL, 0);
    if (afl_area_ptr == (void*)-1) exit(1);
    /* With AFL_INST_RATIO set to a low value, we want to touch the bitmap
       so that the parent doesn't give up on us. */
    if (inst_r) afl_area_ptr[0] = 1;
  }
 }
 /* Fork server logic, invoked once we hit first emulated instruction. */
 static void afl_forkserver(CPUArchState *env) {
  static unsigned char tmp[4];
  if (!afl_area_ptr) return;
  /* Tell the parent that we're alive. If the parent doesn't want
     to talk, assume that we're not running in forkserver mode. */
  if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
  afl_forksrv_pid = getpid();
  /* All right, let's await orders... */
  while (1) {
    pid_t child_pid;
    int status, t_fd[2];
    /* Whoops, parent dead? */
    if (read(FORKSRV_FD, tmp, 4) != 4) exit(2);
    /* Establish a channel with child to grab translation commands. We'll 
       read from t_fd[0], child will write to TSL_FD. */
    if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3);
    close(t_fd[1]);
    child_pid = fork();
    if (child_pid < 0) exit(4);
    if (!child_pid) {
      /* Child process. Close descriptors and run free. */
      afl_fork_child = 1;
      close(FORKSRV_FD);
      close(FORKSRV_FD + 1);
      close(t_fd[0]);
      return;
    }
    /* Parent. */
    close(TSL_FD);
    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) exit(5);
    /* Collect translation requests until child dies and closes the pipe. */
    afl_wait_tsl(env, t_fd[0]);
    /* Get and relay exit status to parent. */
    if (waitpid(child_pid, &status, 0) < 0) exit(6);
    if (write(FORKSRV_FD + 1, &status, 4) != 4) exit(7);
  }
 }
 /* The equivalent of the tuple logging routine from afl-as.h. */
 static inline void afl_maybe_log(unsigned long cur_loc) {
  static __thread unsigned long prev_loc;
  // DEBUG
  //printf("IN AFL_MAYBE_LOG 0x%lx\n", cur_loc);
  // MODIFIED FOR UNICORN MODE -> We want to log all addresses,
  // so the checks for 'start < addr < end' are removed
  if(!afl_area_ptr)
    return;
  // DEBUG
  //printf("afl_area_ptr = %p\n", afl_area_ptr);
  /* Looks like QEMU always maps to fixed locations, so ASAN is not a
     concern. Phew. But instruction addresses may be aligned. Let's mangle
     the value to get something quasi-uniform. */
  cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
  cur_loc &= MAP_SIZE - 1;
  /* Implement probabilistic instrumentation by looking at scrambled block
     address. This keeps the instrumented locations stable across runs. */
  // DEBUG
  //printf("afl_inst_rms = 0x%lx\n", afl_inst_rms);
  if (cur_loc >= afl_inst_rms) return;
  // DEBUG
  //printf("cur_loc = 0x%lx\n", cur_loc);  
  afl_area_ptr[cur_loc ^ prev_loc]++;
  prev_loc = cur_loc >> 1;
 }
 /* This code is invoked whenever QEMU decides that it doesn't have a
   translation of a particular block and needs to compute it. When this happens,
   we tell the parent to mirror the operation, so that the next fork() has a
   cached copy. */
 static void afl_request_tsl(target_ulong pc, target_ulong cb, uint64_t flags) {
  struct afl_tsl t;
  if (!afl_fork_child) return;
  t.pc      = pc;
  t.cs_base = cb;
  t.flags   = flags;
  if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
    return;
 }
 /* This is the other side of the same channel. Since timeouts are handled by
   afl-fuzz simply killing the child, we can just wait until the pipe breaks. */
 static void afl_wait_tsl(CPUArchState *env, int fd) {
  struct afl_tsl t;
  while (1) {
    /* Broken pipe means it's time to return to the fork server routine. */
    if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
      break;
    tb_find_slow(env, t.pc, t.cs_base, t.flags);
  }
  close(fd);
 }
--- a/unicorn_mode/patches/patches.diff
+++ b/unicorn_mode/patches/patches.diff
@ -0,0 +1,107 @@
 diff --git a/Makefile b/Makefile
 index 7d73782..fb3ccfd 100644
 --- a/Makefile
 +++ b/Makefile
@@ -88,6 +88,10 @@ AR = llvm-ar
 LDFLAGS := -fsanitize=address ${LDFLAGS}
 endif
 +ifeq ($(UNICORN_AFL),yes)
 +UNICORN_CFLAGS += -DUNICORN_AFL
 +endif
 +
 ifeq ($(CROSS),)
 CC ?= cc
 AR ?= ar
 diff --git a/config.mk b/config.mk
 index c3621fb..c7b4f7e 100644
 --- a/config.mk
 +++ b/config.mk
@@ -8,7 +8,7 @@
 # Compile with debug info when you want to debug code.
 # Change this to 'no' for release edition.
 -UNICORN_DEBUG ?= yes
 +UNICORN_DEBUG ?= no
 ################################################################################
 # Specify which archs you want to compile in. By default, we build all archs.
@@ -28,3 +28,9 @@ UNICORN_STATIC ?= yes
 # a shared library.
 UNICORN_SHARED ?= yes
 +
 +
 +################################################################################
 +# Changing 'UNICORN_AFLL = yes' to 'UNICORN_AFL = no' disables AFL instrumentation
 +
 +UNICORN_AFL ?= yes
 diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c
 index 7755adf..8114b70 100644
 --- a/qemu/cpu-exec.c
 +++ b/qemu/cpu-exec.c
@@ -24,6 +24,11 @@
 #include "uc_priv.h"
 +#if defined(UNICORN_AFL)
 +#include "../afl-unicorn-cpu-inl.h"
 +static int afl_first_instr = 0;
 +#endif 
 +
 static tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr);
 static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
         target_ulong cs_base, uint64_t flags);
@@ -231,6 +236,10 @@ int cpu_exec(struct uc_struct *uc, CPUArchState *env)   // qq
                             next_tb & TB_EXIT_MASK, tb);
                 }
 +#if defined(UNICORN_AFL)
 +                AFL_UNICORN_CPU_SNIPPET2;
 +#endif
 +
                 /* cpu_interrupt might be called while translating the
                    TB, but before it is linked into a potentially
                    infinite loop and becomes env->current_tb. Avoid
@@ -369,6 +378,11 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
 not_found:
     /* if no translated code available, then translate it now */
     tb = tb_gen_code(cpu, pc, cs_base, (int)flags, 0);   // qq
 +    
 +#if defined(UNICORN_AFL)
 +    /* There seems to be no chaining in unicorn ever? :( */
 +    AFL_UNICORN_CPU_SNIPPET1;
 +#endif
 found:
     /* Move the last found TB to the head of the list */
 diff --git a/qemu/translate-all.c b/qemu/translate-all.c
 index 1a96c34..7ef4878 100644
 --- a/qemu/translate-all.c
 +++ b/qemu/translate-all.c
@@ -403,11 +403,25 @@ static PageDesc *page_find_alloc(struct uc_struct *uc, tb_page_addr_t index, int
 #if defined(CONFIG_USER_ONLY)
     /* We can't use g_malloc because it may recurse into a locked mutex. */
 +#if defined(UNICORN_AFL)
 +    /* This was added by unicorn-afl to bail out semi-gracefully if out of memory. */
 +# define ALLOC(P, SIZE)                                 \
 +    do {                                                \
 +        void* _tmp = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
 +                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
 +        if (_tmp == (void*)-1) { \
 +            qemu_log(">>> Out of memory for stack, bailing out. <<<\n"); \
 +            exit(1); \
 +        } \
 +        (P) = _tmp; \
 +    } while (0)
 +#else /* !UNICORN_AFL */
 # define ALLOC(P, SIZE)                                 \
     do {                                                \
         P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
     } while (0)
 +#endif /* UNICORN_AFL */
 #else
 # define ALLOC(P, SIZE) \
     do { P = g_malloc0(SIZE); } while (0)
--- a/unicorn_mode/samples/simple/COMPILE.md
+++ b/unicorn_mode/samples/simple/COMPILE.md
@ -0,0 +1,41 @@
 Compiling simple_target.c
 ==========================
 You shouldn't need to compile simple_target.c since a MIPS binary version is
 pre-built and shipped with afl-unicorn. This file documents how the binary
 was built in case you want to rebuild it or recompile it for any reason.
 The pre-built binary (simple_target.bin) was built by cross-compiling 
 simple_target.c for MIPS using the mips-linux-gnu-gcc package on an Ubuntu
 16.04 LTS system. This cross compiler (and associated binutils) was installed
 from apt-get packages:
 ```
 sudo apt-get install gcc-mips-linux-gnu
 ```
 simple_target.c was compiled without optimization, position-independent,
 and without standard libraries using the following command line:
 ```
 mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
 ```
 The .text section from the resulting ELF binary was then extracted to create
 the raw binary blob that is loaded and emulated by simple_test_harness.py:
 ```
 mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin 
 ```
 In summary, to recreate simple_taget.bin execute the following:
 ```
 mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
  && mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin 
    && rm simple_target.elf
 ```
 Note that the output of this is padded with nulls for 16-byte alignment. This is 
 important when emulating it, as NOPs will be added after the return of main()
 as necessary.
--- a/unicorn_mode/samples/simple/in/a
+++ b/unicorn_mode/samples/simple/in/a
@ -0,0 +1 @@
 a
--- a/unicorn_mode/samples/simple/out/.cur_input
+++ b/unicorn_mode/samples/simple/out/.cur_input
@ -0,0 +1 @@
 a
--- a/unicorn_mode/samples/simple/out/cmdline
+++ b/unicorn_mode/samples/simple/out/cmdline
@ -0,0 +1,3 @@
 python
 ../samples/simple/simple_test_harness.py
 ../samples/simple/sample_inputs
--- a/unicorn_mode/samples/simple/out/fuzz_bitmap
+++ b/unicorn_mode/samples/simple/out/fuzz_bitmap
--- a/unicorn_mode/samples/simple/out/fuzzer_stats
+++ b/unicorn_mode/samples/simple/out/fuzzer_stats
@ -0,0 +1,28 @@
 start_time        : 1563137991
 last_update       : 1563155870
 fuzzer_pid        : 16972
 cycles_done       : 86
 execs_done        : 4807
 execs_per_sec     : 0.00
 paths_total       : 1
 paths_favored     : 1
 paths_found       : 0
 paths_imported    : 0
 max_depth         : 1
 cur_path          : 0
 pending_favs      : 0
 pending_total     : 0
 variable_paths    : 0
 stability         : 100.00%
 bitmap_cvg        : 0.00%
 unique_crashes    : 0
 unique_hangs      : 0
 last_path         : 0
 last_crash        : 0
 last_hang         : 0
 execs_since_crash : 4807
 exec_timeout      : 9999999
 afl_banner        : python
 afl_version       : 2.52b
 target_mode       : unicorn
 command_line      : /mnt/c/Users/DMaier/tmp/afl-unicorn/afl-fuzz -U -m none -t 9999999 -i ../samples/simple/in -o ../samples/simple/out -- python ../samples/simple/simple_test_harness.py ../samples/simple/sample_inputs
--- a/unicorn_mode/samples/simple/out/plot_data
+++ b/unicorn_mode/samples/simple/out/plot_data
@ -0,0 +1,7 @@
 # unix_time, cycles_done, cur_path, paths_total, pending_total, pending_favs, map_size, unique_crashes, unique_hangs, max_depth, execs_per_sec
 1563154187, 0, 0, 1, 1, 1, 0.00%, 0, 0, 1, inf
 1563154197, 2, 0, 1, 0, 0, 0.00%, 0, 0, 1, 36.23
 1563154202, 9, 0, 1, 0, 0, 0.00%, 0, 0, 1, 32.83
 1563154207, 15, 0, 1, 0, 0, 0.00%, 0, 0, 1, 31.33
 1563154212, 22, 0, 1, 0, 0, 0.00%, 0, 0, 1, 31.05
 1563154217, 29, 0, 1, 0, 0, 0.00%, 0, 0, 1, 33.90
--- a/unicorn_mode/samples/simple/out/queue/.state/deterministic_done/id:000000,orig:a
+++ b/unicorn_mode/samples/simple/out/queue/.state/deterministic_done/id:000000,orig:a
--- a/unicorn_mode/samples/simple/out/queue/id:000000,orig:a
+++ b/unicorn_mode/samples/simple/out/queue/id:000000,orig:a
@ -0,0 +1 @@
 a
--- a/unicorn_mode/samples/simple/sample_inputs/sample1.bin
+++ b/unicorn_mode/samples/simple/sample_inputs/sample1.bin
@ -0,0 +1 @@
 abcd
--- a/unicorn_mode/samples/simple/sample_inputs/sample2.bin
+++ b/unicorn_mode/samples/simple/sample_inputs/sample2.bin
--- a/unicorn_mode/samples/simple/sample_inputs/sample3.bin
+++ b/unicorn_mode/samples/simple/sample_inputs/sample3.bin
@ -0,0 +1 @@
--- a/unicorn_mode/samples/simple/sample_inputs/sample4.bin
+++ b/unicorn_mode/samples/simple/sample_inputs/sample4.bin
@ -0,0 +1 @@
--- a/unicorn_mode/samples/simple/sample_inputs/sample5.bin
+++ b/unicorn_mode/samples/simple/sample_inputs/sample5.bin
@ -0,0 +1 @@
--- a/unicorn_mode/samples/simple/simple_target.bin
+++ b/unicorn_mode/samples/simple/simple_target.bin
--- a/unicorn_mode/samples/simple/simple_target.c
+++ b/unicorn_mode/samples/simple/simple_target.c
@ -0,0 +1,37 @@
 /*
 * Sample target file to test afl-unicorn fuzzing capabilities.
 * This is a very trivial example that will crash pretty easily
 * in several different exciting ways. 
 *
 * Input is assumed to come from a buffer located at DATA_ADDRESS 
 * (0x00300000), so make sure that your Unicorn emulation of this 
 * puts user data there.
 *
 * Written by Nathan Voss <njvoss99@gmail.com>
 */
 // Magic address where mutated data will be placed
 #define DATA_ADDRESS 	0x00300000	
 int main(void)
 {
 	unsigned char* data_buf = (unsigned char*)DATA_ADDRESS;	
 	if(data_buf[20] != 0)
 	{
 		// Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04'
 		unsigned char invalid_read = *(unsigned char*)0x00000000;					
 	}
 	else if(data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2])
 	{
 		// Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
 		unsigned char invalid_read = *(unsigned char*)0x00000000;
 	}
 	else if(data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00)
 	{
 		// Cause a crash if data[10] is not zero, but [9] and [11] are zero
 		unsigned char invalid_read = *(unsigned char*)0x00000000;
 	}
 	return 0;
 }
--- a/unicorn_mode/samples/simple/simple_test_harness.py
+++ b/unicorn_mode/samples/simple/simple_test_harness.py
@ -0,0 +1,170 @@
 """ 
   Simple test harness for AFL's Unicorn Mode.
   This loads the simple_target.bin binary (precompiled as MIPS code) into
   Unicorn's memory map for emulation, places the specified input into
   simple_target's buffer (hardcoded to be at 0x300000), and executes 'main()'.
   If any crashes occur during emulation, this script throws a matching signal
   to tell AFL that a crash occurred.
   Run under AFL as follows:
   $ cd <afl_path>/unicorn_mode/samples/simple/
   $ ../../../afl-fuzz -U -m none -i ./sample_inputs -o ./output -- python simple_test_harness.py @@ 
 """
 import argparse
 import os
 import signal
 from unicorn import *
 from unicorn.mips_const import *
 # Path to the file containing the binary to emulate
 BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
 # Memory map for the code to be tested
 CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
 CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
 STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
 STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
 DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
 DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
 try:
    # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
    from capstone import *
    cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
    def unicorn_debug_instruction(uc, address, size, user_data):
        mem = uc.mem_read(address, size)
        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
            print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
 except ImportError:
    def unicorn_debug_instruction(uc, address, size, user_data):
        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
 def unicorn_debug_block(uc, address, size, user_data):
    print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
 def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
    if access == UC_MEM_WRITE:
        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
    else:
        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
 def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
    if access == UC_MEM_WRITE_UNMAPPED:
        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
    else:
        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
 def force_crash(uc_error):
    # This function should be called to indicate to AFL that a crash occurred during emulation.
    # Pass in the exception received from Uc.emu_start()
    mem_errors = [
        UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
        UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
        UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
    ]
    if uc_error.errno in mem_errors:
        # Memory error - throw SIGSEGV
        os.kill(os.getpid(), signal.SIGSEGV)
    elif uc_error.errno == UC_ERR_INSN_INVALID:
        # Invalid instruction - throw SIGILL
        os.kill(os.getpid(), signal.SIGILL)
    else:
        # Not sure what happened - throw SIGABRT
        os.kill(os.getpid(), signal.SIGABRT)
 def main():
    parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
    parser.add_argument('-d', '--debug', default=False, action="store_true", help="Enables debug tracing")
    args = parser.parse_args()
    # Instantiate a MIPS32 big endian Unicorn Engine instance
    uc = Uc(UC_ARCH_MIPS, UC_MODE_MIPS32 + UC_MODE_BIG_ENDIAN)
    if args.debug:
        uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
        uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
        uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
    #---------------------------------------------------
    # Load the binary to emulate and map it into memory
    print("Loading data input from {}".format(args.input_file))
    binary_file = open(BINARY_FILE, 'rb')
    binary_code = binary_file.read()
    binary_file.close()
    # Apply constraints to the mutated input
    if len(binary_code) > CODE_SIZE_MAX:
        print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX))
        return
    # Write the mutated command into the data buffer
    uc.mem_map(CODE_ADDRESS, CODE_SIZE_MAX)
    uc.mem_write(CODE_ADDRESS, binary_code)
    # Set the program counter to the start of the code
    start_address = CODE_ADDRESS          # Address of entry point of main()
    end_address   = CODE_ADDRESS + 0xf4   # Address of last instruction in main()
    uc.reg_write(UC_MIPS_REG_PC, start_address)
    #-----------------
    # Setup the stack
    uc.mem_map(STACK_ADDRESS, STACK_SIZE)
    uc.reg_write(UC_MIPS_REG_SP, STACK_ADDRESS + STACK_SIZE)
    #-----------------------------------------------------
    # Emulate 1 instruction to kick off AFL's fork server
    #   THIS MUST BE DONE BEFORE LOADING USER DATA! 
    #   If this isn't done every single run, the AFL fork server 
    #   will not be started appropriately and you'll get erratic results!
    #   It doesn't matter what this returns with, it just has to execute at
    #   least one instruction in order to get the fork server started.
    # Execute 1 instruction just to startup the forkserver
    print("Starting the AFL forkserver by executing 1 instruction")
    try:
        uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), 0, 0, count=1)
    except UcError as e:
        print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
        return
    #-----------------------------------------------
    # Load the mutated input and map it into memory
    # Load the mutated input from disk
    print("Loading data input from {}".format(args.input_file))
    input_file = open(args.input_file, 'rb')
    input = input_file.read()
    input_file.close()
    # Apply constraints to the mutated input
    if len(input) > DATA_SIZE_MAX:
        print("Test input is too long (> {} bytes)".format(DATA_SIZE_MAX))
        return
    # Write the mutated command into the data buffer
    uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
    uc.mem_write(DATA_ADDRESS, input)
    #------------------------------------------------------------
    # Emulate the code, allowing it to process the mutated input
    print("Executing until a crash or execution reaches 0x{0:016x}".format(end_address))
    try:
        result = uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), end_address, timeout=0, count=0)
    except UcError as e:
        print("Execution failed with error: {}".format(e))
        force_crash(e)
    print("Done.")
 if __name__ == "__main__":
    main()