From a0e6b98ce8c67270b4a6d31121896fea47b6c2a7 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Wed, 15 Jan 2020 02:38:45 +0100 Subject: [PATCH 01/43] persistent mode harness --- src/afl-fuzz-init.c | 6 ++-- unicorn_mode/samples/c/harness.c | 50 +++++++++++++++++++------------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 5fe3689e..48b0d8ac 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -1940,17 +1940,17 @@ void check_binary(u8* fname) { } - if ((qemu_mode || unicorn_mode) && + if ((qemu_mode) && memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) { SAYF("\n" cLRD "[-] " cRST "This program appears to be instrumented with afl-gcc, but is being " "run in\n" - " QEMU or Unicorn mode (-Q or -U). This is probably not what you " + " QEMU mode (-Q). This is probably not what you " "want -\n" " this setup will be slow and offer no practical benefits.\n"); - FATAL("Instrumentation found in -Q or -U mode"); + FATAL("Instrumentation found in -Q mode"); } diff --git a/unicorn_mode/samples/c/harness.c b/unicorn_mode/samples/c/harness.c index cc81ba7f..4239b222 100644 --- a/unicorn_mode/samples/c/harness.c +++ b/unicorn_mode/samples/c/harness.c @@ -33,21 +33,24 @@ // Memory map for the code to be tested // Arbitrary address where code to test will be loaded -#define BASE_ADDRESS (0x100000) -#define CODE_ADDRESS (0x101119) -#define END_ADDRESS (0x1011d7) +static const int64_t BASE_ADDRESS = 0x100000; +static const int64_t CODE_ADDRESS = 0x101119; +static const int64_t END_ADDRESS = 0x1011d7; // Address of the stack (Some random address again) -#define STACK_ADDRESS (((int64_t) 0x01) << 58) +static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); // Size of the stack (arbitrarily chosen, just make it big enough) -#define STACK_SIZE (0x10000) +static const int64_t STACK_SIZE = 0x10000; // Location where the input will be placed (make sure the emulated program knows this somehow, too ;) ) -#define INPUT_LOCATION (0x10000) +static const int64_t INPUT_LOCATION = 0x10000; // Inside the location, we have an ofset in our special case -#define INPUT_OFFSET (0x16) +static const int64_t INPUT_OFFSET = 0x16; // Maximum allowable size of mutated data from AFL -#define INPUT_SIZE_MAX (0x10000) +static const int64_t INPUT_SIZE_MAX = 0x10000; // Alignment for unicorn mappings (seems to be needed) -#define ALIGNMENT ((uint64_t) 0x1000) +static const int64_t ALIGNMENT = 0x1000; + +// In our special case, we emulate main(), so argc is needed. +static const uint64_t EMULATED_ARGC = 2; static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); @@ -100,10 +103,22 @@ static bool place_input_callback( void *data ){ // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS); - if (input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { - // Test input too long, ignore this testcase + if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { + // Test input too short or too long, ignore this testcase return false; } + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; + + // For persistent mode, we have to set up stack and memory each time. + uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back + // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) + uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv + uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 + + // Make sure the input is 0 terminated. + //input[input_len-1] = '\0'; + // Write the testcase to unicorn. uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); return true; } @@ -188,12 +203,7 @@ int main(int argc, char **argv, char **envp) { uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above - // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) - uint64_t input_location = INPUT_LOCATION; - uc_reg_write(uc, UC_X86_REG_RSI, &input_location); // argv - uint64_t emulated_argc = 2; - uc_reg_write(uc, UC_X86_REG_RDI, &emulated_argc); // argc == 2 - + // If we want tracing output, set the callbacks here if (tracing) { // tracing all basic blocks with customized callback @@ -212,9 +222,9 @@ int main(int argc, char **argv, char **envp) { &end_address, // Where to exit (this is an array) 1, // Count of end addresses NULL, // Optional calback to run after each exec - false, - 1, // For persistent mode: How many rounds to run - NULL + false, // true, if the optional callback should be run also for non-crashes + 100, // For persistent mode: How many rounds to run + NULL // additional data pointer ); switch(afl_ret) { case UC_AFL_RET_ERROR: From 1ac31361ca61f71b6a419064de5063aef80203e5 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Fri, 17 Jan 2020 20:41:30 +0100 Subject: [PATCH 02/43] as suggested, added a comment, why NetBSD needs a higher memory limit --- include/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/config.h b/include/config.h index 69380282..4eac82e0 100644 --- a/include/config.h +++ b/include/config.h @@ -67,7 +67,7 @@ # else # define MEM_LIMIT 50 # endif /* ^!WORD_SIZE_64 */ -#else +#else /* NetBSD's kernel needs more space for stack, see discussion for issue #165 */ # define MEM_LIMIT 200 #endif /* Default memory limit when running in QEMU mode (MB): */ From 858b5da24e3b060e2ebf6ab48ded22fbdd7d3ceb Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 18 Jan 2020 14:28:31 +0000 Subject: [PATCH 03/43] libdislocator: reallocarray API introduction --- libdislocator/libdislocator.so.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/libdislocator/libdislocator.so.c b/libdislocator/libdislocator.so.c index 20649470..b9ba8967 100644 --- a/libdislocator/libdislocator.so.c +++ b/libdislocator/libdislocator.so.c @@ -397,6 +397,28 @@ void* aligned_alloc(size_t align, size_t len) { } +/* specific BSD api mainly checking possible overflow for the size */ + +void* reallocarray(void* ptr, size_t elem_len, size_t elem_cnt) { + + const size_t elem_lim = 1UL << (sizeof(size_t) * 4); + const size_t elem_tot = elem_len * elem_cnt; + void* ret = NULL; + + if ((elem_len >= elem_lim || elem_cnt >= elem_lim) && elem_len > 0 && + elem_cnt > (SIZE_MAX / elem_len)) { + + DEBUGF("reallocarray size overflow (%zu)", elem_tot); + + } else { + + ret = realloc(ptr, elem_tot); + + } + + return ret; +} + __attribute__((constructor)) void __dislocator_init(void) { u8* tmp = (u8*)getenv("AFL_LD_LIMIT_MB"); From 6b0950b03d8a9fd0c21b4be71fd4a4bd6ab68547 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:13:57 +0100 Subject: [PATCH 04/43] fix some syntax errors regarding $(filter ...) --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 703ed673..df5ad048 100644 --- a/Makefile +++ b/Makefile @@ -55,17 +55,17 @@ CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \ AFL_FUZZ_FILES = $(wildcard src/afl-fuzz*.c) -ifneq "($filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null)" "" +ifneq "$(filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python3.7m-config --includes) PYTHON_LIB ?= $(shell python3.7m-config --ldflags) PYTHON_VERSION = 3.7m else - ifneq "($filter %3.7, $(shell python3.7-config --includes) 2> /dev/null" "" + ifneq "$(filter %3.7, $(shell python3.7-config --includes) 2> /dev/null)" "" PYTHON_INCLUDE ?= $(shell python3.7-config --includes) PYTHON_LIB ?= $(shell python3.7-config --ldflags) PYTHON_VERSION = 3.7 else - ifneq "($filter %2.7, $(shell python2.7-config --includes) 2> /dev/null" "" + ifneq "$(filter %2.7, $(shell python2.7-config --includes) 2> /dev/null)" "" PYTHON_INCLUDE ?= $(shell python2.7-config --includes) PYTHON_LIB ?= $(shell python2.7-config --ldflags) PYTHON_VERSION = 2.7 @@ -77,14 +77,14 @@ PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7m && echo /usr/include/p PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7 && echo /usr/include/python3.7) PYTHON_INCLUDE ?= $(shell test -e /usr/include/python2.7 && echo /usr/include/python2.7) -ifneq "($filter %3.7m, $(PYTHON_INCLUDE))" "" +ifneq "$(filter %3.7m, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 3.7m PYTHON_LIB ?= -lpython3.7m else - ifneq "($filter %3.7, $(PYTHON_INCLUDE))" "" + ifneq "$(filter %3.7, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 3.7 else - ifneq "($filter %2.7, $(PYTHON_INCLUDE))" "" + ifneq "$(filter %2.7, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 2.7 PYTHON_LIB ?= -lpython2.7 else From 00b1d16ac61e9f86cd0c1defec6299e0a5e3fdde Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:28:13 +0100 Subject: [PATCH 05/43] more fixes for python checks --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index df5ad048..dbb37feb 100644 --- a/Makefile +++ b/Makefile @@ -60,12 +60,12 @@ ifneq "$(filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null))" "" PYTHON_LIB ?= $(shell python3.7m-config --ldflags) PYTHON_VERSION = 3.7m else - ifneq "$(filter %3.7, $(shell python3.7-config --includes) 2> /dev/null)" "" + ifneq "$(filter %3.7, $(shell python3.7-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python3.7-config --includes) PYTHON_LIB ?= $(shell python3.7-config --ldflags) PYTHON_VERSION = 3.7 else - ifneq "$(filter %2.7, $(shell python2.7-config --includes) 2> /dev/null)" "" + ifneq "$(filter %2.7, $(shell python2.7-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python2.7-config --includes) PYTHON_LIB ?= $(shell python2.7-config --ldflags) PYTHON_VERSION = 2.7 From db5d5017155a24cb04bef97a0cf97d45456e7901 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:46:14 +0100 Subject: [PATCH 06/43] set AFL_CC for libradamsa test (needed on FreeBSD) --- test/test.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 8f40773c..5bab0d7a 100755 --- a/test/test.sh +++ b/test/test.sh @@ -457,7 +457,13 @@ test -e ../libdislocator.so && { } rm -f test-compcov test -e ../libradamsa.so && { - test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 + # on FreeBSD need to set AFL_CC + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain && { From 08691fcc974a9fcf2df3e926959b21199df7e946 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:58:20 +0100 Subject: [PATCH 07/43] add forgotten stderr redirect --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 5bab0d7a..c770c1b7 100755 --- a/test/test.sh +++ b/test/test.sh @@ -463,7 +463,7 @@ test -e ../libradamsa.so && { else export AFL_CC=`$LLVM_CONFIG --bindir`/clang fi - test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c + test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain && { From 0eec6221554c260b2d93de73e88c2279c4479753 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:35:21 +0100 Subject: [PATCH 08/43] Intel test taken from lto branch, extended (as in test.sh), and tested on RaspberryPi --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile b/Makefile index dbb37feb..7260ee47 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,14 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .te CFLAGS_OPT = -march=native endif +ifneq "$(shell uname -m)" "x86_64" + ifneq "$(shell uname -m)" "i386" + ifneq "$(shell uname -m)" "amd64" + AFL_NO_X86=1 + endif + endif +endif + CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT) CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \ -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ From e7770a70023381bc7ff96b1d346b0ff9741f62de Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sun, 19 Jan 2020 12:25:32 +0100 Subject: [PATCH 09/43] make exporting AFL_CC FreeBSD specific, since it seems to harm the libradamsa test on travis/arm64 --- test/test.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/test.sh b/test/test.sh index c770c1b7..43b278b4 100755 --- a/test/test.sh +++ b/test/test.sh @@ -458,11 +458,14 @@ test -e ../libdislocator.so && { rm -f test-compcov test -e ../libradamsa.so && { # on FreeBSD need to set AFL_CC - if which clang >/dev/null; then - export AFL_CC=`which clang` - else - export AFL_CC=`$LLVM_CONFIG --bindir`/clang - fi + + test `uname -s` = 'FreeBSD' && { + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + } test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1 From f706e210ec07d8797850781ed82d2279df9a88b9 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sun, 19 Jan 2020 21:20:51 +0100 Subject: [PATCH 10/43] add missing test cases for qemu_mode unsigaction library --- test/test.sh | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 43b278b4..97cc1511 100755 --- a/test/test.sh +++ b/test/test.sh @@ -569,8 +569,64 @@ test -e ../afl-qemu-trace && { CODE=1 exit 1 } - $ECHO "$YELLOW[-] we need a test case for qemu_mode unsigaction library" rm -rf in out errors + test -e ../qemu_mode/unsigaction/unsigaction32.so && { + ${AFL_CC} -o test-unsigaction32 -m32 test-unsigaction.c >> errors 2>&1 && { + ./test-unsigaction32 + RETVAL_NORMAL32=$? + LD_PRELOAD=../qemu_mode/unsigaction/unsigaction32.so ./test-unsigaction32 + RETVAL_LIBUNSIGACTION32=$? + test $RETVAL_NORMAL32 = "2" -a $RETVAL_LIBUNSIGACTION32 = "0" && { + $ECHO "$GREEN[+] qemu_mode unsigaction library (32 bit) ignores signals" + } || { + test $RETVAL_NORMAL32 != "2" && { + $ECHO "$RED[!] cannot trigger signal in test program (32 bit)" + } + test $RETVAL_LIBUNSIGACTION32 != "0" && { + $ECHO "$RED[!] signal in test program (32 bit) is not ignored with unsigaction" + } + CODE=1 + } + } || { + echo CUT------------------------------------------------------------------CUT + cat errors + echo CUT------------------------------------------------------------------CUT + $ECHO "$RED[!] cannot compile test program (32 bit) for unsigaction library" + CODE=1 + } + } || { + $ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (32 bit) because it is not present" + INCOMPLETE=1 + } + test -e ../qemu_mode/unsigaction/unsigaction64.so && { + ${AFL_CC} -o test-unsigaction64 -m64 test-unsigaction.c >> errors 2>&1 && { + ./test-unsigaction64 + RETVAL_NORMAL64=$? + LD_PRELOAD=../qemu_mode/unsigaction/unsigaction64.so ./test-unsigaction64 + RETVAL_LIBUNSIGACTION64=$? + test $RETVAL_NORMAL64 = "2" -a $RETVAL_LIBUNSIGACTION64 = "0" && { + $ECHO "$GREEN[+] qemu_mode unsigaction library (64 bit) ignores signals" + } || { + test $RETVAL_NORMAL64 != "2" && { + $ECHO "$RED[!] cannot trigger signal in test program (64 bit)" + } + test $RETVAL_LIBUNSIGACTION64 != "0" && { + $ECHO "$RED[!] signal in test program (64 bit) is not ignored with unsigaction" + } + CODE=1 + } + } || { + echo CUT------------------------------------------------------------------CUT + cat errors + echo CUT------------------------------------------------------------------CUT + $ECHO "$RED[!] cannot compile test program (64 bit) for unsigaction library" + CODE=1 + } + } || { + $ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (64 bit) because it is not present" + INCOMPLETE=1 + } + rm -rf errors test-unsigaction32 test-unsigaction64 } } || { $ECHO "$RED[!] gcc compilation of test targets failed - what is going on??" From 274c8d7d3cff7ad61f2a57c7f69914a3948711d2 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sun, 19 Jan 2020 21:22:41 +0100 Subject: [PATCH 11/43] add missing test program (oops) --- test/test-unsigaction.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 test/test-unsigaction.c diff --git a/test/test-unsigaction.c b/test/test-unsigaction.c new file mode 100644 index 00000000..1a5e4b26 --- /dev/null +++ b/test/test-unsigaction.c @@ -0,0 +1,25 @@ +#include /* sigemptyset(), sigaction(), kill(), SIGUSR1 */ +#include /* exit() */ +#include /* getpid() */ +#include /* errno */ +#include /* fprintf() */ + +static void mysig_handler(int sig) +{ + exit(2); +} + +int main() +{ + /* setup sig handler */ + struct sigaction sa; + sa.sa_handler = mysig_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGCHLD, &sa, NULL)) { + fprintf(stderr, "could not set signal handler %d, aborted\n", errno); + exit(1); + } + kill(getpid(), SIGCHLD); + return 0; +} From 72058fdcbcdc707824bd4211ce528237afc1140e Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 20 Jan 2020 12:56:55 +0100 Subject: [PATCH 12/43] another freebsd fix in test.sh --- test/test.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test.sh b/test/test.sh index 97cc1511..23d98278 100755 --- a/test/test.sh +++ b/test/test.sh @@ -179,11 +179,13 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { $ECHO "$BLUE[*] Testing: llvm_mode" test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { # on FreeBSD need to set AFL_CC - if which clang >/dev/null; then - export AFL_CC=`which clang` - else - export AFL_CC=`$LLVM_CONFIG --bindir`/clang - fi + test `uname -s` = 'FreeBSD' && { + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + } ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 AFL_HARDEN=1 ../afl-clang-fast -o test-compcov.harden test-compcov.c > /dev/null 2>&1 test -e test-instr.plain && { From 0d5a8f69e9785cdaec4c9b62e186050112a7cb8f Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 20 Jan 2020 19:21:44 +0100 Subject: [PATCH 13/43] fixed Heiko's global search-replace :) --- include/afl-as.h | 2 +- include/afl-fuzz.h | 2 +- include/alloc-inl.h | 2 +- include/android-ashmem.h | 2 +- include/common.h | 2 +- include/config.h | 17 +++++++++-------- include/debug.h | 2 +- include/forkserver.h | 2 +- include/sharedmem.h | 2 +- include/types.h | 2 +- libdislocator/libdislocator.so.c | 1 + src/afl-analyze.c | 2 +- src/afl-as.c | 2 +- src/afl-common.c | 2 +- src/afl-forkserver.c | 2 +- src/afl-fuzz-bitmap.c | 9 +++++++-- src/afl-fuzz-extras.c | 2 +- src/afl-fuzz-globals.c | 2 +- src/afl-fuzz-init.c | 2 +- src/afl-fuzz-misc.c | 2 +- src/afl-fuzz-one.c | 2 +- src/afl-fuzz-python.c | 2 +- src/afl-fuzz-queue.c | 2 +- src/afl-fuzz-run.c | 2 +- src/afl-fuzz-stats.c | 2 +- src/afl-fuzz.c | 2 +- src/afl-gcc.c | 2 +- src/afl-gotcpu.c | 2 +- src/afl-sharedmem.c | 2 +- src/afl-showmap.c | 2 +- src/afl-tmin.c | 2 +- 31 files changed, 45 insertions(+), 38 deletions(-) diff --git a/include/afl-as.h b/include/afl-as.h index 3af42205..bd5e734a 100644 --- a/include/afl-as.h +++ b/include/afl-as.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 9ecf1f29..00d29f76 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/alloc-inl.h b/include/alloc-inl.h index 48598ed3..5592b295 100644 --- a/include/alloc-inl.h +++ b/include/alloc-inl.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/android-ashmem.h b/include/android-ashmem.h index 35a5ba5e..adddc05f 100755 --- a/include/android-ashmem.h +++ b/include/android-ashmem.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/common.h b/include/common.h index 8ab78b41..3b953470 100644 --- a/include/common.h +++ b/include/common.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/config.h b/include/config.h index 4eac82e0..83fcb8f9 100644 --- a/include/config.h +++ b/include/config.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi @@ -62,13 +62,14 @@ /* Default memory limit for child process (MB): */ #ifndef __NetBSD__ -# ifndef WORD_SIZE_64 -# define MEM_LIMIT 25 -# else -# define MEM_LIMIT 50 -# endif /* ^!WORD_SIZE_64 */ -#else /* NetBSD's kernel needs more space for stack, see discussion for issue #165 */ -# define MEM_LIMIT 200 +#ifndef WORD_SIZE_64 +#define MEM_LIMIT 25 +#else +#define MEM_LIMIT 50 +#endif /* ^!WORD_SIZE_64 */ +#else /* NetBSD's kernel needs more space for stack, see discussion for issue \ + #165 */ +#define MEM_LIMIT 200 #endif /* Default memory limit when running in QEMU mode (MB): */ diff --git a/include/debug.h b/include/debug.h index 68109927..d6c04935 100644 --- a/include/debug.h +++ b/include/debug.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/forkserver.h b/include/forkserver.h index 17bc65af..0fdcba48 100644 --- a/include/forkserver.h +++ b/include/forkserver.h @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/sharedmem.h b/include/sharedmem.h index 69291330..7604d64c 100644 --- a/include/sharedmem.h +++ b/include/sharedmem.h @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/types.h b/include/types.h index eba47be7..9e681e81 100644 --- a/include/types.h +++ b/include/types.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/libdislocator/libdislocator.so.c b/libdislocator/libdislocator.so.c index b9ba8967..221a629b 100644 --- a/libdislocator/libdislocator.so.c +++ b/libdislocator/libdislocator.so.c @@ -417,6 +417,7 @@ void* reallocarray(void* ptr, size_t elem_len, size_t elem_cnt) { } return ret; + } __attribute__((constructor)) void __dislocator_init(void) { diff --git a/src/afl-analyze.c b/src/afl-analyze.c index 3d4e636e..3de8c037 100644 --- a/src/afl-analyze.c +++ b/src/afl-analyze.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-as.c b/src/afl-as.c index 77ac2f97..8d689385 100644 --- a/src/afl-as.c +++ b/src/afl-as.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-common.c b/src/afl-common.c index 8c2f2b9a..6cb97cdf 100644 --- a/src/afl-common.c +++ b/src/afl-common.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index de50c73c..77e1d648 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c index 515a7a79..3ffda284 100644 --- a/src/afl-fuzz-bitmap.c +++ b/src/afl-fuzz-bitmap.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi @@ -524,7 +524,12 @@ u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { struct queue_entry* q = queue; while (q) { - if (q->exec_cksum == cksum) { q->n_fuzz = q->n_fuzz + 1; break ; } + if (q->exec_cksum == cksum) { + + q->n_fuzz = q->n_fuzz + 1; + break; + + } q = q->next; diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c index fcc7749d..6c6dc28c 100644 --- a/src/afl-fuzz-extras.c +++ b/src/afl-fuzz-extras.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-globals.c b/src/afl-fuzz-globals.c index b3476778..f0d98192 100644 --- a/src/afl-fuzz-globals.c +++ b/src/afl-fuzz-globals.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 5fe3689e..219be822 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-misc.c b/src/afl-fuzz-misc.c index b8f376be..0da0cb0a 100644 --- a/src/afl-fuzz-misc.c +++ b/src/afl-fuzz-misc.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 74123300..b04683be 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c index f1cdecde..f06c8e25 100644 --- a/src/afl-fuzz-python.c +++ b/src/afl-fuzz-python.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index 1b51e3aa..0880de75 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index fa7a872a..a006194d 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c index 7679403b..f2afb295 100644 --- a/src/afl-fuzz-stats.c +++ b/src/afl-fuzz-stats.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 0af8b35f..9a7495ef 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-gcc.c b/src/afl-gcc.c index 301e2034..e46fe5cd 100644 --- a/src/afl-gcc.c +++ b/src/afl-gcc.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c index 9a56159c..5be30238 100644 --- a/src/afl-gotcpu.c +++ b/src/afl-gotcpu.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-sharedmem.c b/src/afl-sharedmem.c index 16eb14a7..04fcaa1c 100644 --- a/src/afl-sharedmem.c +++ b/src/afl-sharedmem.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-showmap.c b/src/afl-showmap.c index 8c899c9d..b9da3208 100644 --- a/src/afl-showmap.c +++ b/src/afl-showmap.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-tmin.c b/src/afl-tmin.c index 3e33b72f..7ce0ccaa 100644 --- a/src/afl-tmin.c +++ b/src/afl-tmin.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi From 00d086f816d6b517a6817d6093a83ed8a65b18fa Mon Sep 17 00:00:00 2001 From: van Hauser Date: Tue, 21 Jan 2020 12:53:36 +0100 Subject: [PATCH 14/43] USE_TRACE_PC unnecessary, set env AFL_LLVM_USE_TRACE_PC instead --- docs/ChangeLog | 8 +++++--- llvm_mode/README.md | 19 +++++++++---------- llvm_mode/afl-clang-fast.c | 33 ++++++++++++++++++++++++--------- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/docs/ChangeLog b/docs/ChangeLog index 5347d244..bb3537dd 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -21,9 +21,11 @@ Version ++2.60d (develop): - afl-fuzz: - now prints the real python version support compiled in - set stronger performance compile options and little tweaks - - afl-clang-fast now shows in the help output for which llvm version it - was compiled for - - added blacklisted function check in llvm_mode + - afl-clang-fast: + - show in the help output for which llvm version it was compiled for + - now does not need to be recompiled between trace-pc and pass + instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) + - added blacklisted function check in all modules of llvm_mode - added fix from Debian project to compile libdislocator and libtokencap diff --git a/llvm_mode/README.md b/llvm_mode/README.md index 5afa4dfd..150d1a17 100644 --- a/llvm_mode/README.md +++ b/llvm_mode/README.md @@ -198,24 +198,23 @@ PS. Because there are task switches still involved, the mode isn't as fast as faster than the normal fork() model, and compared to in-process fuzzing, should be a lot more robust. -## 8) Bonus feature #3: new 'trace-pc-guard' mode +## 8) Bonus feature #3: 'trace-pc-guard' mode -Recent versions of LLVM are shipping with a built-in execution tracing feature +LLVM is shipping with a built-in execution tracing feature that provides AFL with the necessary tracing data without the need to post-process the assembly or install any compiler plugins. See: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards -If you have a sufficiently recent compiler and want to give it a try, build -afl-clang-fast this way: +If you have not an outdated compiler and want to give it a try, build +targets this way: ``` - AFL_TRACE_PC=1 make clean all + libtarget-1.0 $ AFL_LLVM_USE_TRACE_PC=1 make ``` -Note that this mode is currently about 20% slower than "vanilla" afl-clang-fast, +Note that this mode is about 20% slower than "vanilla" afl-clang-fast, and about 5-10% slower than afl-clang. This is likely because the -instrumentation is not inlined, and instead involves a function call. On systems -that support it, compiling your target with -flto should help. - - +instrumentation is not inlined, and instead involves a function call. +On systems that support it, compiling your target with -flto can help +a bit. diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index b322b762..7da7c5a3 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -204,13 +204,24 @@ static void edit_params(u32 argc, char** argv) { // "-fsanitize-coverage=trace-cmp,trace-div,trace-gep"; // cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0"; #else - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = "-load"; - cc_params[cc_par_cnt++] = "-Xclang"; - if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL) - cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); - else - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || + getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) { + + cc_params[cc_par_cnt++] = + "-fsanitize-coverage=trace-pc-guard"; // edge coverage by default + + } else { + + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL) + cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); + else + cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + + } + #endif /* ^USE_TRACE_PC */ cc_params[cc_par_cnt++] = "-Qunused-arguments"; @@ -282,8 +293,10 @@ static void edit_params(u32 argc, char** argv) { #ifdef USE_TRACE_PC - if (getenv("AFL_INST_RATIO")) - FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); + if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || + getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) + if (getenv("AFL_INST_RATIO")) + FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); #endif /* USE_TRACE_PC */ @@ -455,6 +468,8 @@ int main(int argc, char** argv) { #ifdef USE_TRACE_PC SAYF(cCYA "afl-clang-fast" VERSION cRST " [tpcg] by \n"); +#warning \ + "You do not need to specifically compile with USE_TRACE_PC anymore, setting the environment variable AFL_LLVM_USE_TRACE_PC is enough." #else SAYF(cCYA "afl-clang-fast" VERSION cRST " by \n"); #endif /* ^USE_TRACE_PC */ From 8b92a40e19c1a90a31e7514de1c90f0cf558a62a Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Wed, 22 Jan 2020 02:08:30 +0100 Subject: [PATCH 15/43] c example now uses persistent mode --- unicorn_mode/samples/c/a.out | Bin 17184 -> 0 bytes unicorn_mode/samples/c/harness.c | 43 ++++++++++++++---- .../{simple_target.c => persistent_target.c} | 27 ++++++----- .../samples/c/persistent_target_x86_64 | Bin 0 -> 16544 bytes 4 files changed, 50 insertions(+), 20 deletions(-) delete mode 100644 unicorn_mode/samples/c/a.out rename unicorn_mode/samples/c/{simple_target.c => persistent_target.c} (51%) create mode 100644 unicorn_mode/samples/c/persistent_target_x86_64 diff --git a/unicorn_mode/samples/c/a.out b/unicorn_mode/samples/c/a.out deleted file mode 100644 index 176c25e1232741d579008dd24852586a98600468..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17184 zcmeHOdu$xV86W!+Vn}i}c|b}iSsrN}R3FZR5CTr_96MQ8PH+=DB?YoM->vN<_pt8v zh=WRN$q;c31`D;6A_{4xRw@0X5rP0wn+AeFl&T6yr72BYLxG-K9_Cfj6uADr+4=VN z)_0UvYSlk(q`Pmv-}jiAZ)WypZg#%j-nqWErbck`i8X?_$p#w_DgzP}tHexv ze^#6;P6IzpVzxYJ6G-heP+Lmt6z&5hyZuV;rROR=rd&g!WLGJ5sREb^)ef?oPDYCN z(ub)bNK9Gol&49t5FQ$&h9EI@+mXxtO;RlOx0KRx)$Y8aY)35FJ*w;;Rd!4@rO%Y( zNim_%lS=NTZIDr7O1hM~UbUi>A3I;NQ)ssh3#M%MCD^HuDUDAi9QMtsyh?GoTGgK^ z#h=n8@mTN56_>=LP4QSVJJd9^YGu>P6@g4DuuQgl9(1THHf-t<*G$ZaKH&est@9Sf z?mGILhUG20zx@ZQH`Rgskq#x&SH(%3WLzG}KTUcBN^{C6-dO>^5V%%G|A7kn->rbZ zRRKR%0e`OoJ{=7%SN{_g^cPmpr}%65c+=Mal=FYI0=}mLz81KKk2lRieJ=2V#ACc< zld(uDoecNIX`Cz-hS8r$C5?;~PFsc{jE?S&M$}B3{jrQ?rn@(;i>H!icepoh+Oo0| z_#Lzc(q=eHPMfKm1m#vzE^XN*Wg|vv&`b)@jFmRi=~SBX2{U0N!h@XMm5x~^F(}Az zn1NJkrxA`uNnj?jc6C{c%B6fHp30b1V#F?cFl%MdJ~S<|(})c0H2T7^xEM^wl2#w^ z$WYkmizUPH*o|nLL3Sz(iEu0_HgtBht}~VeRyet3flEt+O7p}gXA935lGkFs)QVS; zom;s`iBmvEZyCc--!ui=r2F)&9{w@)S zv??fx^zVMY233gUD_7WlVTaH+@g=6~2lXbL0 z7ou?K%YX}uU%_|%q9Ag_50GeLynwX$MdE2GG;vJw&l68epot@r{{`{1M4LDu`6q~{ zrO?Da$sZz~mOv9@l7EnRTKY^3Oa1}kX}K~nDEaRbPfMMN9g@F?cv|92bV>eB;%R9! z5tRHlh^M9CL^F75@6tCqa?fqob1&;7uS|AzcaBcK3+3yhvlmIy@-}K$nEymTi1)_y z(UveA=%drWLgez3)_jEOKFBCkrx(WS7Y)O=II1Z0mrDIhFDA0&V?8&iKmO)g{qd7N zz2+(XxsR=RaFA6F>I&n1^^4ky_Ef*&mNy_4+2uX@NXur*>bX~~+4|m=Ux6vi58^u? zL1Uhp{v@(B*B$K()GxZ3sv(Z{74y_zxej@jDel;K{5kqzSt5e)d z@G!9nBM%K2e~d=RA=)DzriHlpAN$qcij2A1-hEgvLHSrN8;&}C^C z01LEX8jPK4Hlaowj|=U1TuhZL#$&D%kLSqYXv=B<`EOBPR>{@8F@Sr~3lzqXV|EX# zA^`W<`IAw7x!rZv^!)X}oRLtuQw!Kt<V^wtU&+L3j2j_q} z9jxVU`j8$Edafs*Uxqg2YcXvwxxA zn`^EMT^qVCv^`|FpJDX;qF*AS2+;Wr zr8R5TXx-^>B$n*gdc&DmMC*;GB0IIPr8N&Njt_l7@fn;TX)S~e$>RgJT`Y;iu584L zrIIc?H|n((qCJs~hb>dv*wM8`i=?8a*4P{CZ!(k7SU9;vi1j$fGNT$i;?#@I)Ebv) zyJFUWCJ)uLv}tA2Ni(`yTb$Jz7iX57Ay%BfVynl_IY#@J#xf3eov0O290&%qRx_U3 zrNuIuHDGG%x_VI0q-73SQhD=ICmb!6Q@M7=W0n;+ou+O@#j#~v=)0mD{Xu_djJ;VX z+zPtmtwLcCblYU1Z~*jn&|{!SK_@})ey31qz&>^Ndxb(X=swUc(1!O5g}Z70ibUtL zlxl9=ENX@tY8K9#Q8xw|;q;D09%bPV3%npOqk+gp_@FvM0H?BqzhS+9!58ai?y4IW zYtOmj;^h}zKrq=}i_iB^M|orz@;B_MT{ru*K@_0mUj;q*3G#a(e-`pzQ(f8KcdUjy zkpG|JGyHa;@KTBVgd;yeX73gXRAR-_6Z)o+`$+OHhls^bR^k-EyRU=T1 zKs5r@2vj3bjX*U5)d*B0@ZXF8zbD7<#nDojl9z6?VF!6~tRq;4Fv6Ec=3xN|H znf9M$)6X!k-qTyCcv|CAI>#o0-{YfoJ0+IC|8XHDc{*!D;x+VfIuk?UcikG5oZn4b zry58HxRe&yM9f!|i88}MqK(+QS|K!L&-))bCZu$>O+>Q-7#~y>;d&oYdbA&-#O33R zMz#l#RK+2QFpeL>OJ6y@@UgLZDix^=6yM!cE5M7un&4DZ!`isZv6=X(Qgb$U;g z-+wQ~eZqO)-X-wi4f47&9pS?$4L{wDSn1C zE=zID0#E!%?h~_wRu-S_+~X+q;}<-yOYwS47&l_)eJGUg2e_p^ai++h??zaEj`NYo##U-K2MYzhd#`5e$Tg59}lIV8?o~~%<7@C_yTcjS)8A0MPD>ur-`=XCpocv6vdh%=E4=MaS_3}RBIA53gTNEz;vjO(3!W-3tR{mE5 ztWw;LLso3Uf9I~QpdSIQm3Xk31t?c5GoHU*!Ov|K@b6T>e_R27 z1b73=<#EVW+f4ekQ=c1$EBJY_0{&(N{DTVkS%?GG$2+etl=0!(73nhIXV#uAhJ!_n za%%|}#pjz^c8E>D=V1IdtNt>T*>;JS`XKHdd>SxM$AZKa)49qI-8lFT$xxhW(6W5lD0Bd7B7)SMDc2j zVI}CsNz$Y{Cef79A5Zm$<3`jGMOJ@ecGBcKuhz({8v)9eU z;dlM!(8hM7eN!7<`6=}r6-L{(O`(k)>q;f^h7ceuM~rq|dC=Q73u8m)Rjr{;>ELJvnLqj8#;6^N9oF| zK!zC&TVYhS^yW{|V1>E_R78{u`v%dJyFjIS=xw-HSCG8dr;KPOWekLqQF&kLDriMx zNh6yv5mx((5k-NzlBz84Lm5VICZpWRn@12hhcl$*+?Fy7+{fZxql;Q)FJqP7Y$~n0 z2xN9AtZ*-=mA2^s=Ws2`Ob?1cGG&>8{$w`Ln~mXWVJxbkP-{n%748>OHW1DXh(L6A z68>yzrESR#W;%moq7ue{EN#ZaWT0|`aZ3bb90Ky#C_pm-vXtz*fH|PX{y-GA%-I(9 zBoNYq^Kcwa#3Cp)1;6+T$O3FIwmm1T(kQ>&eA>)k7oP^+KJcOOnIND#8Q{@ zMCs?9_Ev1q>vE?2+#_8|{9Lv{}2kDGJbLpxa~D%$MhH*D(Z1BzUi^&-*=`DbHhFM?Digy zJ@31i^8YNXPqB8VePF2m9DkvKp`^)gaR&)qiQ~u6eq`MCypLnr&5DY8%RlI`=lvd2 zTGzUhxBMdz;&pF12S|_iC4B$etIs^sU&02fHpia-Upml4-iqVJ=88Pq(>{a74sY*y zpTPI)H5FJMUu?(Y;3e2n6WN~kqsP?(uMskLVtb~qLFl#@L8aYn(;>w&VLPU8LEyIM zb?}g~=X!eWnSUP{^2PP%b>gkcp37l // Path to the file containing the binary to emulate -#define BINARY_FILE ("simple_target_x86_64") +#define BINARY_FILE ("persistent_target_x86_64") // Memory map for the code to be tested // Arbitrary address where code to test will be loaded static const int64_t BASE_ADDRESS = 0x100000; -static const int64_t CODE_ADDRESS = 0x101119; -static const int64_t END_ADDRESS = 0x1011d7; +static const int64_t CODE_ADDRESS = 0x101139; +static const int64_t END_ADDRESS = 0x10120d; // Address of the stack (Some random address again) static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); // Size of the stack (arbitrarily chosen, just make it big enough) @@ -52,15 +52,33 @@ static const int64_t ALIGNMENT = 0x1000; // In our special case, we emulate main(), so argc is needed. static const uint64_t EMULATED_ARGC = 2; +// The return from our fake strlen +static size_t current_input_len = 0; + static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); } -static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) -{ +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); } +/* +The sample uses strlen, since we don't have a loader or libc, we'll fake it. +We know the strlen will return the lenght of argv[1] that we just planted. +It will be a lot faster than an actual strlen for this specific purpose. +*/ +static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + //Hook + //116b: e8 c0 fe ff ff call 1030 + // We place the return at RAX + //printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len); + uc_reg_write(uc, UC_X86_REG_RAX, ¤t_input_len); + // We skip the actual call by updating RIP + uint64_t next_addr = address + size; + uc_reg_write(uc, UC_X86_REG_RIP, &next_addr); +} + /* Unicorn page needs to be 0x1000 aligned, apparently */ static uint64_t pad(uint64_t size) { if (size % ALIGNMENT == 0) return size; @@ -107,8 +125,6 @@ static bool place_input_callback( // Test input too short or too long, ignore this testcase return false; } - // We need a valid c string, make sure it never goes out of bounds. - input[input_len-1] = '\0'; // For persistent mode, we have to set up stack and memory each time. uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back @@ -116,10 +132,14 @@ static bool place_input_callback( uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 - // Make sure the input is 0 terminated. - //input[input_len-1] = '\0'; + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; // Write the testcase to unicorn. uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); + + // store input_len for the faux strlen hook + current_input_len = input_len; + return true; } @@ -211,6 +231,11 @@ int main(int argc, char **argv, char **envp) { uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1); } + // Add our strlen hook (for this specific testcase only) + int strlen_hook_pos = BASE_ADDRESS + 0x116b; + uc_hook strlen_hook; + uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos); + printf("Starting to fuzz :)\n"); fflush(stdout); diff --git a/unicorn_mode/samples/c/simple_target.c b/unicorn_mode/samples/c/persistent_target.c similarity index 51% rename from unicorn_mode/samples/c/simple_target.c rename to unicorn_mode/samples/c/persistent_target.c index dbf10911..5b866f86 100644 --- a/unicorn_mode/samples/c/simple_target.c +++ b/unicorn_mode/samples/c/persistent_target.c @@ -10,25 +10,30 @@ * Written by Nathan Voss * Adapted by Lukas Seidel */ +#include +#include int main(int argc, char** argv) { - if(argc < 2){ - return -1; - } + if (argc < 2) return -1; char *data_buf = argv[1]; + uint64_t data_len = strlen(data_buf); + if (data_len < 20) return -2; - if (data_buf[20] != 0) { - // Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04' - unsigned char invalid_read = *(unsigned char *) 0x00000000; - } else if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + for (; data_len --> 0 ;) { + if (data_len >= 18) continue; + if (data_len > 2 && data_len < 18) { + ((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1]; + } else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + } + if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] unsigned char invalid_read = *(unsigned char *) 0x00000000; - } else if (data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00) { - // Cause a crash if data[10] is not zero, but [9] and [11] are zero - unsigned char invalid_read = *(unsigned char *) 0x00000000; - } + } return 0; } diff --git a/unicorn_mode/samples/c/persistent_target_x86_64 b/unicorn_mode/samples/c/persistent_target_x86_64 new file mode 100644 index 0000000000000000000000000000000000000000..22e04357ebd63bb86b55a9ee5f451aa4851a0f9a GIT binary patch literal 16544 zcmeHOYit}>6~4P`C(gs`&5M{M#nU#49FR9|;uxFO&93dW$H-37)Gm)EnXbKSdttpB z?an54Xc`N*AuHDa6dssMyUlUN1c zw}~xc8OUXlGxZ@8Kx>v^uN^Ch_W`2cv!r+87P4dH9wMUOQstp?Q^P3eIM7e3NM@e# zN$Lk9^TqZQYSDoK>KLQd4|6kDXZp>x*iohS4iK^*is&~_e)HtVI7{s@ay~I9__;{& zoY)RJER1N2<-k=vhV?Q9rN!7_(t?rw?gKyU=ld<3_Jh>kQhBMS{xj0yik-GO@{$6gh} zu;17RjE6Q@(4NzeJbzI4e#>4kR7x1H0&Z;y{`MvK2Z2{g*negT9%J88!d^h01~4s+ zBCBVT@s!ZCfzhZom`FvFi4$=U(L&4W(TuK*L=!2|+1=IJrtJ#u3hohFS8tCNi)Z3P ziL4&a^!BtR)2VoGv_FZSLnG-Fd1|IyoI-p|LTxb zi{$`(%qHW0yBTmZ;AX(hfSUm~18xS~47eHi-^svVt3LFuI`NkZb+Y`|JB3gmo7TO? zC3WIr#fOcU4UYYA6q*?%J$j80df3>s|TFN7Vc)>cp#s1HIi-<=bHj ztWH%v2QzSWs^u~m8yhm<_x4#3pNEP%RsJo6=6QVsG`$b&(DVvpwyI_vgU=9xeO}t{ z`8>joTWY?bUU*}#dSTwDdR|a3-_qAZfNd0@!k8Vbs%c;F$Nr7CL_jTayZh9M7B5!S z{8hbDoov|-g0Z0xgy-}DIG7j8w*cWeJTn-qs(BQUjJZ@7fU9_CX9tTz&rJ z%W6J%Y2xI|o>ME&gTlD>9c*DTa!H*GRjB#MJk;ma{Ez0QZy5&21$E+NVcqf2c@T|j zUqE9Z)*SQxxehDdW80;CyaxOV>eK;`^!Grxo`O2rH?MkrTy;*I@sb)s_7xy`x#!Ui zH1`A^spo3Vscq7GV%k%6EdwE>p8r$W zSpR!8xEXLW@SnX||d>!y% zz_$VMeJd8viG#v38t`mhy}aTqJnkcp@9gKU8wNgUi}vO6jontLHo)f_z$<{lAL#H` zf4pkd@rrS=chlV;-n~`6&P3ls@cAmV{}S4T{ejcow#sGROVEP({`z5{KMDPcqrSr* z_@cMVU;TtH;#a21BL3RP%iI0+rJPN~E9AT2 zi{L&Eajft6Fr-`D47eF^GvH>x&48N$Hv?`4+zhxGa5M1#oB>`>$?GS1Z6xDrJ~&!m zSxNF4TEzN6l6jrxW|DdR$-W4Sh=7WDOc-&abVSV>9!Q;!$ zFBg0s?0kjb^JnK*2p$)9er0j|*m)SOj{KqCCsqrkBwtzFZ()z)7kvHM`KpqO#eL9d znPRmx>prnYG?nDn3cjALUOur-@O5nG*Nal)#V0lhUUzM`|3LA2wDTK<60$O8-6yI= zeMz40J_|m0U6qM9jRnT6;l)^rUkAKbREndH{NsHD`DYw_E%CDs{$AicmVWZ}-6Q$M zdDAHl-+w`rWgNaeutUI^|E~aFs{czJ*K;z?8nOCzs5nZgINz5xYZsm0p9AlKfA5^< z_g7@^?B{EeUp(G!$~cS9`*$HxUc8Gt^0^N9wUGaD`kmq5g*$;)WE=Lc&q;pq_}L45 z0PgC}@pBvSYrTuFqYmWlp{4ZcCHOIl?;N+Mfmcezc@p?F-i?Cqn{4tm-b zGtDNj0Sz|248^gVn$~eJ+!N6v``R_w%3^nj39bE8`@%h4ZFY^^GXoU-`e_lBBB<>L zh1S`g$7EvV#P zAN%eid)GuTJ2s+6`vLWgiNjn=q+ol|Q4vg~^>}b7l?(Rg60o@|5hE(x+Eu4VhlJD( zN3+8s7#mALJQMYdsW}$UWE1I>P0&D>i6^7zK((VuT?FM}2KD$T)Nw31lWXt=#*q`U=4}c!0jO@?zLB>bPp8<1iEk{7Wxjg&xyphpQ z#@v6lWBe@W@!5c7o?kL56?yizB9FbI5mZzTNQuYm r7*8qphp&J9--G)O)nZZchXKlZM{sV})Z$#XwEv+d)57O)aERg`mmOeI literal 0 HcmV?d00001 From fb221db8ae4d640aa6261633ca249a86305292c4 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Wed, 22 Jan 2020 08:35:41 +0100 Subject: [PATCH 16/43] clarify gcc plugin test case result --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 23d98278..0ae6fd09 100755 --- a/test/test.sh +++ b/test/test.sh @@ -336,7 +336,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && { $ECHO "$GREEN[+] gcc_plugin run reported $TUPLES instrumented locations which is fine" } || { $ECHO "$RED[!] gcc_plugin instrumentation produces a weird number of instrumented locations: $TUPLES" - $ECHO "$YELLOW[-] the gcc_plugin instrumentation issue is not flagged as an error because travis builds would all fail otherwise :-(" + $ECHO "$YELLOW[-] this is a known issue in gcc, not afl++. It is not flagged as an error because travis builds would all fail otherwise :-(" #CODE=1 } } From 4fbcc37f8450136759913875b6234d2e3ab2f032 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 09:26:54 +0100 Subject: [PATCH 17/43] awk version for portability, tested on linux and FreeBSD so far --- afl-cmin.awk | 440 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 440 insertions(+) create mode 100755 afl-cmin.awk diff --git a/afl-cmin.awk b/afl-cmin.awk new file mode 100755 index 00000000..021f7059 --- /dev/null +++ b/afl-cmin.awk @@ -0,0 +1,440 @@ +#!/usr/bin/awk -f + +# getopt.awk --- Do C library getopt(3) function in awk + +# External variables: +# Optind -- index in ARGV of first nonoption argument +# Optarg -- string value of argument to current option +# Opterr -- if nonzero, print our own diagnostic +# Optopt -- current option letter + +# Returns: +# -1 at end of options +# "?" for unrecognized option +# a character representing the current option + +# Private Data: +# _opti -- index in multiflag option, e.g., -abc + +function getopt(argc, argv, options, thisopt, i) +{ + if (length(options) == 0) # no options given + return -1 + + if (argv[Optind] == "--") { # all done + Optind++ + _opti = 0 + return -1 + } else if (argv[Optind] !~ /^-[^:[:space:]]/) { + _opti = 0 + return -1 + } + if (_opti == 0) + _opti = 2 + thisopt = substr(argv[Optind], _opti, 1) + Optopt = thisopt + i = index(options, thisopt) + if (i == 0) { + if (Opterr) + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" + if (_opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return "?" + } + if (substr(options, i + 1, 1) == ":") { + # get option argument + if (length(substr(argv[Optind], _opti + 1)) > 0) + Optarg = substr(argv[Optind], _opti + 1) + else + Optarg = argv[++Optind] + _opti = 0 + } else + Optarg = "" + if (_opti == 0 || _opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return thisopt +} + +BEGIN { + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + + # test program + if (_getopt_test) { + while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) + printf("c = <%c>, Optarg = <%s>\n", + _go_c, Optarg) + printf("non-option arguments:\n") + for (; Optind < ARGC; Optind++) + printf("\tARGV[%d] = <%s>\n", + Optind, ARGV[Optind]) + } +} + +function usage() { + print \ +"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ +"\n" \ +"Required parameters:\n" \ +"\n" \ +" -i dir - input directory with starting corpus\n" \ +" -o dir - output directory for minimized files\n" \ +"\n" \ +"Execution control settings:\n" \ +"\n" \ +" -f file - location read by the fuzzed program (stdin)\n" \ +" -m megs - memory limit for child process ("mem_limit" MB)\n" \ +" -t msec - run time limit for child process (none)\n" \ +" -Q - use binary-only instrumentation (QEMU mode)\n" \ +" -U - use unicorn-based instrumentation (unicorn mode)\n" \ +"\n" \ +"Minimization settings:\n" \ +" -C - keep crashing inputs, reject everything else\n" \ +" -e - solve for edge coverage only, ignore hit counts\n" \ +"\n" \ +"For additional tips, please consult docs/README.md\n" \ +"\n" \ + > "/dev/stderr" + exit 1 +} + +function exists_and_is_executable(binarypath) { + return 0 == system("test -f "binarypath" -a -x "binarypath) +} + +BEGIN { + print "corpus minimization tool for afl-fuzz++ (awk version)\n" +print "PATH="ENVIRON["PATH"] + + # defaults + extra_par = "" + # process options + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) { + if (_go_c == "i") { + if (!Optarg) usage() + if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + in_dir = Optarg + continue + } else + if (_go_c == "o") { + if (!Optarg) usage() + if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + out_dir = Optarg + continue + } else + if (_go_c == "f") { + if (!Optarg) usage() + if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + stdin_file = Optarg + continue + } else + if (_go_c == "m") { + if (!Optarg) usage() + if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + mem_limit = Optarg + mem_limit_given = 1 + continue + } else + if (_go_c == "t") { + if (!Optarg) usage() + if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + timeout = Optarg + continue + } else + if (_go_c == "C") { + ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1 + continue + } else + if (_go_c == "e") { + extra_par = extra_par " -e" + continue + } else + if (_go_c == "Q") { + if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -Q" + if ( !mem_limit_given ) mem_limit = "250" + qemu_mode = 1 + continue + } else + if (_go_c == "U") { + if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -U" + if ( !mem_limit_given ) mem_limit = "250" + unicorn_mode = 1 + continue + } else + if (_go_c == "?") { + exit 1 + } else + usage() + } # while options + + if (!mem_limit) mem_limit = 200 + if (!timeout) timeout = "none" + + # get program args + i = 0 + prog_args_string = "" + for (; Optind < ARGC; Optind++) { + prog_args[i++] = ARGV[Optind] + if (i > 1) + prog_args_string = prog_args_string" "ARGV[Optind] + } + + # sanity checks + if (!prog_args[0] || !in_dir || !out_dir) usage() + + target_bin = prog_args[0] + + # Do a sanity check to discourage the use of /tmp, since we can't really + # handle this safely from an awk script. + + if (!ENVIRON["AFL_ALLOW_TMP"]) { + dirlist[0] = in_dir + dirlist[1] = target_bin + dirlist[2] = out_dir + dirlist[3] = stdin_file + "pwd" | getline dirlist[4] # current directory + for (dirind in dirlist) { + dir = dirlist[dirind] + + if (dir ~ /^(\/var)?\/tmp/) { + print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" + exit 1 + } + } + delete dirlist + } + + # If @@ is specified, but there's no -f, let's come up with a temporary input + # file name. + + trace_dir = out_dir "/.traces" + + if (!stdin_file) { + found_atat = 0 + for (prog_args_ind in prog_args) { + if ("@@" == prog_args[prog_args_ind]) { + found_atat = 1 + break + } + } + if (found_atat) { + stdin_file = trace_dir "/.cur_input" + } + } + + # Check for obvious errors. + + if (mem_limit && mem_limit != "none" && mem_limit < 5) { + print "[-] Error: dangerously low memory limit." > "/dev/stderr" + exit 1 + } + + if (timeout && timeout != "none" && timeout < 10) { + print "[-] Error: dangerously low timeout." > "/dev/stderr" + exit 1 + } + + if (target_bin && !exists_and_is_executable(target_bin)) { + + "which "target_bin" 2>/dev/null" | getline tnew + if (!tnew || !exists_and_is_executable(tnew)) { + print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" + exit 1 + } + target_bin = tnew + } + + if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) { + if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { + print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" + exit 1 + } + } + + if (0 != system( "test -d "in_dir )) { + print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" + exit 1 + } + + if (0 == system( "test -d "in_dir"/queue" )) { + in_dir = in_dir "/queue" + } + + system("rm -rf "trace_dir" 2>/dev/null"); + system("rm "out_dir"/id[:_]* 2>/dev/null") + + if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) { + print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" + exit 1 + } + + if (stdin_file) { + # truncate input file + printf "" > stdin_file + close( stdin_file ) + } + + if (!ENVIRON["AFL_PATH"]) { + if (0 == system("test -f afl-cmin.awk")) { + path = "." + } else { + "which afl-showmap 2>/dev/null" | getline path + } + showmap = path + } else { + showmap = ENVIRON["AFL_PATH"] "/afl-showmap" + } + + if (!showmap || 0 != system("test -x "showmap )) { + print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" + exit 1 + } + + # get list of input filenames sorted by size + i = 0 + while ("find "in_dir" -type f -exec stat -f '%z %N' \{\} \; | sort -n | cut -d' ' -f2-" | getline) { + infilesSmallToBig[i++] = $0 + } + in_count = i + + first_file = infilesSmallToBig[0] + + # Make sure that we're not dealing with a directory. + + if (0 == system("test -d "in_dir"/"first_file)) { + print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr" + exit 1 + } + + # Check for the more efficient way to copy files... + if (0 != system("mkdir -p -m 0700 "trace_dir)) { + print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" + exit 1 + } + + if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { + cp_tool = "ln" + } else { + cp_tool = "cp" + } + + # Make sure that we can actually get anything out of afl-showmap before we + # waste too much time. + + print "[*] Testing the target binary..." + + if (!stdin_file) { + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") + } else { + system("cp "in_dir"/"first_file" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + ++first_count + } + + if (first_count) { + print "[+] OK, "first_count" tuples recorded." + } else { + print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + exit 1 + } + + # Let's roll! + + ############################# + # STEP 1: Collecting traces # + ############################# + + print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." + + cur = 0; + if (!stdin_file) { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur; + printf "\r Processing file "cur"/"in_count + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"") + } + } else { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur + printf "\r Processing file "cur"/"in_count + system("cp "in_dir"/"fn" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + key = line + if (!(key in key_count)) { + ++tuple_count + } + ++key_count[key] + if (! (key in best_file)) { + # this is the best file for this key + best_file[key] = fn + # copy file unless already done + if (! (fn in file_already_copied)) { + system(cp_tool" "in_dir"/"fn" "out_dir"/"fn) + file_already_copied[fn] = "" + ++out_count + } + } + } + close(tracefile_path) + } + + print "" + print "[+] Found "tuple_count" unique tuples across "in_count" files." + + if (out_count == 1) { + print "[!] WARNING: All test cases had the same traces, check syntax!" + } + print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." + + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + + exit 0 +} From 7ce627c92e9b0536e254422d5ef604c3f58e43ce Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 18:38:41 +0100 Subject: [PATCH 18/43] Oops, only this version works with FreeBSD, OpenBSD, NetBSD, MacOS, raspbian --- afl-cmin.awk | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/afl-cmin.awk b/afl-cmin.awk index 021f7059..28c460e8 100755 --- a/afl-cmin.awk +++ b/afl-cmin.awk @@ -110,7 +110,6 @@ function exists_and_is_executable(binarypath) { BEGIN { print "corpus minimization tool for afl-fuzz++ (awk version)\n" -print "PATH="ENVIRON["PATH"] # defaults extra_par = "" @@ -302,7 +301,13 @@ print "PATH="ENVIRON["PATH"] # get list of input filenames sorted by size i = 0 - while ("find "in_dir" -type f -exec stat -f '%z %N' \{\} \; | sort -n | cut -d' ' -f2-" | getline) { + # yuck, gnu stat is incompatible to bsd stat + if ("stat --version 2>/dev/null" !~ /GNU coreutils/) { + stat_format = "-f '%z %N'" + } else { + stat_format = "-c '%s %n'" + } + while ("cd "in_dir" && find . -type f -exec stat "stat_format" \{\} \\; | sort -n | cut -d' ' -f2-" | getline) { infilesSmallToBig[i++] = $0 } in_count = i From ce0b9dae5971f22cd0ae0b468322f78ee2a8a766 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 19:07:02 +0100 Subject: [PATCH 19/43] final step: rename afl-cmin to afl-cmin.bash and add a wrapper afl-cmin for afl-cmin.awk --- afl-cmin | 474 +------------------------------------------------- afl-cmin.bash | 470 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 474 insertions(+), 470 deletions(-) create mode 100755 afl-cmin.bash diff --git a/afl-cmin b/afl-cmin index 1dd782d8..75dc63a7 100755 --- a/afl-cmin +++ b/afl-cmin @@ -1,470 +1,4 @@ -#!/usr/bin/env bash -# -# american fuzzy lop++ - corpus minimization tool -# --------------------------------------------- -# -# Originally written by Michal Zalewski -# -# Copyright 2014, 2015 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# This tool tries to find the smallest subset of files in the input directory -# that still trigger the full range of instrumentation data points seen in -# the starting corpus. This has two uses: -# -# - Screening large corpora of input files before using them as a seed for -# afl-fuzz. The tool will remove functionally redundant files and likely -# leave you with a much smaller set. -# -# (In this case, you probably also want to consider running afl-tmin on -# the individual files later on to reduce their size.) -# -# - Minimizing the corpus generated organically by afl-fuzz, perhaps when -# planning to feed it to more resource-intensive tools. The tool achieves -# this by removing all entries that used to trigger unique behaviors in the -# past, but have been made obsolete by later finds. -# -# Note that the tool doesn't modify the files themselves. For that, you want -# afl-tmin. -# -# This script must use bash because other shells may have hardcoded limits on -# array sizes. -# - -echo "corpus minimization tool for afl-fuzz by Michal Zalewski" -echo - -######### -# SETUP # -######### - -# Process command-line options... - -MEM_LIMIT=200 -TIMEOUT=none - -unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ - AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE - -while getopts "+i:o:f:m:t:eQUCh" opt; do - - case "$opt" in - - "h") - ;; - - "i") - IN_DIR="$OPTARG" - ;; - - "o") - OUT_DIR="$OPTARG" - ;; - "f") - STDIN_FILE="$OPTARG" - ;; - "m") - MEM_LIMIT="$OPTARG" - MEM_LIMIT_GIVEN=1 - ;; - "t") - TIMEOUT="$OPTARG" - ;; - "e") - EXTRA_PAR="$EXTRA_PAR -e" - ;; - "C") - export AFL_CMIN_CRASHES_ONLY=1 - ;; - "Q") - EXTRA_PAR="$EXTRA_PAR -Q" - test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 - QEMU_MODE=1 - ;; - "U") - EXTRA_PAR="$EXTRA_PAR -U" - test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 - UNICORN_MODE=1 - ;; - "?") - exit 1 - ;; - - esac - -done - -shift $((OPTIND-1)) - -TARGET_BIN="$1" - -if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then - - cat 1>&2 <<_EOF_ -Usage: $0 [ options ] -- /path/to/target_app [ ... ] - -Required parameters: - - -i dir - input directory with the starting corpus - -o dir - output directory for minimized files - -Execution control settings: - - -f file - location read by the fuzzed program (stdin) - -m megs - memory limit for child process ($MEM_LIMIT MB) - -t msec - run time limit for child process (none) - -Q - use binary-only instrumentation (QEMU mode) - -U - use unicorn-based instrumentation (Unicorn mode) - -Minimization settings: - - -C - keep crashing inputs, reject everything else - -e - solve for edge coverage only, ignore hit counts - -For additional tips, please consult docs/README. - -_EOF_ - exit 1 -fi - -# Do a sanity check to discourage the use of /tmp, since we can't really -# handle this safely from a shell script. - -if [ "$AFL_ALLOW_TMP" = "" ]; then - - echo "$IN_DIR" | grep -qE '^(/var)?/tmp/' - T1="$?" - - echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/' - T2="$?" - - echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/' - T3="$?" - - echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/' - T4="$?" - - echo "$PWD" | grep -qE '^(/var)?/tmp/' - T5="$?" - - if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then - echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2 - exit 1 - fi - -fi - -# If @@ is specified, but there's no -f, let's come up with a temporary input -# file name. - -TRACE_DIR="$OUT_DIR/.traces" - -if [ "$STDIN_FILE" = "" ]; then - - if echo "$*" | grep -qF '@@'; then - STDIN_FILE="$TRACE_DIR/.cur_input" - fi - -fi - -# Check for obvious errors. - -if [ ! "$MEM_LIMIT" = "none" ]; then - - if [ "$MEM_LIMIT" -lt "5" ]; then - echo "[-] Error: dangerously low memory limit." 1>&2 - exit 1 - fi - -fi - -if [ ! "$TIMEOUT" = "none" ]; then - - if [ "$TIMEOUT" -lt "10" ]; then - echo "[-] Error: dangerously low timeout." 1>&2 - exit 1 - fi - -fi - -if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then - - TNEW="`which "$TARGET_BIN" 2>/dev/null`" - - if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then - echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2 - exit 1 - fi - - TARGET_BIN="$TNEW" - -fi - -if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then - - if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then - echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 - exit 1 - fi - -fi - -if [ ! -d "$IN_DIR" ]; then - echo "[-] Error: directory '$IN_DIR' not found." 1>&2 - exit 1 -fi - -test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue" - -find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null -rm -rf "$TRACE_DIR" 2>/dev/null - -rmdir "$OUT_DIR" 2>/dev/null - -if [ -d "$OUT_DIR" ]; then - echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2 - exit 1 -fi - -mkdir -m 700 -p "$TRACE_DIR" || exit 1 - -if [ ! "$STDIN_FILE" = "" ]; then - rm -f "$STDIN_FILE" || exit 1 - touch "$STDIN_FILE" || exit 1 -fi - -if [ "$AFL_PATH" = "" ]; then - SHOWMAP="${0%/afl-cmin}/afl-showmap" -else - SHOWMAP="$AFL_PATH/afl-showmap" -fi - -if [ ! -x "$SHOWMAP" ]; then - echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2 - rm -rf "$TRACE_DIR" - exit 1 -fi - -IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`)) - -if [ "$IN_COUNT" = "0" ]; then - echo "[+] Hmm, no inputs in the target directory. Nothing to be done." - rm -rf "$TRACE_DIR" - exit 1 -fi - -FIRST_FILE=`ls "$IN_DIR" | head -1` - -# Make sure that we're not dealing with a directory. - -if [ -d "$IN_DIR/$FIRST_FILE" ]; then - echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2 - rm -rf "$TRACE_DIR" - exit 1 -fi - -# Check for the more efficient way to copy files... - -if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then - CP_TOOL=ln -else - CP_TOOL=cp -fi - -# Make sure that we can actually get anything out of afl-showmap before we -# waste too much time. - -echo "[*] Testing the target binary..." - -if [ "$STDIN_FILE" = "" ]; then - - AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE" - -else - - cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE" - AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" &2 - test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - exit 1 - -fi - -# Let's roll! - -############################# -# STEP 1: COLLECTING TRACES # -############################# - -echo "[*] Obtaining traces for input files in '$IN_DIR'..." - -( - - CUR=0 - - if [ "$STDIN_FILE" = "" ]; then - - ls "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn" - - done - - else - - ls "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - cp "$IN_DIR/$fn" "$STDIN_FILE" - - "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" "$TRACE_DIR/.all_uniq" - -TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`)) - -echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files." - -##################################### -# STEP 3: SELECTING CANDIDATE FILES # -##################################### - -# The next step is to find the best candidate for each tuple. The "best" -# part is understood simply as the smallest input that includes a particular -# tuple in its trace. Empirical evidence suggests that this produces smaller -# datasets than more involved algorithms that could be still pulled off in -# a shell script. - -echo "[*] Finding best candidates for each tuple..." - -CUR=0 - -ls -rS "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list" - -done - -echo - -############################## -# STEP 4: LOADING CANDIDATES # -############################## - -# At this point, we have a file of tuple-file pairs, sorted by file size -# in ascending order (as a consequence of ls -rS). By doing sort keyed -# only by tuple (-k 1,1) and configured to output only the first line for -# every key (-s -u), we end up with the smallest file for each tuple. - -echo "[*] Sorting candidate list (be patient)..." - -sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \ - sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script" - -if [ ! -s "$TRACE_DIR/.candidate_script" ]; then - echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2 - test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - exit 1 -fi - -# The sed command converted the sorted list to a shell script that populates -# BEST_FILE[tuple]="fname". Let's load that! - -. "$TRACE_DIR/.candidate_script" - -########################## -# STEP 5: WRITING OUTPUT # -########################## - -# The final trick is to grab the top pick for each tuple, unless said tuple is -# already set due to the inclusion of an earlier candidate; and then put all -# tuples associated with the newly-added file to the "already have" list. The -# loop works from least popular tuples and toward the most common ones. - -echo "[*] Processing candidates and writing output files..." - -CUR=0 - -touch "$TRACE_DIR/.already_have" - -while read -r cnt tuple; do - - CUR=$((CUR+1)) - printf "\\r Processing tuple $CUR/$TUPLE_COUNT... " - - # If we already have this tuple, skip it. - - grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue - - FN=${BEST_FILE[tuple]} - - $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN" - - if [ "$((CUR % 5))" = "0" ]; then - sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp" - mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have" - else - cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have" - fi - -done <"$TRACE_DIR/.all_uniq" - -echo - -OUT_COUNT=`ls -- "$OUT_DIR" | wc -l` - -if [ "$OUT_COUNT" = "1" ]; then - echo "[!] WARNING: All test cases had the same traces, check syntax!" -fi - -echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'." -echo - -test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - -exit 0 +#!/usr/bin/env sh +THISPATH=`dirname ${0}` +export PATH=${THISPATH}:$PATH +awk -f ${0}.awk -- ${@+"$@"} diff --git a/afl-cmin.bash b/afl-cmin.bash new file mode 100755 index 00000000..1dd782d8 --- /dev/null +++ b/afl-cmin.bash @@ -0,0 +1,470 @@ +#!/usr/bin/env bash +# +# american fuzzy lop++ - corpus minimization tool +# --------------------------------------------- +# +# Originally written by Michal Zalewski +# +# Copyright 2014, 2015 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# This tool tries to find the smallest subset of files in the input directory +# that still trigger the full range of instrumentation data points seen in +# the starting corpus. This has two uses: +# +# - Screening large corpora of input files before using them as a seed for +# afl-fuzz. The tool will remove functionally redundant files and likely +# leave you with a much smaller set. +# +# (In this case, you probably also want to consider running afl-tmin on +# the individual files later on to reduce their size.) +# +# - Minimizing the corpus generated organically by afl-fuzz, perhaps when +# planning to feed it to more resource-intensive tools. The tool achieves +# this by removing all entries that used to trigger unique behaviors in the +# past, but have been made obsolete by later finds. +# +# Note that the tool doesn't modify the files themselves. For that, you want +# afl-tmin. +# +# This script must use bash because other shells may have hardcoded limits on +# array sizes. +# + +echo "corpus minimization tool for afl-fuzz by Michal Zalewski" +echo + +######### +# SETUP # +######### + +# Process command-line options... + +MEM_LIMIT=200 +TIMEOUT=none + +unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ + AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE + +while getopts "+i:o:f:m:t:eQUCh" opt; do + + case "$opt" in + + "h") + ;; + + "i") + IN_DIR="$OPTARG" + ;; + + "o") + OUT_DIR="$OPTARG" + ;; + "f") + STDIN_FILE="$OPTARG" + ;; + "m") + MEM_LIMIT="$OPTARG" + MEM_LIMIT_GIVEN=1 + ;; + "t") + TIMEOUT="$OPTARG" + ;; + "e") + EXTRA_PAR="$EXTRA_PAR -e" + ;; + "C") + export AFL_CMIN_CRASHES_ONLY=1 + ;; + "Q") + EXTRA_PAR="$EXTRA_PAR -Q" + test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 + QEMU_MODE=1 + ;; + "U") + EXTRA_PAR="$EXTRA_PAR -U" + test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 + UNICORN_MODE=1 + ;; + "?") + exit 1 + ;; + + esac + +done + +shift $((OPTIND-1)) + +TARGET_BIN="$1" + +if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then + + cat 1>&2 <<_EOF_ +Usage: $0 [ options ] -- /path/to/target_app [ ... ] + +Required parameters: + + -i dir - input directory with the starting corpus + -o dir - output directory for minimized files + +Execution control settings: + + -f file - location read by the fuzzed program (stdin) + -m megs - memory limit for child process ($MEM_LIMIT MB) + -t msec - run time limit for child process (none) + -Q - use binary-only instrumentation (QEMU mode) + -U - use unicorn-based instrumentation (Unicorn mode) + +Minimization settings: + + -C - keep crashing inputs, reject everything else + -e - solve for edge coverage only, ignore hit counts + +For additional tips, please consult docs/README. + +_EOF_ + exit 1 +fi + +# Do a sanity check to discourage the use of /tmp, since we can't really +# handle this safely from a shell script. + +if [ "$AFL_ALLOW_TMP" = "" ]; then + + echo "$IN_DIR" | grep -qE '^(/var)?/tmp/' + T1="$?" + + echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/' + T2="$?" + + echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/' + T3="$?" + + echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/' + T4="$?" + + echo "$PWD" | grep -qE '^(/var)?/tmp/' + T5="$?" + + if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then + echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2 + exit 1 + fi + +fi + +# If @@ is specified, but there's no -f, let's come up with a temporary input +# file name. + +TRACE_DIR="$OUT_DIR/.traces" + +if [ "$STDIN_FILE" = "" ]; then + + if echo "$*" | grep -qF '@@'; then + STDIN_FILE="$TRACE_DIR/.cur_input" + fi + +fi + +# Check for obvious errors. + +if [ ! "$MEM_LIMIT" = "none" ]; then + + if [ "$MEM_LIMIT" -lt "5" ]; then + echo "[-] Error: dangerously low memory limit." 1>&2 + exit 1 + fi + +fi + +if [ ! "$TIMEOUT" = "none" ]; then + + if [ "$TIMEOUT" -lt "10" ]; then + echo "[-] Error: dangerously low timeout." 1>&2 + exit 1 + fi + +fi + +if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then + + TNEW="`which "$TARGET_BIN" 2>/dev/null`" + + if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then + echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2 + exit 1 + fi + + TARGET_BIN="$TNEW" + +fi + +if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then + + if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then + echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 + exit 1 + fi + +fi + +if [ ! -d "$IN_DIR" ]; then + echo "[-] Error: directory '$IN_DIR' not found." 1>&2 + exit 1 +fi + +test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue" + +find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null +rm -rf "$TRACE_DIR" 2>/dev/null + +rmdir "$OUT_DIR" 2>/dev/null + +if [ -d "$OUT_DIR" ]; then + echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2 + exit 1 +fi + +mkdir -m 700 -p "$TRACE_DIR" || exit 1 + +if [ ! "$STDIN_FILE" = "" ]; then + rm -f "$STDIN_FILE" || exit 1 + touch "$STDIN_FILE" || exit 1 +fi + +if [ "$AFL_PATH" = "" ]; then + SHOWMAP="${0%/afl-cmin}/afl-showmap" +else + SHOWMAP="$AFL_PATH/afl-showmap" +fi + +if [ ! -x "$SHOWMAP" ]; then + echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2 + rm -rf "$TRACE_DIR" + exit 1 +fi + +IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`)) + +if [ "$IN_COUNT" = "0" ]; then + echo "[+] Hmm, no inputs in the target directory. Nothing to be done." + rm -rf "$TRACE_DIR" + exit 1 +fi + +FIRST_FILE=`ls "$IN_DIR" | head -1` + +# Make sure that we're not dealing with a directory. + +if [ -d "$IN_DIR/$FIRST_FILE" ]; then + echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2 + rm -rf "$TRACE_DIR" + exit 1 +fi + +# Check for the more efficient way to copy files... + +if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then + CP_TOOL=ln +else + CP_TOOL=cp +fi + +# Make sure that we can actually get anything out of afl-showmap before we +# waste too much time. + +echo "[*] Testing the target binary..." + +if [ "$STDIN_FILE" = "" ]; then + + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE" + +else + + cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE" + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" &2 + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + exit 1 + +fi + +# Let's roll! + +############################# +# STEP 1: COLLECTING TRACES # +############################# + +echo "[*] Obtaining traces for input files in '$IN_DIR'..." + +( + + CUR=0 + + if [ "$STDIN_FILE" = "" ]; then + + ls "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn" + + done + + else + + ls "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + cp "$IN_DIR/$fn" "$STDIN_FILE" + + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" "$TRACE_DIR/.all_uniq" + +TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`)) + +echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files." + +##################################### +# STEP 3: SELECTING CANDIDATE FILES # +##################################### + +# The next step is to find the best candidate for each tuple. The "best" +# part is understood simply as the smallest input that includes a particular +# tuple in its trace. Empirical evidence suggests that this produces smaller +# datasets than more involved algorithms that could be still pulled off in +# a shell script. + +echo "[*] Finding best candidates for each tuple..." + +CUR=0 + +ls -rS "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list" + +done + +echo + +############################## +# STEP 4: LOADING CANDIDATES # +############################## + +# At this point, we have a file of tuple-file pairs, sorted by file size +# in ascending order (as a consequence of ls -rS). By doing sort keyed +# only by tuple (-k 1,1) and configured to output only the first line for +# every key (-s -u), we end up with the smallest file for each tuple. + +echo "[*] Sorting candidate list (be patient)..." + +sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \ + sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script" + +if [ ! -s "$TRACE_DIR/.candidate_script" ]; then + echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2 + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + exit 1 +fi + +# The sed command converted the sorted list to a shell script that populates +# BEST_FILE[tuple]="fname". Let's load that! + +. "$TRACE_DIR/.candidate_script" + +########################## +# STEP 5: WRITING OUTPUT # +########################## + +# The final trick is to grab the top pick for each tuple, unless said tuple is +# already set due to the inclusion of an earlier candidate; and then put all +# tuples associated with the newly-added file to the "already have" list. The +# loop works from least popular tuples and toward the most common ones. + +echo "[*] Processing candidates and writing output files..." + +CUR=0 + +touch "$TRACE_DIR/.already_have" + +while read -r cnt tuple; do + + CUR=$((CUR+1)) + printf "\\r Processing tuple $CUR/$TUPLE_COUNT... " + + # If we already have this tuple, skip it. + + grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue + + FN=${BEST_FILE[tuple]} + + $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN" + + if [ "$((CUR % 5))" = "0" ]; then + sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp" + mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have" + else + cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have" + fi + +done <"$TRACE_DIR/.all_uniq" + +echo + +OUT_COUNT=`ls -- "$OUT_DIR" | wc -l` + +if [ "$OUT_COUNT" = "1" ]; then + echo "[!] WARNING: All test cases had the same traces, check syntax!" +fi + +echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'." +echo + +test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + +exit 0 From 9da167dffdc14468d17ac3c1c942e483baf17433 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Wed, 22 Jan 2020 21:08:47 +0100 Subject: [PATCH 20/43] fix for modern linux --- afl-cmin.awk | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/afl-cmin.awk b/afl-cmin.awk index 28c460e8..fcdfb71f 100755 --- a/afl-cmin.awk +++ b/afl-cmin.awk @@ -109,7 +109,7 @@ function exists_and_is_executable(binarypath) { } BEGIN { - print "corpus minimization tool for afl-fuzz++ (awk version)\n" + print "corpus minimization tool for afl++ (awk version)\n" # defaults extra_par = "" @@ -289,7 +289,7 @@ BEGIN { } else { "which afl-showmap 2>/dev/null" | getline path } - showmap = path + showmap = path "/afl-showmap" } else { showmap = ENVIRON["AFL_PATH"] "/afl-showmap" } @@ -303,11 +303,12 @@ BEGIN { i = 0 # yuck, gnu stat is incompatible to bsd stat if ("stat --version 2>/dev/null" !~ /GNU coreutils/) { - stat_format = "-f '%z %N'" - } else { + # I dont get it why this does not work, output is "stat (GNU coreutils) 8.30" and still it goes here ... stat_format = "-c '%s %n'" + } else { + stat_format = "-f '%z %N'" } - while ("cd "in_dir" && find . -type f -exec stat "stat_format" \{\} \\; | sort -n | cut -d' ' -f2-" | getline) { + while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { infilesSmallToBig[i++] = $0 } in_count = i From c51f89b58e56338a5a430344548d1385432d173e Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 21:50:35 +0100 Subject: [PATCH 21/43] rectification of vanhauser's fix, made it a bit more robust, enabled error output for travis debugging --- afl-cmin.awk | 40 ++++++++++++++++++++++++++++++++-------- test/test.sh | 2 +- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/afl-cmin.awk b/afl-cmin.awk index fcdfb71f..967c4e87 100755 --- a/afl-cmin.awk +++ b/afl-cmin.awk @@ -1,5 +1,28 @@ #!/usr/bin/awk -f +# awk script to minimize a test corpus of input files +# +# based on afl-cmin bash script written by Michal Zalewski +# rewritten by Heiko Eißfeldt (hexcoder-) +# +# uses getopt.awk package from Arnold Robbins +# +# external tools used by this script: +# test +# grep +# rm +# mkdir +# ln +# cp +# pwd +# which +# cd +# find +# stat +# sort +# cut +# and afl-showmap from this project :-) + # getopt.awk --- Do C library getopt(3) function in awk # External variables: @@ -285,11 +308,10 @@ BEGIN { if (!ENVIRON["AFL_PATH"]) { if (0 == system("test -f afl-cmin.awk")) { - path = "." + showmap = "./afl-showmap" } else { - "which afl-showmap 2>/dev/null" | getline path + "which afl-showmap 2>/dev/null" | getline showmap } - showmap = path "/afl-showmap" } else { showmap = ENVIRON["AFL_PATH"] "/afl-showmap" } @@ -301,12 +323,14 @@ BEGIN { # get list of input filenames sorted by size i = 0 - # yuck, gnu stat is incompatible to bsd stat - if ("stat --version 2>/dev/null" !~ /GNU coreutils/) { - # I dont get it why this does not work, output is "stat (GNU coreutils) 8.30" and still it goes here ... - stat_format = "-c '%s %n'" + # yuck, gnu stat is option incompatible to bsd stat + # we use a heuristic to differentiate between + # GNU stat and other stats + "stat --version 2>/dev/null" | getline statversion + if (statversion ~ /GNU coreutils/) { + stat_format = "-c '%s %n'" # GNU } else { - stat_format = "-f '%z %N'" + stat_format = "-f '%z %N'" # *BSD, MacOS } while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { infilesSmallToBig[i++] = $0 diff --git a/test/test.sh b/test/test.sh index 0ae6fd09..cc7fe224 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1 + ../afl-cmin -i in -o in2 -- ./test-instr.plain CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; From 7e7ab8f5415409fd1bb643f4dfef44c5a3935006 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Wed, 22 Jan 2020 22:24:00 +0100 Subject: [PATCH 22/43] Update binaryonly_fuzzing.txt --- docs/binaryonly_fuzzing.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt index 239fb4b0..f8d68cd8 100644 --- a/docs/binaryonly_fuzzing.txt +++ b/docs/binaryonly_fuzzing.txt @@ -5,10 +5,10 @@ Fuzzing binary-only programs with afl++ afl++, libfuzzer and others are great if you have the source code, and it allows for very fast and coverage guided fuzzing. -However, if there is only the binary program and not source code available, -then standard afl++ (dumb mode) is not effective. +However, if there is only the binary program and no source code available, +then standard `afl-fuzz -n` (dumb mode) is not effective. -The following is a description of how these can be fuzzed with afl++ +The following is a description of how these binaries can be fuzzed with afl++ !!!!! TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then @@ -28,7 +28,7 @@ As it is included in afl++ this needs no URL. WINE+QEMU --------- -Wine mode can run Win32 PE with the QEMU instrumentation. +Wine mode can run Win32 PE binaries with the QEMU instrumentation. It needs Wine, python3 and the pefile python package installed. UNICORN @@ -37,7 +37,7 @@ Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. In contrast to QEMU, Unicorn does not offer a full system or even userland emulation. Runtime environment and/or loaders have to be written from scratch, if needed. On top, block chaining has been removed. This means the speed boost introduced in -to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn. +the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn. For further information, check out ./unicorn_mode.txt. From c490b9aa3694ba9c33ba0657ddd5e19dd979f2ed Mon Sep 17 00:00:00 2001 From: hexcoder Date: Thu, 23 Jan 2020 09:11:35 +0100 Subject: [PATCH 23/43] afl-cmin debugging is done now, so suppress stdout messages again (but not stderr) --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index cc7fe224..3473155f 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; From 8b17cac71c5196bae11a5a7ee8f6a17bdb3917e0 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Thu, 23 Jan 2020 09:46:07 +0100 Subject: [PATCH 24/43] add socket_fuzz description --- experimental/README.experiments | 3 +++ 1 file changed, 3 insertions(+) diff --git a/experimental/README.experiments b/experimental/README.experiments index af9739bd..5a505ad7 100644 --- a/experimental/README.experiments +++ b/experimental/README.experiments @@ -28,6 +28,9 @@ Here's a quick overview of the stuff you can find in this directory: mode to speed up certain fuzzing jobs. - post_library - an example of how to build postprocessors for AFL. + + - socketfuzz - a LD_PRELOAD library 'redirects' a socket to stdin + for fuzzing access with afl++ Note that the minimize_corpus.sh tool has graduated from the experimental/ directory and is now available as ../afl-cmin. The LLVM mode has likewise From a58800b90122f3d612a0badb243d2c1b6fc9c742 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Thu, 23 Jan 2020 09:46:59 +0100 Subject: [PATCH 25/43] typo --- experimental/README.experiments | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/README.experiments b/experimental/README.experiments index 5a505ad7..543c078c 100644 --- a/experimental/README.experiments +++ b/experimental/README.experiments @@ -29,7 +29,7 @@ Here's a quick overview of the stuff you can find in this directory: - post_library - an example of how to build postprocessors for AFL. - - socketfuzz - a LD_PRELOAD library 'redirects' a socket to stdin + - socket_fuzzing - a LD_PRELOAD library 'redirects' a socket to stdin for fuzzing access with afl++ Note that the minimize_corpus.sh tool has graduated from the experimental/ From e7c95ebf5a4828b662252b10052a89923dd25030 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Thu, 23 Jan 2020 10:15:33 +0100 Subject: [PATCH 26/43] afl-cmin final touches --- Makefile | 2 +- afl-cmin | 473 ++++++++++++++++++++++++++++++++++++++++++++++++- afl-cmin.awk | 470 ------------------------------------------------ docs/ChangeLog | 2 + test/test.sh | 2 +- 5 files changed, 476 insertions(+), 473 deletions(-) delete mode 100755 afl-cmin.awk diff --git a/Makefile b/Makefile index 7260ee47..459cae5f 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ VERSION = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2) # PROGS intentionally omit afl-as, which gets installed elsewhere. PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze -SH_PROGS = afl-plot afl-cmin afl-whatsup afl-system-config +SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8) ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" diff --git a/afl-cmin b/afl-cmin index 75dc63a7..a072a62a 100755 --- a/afl-cmin +++ b/afl-cmin @@ -1,4 +1,475 @@ #!/usr/bin/env sh THISPATH=`dirname ${0}` export PATH=${THISPATH}:$PATH -awk -f ${0}.awk -- ${@+"$@"} +awk -f - -- ${@+"$@"} <<'EOF' +#!/usr/bin/awk -f + +# awk script to minimize a test corpus of input files +# +# based on afl-cmin bash script written by Michal Zalewski +# rewritten by Heiko Eißfeldt (hexcoder-) +# +# uses getopt.awk package from Arnold Robbins +# +# external tools used by this script: +# test +# grep +# rm +# mkdir +# ln +# cp +# pwd +# which +# cd +# find +# stat +# sort +# cut +# and afl-showmap from this project :-) + +# getopt.awk --- Do C library getopt(3) function in awk + +# External variables: +# Optind -- index in ARGV of first nonoption argument +# Optarg -- string value of argument to current option +# Opterr -- if nonzero, print our own diagnostic +# Optopt -- current option letter + +# Returns: +# -1 at end of options +# "?" for unrecognized option +# a character representing the current option + +# Private Data: +# _opti -- index in multiflag option, e.g., -abc + +function getopt(argc, argv, options, thisopt, i) +{ + if (length(options) == 0) # no options given + return -1 + + if (argv[Optind] == "--") { # all done + Optind++ + _opti = 0 + return -1 + } else if (argv[Optind] !~ /^-[^:[:space:]]/) { + _opti = 0 + return -1 + } + if (_opti == 0) + _opti = 2 + thisopt = substr(argv[Optind], _opti, 1) + Optopt = thisopt + i = index(options, thisopt) + if (i == 0) { + if (Opterr) + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" + if (_opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return "?" + } + if (substr(options, i + 1, 1) == ":") { + # get option argument + if (length(substr(argv[Optind], _opti + 1)) > 0) + Optarg = substr(argv[Optind], _opti + 1) + else + Optarg = argv[++Optind] + _opti = 0 + } else + Optarg = "" + if (_opti == 0 || _opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return thisopt +} + +BEGIN { + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + + # test program + if (_getopt_test) { + while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) + printf("c = <%c>, Optarg = <%s>\n", + _go_c, Optarg) + printf("non-option arguments:\n") + for (; Optind < ARGC; Optind++) + printf("\tARGV[%d] = <%s>\n", + Optind, ARGV[Optind]) + } +} + +function usage() { + print \ +"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ +"\n" \ +"Required parameters:\n" \ +"\n" \ +" -i dir - input directory with starting corpus\n" \ +" -o dir - output directory for minimized files\n" \ +"\n" \ +"Execution control settings:\n" \ +"\n" \ +" -f file - location read by the fuzzed program (stdin)\n" \ +" -m megs - memory limit for child process ("mem_limit" MB)\n" \ +" -t msec - run time limit for child process (none)\n" \ +" -Q - use binary-only instrumentation (QEMU mode)\n" \ +" -U - use unicorn-based instrumentation (unicorn mode)\n" \ +"\n" \ +"Minimization settings:\n" \ +" -C - keep crashing inputs, reject everything else\n" \ +" -e - solve for edge coverage only, ignore hit counts\n" \ +"\n" \ +"For additional tips, please consult docs/README.md\n" \ +"\n" \ + > "/dev/stderr" + exit 1 +} + +function exists_and_is_executable(binarypath) { + return 0 == system("test -f "binarypath" -a -x "binarypath) +} + +BEGIN { + print "corpus minimization tool for afl++ (awk version)\n" + + # defaults + extra_par = "" + # process options + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) { + if (_go_c == "i") { + if (!Optarg) usage() + if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + in_dir = Optarg + continue + } else + if (_go_c == "o") { + if (!Optarg) usage() + if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + out_dir = Optarg + continue + } else + if (_go_c == "f") { + if (!Optarg) usage() + if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + stdin_file = Optarg + continue + } else + if (_go_c == "m") { + if (!Optarg) usage() + if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + mem_limit = Optarg + mem_limit_given = 1 + continue + } else + if (_go_c == "t") { + if (!Optarg) usage() + if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + timeout = Optarg + continue + } else + if (_go_c == "C") { + ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1 + continue + } else + if (_go_c == "e") { + extra_par = extra_par " -e" + continue + } else + if (_go_c == "Q") { + if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -Q" + if ( !mem_limit_given ) mem_limit = "250" + qemu_mode = 1 + continue + } else + if (_go_c == "U") { + if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -U" + if ( !mem_limit_given ) mem_limit = "250" + unicorn_mode = 1 + continue + } else + if (_go_c == "?") { + exit 1 + } else + usage() + } # while options + + if (!mem_limit) mem_limit = 200 + if (!timeout) timeout = "none" + + # get program args + i = 0 + prog_args_string = "" + for (; Optind < ARGC; Optind++) { + prog_args[i++] = ARGV[Optind] + if (i > 1) + prog_args_string = prog_args_string" "ARGV[Optind] + } + + # sanity checks + if (!prog_args[0] || !in_dir || !out_dir) usage() + + target_bin = prog_args[0] + + # Do a sanity check to discourage the use of /tmp, since we can't really + # handle this safely from an awk script. + + if (!ENVIRON["AFL_ALLOW_TMP"]) { + dirlist[0] = in_dir + dirlist[1] = target_bin + dirlist[2] = out_dir + dirlist[3] = stdin_file + "pwd" | getline dirlist[4] # current directory + for (dirind in dirlist) { + dir = dirlist[dirind] + + if (dir ~ /^(\/var)?\/tmp/) { + print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" + exit 1 + } + } + delete dirlist + } + + # If @@ is specified, but there's no -f, let's come up with a temporary input + # file name. + + trace_dir = out_dir "/.traces" + + if (!stdin_file) { + found_atat = 0 + for (prog_args_ind in prog_args) { + if ("@@" == prog_args[prog_args_ind]) { + found_atat = 1 + break + } + } + if (found_atat) { + stdin_file = trace_dir "/.cur_input" + } + } + + # Check for obvious errors. + + if (mem_limit && mem_limit != "none" && mem_limit < 5) { + print "[-] Error: dangerously low memory limit." > "/dev/stderr" + exit 1 + } + + if (timeout && timeout != "none" && timeout < 10) { + print "[-] Error: dangerously low timeout." > "/dev/stderr" + exit 1 + } + + if (target_bin && !exists_and_is_executable(target_bin)) { + + "which "target_bin" 2>/dev/null" | getline tnew + if (!tnew || !exists_and_is_executable(tnew)) { + print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" + exit 1 + } + target_bin = tnew + } + + if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) { + if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { + print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" + exit 1 + } + } + + if (0 != system( "test -d "in_dir )) { + print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" + exit 1 + } + + if (0 == system( "test -d "in_dir"/queue" )) { + in_dir = in_dir "/queue" + } + + system("rm -rf "trace_dir" 2>/dev/null"); + system("rm "out_dir"/id[:_]* 2>/dev/null") + + if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) { + print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" + exit 1 + } + + if (stdin_file) { + # truncate input file + printf "" > stdin_file + close( stdin_file ) + } + + if (!ENVIRON["AFL_PATH"]) { + if (0 == system("test -f afl-cmin.awk")) { + showmap = "./afl-showmap" + } else { + "which afl-showmap 2>/dev/null" | getline showmap + } + } else { + showmap = ENVIRON["AFL_PATH"] "/afl-showmap" + } + + if (!showmap || 0 != system("test -x "showmap )) { + print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" + exit 1 + } + + # get list of input filenames sorted by size + i = 0 + # yuck, gnu stat is option incompatible to bsd stat + # we use a heuristic to differentiate between + # GNU stat and other stats + "stat --version 2>/dev/null" | getline statversion + if (statversion ~ /GNU coreutils/) { + stat_format = "-c '%s %n'" # GNU + } else { + stat_format = "-f '%z %N'" # *BSD, MacOS + } + while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { + infilesSmallToBig[i++] = $0 + } + in_count = i + + first_file = infilesSmallToBig[0] + + # Make sure that we're not dealing with a directory. + + if (0 == system("test -d "in_dir"/"first_file)) { + print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr" + exit 1 + } + + # Check for the more efficient way to copy files... + if (0 != system("mkdir -p -m 0700 "trace_dir)) { + print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" + exit 1 + } + + if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { + cp_tool = "ln" + } else { + cp_tool = "cp" + } + + # Make sure that we can actually get anything out of afl-showmap before we + # waste too much time. + + print "[*] Testing the target binary..." + + if (!stdin_file) { + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") + } else { + system("cp "in_dir"/"first_file" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + ++first_count + } + + if (first_count) { + print "[+] OK, "first_count" tuples recorded." + } else { + print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + exit 1 + } + + # Let's roll! + + ############################# + # STEP 1: Collecting traces # + ############################# + + print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." + + cur = 0; + if (!stdin_file) { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur; + printf "\r Processing file "cur"/"in_count + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"") + } + } else { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur + printf "\r Processing file "cur"/"in_count + system("cp "in_dir"/"fn" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + key = line + if (!(key in key_count)) { + ++tuple_count + } + ++key_count[key] + if (! (key in best_file)) { + # this is the best file for this key + best_file[key] = fn + # copy file unless already done + if (! (fn in file_already_copied)) { + system(cp_tool" "in_dir"/"fn" "out_dir"/"fn) + file_already_copied[fn] = "" + ++out_count + } + } + } + close(tracefile_path) + } + + print "" + print "[+] Found "tuple_count" unique tuples across "in_count" files." + + if (out_count == 1) { + print "[!] WARNING: All test cases had the same traces, check syntax!" + } + print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." + + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + + exit 0 +} +EOF diff --git a/afl-cmin.awk b/afl-cmin.awk deleted file mode 100755 index 967c4e87..00000000 --- a/afl-cmin.awk +++ /dev/null @@ -1,470 +0,0 @@ -#!/usr/bin/awk -f - -# awk script to minimize a test corpus of input files -# -# based on afl-cmin bash script written by Michal Zalewski -# rewritten by Heiko Eißfeldt (hexcoder-) -# -# uses getopt.awk package from Arnold Robbins -# -# external tools used by this script: -# test -# grep -# rm -# mkdir -# ln -# cp -# pwd -# which -# cd -# find -# stat -# sort -# cut -# and afl-showmap from this project :-) - -# getopt.awk --- Do C library getopt(3) function in awk - -# External variables: -# Optind -- index in ARGV of first nonoption argument -# Optarg -- string value of argument to current option -# Opterr -- if nonzero, print our own diagnostic -# Optopt -- current option letter - -# Returns: -# -1 at end of options -# "?" for unrecognized option -# a character representing the current option - -# Private Data: -# _opti -- index in multiflag option, e.g., -abc - -function getopt(argc, argv, options, thisopt, i) -{ - if (length(options) == 0) # no options given - return -1 - - if (argv[Optind] == "--") { # all done - Optind++ - _opti = 0 - return -1 - } else if (argv[Optind] !~ /^-[^:[:space:]]/) { - _opti = 0 - return -1 - } - if (_opti == 0) - _opti = 2 - thisopt = substr(argv[Optind], _opti, 1) - Optopt = thisopt - i = index(options, thisopt) - if (i == 0) { - if (Opterr) - printf("%c -- invalid option\n", thisopt) > "/dev/stderr" - if (_opti >= length(argv[Optind])) { - Optind++ - _opti = 0 - } else - _opti++ - return "?" - } - if (substr(options, i + 1, 1) == ":") { - # get option argument - if (length(substr(argv[Optind], _opti + 1)) > 0) - Optarg = substr(argv[Optind], _opti + 1) - else - Optarg = argv[++Optind] - _opti = 0 - } else - Optarg = "" - if (_opti == 0 || _opti >= length(argv[Optind])) { - Optind++ - _opti = 0 - } else - _opti++ - return thisopt -} - -BEGIN { - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - - # test program - if (_getopt_test) { - while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, Optarg = <%s>\n", - _go_c, Optarg) - printf("non-option arguments:\n") - for (; Optind < ARGC; Optind++) - printf("\tARGV[%d] = <%s>\n", - Optind, ARGV[Optind]) - } -} - -function usage() { - print \ -"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ -"\n" \ -"Required parameters:\n" \ -"\n" \ -" -i dir - input directory with starting corpus\n" \ -" -o dir - output directory for minimized files\n" \ -"\n" \ -"Execution control settings:\n" \ -"\n" \ -" -f file - location read by the fuzzed program (stdin)\n" \ -" -m megs - memory limit for child process ("mem_limit" MB)\n" \ -" -t msec - run time limit for child process (none)\n" \ -" -Q - use binary-only instrumentation (QEMU mode)\n" \ -" -U - use unicorn-based instrumentation (unicorn mode)\n" \ -"\n" \ -"Minimization settings:\n" \ -" -C - keep crashing inputs, reject everything else\n" \ -" -e - solve for edge coverage only, ignore hit counts\n" \ -"\n" \ -"For additional tips, please consult docs/README.md\n" \ -"\n" \ - > "/dev/stderr" - exit 1 -} - -function exists_and_is_executable(binarypath) { - return 0 == system("test -f "binarypath" -a -x "binarypath) -} - -BEGIN { - print "corpus minimization tool for afl++ (awk version)\n" - - # defaults - extra_par = "" - # process options - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) { - if (_go_c == "i") { - if (!Optarg) usage() - if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - in_dir = Optarg - continue - } else - if (_go_c == "o") { - if (!Optarg) usage() - if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - out_dir = Optarg - continue - } else - if (_go_c == "f") { - if (!Optarg) usage() - if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - stdin_file = Optarg - continue - } else - if (_go_c == "m") { - if (!Optarg) usage() - if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - mem_limit = Optarg - mem_limit_given = 1 - continue - } else - if (_go_c == "t") { - if (!Optarg) usage() - if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - timeout = Optarg - continue - } else - if (_go_c == "C") { - ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1 - continue - } else - if (_go_c == "e") { - extra_par = extra_par " -e" - continue - } else - if (_go_c == "Q") { - if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - extra_par = extra_par " -Q" - if ( !mem_limit_given ) mem_limit = "250" - qemu_mode = 1 - continue - } else - if (_go_c == "U") { - if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - extra_par = extra_par " -U" - if ( !mem_limit_given ) mem_limit = "250" - unicorn_mode = 1 - continue - } else - if (_go_c == "?") { - exit 1 - } else - usage() - } # while options - - if (!mem_limit) mem_limit = 200 - if (!timeout) timeout = "none" - - # get program args - i = 0 - prog_args_string = "" - for (; Optind < ARGC; Optind++) { - prog_args[i++] = ARGV[Optind] - if (i > 1) - prog_args_string = prog_args_string" "ARGV[Optind] - } - - # sanity checks - if (!prog_args[0] || !in_dir || !out_dir) usage() - - target_bin = prog_args[0] - - # Do a sanity check to discourage the use of /tmp, since we can't really - # handle this safely from an awk script. - - if (!ENVIRON["AFL_ALLOW_TMP"]) { - dirlist[0] = in_dir - dirlist[1] = target_bin - dirlist[2] = out_dir - dirlist[3] = stdin_file - "pwd" | getline dirlist[4] # current directory - for (dirind in dirlist) { - dir = dirlist[dirind] - - if (dir ~ /^(\/var)?\/tmp/) { - print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" - exit 1 - } - } - delete dirlist - } - - # If @@ is specified, but there's no -f, let's come up with a temporary input - # file name. - - trace_dir = out_dir "/.traces" - - if (!stdin_file) { - found_atat = 0 - for (prog_args_ind in prog_args) { - if ("@@" == prog_args[prog_args_ind]) { - found_atat = 1 - break - } - } - if (found_atat) { - stdin_file = trace_dir "/.cur_input" - } - } - - # Check for obvious errors. - - if (mem_limit && mem_limit != "none" && mem_limit < 5) { - print "[-] Error: dangerously low memory limit." > "/dev/stderr" - exit 1 - } - - if (timeout && timeout != "none" && timeout < 10) { - print "[-] Error: dangerously low timeout." > "/dev/stderr" - exit 1 - } - - if (target_bin && !exists_and_is_executable(target_bin)) { - - "which "target_bin" 2>/dev/null" | getline tnew - if (!tnew || !exists_and_is_executable(tnew)) { - print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" - exit 1 - } - target_bin = tnew - } - - if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) { - if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { - print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" - exit 1 - } - } - - if (0 != system( "test -d "in_dir )) { - print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" - exit 1 - } - - if (0 == system( "test -d "in_dir"/queue" )) { - in_dir = in_dir "/queue" - } - - system("rm -rf "trace_dir" 2>/dev/null"); - system("rm "out_dir"/id[:_]* 2>/dev/null") - - if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) { - print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" - exit 1 - } - - if (stdin_file) { - # truncate input file - printf "" > stdin_file - close( stdin_file ) - } - - if (!ENVIRON["AFL_PATH"]) { - if (0 == system("test -f afl-cmin.awk")) { - showmap = "./afl-showmap" - } else { - "which afl-showmap 2>/dev/null" | getline showmap - } - } else { - showmap = ENVIRON["AFL_PATH"] "/afl-showmap" - } - - if (!showmap || 0 != system("test -x "showmap )) { - print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" - exit 1 - } - - # get list of input filenames sorted by size - i = 0 - # yuck, gnu stat is option incompatible to bsd stat - # we use a heuristic to differentiate between - # GNU stat and other stats - "stat --version 2>/dev/null" | getline statversion - if (statversion ~ /GNU coreutils/) { - stat_format = "-c '%s %n'" # GNU - } else { - stat_format = "-f '%z %N'" # *BSD, MacOS - } - while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { - infilesSmallToBig[i++] = $0 - } - in_count = i - - first_file = infilesSmallToBig[0] - - # Make sure that we're not dealing with a directory. - - if (0 == system("test -d "in_dir"/"first_file)) { - print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr" - exit 1 - } - - # Check for the more efficient way to copy files... - if (0 != system("mkdir -p -m 0700 "trace_dir)) { - print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" - exit 1 - } - - if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { - cp_tool = "ln" - } else { - cp_tool = "cp" - } - - # Make sure that we can actually get anything out of afl-showmap before we - # waste too much time. - - print "[*] Testing the target binary..." - - if (!stdin_file) { - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") - } else { - system("cp "in_dir"/"first_file" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { - ++first_count - } - - if (first_count) { - print "[+] OK, "first_count" tuples recorded." - } else { - print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" - if (!ENVIRON["AFL_KEEP_TRACES"]) { - system("rm -rf "trace_dir" 2>/dev/null") - } - exit 1 - } - - # Let's roll! - - ############################# - # STEP 1: Collecting traces # - ############################# - - print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." - - cur = 0; - if (!stdin_file) { - while (cur < in_count) { - fn = infilesSmallToBig[cur] - ++cur; - printf "\r Processing file "cur"/"in_count - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"") - } - } else { - while (cur < in_count) { - fn = infilesSmallToBig[cur] - ++cur - printf "\r Processing file "cur"/"in_count - system("cp "in_dir"/"fn" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { - key = line - if (!(key in key_count)) { - ++tuple_count - } - ++key_count[key] - if (! (key in best_file)) { - # this is the best file for this key - best_file[key] = fn - # copy file unless already done - if (! (fn in file_already_copied)) { - system(cp_tool" "in_dir"/"fn" "out_dir"/"fn) - file_already_copied[fn] = "" - ++out_count - } - } - } - close(tracefile_path) - } - - print "" - print "[+] Found "tuple_count" unique tuples across "in_count" files." - - if (out_count == 1) { - print "[!] WARNING: All test cases had the same traces, check syntax!" - } - print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." - - if (!ENVIRON["AFL_KEEP_TRACES"]) { - system("rm -rf "trace_dir" 2>/dev/null") - } - - exit 0 -} diff --git a/docs/ChangeLog b/docs/ChangeLog index bb3537dd..33c6f618 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -25,6 +25,8 @@ Version ++2.60d (develop): - show in the help output for which llvm version it was compiled for - now does not need to be recompiled between trace-pc and pass instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) + - afl-cmin is now a sh script (invoking awk) instead of bash for portability + the original script is still present as afl-cmin.bash - added blacklisted function check in all modules of llvm_mode - added fix from Debian project to compile libdislocator and libtokencap diff --git a/test/test.sh b/test/test.sh index 3473155f..0ae6fd09 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1 CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; From 436873a19abe5858e56555db02095f4eb7e6febd Mon Sep 17 00:00:00 2001 From: van Hauser Date: Thu, 23 Jan 2020 11:55:53 +0100 Subject: [PATCH 27/43] show stderr on afl-cmin test.sh --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 0ae6fd09..3473155f 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1 + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; From 6abe33030396c8f15f00b4fe3d083f3841de3212 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Fri, 24 Jan 2020 20:58:15 +0100 Subject: [PATCH 28/43] afl-cmin more awk portability (mawk), add afl-cmin/afl-tmin tests for non-x86 platforms --- afl-cmin | 27 ++++++++------------------- test/test.sh | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/afl-cmin b/afl-cmin index a072a62a..de5a66ed 100755 --- a/afl-cmin +++ b/afl-cmin @@ -8,6 +8,10 @@ awk -f - -- ${@+"$@"} <<'EOF' # # based on afl-cmin bash script written by Michal Zalewski # rewritten by Heiko Eißfeldt (hexcoder-) +# tested with: +# gnu awk (x86 Linux) +# bsd awk (x86 *BSD) +# mawk (arm32 raspbian) # # uses getopt.awk package from Arnold Robbins # @@ -52,7 +56,7 @@ function getopt(argc, argv, options, thisopt, i) Optind++ _opti = 0 return -1 - } else if (argv[Optind] !~ /^-[^:[:space:]]/) { + } else if (argv[Optind] !~ /^-[^:\t ]/) { _opti = 0 return -1 } @@ -88,22 +92,6 @@ function getopt(argc, argv, options, thisopt, i) return thisopt } -BEGIN { - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - - # test program - if (_getopt_test) { - while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, Optarg = <%s>\n", - _go_c, Optarg) - printf("non-option arguments:\n") - for (; Optind < ARGC; Optind++) - printf("\tARGV[%d] = <%s>\n", - Optind, ARGV[Optind]) - } -} - function usage() { print \ "Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ @@ -311,7 +299,7 @@ BEGIN { } if (!ENVIRON["AFL_PATH"]) { - if (0 == system("test -f afl-cmin.awk")) { + if (0 == system("test -f afl-cmin")) { showmap = "./afl-showmap" } else { "which afl-showmap 2>/dev/null" | getline showmap @@ -336,7 +324,8 @@ BEGIN { } else { stat_format = "-f '%z %N'" # *BSD, MacOS } - while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { + cmdline = "cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" + while (cmdline | getline) { infilesSmallToBig[i++] = $0 } in_count = i diff --git a/test/test.sh b/test/test.sh index 3473155f..93a4e008 100755 --- a/test/test.sh +++ b/test/test.sh @@ -153,8 +153,8 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in -1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; -*) $ECHO "$RED[!] afl-cmin did not correctly minimize testcase numbers" +1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; +*) $ECHO "$RED[!] afl-cmin did not correctly minimizethe number of testcases" CODE=1 ;; esac @@ -176,7 +176,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { $ECHO "$YELLOW[-] not an intel platform, cannot test afl-gcc" } -$ECHO "$BLUE[*] Testing: llvm_mode" +$ECHO "$BLUE[*] Testing: llvm_mode, afl-showmap, afl-fuzz, afl-cmin and afl-tmin" test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { # on FreeBSD need to set AFL_CC test `uname -s` = 'FreeBSD' && { @@ -253,6 +253,26 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { $ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode" CODE=1 } + test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" || { + echo 000000000000000000000000 > in/in2 + mkdir -p in2 + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null + CNT=`ls in2/ | wc -l` + case "$CNT" in +1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; +*) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; + esac + ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 + SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` + test "$SIZE" = 1 && $ECHO "$GREEN[+] afl-tmin correctly minimized the testcase" + test "$SIZE" = 1 || { + $ECHO "$RED[!] afl-tmin did incorrectly minimize the testcase to $SIZE" + CODE=1 + } + rm -rf in2 + } rm -rf in out errors } rm -f test-instr.plain From 5d2330f04e45225588a11c64b26a7dbb1a2fbe1a Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sat, 25 Jan 2020 05:27:10 +0100 Subject: [PATCH 29/43] nicer output for afl-system-config --- afl-system-config | 93 ++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/afl-system-config b/afl-system-config index 2a7df17f..1e180d8b 100755 --- a/afl-system-config +++ b/afl-system-config @@ -1,6 +1,6 @@ #!/bin/sh test "$1" = "-h" && { - echo afl-system-config by Marc Heuse + echo 'afl-system-config by Marc Heuse ' echo echo $0 echo @@ -12,55 +12,72 @@ test "$1" = "-h" && { exit 1 } +DONE= PLATFORM=`uname -s` -echo This reconfigures the system to have a better fuzzing performance +echo This reconfigures the system to have a better fuzzing performance. if [ '!' "$EUID" = 0 ] && [ '!' `id -u` = 0 ] ; then - echo Error you need to be root to run this - exit 1 + echo "Warning: you need to be root to run this!" + # we do not exit as other mechanisms exist that allows to do this than + # being root. let the errors speak for themselves. fi if [ "$PLATFORM" = "Linux" ] ; then -sysctl -w kernel.core_pattern=core -sysctl -w kernel.randomize_va_space=0 -sysctl -w kernel.sched_child_runs_first=1 -sysctl -w kernel.sched_autogroup_enabled=1 -sysctl -w kernel.sched_migration_cost_ns=50000000 -sysctl -w kernel.sched_latency_ns=250000000 -echo never > /sys/kernel/mm/transparent_hugepage/enabled -test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor -test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor -test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor -test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo -test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost -echo -echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: -echo '/etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"' +{ + sysctl -w kernel.core_pattern=core + sysctl -w kernel.randomize_va_space=0 + sysctl -w kernel.sched_child_runs_first=1 + sysctl -w kernel.sched_autogroup_enabled=1 + sysctl -w kernel.sched_migration_cost_ns=50000000 + sysctl -w kernel.sched_latency_ns=250000000 + echo never > /sys/kernel/mm/transparent_hugepage/enabled + test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor + test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor + test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo + test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost +} > /dev/null + echo Settings applied. + dmesg | egrep -q 'nospectre_v2|spectre_v2=off' || { + echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: + echo ' /etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"' + } + DONE=1 fi if [ "$PLATFORM" = "FreeBSD" ] ; then -sysctl kern.elf32.aslr.enable=0 -sysctl kern.elf64.aslr.enable=0 -echo -echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: -echo 'sysctl hw.ibrs_disable=1' -echo -echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.' +{ + sysctl kern.elf32.aslr.enable=0 + sysctl kern.elf64.aslr.enable=0 +} > /dev/null + echo Settings applied. + echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: + echo ' sysctl hw.ibrs_disable=1' + echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.' + DONE=1 fi if [ "$PLATFORM" = "OpenBSD" ] ; then -echo -echo 'System security features cannot be disabled on OpenBSD.' + echo + echo 'System security features cannot be disabled on OpenBSD.' + DONE=1 fi if [ "$PLATFORM" = "NetBSD" ] ; then -echo -echo It is recommended to enable unprivileged users to set cpu affinity -echo to be able to use afl-gotcpu meaningfully. -/sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1 +{ + #echo It is recommended to enable unprivileged users to set cpu affinity + #echo to be able to use afl-gotcpu meaningfully. + /sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1 +} > /dev/null + echo Settings applied. + DONE=1 fi if [ "$PLATFORM" = "Darwin" ] ; then if [ $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') ] ; then -echo We unload the default crash reporter here -SL=/System/Library; PL=com.apple.ReportCrash -launchctl unload -w ${SL}/LaunchAgents/${PL}.plist -sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist + echo We unload the default crash reporter here + SL=/System/Library; PL=com.apple.ReportCrash + launchctl unload -w ${SL}/LaunchAgents/${PL}.plist + sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist + echo Settings applied. + else + echo Nothing to do. fi + DONE=1 fi -echo -echo Also use AFL_TMPDIR to use a tmpfs for the input file +test -z "$DONE" && echo Error: Unknown platform: $PLATFORM +test -z "$AFL_TMPDIR" && echo Also use AFL_TMPDIR and point it to a tmpfs for the input file caching From 2c6847bfa0b57f3330b1aab9b91d935757db51b7 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sat, 25 Jan 2020 16:11:42 +0100 Subject: [PATCH 30/43] added whitelist+blacklist to all llvm_mode passes --- docs/ChangeLog | 2 +- llvm_mode/LLVMInsTrim.so.cc | 29 +----- llvm_mode/MarkNodes.cc | 19 ++-- llvm_mode/compare-transform-pass.so.cc | 94 ++++++++++++++++++++ llvm_mode/split-compares-pass.so.cc | 118 +++++++++++++++++++++++++ llvm_mode/split-switches-pass.so.cc | 113 +++++++++++++++++++++++ test/test.sh | 16 ++-- 7 files changed, 343 insertions(+), 48 deletions(-) diff --git a/docs/ChangeLog b/docs/ChangeLog index 33c6f618..c1d53379 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -27,7 +27,7 @@ Version ++2.60d (develop): instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) - afl-cmin is now a sh script (invoking awk) instead of bash for portability the original script is still present as afl-cmin.bash - - added blacklisted function check in all modules of llvm_mode + - added blacklist and whitelisting function check in all modules of llvm_mode - added fix from Debian project to compile libdislocator and libtokencap diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 11451b43..24df6d42 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -144,19 +144,6 @@ struct InsTrim : public ModulePass { // this is our default MarkSetOpt = true; - /* // I dont think this makes sense to port into LLVMInsTrim - char* inst_ratio_str = getenv("AFL_INST_RATIO"); - unsigned int inst_ratio = 100; - if (inst_ratio_str) { - - if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || - inst_ratio > 100) FATAL("Bad value of AFL_INST_RATIO (must be between 1 - and 100)"); - - } - - */ - LLVMContext &C = M.getContext(); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); IntegerType *Int32Ty = IntegerType::getInt32Ty(C); @@ -203,8 +190,7 @@ struct InsTrim : public ModulePass { if (instFilename.str().empty()) { - /* If the original location is empty, try using the inlined location - */ + /* If the original location is empty, try using the inlined location */ DILocation *oDILoc = cDILoc->getInlinedAt(); if (oDILoc) { @@ -432,28 +418,19 @@ struct InsTrim : public ModulePass { IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - /* Set prev_loc to cur_loc >> 1 */ - /* - StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, L >> 1), - OldPrev); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, - None)); - */ - total_instr++; } } - OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n" /*", ratio - %u%%)."*/ - , + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr, total_rs, total_hs, getenv("AFL_HARDEN") ? "hardened" : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? "ASAN/MSAN" - : "non-hardened") /*, inst_ratio*/); + : "non-hardened")); return false; } diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc index 2aeeda8d..caa8cede 100644 --- a/llvm_mode/MarkNodes.cc +++ b/llvm_mode/MarkNodes.cc @@ -65,16 +65,11 @@ void buildCFG(Function *F) { } - // uint32_t FakeID = 0; for (auto S = F->begin(), E = F->end(); S != E; ++S) { BasicBlock *BB = &*S; uint32_t MyID = LMap[BB]; - // if (succ_begin(BB) == succ_end(BB)) { - // Succs[MyID].push_back(FakeID); - // Marked.insert(MyID); - //} for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { Succs[MyID].push_back(LMap[*I]); @@ -113,7 +108,7 @@ void DFStree(size_t now_id) { } -void turnCFGintoDAG(Function *F) { +void turnCFGintoDAG() { tSuccs = Succs; tag.resize(Blocks.size()); @@ -176,7 +171,7 @@ void DFS(uint32_t now) { } -void DominatorTree(Function *F) { +void DominatorTree() { if (Blocks.empty()) return; uint32_t s = start_point; @@ -390,7 +385,7 @@ void MarkSubGraph(uint32_t ss, uint32_t tt) { } -void MarkVertice(Function *F) { +void MarkVertice() { uint32_t s = start_point; @@ -411,8 +406,6 @@ void MarkVertice(Function *F) { timeStamp = 0; uint32_t t = 0; - // MarkSubGraph(s, t); - // return; while (s != t) { @@ -432,9 +425,9 @@ std::pair, std::vector > markNodes( reset(); labelEachBlock(F); buildCFG(F); - turnCFGintoDAG(F); - DominatorTree::DominatorTree(F); - MarkVertice(F); + turnCFGintoDAG(); + DominatorTree::DominatorTree(); + MarkVertice(); std::vector Result, ResultAbove; for (uint32_t x : Markabove) { diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 0ccce875..5d924b63 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -18,7 +18,13 @@ #include #include +#include +#include +#include +#include + #include "llvm/ADT/Statistic.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -42,6 +48,23 @@ class CompareTransform : public ModulePass { static char ID; CompareTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + } bool runOnModule(Module &M) override; @@ -57,6 +80,9 @@ class CompareTransform : public ModulePass { } + protected: + std::list myWhitelist; + private: bool transformCmps(Module &M, const bool processStrcmp, const bool processMemcmp, const bool processStrncmp, @@ -104,6 +130,74 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, for (auto &BB : F) { + if (!myWhitelist.empty()) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + bool instrumentBlock = false; + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); + if (Loc) { + + DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + for (auto &IN : BB) { CallInst *callInst = nullptr; diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index eeac4a55..bc25b322 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -15,7 +15,17 @@ * limitations under the License. */ +#include +#include +#include + +#include +#include +#include +#include + #include "llvm/Pass.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/raw_ostream.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -35,6 +45,41 @@ class SplitComparesTransform : public ModulePass { static char ID; SplitComparesTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + + } + + static bool isBlacklisted(const Function *F) { + + static const SmallVector Blacklist = { + + "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." + + }; + + for (auto const &BlacklistFunc : Blacklist) { + + if (F->getName().startswith(BlacklistFunc)) { return true; } + + } + + return false; + } bool runOnModule(Module &M) override; @@ -49,6 +94,9 @@ class SplitComparesTransform : public ModulePass { } + protected: + std::list myWhitelist; + private: int enableFPSplit; @@ -77,8 +125,78 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * all integer comparisons with >= and <= predicates to the icomps vector */ for (auto &F : M) { + if (isBlacklisted(&F)) continue; + for (auto &BB : F) { + if (!myWhitelist.empty()) { + + bool instrumentBlock = false; + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); + if (Loc) { + + DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + for (auto &IN : BB) { CmpInst *selectcmpInst = nullptr; diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc index 2743a71a..3a2838c0 100644 --- a/llvm_mode/split-switches-pass.so.cc +++ b/llvm_mode/split-switches-pass.so.cc @@ -18,7 +18,13 @@ #include #include +#include +#include +#include +#include + #include "llvm/ADT/Statistic.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -42,6 +48,41 @@ class SplitSwitchesTransform : public ModulePass { static char ID; SplitSwitchesTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + + } + + static bool isBlacklisted(const Function *F) { + + static const SmallVector Blacklist = { + + "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." + + }; + + for (auto const &BlacklistFunc : Blacklist) { + + if (F->getName().startswith(BlacklistFunc)) { return true; } + + } + + return false; + } bool runOnModule(Module &M) override; @@ -71,6 +112,9 @@ class SplitSwitchesTransform : public ModulePass { typedef std::vector CaseVector; + protected: + std::list myWhitelist; + private: bool splitSwitches(Module &M); bool transformCmps(Module &M, const bool processStrcmp, @@ -268,10 +312,79 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { * all switches to switches vector for later processing */ for (auto &F : M) { + if (isBlacklisted(&F)) continue; + for (auto &BB : F) { SwitchInst *switchInst = nullptr; + if (!myWhitelist.empty()) { + + bool instrumentBlock = false; + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); + if (Loc) { + + DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + if ((switchInst = dyn_cast(BB.getTerminator()))) { if (switchInst->getNumCases() < 1) continue; diff --git a/test/test.sh b/test/test.sh index 93a4e008..9676d22d 100755 --- a/test/test.sh +++ b/test/test.sh @@ -153,10 +153,10 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in -1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; -*) $ECHO "$RED[!] afl-cmin did not correctly minimizethe number of testcases" - CODE=1 - ;; + *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; + *) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; esac ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` @@ -259,10 +259,10 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in -1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; -*) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" - CODE=1 - ;; + *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; + *) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; esac ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` From 3561a1b775989a0cf37221f810eec601cdb14bcf Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 27 Jan 2020 00:19:59 +0100 Subject: [PATCH 31/43] dockerfile update --- Dockerfile | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1947f211..7bb60610 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,9 @@ RUN apt-get update && apt-get install -y \ clang \ clang-9 \ flex \ + git \ + python3.7 \ + python3.7-dev \ gcc-9 \ gcc-9-plugin-dev \ gcc-9-multilib \ @@ -23,10 +26,12 @@ RUN apt-get update && apt-get install -y \ ca-certificates \ libpixman-1-dev \ && rm -rf /var/lib/apt/lists/* + ARG CC=gcc-9 ARG CXX=g++-9 ARG LLVM_CONFIG=llvm-config-9 -COPY . /app -RUN cd /app && make clean && make distrib && \ - make install && cd .. && rm -rf /app -WORKDIR /work + +RUN git clone https://github.com/vanhauser-thc/AFLplusplus + +RUN cd AFLplusplus && make clean && make distrib && \ + make install && cd .. && rm -rf AFLplusplus From fa64c0d4a5a6eb1eddd13071e3b326778bf6db5a Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 27 Jan 2020 10:40:13 +0100 Subject: [PATCH 32/43] important fixes for afl-cmin --- afl-cmin | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/afl-cmin b/afl-cmin index de5a66ed..865809e1 100755 --- a/afl-cmin +++ b/afl-cmin @@ -292,6 +292,24 @@ BEGIN { exit 1 } + # Check for the more efficient way to copy files... + if (0 != system("mkdir -p -m 0700 "trace_dir)) { + print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" + exit 1 + } + if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/auto_extras")) { + print "[-] Error: Cannot create directory "trace_dir"/.state/auto_extras" > "/dev/stderr" + exit 1 + } + if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/redundant_edges")) { + print "[-] Error: Cannot create directory "trace_dir"/.state/redundant_edges" > "/dev/stderr" + exit 1 + } + if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/deterministic_done")) { + print "[-] Error: Cannot create directory "trace_dir"/.state/deterministic_done" > "/dev/stderr" + exit 1 + } + if (stdin_file) { # truncate input file printf "" > stdin_file @@ -339,12 +357,6 @@ BEGIN { exit 1 } - # Check for the more efficient way to copy files... - if (0 != system("mkdir -p -m 0700 "trace_dir)) { - print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" - exit 1 - } - if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { cp_tool = "ln" } else { @@ -360,7 +372,7 @@ BEGIN { system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") } else { system("cp "in_dir"/"first_file" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" Date: Mon, 27 Jan 2020 11:47:39 +0100 Subject: [PATCH 33/43] nearing afl-cmin perfection :-) --- afl-cmin | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/afl-cmin b/afl-cmin index 865809e1..f6e76263 100755 --- a/afl-cmin +++ b/afl-cmin @@ -297,18 +297,6 @@ BEGIN { print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" exit 1 } - if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/auto_extras")) { - print "[-] Error: Cannot create directory "trace_dir"/.state/auto_extras" > "/dev/stderr" - exit 1 - } - if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/redundant_edges")) { - print "[-] Error: Cannot create directory "trace_dir"/.state/redundant_edges" > "/dev/stderr" - exit 1 - } - if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/deterministic_done")) { - print "[-] Error: Cannot create directory "trace_dir"/.state/deterministic_done" > "/dev/stderr" - exit 1 - } if (stdin_file) { # truncate input file @@ -342,7 +330,7 @@ BEGIN { } else { stat_format = "-f '%z %N'" # *BSD, MacOS } - cmdline = "cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" + cmdline = "cd "in_dir" && find . -maxdepth 1 -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" while (cmdline | getline) { infilesSmallToBig[i++] = $0 } From 3374ada561e5dcfe052c41837fc15bd29287b285 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 27 Jan 2020 11:48:49 +0100 Subject: [PATCH 34/43] nearing afl-cmin perfection :-) --- afl-cmin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/afl-cmin b/afl-cmin index f6e76263..e9d713aa 100755 --- a/afl-cmin +++ b/afl-cmin @@ -360,7 +360,7 @@ BEGIN { system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") } else { system("cp "in_dir"/"first_file" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" < /dev/null") + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" Date: Mon, 27 Jan 2020 13:06:00 +0100 Subject: [PATCH 35/43] updated binary_fuzzing document --- ...only_fuzzing.txt => binaryonly_fuzzing.md} | 103 ++++++++++-------- 1 file changed, 56 insertions(+), 47 deletions(-) rename docs/{binaryonly_fuzzing.txt => binaryonly_fuzzing.md} (57%) diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.md similarity index 57% rename from docs/binaryonly_fuzzing.txt rename to docs/binaryonly_fuzzing.md index f8d68cd8..d22e4ce2 100644 --- a/docs/binaryonly_fuzzing.txt +++ b/docs/binaryonly_fuzzing.md @@ -1,6 +1,4 @@ - -Fuzzing binary-only programs with afl++ -======================================= +#Fuzzing binary-only programs with afl++ afl++, libfuzzer and others are great if you have the source code, and it allows for very fast and coverage guided fuzzing. @@ -16,36 +14,42 @@ TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then !!!!! -QEMU ----- +##QEMU Qemu is the "native" solution to the program. It is available in the ./qemu_mode/ directory and once compiled it can be accessed by the afl-fuzz -Q command line option. The speed decrease is at about 50% It is the easiest to use alternative and even works for cross-platform binaries. +Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) +which now has a qemu_mode, but its performance is just 1.5%! + As it is included in afl++ this needs no URL. -WINE+QEMU ---------- + +##WINE+QEMU Wine mode can run Win32 PE binaries with the QEMU instrumentation. It needs Wine, python3 and the pefile python package installed. -UNICORN -------- +As it is included in afl++ this needs no URL. + + +##UNICORN Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. -In contrast to QEMU, Unicorn does not offer a full system or even userland emulation. -Runtime environment and/or loaders have to be written from scratch, if needed. -On top, block chaining has been removed. This means the speed boost introduced in -the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn. -For further information, check out ./unicorn_mode.txt. +In contrast to QEMU, Unicorn does not offer a full system or even userland +emulation. Runtime environment and/or loaders have to be written from scratch, +if needed. On top, block chaining has been removed. This means the speed boost +introduced in the patched QEMU Mode of afl++ cannot simply be ported over to +Unicorn. For further information, check out ./unicorn_mode.txt. + +As it is included in afl++ this needs no URL. -DYNINST -------- +##DYNINST Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio (see far below). However whereas Pintool and Dynamorio work at runtime, dyninst -instruments the target at load time, and then let it run. +instruments the target at load time, and then let it run - or save the +binary with the changes. This is great for some things, e.g. fuzzing, and not so effective for others, e.g. malware analysis. @@ -53,9 +57,9 @@ So what we can do with dyninst is taking every basic block, and put afl's instrumention code in there - and then save the binary. Afterwards we can just fuzz the newly saved target binary with afl-fuzz. Sounds great? It is. The issue though - it is a non-trivial problem to -insert instructions, which change addresses in the process space, so +insert instructions, which change addresses in the process space, so that everything is still working afterwards. Hence more often than not binaries -crash when they are run (because of instrumentation). +crash when they are run. The speed decrease is about 15-35%, depending on the optimization options used with afl-dyninst. @@ -63,11 +67,10 @@ used with afl-dyninst. So if dyninst works, it is the best option available. Otherwise it just doesn't work well. -https://github.com/vanhauser-thc/afl-dyninst +[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) -INTEL-PT --------- +##INTEL-PT If you have a newer Intel CPU, you can make use of Intels processor trace. The big issue with Intel's PT is the small buffer size and the complex encoding of the debug information collected through PT. @@ -77,30 +80,39 @@ the implementation and other factors). There are two afl intel-pt implementations: -1. https://github.com/junxzm1990/afl-pt +1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt) => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. -2. https://github.com/hunter-ht-2018/ptfuzzer +2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer) => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must be used. This one is faster than the other. +Note that there is also honggfuzz: https://github.com/google/honggfuzz +But its IPT performance is just 6%! -CORESIGHT ---------- +##CORESIGHT Coresight is ARM's answer to Intel's PT. There is no implementation so far which handle coresight and getting it working on an ARM Linux is very difficult due to custom kernel building on embedded systems is difficult. And finding one that has coresight in the ARM chip is difficult too. My guess is that it is slower than Qemu, but faster than Intel PT. + If anyone finds any coresight implementation for afl please ping me: vh@thc.org -PIN & DYNAMORIO ---------------- +##FRIDA +Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio. +What is special is that it is written Python, and scripted with Javascript. +It is mostly used to reverse binaries on mobile phones however can be used +everywhere. +There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer) + + +##PIN & DYNAMORIO Pintool and Dynamorio are dynamic instrumentation engines, and they can be used for getting basic block information at runtime. Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows @@ -115,30 +127,27 @@ Hence Dynamorio is the option to go for if everything fails, and Pintool only if Dynamorio fails too. Dynamorio solutions: - https://github.com/vanhauser-thc/afl-dynamorio - https://github.com/mxmssh/drAFL - https://github.com/googleprojectzero/winafl/ <= very good but windows only + * [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio) + * [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL) + * [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only Pintool solutions: - https://github.com/vanhauser-thc/afl-pin - https://github.com/mothran/aflpin - https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported + * [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin) + * [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin) + * [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported -Non-AFL solutions ------------------ +##Non-AFL solutions +There are many binary-only fuzzing frameworks. +Some are great for CTFs but don't work with large binaries, others are very +slow but have good path discovery, some are very hard to set-up ... -There are many binary-only fuzzing frameworks. Some are great for CTFs but don't -work with large binaries, others are very slow but have good path discovery, -some are very hard to set-up ... - -QSYM: https://github.com/sslab-gatech/qsym -Manticore: https://github.com/trailofbits/manticore -S2E: https://github.com/S2E - +* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym) +* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore) +* S2E: [https://github.com/S2E](https://github.com/S2E) +* +## Closing words -That's it! -News, corrections, updates? -Email vh@thc.org +That's it! News, corrections, updates? Send an email to vh@thc.org From 38232979587b6c37b024f22849b311d7e6962edf Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 27 Jan 2020 13:29:22 +0100 Subject: [PATCH 36/43] Added persistent mode sample --- unicorn_mode/samples/persistent/.gitignore | 3 + unicorn_mode/samples/persistent/COMPILE.md | 24 ++ unicorn_mode/samples/persistent/Makefile | 42 +++ unicorn_mode/samples/persistent/harness.c | 269 ++++++++++++++++++ .../samples/persistent/persistent_target.c | 39 +++ .../persistent/persistent_target_x86_64 | Bin 0 -> 16544 bytes unicorn_mode/samples/persistent/sample_all.sh | 18 ++ .../persistent/sample_inputs/sample1.bin | 1 + .../persistent/sample_inputs/sample2.bin | Bin 0 -> 1 bytes .../persistent/sample_inputs/sample3.bin | 1 + .../persistent/sample_inputs/sample4.bin | 1 + .../persistent/sample_inputs/sample5.bin | 1 + .../persistent/simple_target_noncrashing.c | 33 +++ .../samples/persistent/simple_target_x86_64 | Bin 0 -> 17624 bytes 14 files changed, 432 insertions(+) create mode 100644 unicorn_mode/samples/persistent/.gitignore create mode 100644 unicorn_mode/samples/persistent/COMPILE.md create mode 100644 unicorn_mode/samples/persistent/Makefile create mode 100644 unicorn_mode/samples/persistent/harness.c create mode 100644 unicorn_mode/samples/persistent/persistent_target.c create mode 100644 unicorn_mode/samples/persistent/persistent_target_x86_64 create mode 100644 unicorn_mode/samples/persistent/sample_all.sh create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample1.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample2.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample3.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample4.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample5.bin create mode 100644 unicorn_mode/samples/persistent/simple_target_noncrashing.c create mode 100644 unicorn_mode/samples/persistent/simple_target_x86_64 diff --git a/unicorn_mode/samples/persistent/.gitignore b/unicorn_mode/samples/persistent/.gitignore new file mode 100644 index 00000000..3e446132 --- /dev/null +++ b/unicorn_mode/samples/persistent/.gitignore @@ -0,0 +1,3 @@ +harness +harness-debug +out diff --git a/unicorn_mode/samples/persistent/COMPILE.md b/unicorn_mode/samples/persistent/COMPILE.md new file mode 100644 index 00000000..781f15c0 --- /dev/null +++ b/unicorn_mode/samples/persistent/COMPILE.md @@ -0,0 +1,24 @@ +# C Sample + +This shows a simple persistent harness for unicornafl in C +In contrast to the normal c harness, this harness manually resets the unicorn state on each new input. +Thanks to this, we can rerun the testcase in unicorn multiple times, without the need to fork again. + +## Compiling sample.c + +The target can be built using the `make` command. +Just make sure you have built unicorn support first: +```bash +cd /path/to/afl/unicorn_mode +./build_unicorn_support.sh +``` + +## Compiling persistent_target.c + +You don't need to compile persistent_target.c since a X86_64 binary version is +pre-built and shipped in this sample folder. This file documents how the binary +was built in case you want to rebuild it or recompile it for any reason. + +The pre-built binary (persistent_target_x86_64.bin) was built using -g -O0 in gcc. + +We then load the binary we execute the main function directly. diff --git a/unicorn_mode/samples/persistent/Makefile b/unicorn_mode/samples/persistent/Makefile new file mode 100644 index 00000000..fe100490 --- /dev/null +++ b/unicorn_mode/samples/persistent/Makefile @@ -0,0 +1,42 @@ +# UnicornAFL Usage +# Original Unicorn Example Makefile by Nguyen Anh Quynh , 2015 +# Adapted for AFL++ by domenukk , 2020 + +UNAME_S := $(shell uname -s) + +LIBDIR = ../../unicornafl +BIN_EXT = +AR_EXT = a + +# Verbose output? +V ?= 0 + +CFLAGS += -Wall -Werror -I../../unicornafl/include + +LDFLAGS += -L$(LIBDIR) -lpthread -lm +ifeq ($(UNAME_S), Linux) +LDFLAGS += -lrt +endif + +ifneq ($(CROSS),) +CC = $(CROSS)gcc +endif + +.PHONY: all clean + +all: harness + +clean: + rm -rf *.o harness harness-debug + +harness.o: harness.c ../../unicornafl/include/unicorn/*.h + ${CC} ${CFLAGS} -O3 -c $< + +harness-debug.o: harness.c ../../unicornafl/include/unicorn/*.h + ${CC} ${CFLAGS} -g -c $< -o $@ + +harness: harness.o + ${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@ + +debug: harness-debug.o + ${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug diff --git a/unicorn_mode/samples/persistent/harness.c b/unicorn_mode/samples/persistent/harness.c new file mode 100644 index 00000000..d8ebffbc --- /dev/null +++ b/unicorn_mode/samples/persistent/harness.c @@ -0,0 +1,269 @@ +/* + Persistent test harness for AFL++'s unicornafl c mode. + + This loads the persistent_target.bin binary (precompiled as X86_64 code) into + Unicorn's memory map for emulation, places the specified input into + the argv buffer (handed in as first parameter), and executes 'main()'. + Any crashes during emulation will automatically be handled by the afl-fuzz() function. + + Run under AFL as follows: + + $ cd /unicorn_mode/samples/persistent/ + $ make + $ ../../../afl-fuzz -m none -i sample_inputs -o out -- ./harness @@ + + (Re)run a simgle input with block tracing using: + + $ ./harness -t [inputfile] +*/ + +// This is not your everyday Unicorn. +#define UNICORN_AFL + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// Path to the file containing the binary to emulate +#define BINARY_FILE ("persistent_target_x86_64") + +// Memory map for the code to be tested +// Arbitrary address where code to test will be loaded +static const int64_t BASE_ADDRESS = 0x100000; +static const int64_t CODE_ADDRESS = 0x101139; +static const int64_t END_ADDRESS = 0x10120d; +// Address of the stack (Some random address again) +static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); +// Size of the stack (arbitrarily chosen, just make it big enough) +static const int64_t STACK_SIZE = 0x10000; +// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) ) +static const int64_t INPUT_LOCATION = 0x10000; +// Inside the location, we have an ofset in our special case +static const int64_t INPUT_OFFSET = 0x16; +// Maximum allowable size of mutated data from AFL +static const int64_t INPUT_SIZE_MAX = 0x10000; +// Alignment for unicorn mappings (seems to be needed) +static const int64_t ALIGNMENT = 0x1000; + +// In our special case, we emulate main(), so argc is needed. +static const uint64_t EMULATED_ARGC = 2; + +// The return from our fake strlen +static size_t current_input_len = 0; + +static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); +} + +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); +} + +/* +The sample uses strlen, since we don't have a loader or libc, we'll fake it. +We know the strlen will return the lenght of argv[1] that we just planted. +It will be a lot faster than an actual strlen for this specific purpose. +*/ +static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + //Hook + //116b: e8 c0 fe ff ff call 1030 + // We place the return at RAX + uc_reg_write(uc, UC_X86_REG_RAX, ¤t_input_len); + // We skip the actual call by updating RIP + //printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len); + uint64_t next_addr = address + size; + uc_reg_write(uc, UC_X86_REG_RIP, &next_addr); +} + +/* Unicorn page needs to be 0x1000 aligned, apparently */ +static uint64_t pad(uint64_t size) { + if (size % ALIGNMENT == 0) return size; + return ((size / ALIGNMENT) + 1) * ALIGNMENT; +} + +/* returns the filesize in bytes, -1 or error. */ +static off_t afl_mmap_file(char *filename, char **buf_ptr) { + + off_t ret = -1; + + int fd = open(filename, O_RDONLY); + + struct stat st = {0}; + if (fstat(fd, &st)) goto exit; + + off_t in_len = st.st_size; + if (in_len == -1) { + /* This can only ever happen on 32 bit if the file is exactly 4gb. */ + fprintf(stderr, "Filesize of %s too large", filename); + goto exit; + } + + *buf_ptr = mmap(0, in_len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + + if (*buf_ptr != MAP_FAILED) ret = in_len; + +exit: + close(fd); + return ret; + +} + +/* Place the input at the right spot inside unicorn */ +static bool place_input_callback( + uc_engine *uc, + char *input, + size_t input_len, + uint32_t persistent_round, + void *data +){ + // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS); + if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { + // Test input too short or too long, ignore this testcase + return false; + } + + // For persistent mode, we have to set up stack and memory each time. + uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back + // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) + uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv + uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 + + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; + // Write the testcase to unicorn. + uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); + + // store input_len for the faux strlen hook + current_input_len = input_len; + + return true; +} + +static void mem_map_checked(uc_engine *uc, uint64_t addr, size_t size, uint32_t mode) { + size = pad(size); + //printf("SIZE %lx, align: %lx\n", size, ALIGNMENT); + uc_err err = uc_mem_map(uc, addr, size, mode); + if (err != UC_ERR_OK) { + printf("Error mapping %ld bytes at 0x%lx: %s (mode: %d)\n", size, addr, uc_strerror(err), mode); + exit(1); + } +} + +int main(int argc, char **argv, char **envp) { + if (argc == 1) { + printf("Test harness for simple_target.bin. Usage: harness [-t] \n"); + exit(1); + } + bool tracing = false; + char *filename = argv[1]; + if (argc > 2 && !strcmp(argv[1], "-t")) { + tracing = true; + filename = argv[2]; + } + + uc_engine *uc; + uc_err err; + uc_hook hooks[2]; + char *file_contents; + + // Initialize emulator in X86_64 mode + err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc); + if (err) { + printf("Failed on uc_open() with error returned: %u (%s)\n", + err, uc_strerror(err)); + return -1; + } + + printf("Loading data input from %s\n", BINARY_FILE); + off_t len = afl_mmap_file(BINARY_FILE, &file_contents); + if (len < 0) { + perror("Could not read binary to emulate"); + return -2; + } + if (len == 0) { + fprintf(stderr, "File at '%s' is empty\n", BINARY_FILE); + return -3; + } + + // Map memory. + mem_map_checked(uc, BASE_ADDRESS, len, UC_PROT_ALL); + printf("Len: %lx", len); + fflush(stdout); + + // write machine code to be emulated to memory + if (uc_mem_write(uc, BASE_ADDRESS, file_contents, len) != UC_ERR_OK) { + printf("Error writing to CODE"); + } + + // Release copied contents + munmap(file_contents, len); + + // Set the program counter to the start of the code + uint64_t start_address = CODE_ADDRESS; // address of entry point of main() + uint64_t end_address = END_ADDRESS; // Address of last instruction in main() + uc_reg_write(uc, UC_X86_REG_RIP, &start_address); // address of entry point of main() + + // Setup the Stack + mem_map_checked(uc, STACK_ADDRESS - STACK_SIZE, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE); + uint64_t stack_val = STACK_ADDRESS; + printf("%ld", stack_val); + uc_reg_write(uc, UC_X86_REG_RSP, &stack_val); + + // reserve some space for our input data + mem_map_checked(uc, INPUT_LOCATION, INPUT_SIZE_MAX, UC_PROT_READ); + + // build a "dummy" argv with lenth 2 at 0x10000: + // 0x10000 argv[0] NULL + // 0x10008 argv[1] (char *)0x10016 --. points to the next offset. + // 0x10016 argv[1][0], ... <-^ contains the acutal input data. (INPUT_LOCATION + INPUT_OFFSET) + + uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above + + + // If we want tracing output, set the callbacks here + if (tracing) { + // tracing all basic blocks with customized callback + uc_hook_add(uc, &hooks[0], UC_HOOK_BLOCK, hook_block, NULL, 1, 0); + uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1); + } + + // Add our strlen hook (for this specific testcase only) + int strlen_hook_pos = BASE_ADDRESS + 0x116b; + uc_hook strlen_hook; + uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos); + + printf("Starting to fuzz :)\n"); + fflush(stdout); + + // let's gooo + uc_afl_ret afl_ret = uc_afl_fuzz( + uc, // The unicorn instance we prepared + filename, // Filename of the input to process. In AFL this is usually the '@@' placeholder, outside it's any input file. + place_input_callback, // Callback that places the input (automatically loaded from the file at filename) in the unicorninstance + &end_address, // Where to exit (this is an array) + 1, // Count of end addresses + NULL, // Optional calback to run after each exec + false, // true, if the optional callback should be run also for non-crashes + 1000, // For persistent mode: How many rounds to run + NULL // additional data pointer + ); + switch(afl_ret) { + case UC_AFL_RET_ERROR: + printf("Error starting to fuzz"); + return -3; + break; + case UC_AFL_RET_NO_AFL: + printf("No AFL attached - We are done with a single run."); + break; + default: + break; + } + return 0; +} diff --git a/unicorn_mode/samples/persistent/persistent_target.c b/unicorn_mode/samples/persistent/persistent_target.c new file mode 100644 index 00000000..5b866f86 --- /dev/null +++ b/unicorn_mode/samples/persistent/persistent_target.c @@ -0,0 +1,39 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Nathan Voss + * Adapted by Lukas Seidel + */ +#include +#include + + +int main(int argc, char** argv) { + if (argc < 2) return -1; + + char *data_buf = argv[1]; + uint64_t data_len = strlen(data_buf); + if (data_len < 20) return -2; + + for (; data_len --> 0 ;) { + if (data_len >= 18) continue; + if (data_len > 2 && data_len < 18) { + ((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1]; + } else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + } + if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/persistent/persistent_target_x86_64 b/unicorn_mode/samples/persistent/persistent_target_x86_64 new file mode 100644 index 0000000000000000000000000000000000000000..22e04357ebd63bb86b55a9ee5f451aa4851a0f9a GIT binary patch literal 16544 zcmeHOYit}>6~4P`C(gs`&5M{M#nU#49FR9|;uxFO&93dW$H-37)Gm)EnXbKSdttpB z?an54Xc`N*AuHDa6dssMyUlUN1c zw}~xc8OUXlGxZ@8Kx>v^uN^Ch_W`2cv!r+87P4dH9wMUOQstp?Q^P3eIM7e3NM@e# zN$Lk9^TqZQYSDoK>KLQd4|6kDXZp>x*iohS4iK^*is&~_e)HtVI7{s@ay~I9__;{& zoY)RJER1N2<-k=vhV?Q9rN!7_(t?rw?gKyU=ld<3_Jh>kQhBMS{xj0yik-GO@{$6gh} zu;17RjE6Q@(4NzeJbzI4e#>4kR7x1H0&Z;y{`MvK2Z2{g*negT9%J88!d^h01~4s+ zBCBVT@s!ZCfzhZom`FvFi4$=U(L&4W(TuK*L=!2|+1=IJrtJ#u3hohFS8tCNi)Z3P ziL4&a^!BtR)2VoGv_FZSLnG-Fd1|IyoI-p|LTxb zi{$`(%qHW0yBTmZ;AX(hfSUm~18xS~47eHi-^svVt3LFuI`NkZb+Y`|JB3gmo7TO? zC3WIr#fOcU4UYYA6q*?%J$j80df3>s|TFN7Vc)>cp#s1HIi-<=bHj ztWH%v2QzSWs^u~m8yhm<_x4#3pNEP%RsJo6=6QVsG`$b&(DVvpwyI_vgU=9xeO}t{ z`8>joTWY?bUU*}#dSTwDdR|a3-_qAZfNd0@!k8Vbs%c;F$Nr7CL_jTayZh9M7B5!S z{8hbDoov|-g0Z0xgy-}DIG7j8w*cWeJTn-qs(BQUjJZ@7fU9_CX9tTz&rJ z%W6J%Y2xI|o>ME&gTlD>9c*DTa!H*GRjB#MJk;ma{Ez0QZy5&21$E+NVcqf2c@T|j zUqE9Z)*SQxxehDdW80;CyaxOV>eK;`^!Grxo`O2rH?MkrTy;*I@sb)s_7xy`x#!Ui zH1`A^spo3Vscq7GV%k%6EdwE>p8r$W zSpR!8xEXLW@SnX||d>!y% zz_$VMeJd8viG#v38t`mhy}aTqJnkcp@9gKU8wNgUi}vO6jontLHo)f_z$<{lAL#H` zf4pkd@rrS=chlV;-n~`6&P3ls@cAmV{}S4T{ejcow#sGROVEP({`z5{KMDPcqrSr* z_@cMVU;TtH;#a21BL3RP%iI0+rJPN~E9AT2 zi{L&Eajft6Fr-`D47eF^GvH>x&48N$Hv?`4+zhxGa5M1#oB>`>$?GS1Z6xDrJ~&!m zSxNF4TEzN6l6jrxW|DdR$-W4Sh=7WDOc-&abVSV>9!Q;!$ zFBg0s?0kjb^JnK*2p$)9er0j|*m)SOj{KqCCsqrkBwtzFZ()z)7kvHM`KpqO#eL9d znPRmx>prnYG?nDn3cjALUOur-@O5nG*Nal)#V0lhUUzM`|3LA2wDTK<60$O8-6yI= zeMz40J_|m0U6qM9jRnT6;l)^rUkAKbREndH{NsHD`DYw_E%CDs{$AicmVWZ}-6Q$M zdDAHl-+w`rWgNaeutUI^|E~aFs{czJ*K;z?8nOCzs5nZgINz5xYZsm0p9AlKfA5^< z_g7@^?B{EeUp(G!$~cS9`*$HxUc8Gt^0^N9wUGaD`kmq5g*$;)WE=Lc&q;pq_}L45 z0PgC}@pBvSYrTuFqYmWlp{4ZcCHOIl?;N+Mfmcezc@p?F-i?Cqn{4tm-b zGtDNj0Sz|248^gVn$~eJ+!N6v``R_w%3^nj39bE8`@%h4ZFY^^GXoU-`e_lBBB<>L zh1S`g$7EvV#P zAN%eid)GuTJ2s+6`vLWgiNjn=q+ol|Q4vg~^>}b7l?(Rg60o@|5hE(x+Eu4VhlJD( zN3+8s7#mALJQMYdsW}$UWE1I>P0&D>i6^7zK((VuT?FM}2KD$T)Nw31lWXt=#*q`U=4}c!0jO@?zLB>bPp8<1iEk{7Wxjg&xyphpQ z#@v6lWBe@W@!5c7o?kL56?yizB9FbI5mZzTNQuYm r7*8qphp&J9--G)O)nZZchXKlZM{sV})Z$#XwEv+d)57O)aERg`mmOeI literal 0 HcmV?d00001 diff --git a/unicorn_mode/samples/persistent/sample_all.sh b/unicorn_mode/samples/persistent/sample_all.sh new file mode 100644 index 00000000..01daf365 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_all.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +[ -z "${UNAME}" ] && UNAME=$(uname) + +DIR=`dirname $0` + +if [ "$UNAME" = Darwin ]; then + export DYLD_LIBRARY_PATH=../../unicorn +else + export LD_LIBRARY_PATH=../../unicorn +fi + + + +if [ ! test -e $DIR/harness]; then + echo "[!] harness not found in $DIR" + exit 1 +fi \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample1.bin b/unicorn_mode/samples/persistent/sample_inputs/sample1.bin new file mode 100644 index 00000000..85df5078 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample1.bin @@ -0,0 +1 @@ +abcd \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample2.bin b/unicorn_mode/samples/persistent/sample_inputs/sample2.bin new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample3.bin b/unicorn_mode/samples/persistent/sample_inputs/sample3.bin new file mode 100644 index 00000000..6b2aaa76 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample3.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample4.bin b/unicorn_mode/samples/persistent/sample_inputs/sample4.bin new file mode 100644 index 00000000..71bd63e6 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample4.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample5.bin b/unicorn_mode/samples/persistent/sample_inputs/sample5.bin new file mode 100644 index 00000000..aed2973e --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample5.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/simple_target_noncrashing.c b/unicorn_mode/samples/persistent/simple_target_noncrashing.c new file mode 100644 index 00000000..00764473 --- /dev/null +++ b/unicorn_mode/samples/persistent/simple_target_noncrashing.c @@ -0,0 +1,33 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Nathan Voss + * Adapted by Lukas Seidel + */ + + +int main(int argc, char** argv) { + if(argc < 2){ + return -1; + } + + char *data_buf = argv[1]; + + if len(data_buf < 20) { + if (data_buf[20] != 0) { + printf("Not crashing"); + } else if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + printf("Also not crashing with databuf[0] == %c", data_buf[0]) + } else if (data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/persistent/simple_target_x86_64 b/unicorn_mode/samples/persistent/simple_target_x86_64 new file mode 100644 index 0000000000000000000000000000000000000000..560264fd8f2329821a6aee4da5eb5bf308facfbd GIT binary patch literal 17624 zcmeHOeQX@X6`#HJ+UHz+zSv2~2jFdh;z;q?2}!UCPVQ`ocYN(W=spbob`{-p9a5?6tu-SblJrq@e7qEthqXg6DG)r5p7lkGq|E|NUq zL@iklqJ&SCr^^yubW*}Zopu?Sh@ee2tFWOu9nAj40WC0qn4zpEgn0x6AToi_1CLKBAIk?W39D zwX54llWn7!Y;j-PzRtC6YgdO0x$r99?)gv=98(*&>|i(i`T71Yz5KOI)30B-rKNA^ zxxHszIfnJdI-oz)!G!u0e#Gg6vQL{jK~b-wyaceOD)X74` z@RQPk9uacFi9feN;5hcMzbH*QaHpj^&;R!z>9zv z0WShx1iT1%5%41Le;946w#WJ% z?A`@pR=jP8G2XolvqtHH-C!K*UJp!pK?uGtCd%dVS#33Bm0hPt!i_BtV>Q_6k?DM6 zON61nH&URx1ALSoc-tryC&wRnTX|^C2>{D)`~bZlicK1aB7UP3y8`(u#`wwkjmO!; zC+&t4=o9kai2dWOBFZ3tXXwFIlx=zFR%xL$9uLW^50pAzdeC8f`K?YVtLKi3%OyL9 zv-T3n281*)UF-)uy9>fA+K26l-ZMDSy$*I_(YpsqzlrV`C|!;YMN9wOVNA5G0Qj!K z7BmPU!vvte>BEVD@NBJif`ih_~NeUebHUf z`=h2ahFBhs9He|)O^qasjF-F!coFa-;6=cTfENKT0$v2X2zU|jBH%^fe-!}*-q9&b z)pe~<0ela3{iSjlw%2UId*$+dK+gmH3eds#%jGjb@jWXh<-m5P>S8Q`27Xq(O~EiUvERL??tc_@1yaZ=uRldLV15M^ex{& zaM9yxET~V^#DcAlX??+tM{12==Xl-5U?dytj0QWR!PcIj-VC0VivXS1r1P0{_LAsaax2eoHb{P_ zJOsN~@^lV#spRQ=C!UwZMCUv`oR^042jA;$vMX0Fe^`F|8ZH=2SBcRP4b4chj+!iz7!wW{&Ie;lz%9B zSzYP^F z6dVD(W)tdM*X)Ce`JPBI?J<|q*(s$ZQpsYqXI)Cd%MDUtgt59BEt9pG-;ZVc{U5`3 zy;A!mC;k!;rp!}r2{s4R;L_kcrKv7-9h4tdHY`?ycLdf2x&pT@9%x2EWYJSSiR!I3c z*Z|-B8VE!~VTH`to>9w$BjhQ|4kthdZcluU)%&Qk)CNV zSP8p*qV>W#SsQgOR!9KFC3arnt3ap#A^Y1@5|)mH zPFYzipGg24&nME|;@W}UmeSj{cCcc$kV$2&q@GB}^NgRhcSjD2*~`QC312EiobJCk z)l~Rzj_z?(;wrl+etS+Bdb5W4t2|ItxcNfQMW=xZH%;jIS&7#%dQMg1en!u?O1z%Y zbF&hMr!ZXza0pbmJ)~E~=Q5fv9O}?6m`!viBJwH=PTgB^N>$d#I;!f27|lPG`tzpd zol3lkReL_FteMe0gi3w*7c}BTL|%p2W>HmqA#1IQ!y}h4xTDiad^nUs12}fuKvaO7V(2Fyj9|~t^k%NycVLc>G)th zFl~m9!e;iD4LJIzbqeZYItU*H2BEv(p8@bFwgUC%c;K69L3<$h+4x91>GJ~s1r*9>?Q_KRGP(fa8oz;#|mJdU{I`5>29?0bM? zdne_5ZApV)0u8~q>WGjBL3c}iU5?LoDSiQP)OWY{CsLm#L|XRz9B{0sTmNOi^~xeS z_d4Lqs^Bm+WpL(A@Bs2T&-4fkA&|4|tSJVK4iPwx7(u2{rotYa4xAsH_&8_|Ycg2Rx zp=i%w488J;ZHYpWSN)T(vROFMlgad5xJabpGMvem15}U7+izyFkTcWqY!YwA4Qz#L zamh^9EEX(i&U8&MGQbZP3gX_JSOu6ST&$yD;ElVg*XSxQ?p0Qeg$w(~?D#NHJ1=OO za7c+1%GLw{0v}d5rj;9!xrQ?M(3n%wy!JnXZUP$;tDVMEa zOpxWR(Ks5gFmGHKmQ6Mb<7fd{j>k1@rRDfYC&7X^VIc;Ju%JA+i;razP*x6n!B=?C zC}e~)2p$W=ki~TzVAgOk1$Oal3dXl&Gue?G<>SM{d226ma2?d5JOrrPPwBH@I77lu z6P~3a(fW+2d!|HPXSxX(+*6W0t>cK^>{Nz4J+}y63mK<9t@nuPq$sJ|z7OyZV2?@V z)4Gr-)t}}8oX?$U3oy7_B70gt5$?Cwtl--whZpMaiDlnM7${4k|pk<99!x zxQ8ZtT8|Q?{~M_Oq(^i&*yEm@cv`m-)p@DnuC=rN5@rV=gE1w0THg{qDHW-Fcl#d( zIr_!;@I>n#N!=?jcmI78jGXo~FA-fs6?3_lEW`eB+S9t4DE&V}`tI_-E$ye~Ze40h z>eeTo==Y^PRkMcdB!y$=6^ZN!U-x+^tVM z(U-us(P2;jOLc?+ghYCRo8}3}bsqLTl~3zEy4U|Kb`B&OA7n?v1lNP8LiV)ZI42h- zt!V5>WKZ-w$ei{pB9%J?rQfMUr`kjI7eL^&r*+yQX;1ZZ+Y|p5WY8DYpVmXI(w@p8 zeHjj7*(Jy!Ce^2KvX{%cDV*$yekkpy6lA7LI&}LbJJR<9;Kopvk%n_zLKn;C&thL# zDN1;h3X;@qzibx!kEH#J|HeKn?cMGC@hV|>Y!-W}2U7a&IaSlQ39ds9+#7+#GN?ak jT;czv)c3+de1E4)axUjoF6m Date: Mon, 27 Jan 2020 13:34:59 +0100 Subject: [PATCH 37/43] update binary_fuzzing doc --- docs/binaryonly_fuzzing.md | 212 +++++++++++++++++++------------------ 1 file changed, 110 insertions(+), 102 deletions(-) diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md index d22e4ce2..6eff30d7 100644 --- a/docs/binaryonly_fuzzing.md +++ b/docs/binaryonly_fuzzing.md @@ -1,153 +1,161 @@ -#Fuzzing binary-only programs with afl++ +# Fuzzing binary-only programs with afl++ -afl++, libfuzzer and others are great if you have the source code, and -it allows for very fast and coverage guided fuzzing. + afl++, libfuzzer and others are great if you have the source code, and + it allows for very fast and coverage guided fuzzing. -However, if there is only the binary program and no source code available, -then standard `afl-fuzz -n` (dumb mode) is not effective. + However, if there is only the binary program and no source code available, + then standard `afl-fuzz -n` (dumb mode) is not effective. -The following is a description of how these binaries can be fuzzed with afl++ + The following is a description of how these binaries can be fuzzed with afl++ -!!!!! -TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then - use afl -Q qemu_mode, or better: use both in parallel. -!!!!! + !!!!! + TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then + use afl -Q qemu_mode, or better: use both in parallel. + !!!!! -##QEMU -Qemu is the "native" solution to the program. -It is available in the ./qemu_mode/ directory and once compiled it can -be accessed by the afl-fuzz -Q command line option. -The speed decrease is at about 50% -It is the easiest to use alternative and even works for cross-platform binaries. +## QEMU -Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) -which now has a qemu_mode, but its performance is just 1.5%! + Qemu is the "native" solution to the program. + It is available in the ./qemu_mode/ directory and once compiled it can + be accessed by the afl-fuzz -Q command line option. + The speed decrease is at about 50%. + It is the easiest to use alternative and even works for cross-platform binaries. -As it is included in afl++ this needs no URL. + Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) + which now has a qemu_mode, but its performance is just 1.5%! + + As it is included in afl++ this needs no URL. -##WINE+QEMU -Wine mode can run Win32 PE binaries with the QEMU instrumentation. -It needs Wine, python3 and the pefile python package installed. +## WINE+QEMU -As it is included in afl++ this needs no URL. + Wine mode can run Win32 PE binaries with the QEMU instrumentation. + It needs Wine, python3 and the pefile python package installed. + + As it is included in afl++ this needs no URL. -##UNICORN -Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. -In contrast to QEMU, Unicorn does not offer a full system or even userland -emulation. Runtime environment and/or loaders have to be written from scratch, -if needed. On top, block chaining has been removed. This means the speed boost -introduced in the patched QEMU Mode of afl++ cannot simply be ported over to -Unicorn. For further information, check out ./unicorn_mode.txt. +## UNICORN -As it is included in afl++ this needs no URL. + Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. + In contrast to QEMU, Unicorn does not offer a full system or even userland + emulation. Runtime environment and/or loaders have to be written from scratch, + if needed. On top, block chaining has been removed. This means the speed boost + introduced in the patched QEMU Mode of afl++ cannot simply be ported over to + Unicorn. For further information, check out ./unicorn_mode.txt. + + As it is included in afl++ this needs no URL. -##DYNINST -Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio -(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst -instruments the target at load time, and then let it run - or save the -binary with the changes. -This is great for some things, e.g. fuzzing, and not so effective for others, -e.g. malware analysis. +## DYNINST -So what we can do with dyninst is taking every basic block, and put afl's -instrumention code in there - and then save the binary. -Afterwards we can just fuzz the newly saved target binary with afl-fuzz. -Sounds great? It is. The issue though - it is a non-trivial problem to -insert instructions, which change addresses in the process space, so that -everything is still working afterwards. Hence more often than not binaries -crash when they are run. + Dyninst is a binary instrumentation framework similar to Pintool and + Dynamorio (see far below). However whereas Pintool and Dynamorio work at + runtime, dyninst instruments the target at load time, and then let it run - + or save the binary with the changes. + This is great for some things, e.g. fuzzing, and not so effective for others, + e.g. malware analysis. -The speed decrease is about 15-35%, depending on the optimization options -used with afl-dyninst. + So what we can do with dyninst is taking every basic block, and put afl's + instrumention code in there - and then save the binary. + Afterwards we can just fuzz the newly saved target binary with afl-fuzz. + Sounds great? It is. The issue though - it is a non-trivial problem to + insert instructions, which change addresses in the process space, so that + everything is still working afterwards. Hence more often than not binaries + crash when they are run. -So if dyninst works, it is the best option available. Otherwise it just doesn't -work well. + The speed decrease is about 15-35%, depending on the optimization options + used with afl-dyninst. -[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) + So if Dyninst works, it is the best option available. Otherwise it just + doesn't work well. + + [https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) -##INTEL-PT -If you have a newer Intel CPU, you can make use of Intels processor trace. -The big issue with Intel's PT is the small buffer size and the complex -encoding of the debug information collected through PT. -This makes the decoding very CPU intensive and hence slow. -As a result, the overall speed decrease is about 70-90% (depending on -the implementation and other factors). +## INTEL-PT -There are two afl intel-pt implementations: + If you have a newer Intel CPU, you can make use of Intels processor trace. + The big issue with Intel's PT is the small buffer size and the complex + encoding of the debug information collected through PT. + This makes the decoding very CPU intensive and hence slow. + As a result, the overall speed decrease is about 70-90% (depending on + the implementation and other factors). -1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt) - => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. + There are two afl intel-pt implementations: -2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer) - => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must - be used. This one is faster than the other. + 1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt) + => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. -Note that there is also honggfuzz: https://github.com/google/honggfuzz -But its IPT performance is just 6%! + 2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer) + => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must + be used. This one is faster than the other. + + Note that there is also honggfuzz: https://github.com/google/honggfuzz + But its IPT performance is just 6%! -##CORESIGHT -Coresight is ARM's answer to Intel's PT. -There is no implementation so far which handle coresight and getting -it working on an ARM Linux is very difficult due to custom kernel building -on embedded systems is difficult. And finding one that has coresight in -the ARM chip is difficult too. -My guess is that it is slower than Qemu, but faster than Intel PT. +## CORESIGHT -If anyone finds any coresight implementation for afl please ping me: -vh@thc.org + Coresight is ARM's answer to Intel's PT. + There is no implementation so far which handle coresight and getting + it working on an ARM Linux is very difficult due to custom kernel building + on embedded systems is difficult. And finding one that has coresight in + the ARM chip is difficult too. + My guess is that it is slower than Qemu, but faster than Intel PT. + + If anyone finds any coresight implementation for afl please ping me: vh@thc.org -##FRIDA -Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio. -What is special is that it is written Python, and scripted with Javascript. -It is mostly used to reverse binaries on mobile phones however can be used -everywhere. +## FRIDA -There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer) + Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio. + What is special is that it is written Python, and scripted with Javascript. + It is mostly used to reverse binaries on mobile phones however can be used + everywhere. + + There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer) -##PIN & DYNAMORIO -Pintool and Dynamorio are dynamic instrumentation engines, and they can be -used for getting basic block information at runtime. -Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows -whereas Dynamorio is additionally available for ARM and AARCH64. -Dynamorio is also 10x faster than Pintool. +## PIN & DYNAMORIO -The big issue with Dynamorio (and therefore Pintool too) is speed. -Dynamorio has a speed decrease of 98-99% -Pintool has a speed decrease of 99.5% + Pintool and Dynamorio are dynamic instrumentation engines, and they can be + used for getting basic block information at runtime. + Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows + whereas Dynamorio is additionally available for ARM and AARCH64. + Dynamorio is also 10x faster than Pintool. -Hence Dynamorio is the option to go for if everything fails, and Pintool -only if Dynamorio fails too. + The big issue with Dynamorio (and therefore Pintool too) is speed. + Dynamorio has a speed decrease of 98-99% + Pintool has a speed decrease of 99.5% -Dynamorio solutions: + Hence Dynamorio is the option to go for if everything fails, and Pintool + only if Dynamorio fails too. + + Dynamorio solutions: * [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio) * [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL) * [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only -Pintool solutions: + Pintool solutions: * [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin) * [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin) * [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported -##Non-AFL solutions -There are many binary-only fuzzing frameworks. -Some are great for CTFs but don't work with large binaries, others are very -slow but have good path discovery, some are very hard to set-up ... +## Non-AFL solutions -* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym) -* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore) -* S2E: [https://github.com/S2E](https://github.com/S2E) -* + There are many binary-only fuzzing frameworks. + Some are great for CTFs but don't work with large binaries, others are very + slow but have good path discovery, some are very hard to set-up ... + + * QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym) + * Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore) + * S2E: [https://github.com/S2E](https://github.com/S2E) + * ... please send me any missing that are good ## Closing words -That's it! News, corrections, updates? Send an email to vh@thc.org + That's it! News, corrections, updates? Send an email to vh@thc.org From d3dcc352da80929d2dfffc853a4aecd313175cb8 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 28 Jan 2020 09:17:55 +0000 Subject: [PATCH 38/43] First tests with LLVM 11 --- llvm_mode/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index ebe6b9de..e952e5fb 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -29,14 +29,14 @@ ifeq "$(shell uname)" "OpenBSD" LLVM_CONFIG ?= $(BIN_PATH)/llvm-config HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1) ifeq "$(HAS_OPT)" "1" - $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 10) -> e.g. "pkg_add llvm-7.0.1p9") + $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9") endif else LLVM_CONFIG ?= llvm-config endif LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[1-9]' && echo 1 || echo 0 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) @@ -48,7 +48,7 @@ ifeq "$(LLVMVER)" "" endif ifeq "$(LLVM_UNSUPPORTED)" "1" - $(warning llvm_mode only supports llvm versions 3.8.0 up to 10) + $(warning llvm_mode only supports llvm versions 3.8.0 up to 11) endif ifeq "$(LLVM_MAJOR)" "9" From 465033b04a4ebfb7693925303620613a8d4a223e Mon Sep 17 00:00:00 2001 From: van Hauser Date: Tue, 28 Jan 2020 11:00:51 +0100 Subject: [PATCH 39/43] bump llvm version --- README.md | 8 ++++---- llvm_mode/README.md | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2edca8af..dc43d5d2 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ get any feature improvements since November 2017. Among other changes afl++ has a more performant llvm_mode, supports - llvm up to version 10, QEMU 3.1, more speed and crashfixes for QEMU, + llvm up to version 11, QEMU 3.1, more speed and crashfixes for QEMU, better *BSD and Android support and much, much more. Additionally the following features and patches have been integrated: @@ -204,7 +204,7 @@ superior to blind fuzzing or coverage-only tools. PLEASE NOTE: llvm_mode compilation with afl-clang-fast/afl-clang-fast++ instead of afl-gcc/afl-g++ is much faster and has a few cool features. See llvm_mode/ - however few code does not compile with llvm. -We support llvm versions 3.8.0 to 10. +We support llvm versions 3.8.0 to 11. When source code is available, instrumentation can be injected by a companion tool that works as a drop-in replacement for gcc or clang in any standard build @@ -227,7 +227,7 @@ For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`. The clang wrappers (afl-clang and afl-clang++) can be used in the same way; clang users may also opt to leverage a higher-performance instrumentation mode, as described in [llvm_mode/README.md](llvm_mode/README.md). -Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 10. +Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 11. Using the LAF Intel performance enhancements are also recommended, see [llvm_mode/README.laf-intel.md](llvm_mode/README.laf-intel.md) @@ -272,7 +272,7 @@ $ ./build_qemu_support.sh For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md). The mode is approximately 2-5x slower than compile-time instrumentation, is -less conductive to parallelization, and may have some other quirks. +less conducive to parallelization, and may have some other quirks. If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for your binary, then you can use afl-fuzz normally and it will have twice diff --git a/llvm_mode/README.md b/llvm_mode/README.md index 150d1a17..54788aba 100644 --- a/llvm_mode/README.md +++ b/llvm_mode/README.md @@ -5,7 +5,7 @@ ## 1) Introduction -! llvm_mode works with llvm versions 3.8.0 up to 10 ! +! llvm_mode works with llvm versions 3.8.0 up to 11 ! The code in this directory allows you to instrument programs for AFL using true compiler-level instrumentation, instead of the more crude From bb88d98ff8f8f1b1a434643ccd30dcd48b529a64 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Tue, 28 Jan 2020 19:23:04 +0100 Subject: [PATCH 40/43] android: prefer bigcores --- docs/ChangeLog | 2 ++ llvm_mode/LLVMInsTrim.so.cc | 7 ++++--- src/afl-fuzz-init.c | 38 +++++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/docs/ChangeLog b/docs/ChangeLog index c1d53379..5017a803 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -21,10 +21,12 @@ Version ++2.60d (develop): - afl-fuzz: - now prints the real python version support compiled in - set stronger performance compile options and little tweaks + - Android: prefer bigcores when selecting a CPU - afl-clang-fast: - show in the help output for which llvm version it was compiled for - now does not need to be recompiled between trace-pc and pass instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) + - llvm 11 is supported - afl-cmin is now a sh script (invoking awk) instead of bash for portability the original script is still present as afl-cmin.bash - added blacklist and whitelisting function check in all modules of llvm_mode diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 24df6d42..39b2dedd 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -190,7 +190,8 @@ struct InsTrim : public ModulePass { if (instFilename.str().empty()) { - /* If the original location is empty, try using the inlined location */ + /* If the original location is empty, try using the inlined location + */ DILocation *oDILoc = cDILoc->getInlinedAt(); if (oDILoc) { @@ -424,8 +425,8 @@ struct InsTrim : public ModulePass { } - OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", - total_instr, total_rs, total_hs, + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr, + total_rs, total_hs, getenv("AFL_HARDEN") ? "hardened" : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 2ef2c4e7..6efa6227 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -184,11 +184,21 @@ void bind_to_free_cpu(void) { "For this platform we do not have free CPU binding code yet. If possible, please supply a PR to https://github.com/vanhauser-thc/AFLplusplus" #endif - for (i = 0; i < cpu_core_count; ++i) - if (!cpu_used[i]) break; + size_t cpu_start = 0; + try: +#ifndef __ANDROID__ + for (i = cpu_start; i < cpu_core_count; i++) + if (!cpu_used[i]) break; if (i == cpu_core_count) { +#else + for (i = cpu_core_count - cpu_start - 1; i > -1; i--) + if (!cpu_used[i]) break; + if (i == -1) { + +#endif + SAYF("\n" cLRD "[-] " cRST "Uh-oh, looks like all %d CPU cores on your system are allocated to\n" " other instances of afl-fuzz (or similar CPU-locked tasks). " @@ -197,12 +207,11 @@ void bind_to_free_cpu(void) { "you are\n" " absolutely sure, you can set AFL_NO_AFFINITY and try again.\n", cpu_core_count); - FATAL("No more free CPU cores"); } - OKF("Found a free CPU core, binding to #%u.", i); + OKF("Found a free CPU core, try binding to #%u.", i); cpu_aff = i; @@ -212,22 +221,31 @@ void bind_to_free_cpu(void) { #elif defined(__NetBSD__) c = cpuset_create(); if (c == NULL) PFATAL("cpuset_create failed"); - cpuset_set(i, c); #endif #if defined(__linux__) - if (sched_setaffinity(0, sizeof(c), &c)) PFATAL("sched_setaffinity failed"); + if (sched_setaffinity(0, sizeof(c), &c)) { + + if (cpu_start == cpu_core_count) + PFATAL("sched_setaffinity failed for CPU %d, exit", i); + WARNF("sched_setaffinity failed to CPU %d, trying next CPU", i); + cpu_start++; + goto try + ; + + } + #elif defined(__FreeBSD__) || defined(__DragonFly__) if (pthread_setaffinity_np(pthread_self(), sizeof(c), &c)) PFATAL("pthread_setaffinity failed"); #elif defined(__NetBSD__) - if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c)) - PFATAL("pthread_setaffinity failed"); +if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c)) + PFATAL("pthread_setaffinity failed"); - cpuset_destroy(c); +cpuset_destroy(c); #else - // this will need something for other platforms +// this will need something for other platforms #endif } From b13bb64c3b0fb938e7807ab999cbb79906a8c2a4 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Tue, 28 Jan 2020 23:15:06 +0100 Subject: [PATCH 41/43] replace -maxdepth with posix -prune (portability) --- afl-cmin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/afl-cmin b/afl-cmin index e9d713aa..9179628e 100755 --- a/afl-cmin +++ b/afl-cmin @@ -330,7 +330,7 @@ BEGIN { } else { stat_format = "-f '%z %N'" # *BSD, MacOS } - cmdline = "cd "in_dir" && find . -maxdepth 1 -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" + cmdline = "cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" while (cmdline | getline) { infilesSmallToBig[i++] = $0 } From ceed66930ef15922cd25e70a4770eaa31309e0ce Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Thu, 30 Jan 2020 21:32:08 +0100 Subject: [PATCH 42/43] lower requirements for lower llvm/clang versions 3.7.1 works with the exception of InsTrim, 3.8.1 and above is ok --- llvm_mode/LLVMInsTrim.so.cc | 61 +++++++++- llvm_mode/Makefile | 4 +- llvm_mode/MarkNodes.cc | 11 ++ llvm_mode/afl-llvm-pass.so.cc | 71 ++++++++++- llvm_mode/compare-transform-pass.so.cc | 58 ++++++++- llvm_mode/split-compares-pass.so.cc | 156 ++++++++++++++++++------- llvm_mode/split-switches-pass.so.cc | 90 ++++++++++++-- 7 files changed, 387 insertions(+), 64 deletions(-) diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 39b2dedd..5b7b79e1 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -3,10 +3,23 @@ #include #include +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Support/CFG.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/DebugInfo.h" +#endif #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LegacyPassManager.h" @@ -16,9 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" #include #include #include @@ -97,7 +108,7 @@ struct InsTrim : public ModulePass { // ripped from aflgo static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", @@ -173,6 +184,8 @@ struct InsTrim : public ModulePass { StringRef instFilename; unsigned int instLine = 0; +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) for (auto &BB : F) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); @@ -227,6 +240,48 @@ struct InsTrim : public ModulePass { } +#else + for (auto &BB : F) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + if (Loc.isUnknown()) Loc = IP->getDebugLoc(); + + } + + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + instLine = cDILoc.getLineNumber(); + instFilename = cDILoc.getFilename(); + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) { diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index e952e5fb..50b1d48c 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -36,7 +36,7 @@ else endif LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^1[2-9]' && echo 1 || echo 0 ) LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) @@ -201,7 +201,7 @@ endif ln -sf afl-clang-fast ../afl-clang-fast++ ../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps - $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) + -$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) ../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc index caa8cede..7b22bac0 100644 --- a/llvm_mode/MarkNodes.cc +++ b/llvm_mode/MarkNodes.cc @@ -3,11 +3,22 @@ #include #include #include + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) #include "llvm/IR/CFG.h" +#else +#include "llvm/Support/CFG.h" +#endif #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index 15cc6127..2cd23adf 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -37,14 +37,26 @@ #include #include -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/CFG.h" +#else +#include "llvm/DebugInfo.h" +#include "llvm/Support/CFG.h" +#endif using namespace llvm; @@ -78,7 +90,7 @@ class AFLCoverage : public ModulePass { // ripped from aflgo static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", @@ -197,6 +209,8 @@ bool AFLCoverage::runOnModule(Module &M) { * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -249,6 +263,47 @@ bool AFLCoverage::runOnModule(Module &M) { } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; @@ -273,13 +328,19 @@ bool AFLCoverage::runOnModule(Module &M) { // result: a little more speed and less map pollution int more_than_one = -1; // fprintf(stderr, "BB %u: ", cur_loc); - for (BasicBlock *Pred : predecessors(&BB)) { + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); PI != E; + ++PI) { + + BasicBlock *Pred = *PI; int count = 0; if (more_than_one == -1) more_than_one = 0; // fprintf(stderr, " %p=>", Pred); - for (BasicBlock *Succ : successors(Pred)) { + for (succ_iterator SI = succ_begin(Pred), E = succ_end(Pred); SI != E; + ++SI) { + + BasicBlock *Succ = *SI; // if (count > 0) // fprintf(stderr, "|"); diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 5d924b63..e1332a9d 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -22,9 +22,9 @@ #include #include #include +#include "llvm/Config/llvm-config.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -32,10 +32,19 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif + #include using namespace llvm; @@ -115,7 +124,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, c = M.getOrInsertFunction("tolower", Int32Ty, Int32Ty #if LLVM_VERSION_MAJOR < 5 , - nullptr + NULL #endif ); #if LLVM_VERSION_MAJOR < 9 @@ -140,6 +149,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -192,6 +203,47 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index bc25b322..e16993d6 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -24,16 +24,25 @@ #include #include +#include "llvm/Config/llvm-config.h" + #include "llvm/Pass.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/raw_ostream.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/IR/Module.h" #include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif using namespace llvm; @@ -66,7 +75,7 @@ class SplitComparesTransform : public ModulePass { static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." @@ -139,6 +148,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -191,6 +202,47 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; @@ -283,7 +335,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * block bb it is now at the position where the old IcmpInst was */ Instruction *icmp_np; icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_np); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_np); /* create a new basic block which holds the new EQ icmp */ Instruction *icmp_eq; @@ -348,7 +401,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * block bb it is now at the position where the old IcmpInst was */ Instruction *fcmp_np; fcmp_np = CmpInst::Create(Instruction::FCmp, new_pred, op0, op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), fcmp_np); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + fcmp_np); /* create a new basic block which holds the new EQ fcmp */ Instruction *fcmp_eq; @@ -469,20 +523,21 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) { s_op0 = BinaryOperator::Create(Instruction::LShr, op0, ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); t_op0 = new TruncInst(s_op0, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0); s_op1 = BinaryOperator::Create(Instruction::LShr, op1, ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); t_op1 = new TruncInst(s_op1, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1); /* compare of the sign bits */ icmp_sign_bit = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); /* create a new basic block which is executed if the signedness bit is * different */ @@ -557,6 +612,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { LLVMContext &C = M.getContext(); +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) const DataLayout &dl = M.getDataLayout(); /* define unions with floating point and (sign, exponent, mantissa) triples @@ -571,6 +628,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } +#endif + std::vector fcomps; /* get all EQ, NE, GT, and LT fcmps. if the other two @@ -669,11 +728,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction *b_op0, *b_op1; b_op0 = CastInst::Create(Instruction::BitCast, op0, IntegerType::get(C, op_size)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op0); b_op1 = CastInst::Create(Instruction::BitCast, op1, IntegerType::get(C, op_size)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op1); /* isolate signs of value of floating point type */ @@ -684,21 +743,22 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { s_s0 = BinaryOperator::Create(Instruction::LShr, b_op0, ConstantInt::get(b_op0->getType(), op_size - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s0); t_s0 = new TruncInst(s_s0, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s0); s_s1 = BinaryOperator::Create(Instruction::LShr, b_op1, ConstantInt::get(b_op1->getType(), op_size - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s1); t_s1 = new TruncInst(s_s1, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s1); /* compare of the sign bits */ icmp_sign_bit = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_s0, t_s1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); /* create a new basic block which is executed if the signedness bits are * equal */ @@ -730,16 +790,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::LShr, b_op1, ConstantInt::get(b_op1->getType(), shiftR_exponent)); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), s_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), s_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), s_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), s_e1); t_e0 = new TruncInst(s_e0, IntExponentTy); t_e1 = new TruncInst(s_e1, IntExponentTy); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), t_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), t_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), t_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), t_e1); if (sizeInBits - precision < exTySizeBytes * 8) { @@ -750,9 +810,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::And, t_e1, ConstantInt::get(t_e1->getType(), mask_exponent)); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), m_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), m_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), m_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), m_e1); } else { @@ -780,7 +840,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent); + BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent); icmp_exponent_result = BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); break; @@ -789,7 +849,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent); + BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent); icmp_exponent_result = BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); break; @@ -798,7 +858,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent_result); + BasicBlock::iterator(signequal_bb->getTerminator()), + icmp_exponent_result); { @@ -822,19 +883,19 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { m_f1 = BinaryOperator::Create( Instruction::And, b_op1, ConstantInt::get(b_op1->getType(), mask_fraction)); - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - m_f0); - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - m_f1); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f0); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f1); if (needTrunc) { t_f0 = new TruncInst(m_f0, IntFractionTy); t_f1 = new TruncInst(m_f1, IntFractionTy); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f0); + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f1); + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { @@ -850,9 +911,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { t_f0 = new TruncInst(b_op0, IntFractionTy); t_f1 = new TruncInst(b_op1, IntFractionTy); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f0); + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f1); + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { @@ -882,7 +943,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), icmp_fraction); + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction); icmp_fraction_result = BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0); break; @@ -891,7 +952,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), icmp_fraction); + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction); icmp_fraction_result = BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0); break; @@ -899,8 +960,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - icmp_fraction_result); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction_result); PHINode *PN = PHINode::Create(Int1Ty, 3, ""); @@ -1037,18 +1098,21 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) { s_op0 = BinaryOperator::Create(Instruction::LShr, op0, ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); op0_high = new TruncInst(s_op0, NewIntType); - bb->getInstList().insert(bb->getTerminator()->getIterator(), op0_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op0_high); s_op1 = BinaryOperator::Create(Instruction::LShr, op1, ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); op1_high = new TruncInst(s_op1, NewIntType); - bb->getInstList().insert(bb->getTerminator()->getIterator(), op1_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op1_high); icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_high); /* now we have to destinguish between == != and > < */ if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { @@ -1194,13 +1258,19 @@ bool SplitComparesTransform::runOnModule(Module &M) { << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif case 32: errs() << "Split-integer-compare-pass " << bitw << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif case 16: errs() << "Split-integer-compare-pass " << bitw << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc index 3a2838c0..9101dc26 100644 --- a/llvm_mode/split-switches-pass.so.cc +++ b/llvm_mode/split-switches-pass.so.cc @@ -23,8 +23,9 @@ #include #include +#include "llvm/Config/llvm-config.h" + #include "llvm/ADT/Statistic.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -32,10 +33,20 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif + #include using namespace llvm; @@ -69,7 +80,7 @@ class SplitSwitchesTransform : public ModulePass { static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." @@ -140,7 +151,7 @@ BasicBlock *SplitSwitchesTransform::switchConvert( IntegerType * ByteType = IntegerType::get(OrigBlock->getContext(), 8); unsigned BytesInValue = bytesChecked.size(); std::vector setSizes; - std::vector> byteSets(BytesInValue, std::set()); + std::vector > byteSets(BytesInValue, std::set()); assert(ValTypeBitWidth >= 8 && ValTypeBitWidth <= 64); @@ -213,8 +224,25 @@ BasicBlock *SplitSwitchesTransform::switchConvert( NewNode->getInstList().push_back(Comp); bytesChecked[smallestIndex] = true; - if (std::all_of(bytesChecked.begin(), bytesChecked.end(), - [](bool b) { return b; })) { + bool allBytesAreChecked = true; + + for (std::vector::iterator BCI = bytesChecked.begin(), + E = bytesChecked.end(); + BCI != E; ++BCI) { + + if (!*BCI) { + + allBytesAreChecked = false; + break; + + } + + } + + // if (std::all_of(bytesChecked.begin(), bytesChecked.end(), + // [](bool b) { return b; })) { + + if (allBytesAreChecked) { assert(Cases.size() == 1); BranchInst::Create(Cases[0].BB, NewDefault, Comp, NewNode); @@ -306,6 +334,10 @@ BasicBlock *SplitSwitchesTransform::switchConvert( bool SplitSwitchesTransform::splitSwitches(Module &M) { +#if (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7) + LLVMContext &C = M.getContext(); +#endif + std::vector switches; /* iterate over all functions, bbs and instruction and add @@ -327,6 +359,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -379,6 +413,47 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; @@ -426,8 +501,7 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { * if the default block is set as an unreachable we avoid creating one * because will never be a valid target.*/ BasicBlock *NewDefault = nullptr; - NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - NewDefault->insertInto(F, Default); + NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault", F, Default); BranchInst::Create(Default, NewDefault); /* Prepare cases vector. */ From b050c1158398dd07e25a6cd65234da84e5656fa6 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Thu, 30 Jan 2020 21:50:57 +0100 Subject: [PATCH 43/43] for partial functionality ignore the LLVMInsTrim build result --- llvm_mode/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index 50b1d48c..5f65b55e 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -36,7 +36,7 @@ else endif LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^1[2-9]' && echo 1 || echo 0 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)