Merge branch 'dev' of https://github.com/AFLplusplus/AFLplusplus into dev

2025-06-10 01:01:33 +00:00 · 2020-03-21 10:36:18 +01:00 · 2020-03-21 10:36:18 +01:00 · 993bf03af9
commit 993bf03af9
parent 2508008b53 a0012c9e82
13 changed files with 209 additions and 44 deletions
--- a/README.md
+++ b/README.md
@ -73,6 +73,8 @@

  * The new CmpLog instrumentation for LLVM and QEMU inspired by [Redqueen](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf)

+  * llvm_mode ngram coverage by Adrean Herrera [https://github.com/adrianherrera/afl-ngram-pass](https://github.com/adrianherrera/afl-ngram-pass)
+
  A more thorough list is available in the PATCHES file.

  | Feature/Instrumentation | afl-gcc | llvm_mode | gcc_plugin | qemu_mode        | unicorn_mode |
@ -84,6 +86,7 @@
  | Whitelist               |         |     x     |     x      |        (x)(3)    |              |
  | non-colliding coverage  |         |     x(4)  |            |        (x)(5)    |              |
  | InsTrim                 |         |     x     |            |                  |              |
+  | ngram prev_loc coverage |         |     x(6)  |            |                  |              |

  neverZero:

@ -97,6 +100,8 @@

  (5) upcoming, development in branch

+  (6) not compatible with LTO and InsTrim modes
+
  So all in all this is the best-of afl that is currently out there :-)

  For new versions and additional information, check out:
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@ -31,6 +31,9 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
              runtime
  - LTO collision free instrumented added in llvm_mode with afl-clang-lto -
    note that this mode is amazing, but quite some targets won't compile
+  - Added llvm_mode NGRAM prev_loc coverage by Adrean Herrera
+    (https://github.com/adrianherrera/afl-ngram-pass/), activate by setting
+    AFL_LLVM_NGRAM_SIZE
  - llvm_mode InsTrim mode:
    - removed workaround for bug where paths were not instrumented and 
      imported fix by author
--- a/docs/PATCHES.md
+++ b/docs/PATCHES.md
@ -20,6 +20,7 @@ afl-qemu-speed.diff			by abiondo on github
 afl-qemu-optimize-map.diff		by mh(at)mh-sec(dot)de
 ```

+ llvm_mode ngram prev_loc coverage (github.com/adrianherrera/afl-ngram-pass)
 + Custom mutator (native library) (by kyakdan)
 + unicorn_mode (modernized and updated by domenukk)
 + instrim (https://github.com/csienslab/instrim) was integrated
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@ -110,6 +110,9 @@ are then later combined.
   - AFL_LLVM_LTO_DONTWRITEID prevents that the highest location ID written
     into the instrumentation is set in a global variable

+    Instrim, LTO and ngram modes can not be used together.
+    See llvm_mode/README.LTO.md for more information.
+
 ### LAF-INTEL

    This great feature will split compares to series of single byte comparisons
@ -149,8 +152,17 @@ are then later combined.
      functions with a single basic block. This is useful for most C and
      some C++ targets.

+    Instrim, LTO and ngram modes can not be used together.
    See llvm_mode/README.instrim.md

+### NGRAM
+
+    - Setting AFL_LLVM_NGRAM_SIZE activates ngram prev_loc coverage, good
+      values are 2, 4 or 8.
+
+    Instrim, LTO and ngram modes can not be used together.
+    See llvm_mode/README.ngram.md
+
 ### NOT_ZERO

    - Setting AFL_LLVM_NOT_ZERO=1 during compilation will use counters
--- a/gcc_plugin/Makefile
+++ b/gcc_plugin/Makefile
@ -132,7 +132,7 @@ vpath  % ..
 	@../$* -h 2>&1 | tail -n +4 >> ../$@
 	@echo >> ../$@
 	@echo .SH AUTHOR >> ../$@
-	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com>" >> ../$@
+	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de>, Andrea Fioraldi <andreafioraldi@gmail.com> and Dominik Maier <domenukk@gmail.com>" >> ../$@
 	@echo  The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> ../$@
 	@echo >> ../$@
 	@echo .SH LICENSE >> ../$@
@ -143,7 +143,7 @@ install: all
 	install -m 755 ../afl-gcc-fast $${DESTDIR}$(BIN_PATH)
 	install -m 755 ../afl-gcc-pass.so ../afl-gcc-rt.o $${DESTDIR}$(HELPER_PATH)
 	install -m 644 -T README.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin.md
-	install -m 644 -T README.whitelist.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin_whitelist.md
+	install -m 644 -T README.whitelist.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin.whitelist.md

 clean:
 	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 .test2
--- a/llvm_mode/README.ngram.md
+++ b/llvm_mode/README.ngram.md
@ -0,0 +1,20 @@
+# AFL N-Gram Branch Coverage
+
+## Source
+
+This is an LLVM-based implementation of the n-gram branch coverage proposed in
+the paper ["Be Sensitive and Collaborative: Analzying Impact of Coverage Metrics
+in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf),
+by Jinghan Wang, et. al.
+
+Note that the original implementation (available
+[here](https://github.com/bitsecurerlab/afl-sensitive))
+is built on top of AFL's QEMU mode.
+This is essentially a port that uses LLVM vectorized instructions to achieve
+the same results when compiling source code.
+
+## Usage
+
+The size of `n` (i.e., the number of branches to remember) is an option
+that is specified in the `AFL_LLVM_NGRAM_SIZE` environment variable.
+Good values are 2, 4 or 8.
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@ -158,14 +158,20 @@ static void edit_params(u32 argc, char **argv) {
 #endif
    if (lto_flag[0] != '-')
      FATAL(
-          "afl-clang-lto not possible because Makefile magic did not identify "
-          "the correct -flto flag");
+          "Using afl-clang-lto is not possible because Makefile magic did not "
+          "identify the correct -flto flag");
    if (getenv("AFL_LLVM_INSTRIM") != NULL)
      FATAL("afl-clang-lto does not work with InsTrim mode");
+    if (getenv("AFL_LLVM_NGRAM_SIZE") != NULL)
+      FATAL("afl-clang-lto does not work with ngram coverage mode");
    lto_mode = 1;

  }

+  if (getenv("AFL_LLVM_NGRAM_SIZE") != NULL &&
+      getenv("AFL_LLVM_INSTRIM") != NULL)
+    FATAL("AFL_LLVM_NGRAM_SIZE and AFL_LLVM_INSTRIM can not be used together");
+
  if (!strcmp(name, "afl-clang-fast++") || !strcmp(name, "afl-clang-lto++")) {

    u8 *alt_cxx = getenv("AFL_CXX");
@ -605,6 +611,7 @@ int main(int argc, char **argv, char **envp) {
            "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n"
            "AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n"
            "AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed\n"
+            "AFL_LLVM_NGRAM_SIZE: use ngram prev_loc coverage\n"
            "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n"
            "\nafl-clang-fast was built for llvm %s with the llvm binary path "
            "of "
--- a/llvm_mode/afl-llvm-pass.so.cc
+++ b/llvm_mode/afl-llvm-pass.so.cc
@ -2,12 +2,15 @@
   american fuzzy lop++ - LLVM-mode instrumentation pass
   ---------------------------------------------------

-   Written by Laszlo Szekeres <lszekeres@google.com> and
+   Written by Laszlo Szekeres <lszekeres@google.com>,
+              Adrian Herrera <adrian.herrera@anu.edu.au>,
              Michal Zalewski

   LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted
   from afl-as.c are Michal's fault.

+   NGRAM previous location coverage comes from Adrian Herrera.
+
   Copyright 2015, 2016 Google Inc. All rights reserved.
   Copyright 2019-2020 AFLplusplus Project. All rights reserved.

@ -27,7 +30,6 @@

 #include "config.h"
 #include "debug.h"
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@ -47,6 +49,7 @@ typedef long double max_align_t;
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"

 #if LLVM_VERSION_MAJOR > 3 || \
@ -58,6 +61,8 @@ typedef long double max_align_t;
 #include "llvm/Support/CFG.h"
 #endif

+#include "llvm-ngram-coverage.h"
+
 using namespace llvm;

 namespace {
@ -118,6 +123,7 @@ class AFLCoverage : public ModulePass {

 protected:
  std::list<std::string> myWhitelist;
+  uint32_t               ngram_size = 0;

 };

@ -131,6 +137,8 @@ bool AFLCoverage::runOnModule(Module &M) {

  IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
  IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+  IntegerType *IntLocTy =
+      IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT);
  struct timeval  tv;
  struct timezone tz;
  u32             rand_seed;
@ -147,7 +155,8 @@ bool AFLCoverage::runOnModule(Module &M) {

  if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {

-    SAYF(cCYA "afl-llvm-pass" VERSION cRST " by <lszekeres@google.com>\n");
+    SAYF(cCYA "afl-llvm-pass" VERSION cRST
+              " by <lszekeres@google.com> and <adrian.herrera@anu.edu.au>\n");

  } else

@ -170,21 +179,73 @@ bool AFLCoverage::runOnModule(Module &M) {
  char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO");
 #endif

+  /* Decide previous location vector size (must be a power of two) */
+
+  char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
+  if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
+
+  if (ngram_size_str)
+    if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 ||
+        ngram_size > MAX_NGRAM_SIZE)
+      FATAL(
+          "Bad value of AFL_NGRAM_SIZE (must be between 2 and MAX_NGRAM_SIZE)");
+
+  unsigned PrevLocSize;
+  if (ngram_size == 1) ngram_size = 0;
+  if (ngram_size)
+    PrevLocSize = ngram_size - 1;
+  else
+    PrevLocSize = 1;
+  uint64_t    PrevLocVecSize = PowerOf2Ceil(PrevLocSize);
+  VectorType *PrevLocTy;
+
+  if (ngram_size) PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize);
+
  /* Get globals for the SHM region and the previous location. Note that
     __afl_prev_loc is thread-local. */

  GlobalVariable *AFLMapPtr =
      new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
                         GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+  GlobalVariable *AFLPrevLoc;

+  if (ngram_size)
 #ifdef __ANDROID__
-  GlobalVariable *AFLPrevLoc = new GlobalVariable(
+    AFLPrevLoc = new GlobalVariable(
+        M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_loc");
+#else
+    AFLPrevLoc = new GlobalVariable(
+        M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_loc",
+        /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel,
+        /* AddressSpace */ 0, /* IsExternallyInitialized */ false);
+#endif
+  else
+#ifdef __ANDROID__
+    AFLPrevLoc = new GlobalVariable(
        M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc");
 #else
-  GlobalVariable *AFLPrevLoc = new GlobalVariable(
+    AFLPrevLoc = new GlobalVariable(
        M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0,
        GlobalVariable::GeneralDynamicTLSModel, 0, false);
 #endif
+
+  /* Create the vector shuffle mask for updating the previous block history.
+     Note that the first element of the vector will store cur_loc, so just set
+     it to undef to allow the optimizer to do its thing. */
+
+  SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)};
+
+  for (unsigned I = 0; I < PrevLocSize - 1; ++I)
+    PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I));
+
+  for (unsigned I = PrevLocSize; I < PrevLocVecSize; ++I)
+    PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize));
+
+  Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle);
+
+  // other constants we need
  ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
  ConstantInt *One = ConstantInt::get(Int8Ty, 1);

@ -356,20 +417,41 @@ bool AFLCoverage::runOnModule(Module &M) {
      // fprintf(stderr, " == %d\n", more_than_one);
      if (more_than_one != 1) continue;
 #endif
-      ConstantInt *CurLoc = ConstantInt::get(Int32Ty, cur_loc);
+
+      ConstantInt *CurLoc;
+
+      if (ngram_size)
+        CurLoc = ConstantInt::get(IntLocTy, cur_loc);
+      else
+        CurLoc = ConstantInt::get(Int32Ty, cur_loc);

      /* Load prev_loc */

      LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
      PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-      Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
+      Value *PrevLocTrans;
+
+      /* "For efficiency, we propose to hash the tuple as a key into the
+         hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where
+         prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */
+
+      if (ngram_size)
+        PrevLocTrans = IRB.CreateXorReduce(PrevLoc);
+      else
+        PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());

      /* Load SHM pointer */

      LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
      MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-      Value *MapPtrIdx =
-          IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc));
+
+      Value *MapPtrIdx;
+      if (ngram_size)
+        MapPtrIdx = IRB.CreateGEP(
+            MapPtr,
+            IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, CurLoc), Int32Ty));
+      else
+        MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, CurLoc));

      /* Update bitmap */

@ -449,12 +531,28 @@ bool AFLCoverage::runOnModule(Module &M) {
      IRB.CreateStore(Incr, MapPtrIdx)
          ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));

-      /* Set prev_loc to cur_loc >> 1 */
+      /* Update prev_loc history vector (by placing cur_loc at the head of the
+         vector and shuffle the other elements back by one) */

-      StoreInst *Store =
-          IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc);
+      StoreInst *Store;
+
+      if (ngram_size) {
+
+        Value *ShuffledPrevLoc = IRB.CreateShuffleVector(
+            PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask);
+        Value *UpdatedPrevLoc = IRB.CreateInsertElement(
+            ShuffledPrevLoc, IRB.CreateLShr(CurLoc, (uint64_t)1), (uint64_t)0);
+
+        Store = IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc);
        Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));

+      } else {
+
+        Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1),
+                                AFLPrevLoc);
+
+      }
+
      inst_blocks++;

    }
--- a/llvm_mode/afl-llvm-rt.o.c
+++ b/llvm_mode/afl-llvm-rt.o.c
@ -26,6 +26,7 @@
 #include "config.h"
 #include "types.h"
 #include "cmplog.h"
+#include "llvm-ngram-coverage.h"

 #include <stdio.h>
 #include <stdlib.h>
@ -62,10 +63,10 @@ u8  __afl_area_initial[MAP_SIZE];
 u8 *__afl_area_ptr = __afl_area_initial;

 #ifdef __ANDROID__
-u32 __afl_prev_loc;
+PREV_LOC_T __afl_prev_loc[MAX_NGRAM_SIZE];
 u32        __afl_final_loc;
 #else
-__thread u32 __afl_prev_loc;
+__thread PREV_LOC_T __afl_prev_loc[MAX_NGRAM_SIZE];
 __thread u32        __afl_final_loc;
 #endif

@ -281,7 +282,7 @@ int __afl_persistent_loop(unsigned int max_cnt) {

      memset(__afl_area_ptr, 0, MAP_SIZE);
      __afl_area_ptr[0] = 1;
-      __afl_prev_loc = 0;
+      memset(__afl_prev_loc, 0, MAX_NGRAM_SIZE * sizeof(PREV_LOC_T));

    }

@ -298,7 +299,7 @@ int __afl_persistent_loop(unsigned int max_cnt) {
      raise(SIGSTOP);

      __afl_area_ptr[0] = 1;
-      __afl_prev_loc = 0;
+      memset(__afl_prev_loc, 0, MAX_NGRAM_SIZE * sizeof(PREV_LOC_T));

      return 1;

--- a/llvm_mode/llvm-ngram-coverage.h
+++ b/llvm_mode/llvm-ngram-coverage.h
@ -0,0 +1,18 @@
+#ifndef AFL_NGRAM_CONFIG_H
+#define AFL_NGRAM_CONFIG_H
+
+#include "../config.h"
+
+#if (MAP_SIZE_POW2 <= 16)
+typedef u16 PREV_LOC_T;
+#elif (MAP_SIZE_POW2 <= 32)
+typedef u32 PREV_LOC_T;
+#else
+typedef u64 PREV_LOC_T;
+#endif
+
+/* Maximum ngram size */
+#define MAX_NGRAM_SIZE 128
+
+#endif
+
--- a/src/afl-common.c
+++ b/src/afl-common.c
@ -57,10 +57,10 @@ char *    afl_environment_variables[] = {
    "AFL_LLVM_INSTRIM_LOOPHEAD", "AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK",
    "AFL_LLVM_LAF_SPLIT_COMPARES", "AFL_LLVM_LAF_SPLIT_COMPARES_BITW",
    "AFL_LLVM_LAF_SPLIT_FLOATS", "AFL_LLVM_LAF_SPLIT_SWITCHES",
-    "AFL_LLVM_LAF_TRANSFORM_COMPARES", "AFL_LLVM_NOT_ZERO",
-    "AFL_LLVM_WHITELIST", "AFL_NO_AFFINITY", "AFL_LLVM_LTO_STARTID",
-    "AFL_LLVM_LTO_DONTWRITEID", "AFL_NO_ARITH", "AFL_NO_BUILTIN",
-    "AFL_NO_CPU_RED", "AFL_NO_FORKSRV", "AFL_NO_UI",
+    "AFL_LLVM_LAF_TRANSFORM_COMPARES", "AFL_LLVM_NGRAM_SIZE", "AFL_NGRAM_SIZE",
+    "AFL_LLVM_NOT_ZERO", "AFL_LLVM_WHITELIST", "AFL_NO_AFFINITY",
+    "AFL_LLVM_LTO_STARTID", "AFL_LLVM_LTO_DONTWRITEID", "AFL_NO_ARITH",
+    "AFL_NO_BUILTIN", "AFL_NO_CPU_RED", "AFL_NO_FORKSRV", "AFL_NO_UI",
    "AFL_NO_X86",  // not really an env but we dont want to warn on it
    "AFL_PATH", "AFL_PERFORMANCE_FILE",
    //"AFL_PERSISTENT", // not implemented anymore, so warn additionally
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@ -105,7 +105,7 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
      afl->start_time / 1000, cur_time / 1000,
      (cur_time - afl->start_time) / 1000, getpid(),
      afl->queue_cycle ? (afl->queue_cycle - 1) : 0, afl->cycles_wo_finds,
-      afl->total_execs, /*eps,*/
+      afl->total_execs,
      afl->total_execs / ((double)(get_cur_time() - afl->start_time) / 1000),
      afl->queued_paths, afl->queued_favored, afl->queued_discovered,
      afl->queued_imported, afl->max_depth, afl->current_entry,
--- a/unicorn_mode/build_unicorn_support.sh
+++ b/unicorn_mode/build_unicorn_support.sh
@ -176,7 +176,7 @@ cd ../samples/simple || exit 1

 # Run afl-showmap on the sample application. If anything comes out then it must have worked!
 unset AFL_INST_RATIO
-echo 0 | ../../../afl-showmap -U -m none -q -o .test-instr0 -- $PYTHONBIN simple_test_harness.py ./sample_inputs/sample1.bin || exit 1
+echo 0 | ../../../afl-showmap -U -m none -t 2000 -q -o .test-instr0 -- $PYTHONBIN simple_test_harness.py ./sample_inputs/sample1.bin || exit 1

 if [ -s .test-instr0 ]
 then