From e3dadbfe0f9fad435a6fa201131315500f1a348a Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Fri, 13 Jan 2023 18:27:22 +0100
Subject: [PATCH 01/77] autotokens

---
 custom_mutators/autotokens/Makefile       |   7 +
 custom_mutators/autotokens/autotokens.cpp | 391 ++++++++++++++++++++++
 qemu_mode/qemuafl                         |   2 +-
 3 files changed, 399 insertions(+), 1 deletion(-)
 create mode 100644 custom_mutators/autotokens/Makefile
 create mode 100644 custom_mutators/autotokens/autotokens.cpp
diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile
new file mode 100644
index 00000000..1ee7f5c4
--- /dev/null
+++ b/custom_mutators/autotokens/Makefile
@@ -0,0 +1,7 @@
+all:	autotokens.so
+
+autotokens.so:	autotokens.cpp
+	$(CXX) -O3 -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o
+
+clean:
+	rm -f autotokens.so *~ core
\ No newline at end of file
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
new file mode 100644
index 00000000..afde8c26
--- /dev/null
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -0,0 +1,391 @@
+extern "C" {
+#include "afl-fuzz.h"
+}
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <iostream>
+#include <fstream>
+#include <unordered_map>
+#include <vector>
+#include <regex>
+
+#define AUTOTOKENS_DEBUG 1
+#define AUTOTOKENS_LEN_MIN 12
+#define AUTOTOKENS_CHANGE_MIN_PERCENT 5
+#define AUTOTOKENS_CHANGE_MAX_PERCENT 10
+
+using namespace std;
+
+typedef struct my_mutator {
+
+  afl_state *afl;
+
+} my_mutator_t;
+
+#define DEBUG \
+  if (unlikely(debug)) fprintf
+
+static afl_state                           *afl_ptr;
+static int                                  debug = AUTOTOKENS_DEBUG;
+static u32                                  current_id = 0;
+static unordered_map<string, vector<u32> *> file_mapping;
+static unordered_map<string, u32>           token_to_id;
+static unordered_map<u32, string>           id_to_token;
+static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
+static regex regex_comment_star("/\\*(.|\n)*?\\*/",
+                                regex::multiline | regex::optimize);
+static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize);
+static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
+static regex regex_whitespace(R"([ \t]+)", regex::optimize);
+static vector<u32> *s;
+
+extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
+                       u8 **out_buf, uint8_t *add_buf,
+                       size_t add_buf_size, size_t max_size) {
+
+  DEBUG(stderr, "MUT!\n");
+
+  if (s == NULL) { return 0; }
+
+  vector<u32> m = *s;
+  u32 i, m_size = (u32)m.size();
+
+  u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256));
+  DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
+
+  for (i = 0; i < rounds; ++i) {
+  
+    u32 item, new_item;
+  
+    switch(rand_below(afl_ptr, 4)) {
+      /* CHANGE */
+      case 0: /* fall through */
+      case 1:
+        item = rand_below(afl_ptr, m_size);
+        do {
+          new_item = 1 + rand_below(afl_ptr, current_id);
+        } while(unlikely(new_item == m[item]));
+        m[item] = new_item;
+        break;
+      /* INSERT (+1 so we insert also after last place) */
+      case 2:
+        new_item = 1 + rand_below(afl_ptr, current_id);
+        m.insert(m.begin() + rand_below(afl_ptr, m_size + 1), new_item);
+        ++m_size;
+        break;
+      /* ERASE - only if large enough */
+      case 3:
+        if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); }
+        --m_size;
+        break;
+    }
+  
+  }
+  
+  string output;
+  u32 m_size_1 = m_size - 1;
+  for (i = 0; i < m_size; ++i) {
+    output += id_to_token[m[i]];
+    if (likely(i < m_size_1)) { output += " "; }
+  }
+
+  u32 mutated_size = output.size();
+  u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size);
+
+  if (unlikely(!mutated_out)) {
+  
+    *out_buf = NULL;
+    return 0;
+  
+  }
+
+  /*
+  *out_buf = buf;
+  return buf_size;
+  */
+  memcpy(mutated_out, output.data(), mutated_size);
+  *out_buf = mutated_out;
+  DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out);
+  return mutated_size;
+
+}
+
+
+/* We are not using afl_custom_queue_new_entry() because not every corpus entry
+   will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
+
+extern "C" unsigned char afl_custom_queue_get(void                *data,
+                                              const unsigned char *filename) {
+
+  if (likely(!debug))
+    if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; }
+
+  vector<u32> *structure = NULL;
+  string       fn = (char *)filename;
+
+  auto entry = file_mapping.find(fn);
+  if (entry == file_mapping.end()) {
+
+    // this input file was not analyzed for tokens yet, so let's do it!
+
+    FILE *fp = fopen((char *)filename, "rb");
+    if (!fp) { s = NULL; return 0; }  // should not happen
+    fseek(fp, 0, SEEK_END);
+    size_t len = (size_t)ftell(fp);
+    if (len < AUTOTOKENS_LEN_MIN) {
+
+      fclose(fp);
+      file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
+      DEBUG(stderr, "Too short (%lu) %s\n", len, filename);
+      s = NULL;
+      return 0;
+
+    }
+
+    string input;
+    input.resize(len);
+    rewind(fp);
+    fread(input.data(), input.size(), 1, fp);
+    fclose(fp);
+
+    // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
+    // input.size(), filename, input.c_str());
+
+    input = regex_replace(input, regex_comment_slash, "$2");
+    input = regex_replace(input, regex_comment_star, "");
+
+    DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
+          filename, input.c_str());
+
+    /*
+    u32 spaces = count(input.begin(), input.end(), ' ');
+    u32 tabs = count(input.begin(), input.end(), '\t');
+    u32 linefeeds = count(input.begin(), input.end(), '\n');
+    bool ends_with_linefeed = input[input.length() - 1] == '\n';
+    DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
+          linefeeds, ends_with_linefeed);
+    */
+
+    // now extract all tokens
+    vector<string>         tokens;
+    smatch                 match;
+    string::const_iterator cur = input.begin(), ende = input.end(), last = cur,
+                           found, prev;
+
+    DEBUG(stderr, "MATCHES:\n");
+    while (regex_search(cur, ende, match, regex_string)) {
+
+      prev = cur;
+      found = match[1].first;
+      cur = match[1].second;
+      DEBUG(stderr,
+            "string \"%s\" found at start %lu offset %lu continue at %lu\n",
+            match[1].str().c_str(), prev - input.begin(), match.position(),
+            cur - input.begin());
+      if (prev < found) {  // there are items between search start and find
+        sregex_token_iterator it{prev, found, regex_whitespace, -1};
+        vector<std::string>   tokenized{it, {}};
+        tokenized.erase(
+            std::remove_if(tokenized.begin(), tokenized.end(),
+                           [](std::string const &s) { return s.size() == 0; }),
+            tokenized.end());
+        tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+
+        DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
+              input.size());
+        for (auto x : tokenized) {
+
+          cerr << x << endl;
+
+        }
+
+        for (auto token : tokenized) {
+
+          string::const_iterator c = token.begin(), e = token.end(), f, p;
+          smatch                 m;
+
+          while (regex_search(c, e, m, regex_word)) {
+
+            p = c;
+            f = m[0].first;
+            c = m[0].second;
+            if (p < f) {
+
+              // there are items between search start and find
+              string foo(p, f);
+              DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+              tokens.push_back(std::string(p, f));
+
+            }
+
+            DEBUG(stderr,
+                  "SUBstring \"%s\" found at start %lu offset %lu continue at "
+                  "%lu\n",
+                  m[0].str().c_str(), p - input.begin(), m.position(),
+                  c - token.begin());
+            tokens.push_back(m[0].str());
+
+          }
+
+          if (c < e) {
+
+            string foo(c, e);
+            DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+            tokens.push_back(std::string(c, e));
+
+          }
+
+        }
+
+      }
+
+      if (match[1].length() > 0) { tokens.push_back(match[1]); }
+
+    }
+
+    if (cur < ende) {
+
+      DEBUG(stderr, "REST!\n");
+
+      sregex_token_iterator it{cur, ende, regex_whitespace, -1};
+      vector<std::string>   tokenized{it, {}};
+      tokenized.erase(
+          std::remove_if(tokenized.begin(), tokenized.end(),
+                         [](std::string const &s) { return s.size() == 0; }),
+          tokenized.end());
+      tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+
+      DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
+            input.size());
+      for (auto x : tokenized) {
+
+        cerr << x << endl;
+
+      }
+
+      for (auto token : tokenized) {
+
+        string::const_iterator c = token.begin(), e = token.end(), f, p;
+        smatch                 m;
+
+        while (regex_search(c, e, m, regex_word)) {
+
+          p = c;
+          f = m[0].first;
+          c = m[0].second;
+          if (p < f) {
+
+            // there are items between search start and find
+            string foo(p, f);
+            DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+            tokens.push_back(std::string(p, f));
+
+          }
+
+          DEBUG(stderr,
+                "SUB2string \"%s\" found at start %lu offset %lu continue at "
+                "%lu\n",
+                m[0].str().c_str(), p - input.begin(), m.position(),
+                c - token.begin());
+          tokens.push_back(m[0].str());
+
+        }
+
+        if (c < e) {
+
+          string foo(c, e);
+          DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+          tokens.push_back(std::string(c, e));
+
+        }
+
+      }
+
+    }
+
+    DEBUG(stderr, "DUMPING TOKENS:\n");
+    if (unlikely(debug))
+      for (u32 i = 0; i < tokens.size(); ++i) {
+
+        DEBUG(stderr, "%s ", tokens[i].c_str());
+
+      }
+
+    DEBUG(stderr, "---------------------------\n");
+
+    /* Now we transform the tokens into an ID list and saved that */
+
+    structure = new vector<u32>();
+    u32 id;
+
+    for (u32 i = 0; i < tokens.size(); ++i) {
+
+      if ((id = token_to_id[tokens[i]]) == 0) {
+
+        // First time we see this token, add it to the list
+        ++current_id;
+        token_to_id[tokens[i]] = current_id;
+        id_to_token[current_id] = tokens[i];
+        structure->push_back(current_id);
+
+      } else {
+
+        structure->push_back(id);
+
+      }
+
+    }
+
+    // save the token structure to the file mapping
+    file_mapping[fn] = structure;
+    s = structure;
+
+    // we are done!
+    DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",
+          structure->size());
+
+  } else {
+
+    if (entry->second == NULL) {
+
+      DEBUG(stderr, "Skipping %s\n", filename);
+      s = NULL;
+      return 0;
+
+    }
+
+    s = entry->second;
+    DEBUG(stderr, "OK %s\n", filename);
+
+  }
+
+  return 1;  // we always fuzz unless non-ascii or too small
+
+}
+
+extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
+
+  (void)(seed);
+  my_mutator_t *data = (my_mutator_t *)calloc(1, sizeof(my_mutator_t));
+  if (!data) {
+
+    perror("afl_custom_init alloc");
+    return NULL;
+
+  }
+
+  data->afl = afl_ptr = afl;
+
+  return data;
+
+}
+
+extern "C" void afl_custom_deinit(my_mutator_t *data) {
+
+  free(data);
+
+}
+
diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl
index a8af9cbd..a120c3fe 160000
--- a/qemu_mode/qemuafl
+++ b/qemu_mode/qemuafl
@@ -1 +1 @@
-Subproject commit a8af9cbde71e333ce72a46f15e655d0b82ed0939
+Subproject commit a120c3feb573d4cade292cdeb7c1f6b1ce109efe

From 9548af52b266ecc2aed81f388f7a1a7a3fcfb181 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sat, 14 Jan 2023 09:30:25 +0100
Subject: [PATCH 02/77] texts

---
 custom_mutators/autotokens/README | 12 ++++++++++++
 custom_mutators/autotokens/TODO   | 13 +++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 custom_mutators/autotokens/README
 create mode 100644 custom_mutators/autotokens/TODO

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
new file mode 100644
index 00000000..6849279e
--- /dev/null
+++ b/custom_mutators/autotokens/README
@@ -0,0 +1,12 @@
+# autotokens
+
+This implements an improved autotoken idea presented in
+[Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf].
+It is a grammar fuzzer without actually knowing the grammar.
+
+It is recommended to run with together in an instance with `CMPLOG`.
+
+If you have a dictionary (`-x`) this improves this custom grammar mutator.
+
+If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`,
+to concentrate on grammar bug classes.
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
new file mode 100644
index 00000000..700b3fa7
--- /dev/null
+++ b/custom_mutators/autotokens/TODO
@@ -0,0 +1,13 @@
+whitespace belassen oder notieren?		MAYBE
+0=space 1=tab 2=linefeed
+
+dictionary mitverwenden?			JA aber nur ascii
+-> neue liste?
+wie mache ich das bei honggfuzz?
+ansonsten neuer custom mutator entrypoint?
+
+nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung
+die aber dann dafür sorgt dass eine leere struktur da ist.
+is is_ascii in afl-common.o ?
+
+cmplog: only add tokens that were found to fit?

From 35801bed7a5feb8cc3a363bafbd577f256c467f6 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sun, 15 Jan 2023 13:47:31 +0100
Subject: [PATCH 03/77] dictionary support

---
 custom_mutators/autotokens/TODO           |  17 +-
 custom_mutators/autotokens/autotokens.cpp | 248 +++++++++++++++++-----
 include/config.h                          |   2 +-
 3 files changed, 199 insertions(+), 68 deletions(-)

diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 700b3fa7..2e5e384f 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -1,13 +1,12 @@
 whitespace belassen oder notieren?		MAYBE
 0=space 1=tab 2=linefeed
 
-dictionary mitverwenden?			JA aber nur ascii
--> neue liste?
-wie mache ich das bei honggfuzz?
-ansonsten neuer custom mutator entrypoint?
-
-nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung
-die aber dann dafür sorgt dass eine leere struktur da ist.
-is is_ascii in afl-common.o ?
-
 cmplog: only add tokens that were found to fit?
+
+create from thin air if no good seed after a cycle and dict large enough?
+(static u32 no_of_struct_inputs;) 
+
+splice insert, splice overwrite
+(linefeed, semicolon)
+
+
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index afde8c26..2fad8dd7 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -1,5 +1,7 @@
 extern "C" {
+
 #include "afl-fuzz.h"
+
 }
 
 #include <stdio.h>
@@ -13,9 +15,7 @@ extern "C" {
 #include <regex>
 
 #define AUTOTOKENS_DEBUG 1
-#define AUTOTOKENS_LEN_MIN 12
-#define AUTOTOKENS_CHANGE_MIN_PERCENT 5
-#define AUTOTOKENS_CHANGE_MAX_PERCENT 10
+#define AUTOTOKENS_CHANGE_MIN 8
 
 using namespace std;
 
@@ -31,43 +31,55 @@ typedef struct my_mutator {
 static afl_state                           *afl_ptr;
 static int                                  debug = AUTOTOKENS_DEBUG;
 static u32                                  current_id = 0;
+static u32                                  valid_structures = 0;
+static u32                                  extras_cnt = 0, a_extras_cnt = 0;
 static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
-static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
-static regex regex_comment_star("/\\*(.|\n)*?\\*/",
-                                regex::multiline | regex::optimize);
-static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize);
-static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
-static regex regex_whitespace(R"([ \t]+)", regex::optimize);
-static vector<u32> *s;
+static regex        regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
+static regex        regex_comment_star("/\\*(.|\n)*?\\*/",
+                                       regex::multiline | regex::optimize);
+static regex        regex_string("\"(.*?)\"|'(.*?')", regex::optimize);
+static regex        regex_word("[A-Za-z0-9_$]+", regex::optimize);
+static regex        regex_whitespace(R"([ \t]+)", regex::optimize);
+static vector<u32> *s;  // the structure of the currently selected input
 
-extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
-                       u8 **out_buf, uint8_t *add_buf,
-                       size_t add_buf_size, size_t max_size) {
+extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
+                                  u8 **out_buf, u8 *add_buf,
+                                  size_t add_buf_size, size_t max_size) {
 
-  DEBUG(stderr, "MUT!\n");
+  if (s == NULL) {
 
-  if (s == NULL) { return 0; }
+    *out_buf = NULL;
+    return 0;
 
-  vector<u32> m = *s;
-  u32 i, m_size = (u32)m.size();
+  }
 
-  u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256));
-  DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
+  vector<u32> m = *s;  // copy of the structure we will modify
+  u32         i, m_size = (u32)m.size();
+
+  u32 rounds =
+      MAX(AUTOTOKENS_CHANGE_MIN,
+          MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
+                               afl_ptr->havoc_div / 256));
+  // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
   for (i = 0; i < rounds; ++i) {
-  
+
     u32 item, new_item;
-  
-    switch(rand_below(afl_ptr, 4)) {
+
+    switch (rand_below(afl_ptr, 4)) {
+
       /* CHANGE */
-      case 0: /* fall through */
+      case 0:                                               /* fall through */
       case 1:
         item = rand_below(afl_ptr, m_size);
         do {
+
           new_item = 1 + rand_below(afl_ptr, current_id);
-        } while(unlikely(new_item == m[item]));
+
+        } while (unlikely(new_item == m[item]));
+
         m[item] = new_item;
         break;
       /* INSERT (+1 so we insert also after last place) */
@@ -81,31 +93,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s
         if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); }
         --m_size;
         break;
+        // TODO: add full line insert splice, replace splace, delete
+
     }
-  
+
   }
-  
+
   string output;
-  u32 m_size_1 = m_size - 1;
+  u32    m_size_1 = m_size - 1;
+
   for (i = 0; i < m_size; ++i) {
+
     output += id_to_token[m[i]];
     if (likely(i < m_size_1)) { output += " "; }
+
   }
 
   u32 mutated_size = output.size();
-  u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size);
+  u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size);
 
   if (unlikely(!mutated_out)) {
-  
+
     *out_buf = NULL;
     return 0;
-  
+
   }
 
-  /*
-  *out_buf = buf;
-  return buf_size;
-  */
   memcpy(mutated_out, output.data(), mutated_size);
   *out_buf = mutated_out;
   DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out);
@@ -113,29 +126,106 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s
 
 }
 
-
 /* We are not using afl_custom_queue_new_entry() because not every corpus entry
    will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
 
 extern "C" unsigned char afl_custom_queue_get(void                *data,
                                               const unsigned char *filename) {
 
-  if (likely(!debug))
-    if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; }
+  if (likely(!debug)) {
+
+    if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) {
+
+      s = NULL;
+      return 0;
+
+    }
+
+  }
+
+  // check if there are new dictionary entries and add them to the tokens
+  if (valid_structures) {
+
+    while (extras_cnt < afl_ptr->extras_cnt) {
+
+      u32 ok = 1, l = afl_ptr->extras[extras_cnt].len;
+      u8 *ptr = afl_ptr->extras[extras_cnt].data;
+
+      for (u32 i = 0; i < l; ++i) {
+
+        if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) {
+
+          ok = 0;
+          break;
+
+        }
+
+      }
+
+      if (ok) {
+
+        ++current_id;
+        token_to_id[(char *)ptr] = current_id;
+        id_to_token[current_id] = (char *)ptr;
+
+      }
+
+      ++extras_cnt;
+      DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr);
+
+    }
+
+    while (a_extras_cnt < afl_ptr->a_extras_cnt) {
+
+      u32 ok = 1, l = afl_ptr->a_extras[a_extras_cnt].len;
+      u8 *ptr = afl_ptr->a_extras[a_extras_cnt].data;
+
+      for (u32 i = 0; i < l; ++i) {
+
+        if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) {
+
+          ok = 0;
+          break;
+
+        }
+
+      }
+
+      if (ok) {
+
+        ++current_id;
+        token_to_id[(char *)ptr] = current_id;
+        id_to_token[current_id] = (char *)ptr;
+
+      }
+
+      ++a_extras_cnt;
+      DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr);
+
+    }
+
+  }
 
   vector<u32> *structure = NULL;
   string       fn = (char *)filename;
+  auto         entry = file_mapping.find(fn);
 
-  auto entry = file_mapping.find(fn);
   if (entry == file_mapping.end()) {
 
     // this input file was not analyzed for tokens yet, so let's do it!
 
     FILE *fp = fopen((char *)filename, "rb");
-    if (!fp) { s = NULL; return 0; }  // should not happen
+    if (!fp) {
+
+      s = NULL;
+      return 0;
+
+    }  // should not happen
+
     fseek(fp, 0, SEEK_END);
     size_t len = (size_t)ftell(fp);
-    if (len < AUTOTOKENS_LEN_MIN) {
+
+    if (len < AFL_TXT_MIN_LEN) {
 
       fclose(fp);
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
@@ -151,6 +241,30 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     fread(input.data(), input.size(), 1, fp);
     fclose(fp);
 
+    if (!afl_ptr->shm.cmplog_mode) {
+
+      // not running with CMPLOG? bad choice, but whatever ...
+      // we only want text inputs, so we have to check it ourselves.
+
+      u32 valid_chars = 0;
+      for (u32 i = 0; i < len; ++i) {
+
+        if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; }
+
+      }
+
+      // we want at least 95% of text characters ...
+      if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) {
+
+        file_mapping[fn] = NULL;
+        DEBUG(stderr, "Not text (%lu) %s\n", len, filename);
+        s = NULL;
+        return 0;
+
+      }
+
+    }
+
     // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
     // input.size(), filename, input.c_str());
 
@@ -175,7 +289,6 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     string::const_iterator cur = input.begin(), ende = input.end(), last = cur,
                            found, prev;
 
-    DEBUG(stderr, "MATCHES:\n");
     while (regex_search(cur, ende, match, regex_string)) {
 
       prev = cur;
@@ -196,11 +309,12 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
               input.size());
-        for (auto x : tokenized) {
+        if (unlikely(debug))
+          for (auto x : tokenized) {
 
-          cerr << x << endl;
+            cerr << x << endl;
 
-        }
+          }
 
         for (auto token : tokenized) {
 
@@ -232,8 +346,13 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
           if (c < e) {
 
-            string foo(c, e);
-            DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+            if (unlikely(debug)) {
+
+              string foo(c, e);
+              DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+            }
+
             tokens.push_back(std::string(c, e));
 
           }
@@ -248,8 +367,6 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     if (cur < ende) {
 
-      DEBUG(stderr, "REST!\n");
-
       sregex_token_iterator it{cur, ende, regex_whitespace, -1};
       vector<std::string>   tokenized{it, {}};
       tokenized.erase(
@@ -260,11 +377,12 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
             input.size());
-      for (auto x : tokenized) {
+      if (unlikely(debug))
+        for (auto x : tokenized) {
 
-        cerr << x << endl;
+          cerr << x << endl;
 
-      }
+        }
 
       for (auto token : tokenized) {
 
@@ -279,8 +397,13 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
           if (p < f) {
 
             // there are items between search start and find
-            string foo(p, f);
-            DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+            if (unlikely(debug)) {
+
+              string foo(p, f);
+              DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+
+            }
+
             tokens.push_back(std::string(p, f));
 
           }
@@ -296,8 +419,13 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         if (c < e) {
 
-          string foo(c, e);
-          DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+          if (unlikely(debug)) {
+
+            string foo(c, e);
+            DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+          }
+
           tokens.push_back(std::string(c, e));
 
         }
@@ -306,15 +434,18 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
-    DEBUG(stderr, "DUMPING TOKENS:\n");
-    if (unlikely(debug))
+    if (unlikely(debug)) {
+
+      DEBUG(stderr, "DUMPING TOKENS:\n");
       for (u32 i = 0; i < tokens.size(); ++i) {
 
         DEBUG(stderr, "%s ", tokens[i].c_str());
 
       }
 
-    DEBUG(stderr, "---------------------------\n");
+      DEBUG(stderr, "---------------------------\n");
+
+    }
 
     /* Now we transform the tokens into an ID list and saved that */
 
@@ -342,6 +473,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     // save the token structure to the file mapping
     file_mapping[fn] = structure;
     s = structure;
+    ++valid_structures;
 
     // we are done!
     DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",
diff --git a/include/config.h b/include/config.h
index a5a4c473..6cfaac11 100644
--- a/include/config.h
+++ b/include/config.h
@@ -494,7 +494,7 @@
 /* What is the minimum percentage of ascii characters present to be classifed
    as "is_ascii"? */
 
-#define AFL_TXT_MIN_PERCENT 94
+#define AFL_TXT_MIN_PERCENT 95
 
 /* How often to perform ASCII mutations 0 = disable, 1-8 are good values */
 

From 10b82c72772f40f703119fc7cd1c9063500a6bbe Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sun, 15 Jan 2023 18:17:28 +0100
Subject: [PATCH 04/77] fixes

---
 custom_mutators/autotokens/Makefile       |  2 +-
 custom_mutators/autotokens/autotokens.cpp | 40 ++++++++++++++++-------
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile
index 1ee7f5c4..5dd52dee 100644
--- a/custom_mutators/autotokens/Makefile
+++ b/custom_mutators/autotokens/Makefile
@@ -1,7 +1,7 @@
 all:	autotokens.so
 
 autotokens.so:	autotokens.cpp
-	$(CXX) -O3 -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o
+	$(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o
 
 clean:
 	rm -f autotokens.so *~ core
\ No newline at end of file
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 2fad8dd7..9fbdf52a 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -14,7 +14,7 @@ extern "C" {
 #include <vector>
 #include <regex>
 
-#define AUTOTOKENS_DEBUG 1
+#define AUTOTOKENS_DEBUG 0
 #define AUTOTOKENS_CHANGE_MIN 8
 
 using namespace std;
@@ -64,11 +64,13 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                afl_ptr->havoc_div / 256));
   // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
+  u32 max_rand = 4;
+
   for (i = 0; i < rounds; ++i) {
 
     u32 item, new_item;
 
-    switch (rand_below(afl_ptr, 4)) {
+    switch (rand_below(afl_ptr, max_rand)) {
 
       /* CHANGE */
       case 0:                                               /* fall through */
@@ -90,9 +92,19 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         break;
       /* ERASE - only if large enough */
       case 3:
-        if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); }
-        --m_size;
+        if (m_size > 8) {
+
+          m.erase(m.begin() + rand_below(afl_ptr, m_size));
+          --m_size;
+
+        } else {
+
+          max_rand = 3;
+
+        }
+
         break;
+
         // TODO: add full line insert splice, replace splace, delete
 
     }
@@ -119,9 +131,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   }
 
+  if (unlikely(debug)) {
+
+    DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size);
+    fwrite(output.data(), 1, mutated_size, stderr);
+    DEBUG(stderr, "\n---\n");
+
+  }
+
   memcpy(mutated_out, output.data(), mutated_size);
   *out_buf = mutated_out;
-  DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out);
   return mutated_size;
 
 }
@@ -292,11 +311,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     while (regex_search(cur, ende, match, regex_string)) {
 
       prev = cur;
-      found = match[1].first;
-      cur = match[1].second;
-      DEBUG(stderr,
-            "string \"%s\" found at start %lu offset %lu continue at %lu\n",
-            match[1].str().c_str(), prev - input.begin(), match.position(),
+      found = match[0].first;
+      cur = match[0].second;
+      DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n",
+            match[0].str().c_str(), prev - input.begin(), match.position(),
             cur - input.begin());
       if (prev < found) {  // there are items between search start and find
         sregex_token_iterator it{prev, found, regex_whitespace, -1};
@@ -361,7 +379,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       }
 
-      if (match[1].length() > 0) { tokens.push_back(match[1]); }
+      if (match[0].length() > 0) { tokens.push_back(match[0]); }
 
     }
 

From 8cc1c6c54edbeb5ac7a8bcb050eb7976009517fa Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 16 Jan 2023 10:18:08 +0100
Subject: [PATCH 05/77] nits

---
 src/afl-fuzz-one.c | 32 +++++++++++++++++++-------------
 src/afl-fuzz.c     | 23 +++++++++++++++++++++++
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 97855607..eaf65987 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -584,7 +584,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
      if it has gone through deterministic testing in earlier, resumed runs
      (passed_det). */
 
-  if (likely(afl->queue_cur->passed_det) || likely(afl->skip_deterministic) ||
+  if (likely(afl->skip_deterministic) || likely(afl->queue_cur->passed_det) ||
       likely(perf_score <
              (afl->queue_cur->depth * 30 <= afl->havoc_max_mult * 100
                   ? afl->queue_cur->depth * 30
@@ -1908,9 +1908,10 @@ custom_mutator_stage:
 
   afl->stage_name = "custom mutator";
   afl->stage_short = "custom";
-  afl->stage_max = HAVOC_CYCLES * perf_score / afl->havoc_div / 100;
   afl->stage_val_type = STAGE_VAL_NONE;
   bool has_custom_fuzz = false;
+  u32  shift = unlikely(afl->custom_only) ? 7 : 8;
+  afl->stage_max = (HAVOC_CYCLES * perf_score / afl->havoc_div) >> shift;
 
   if (afl->stage_max < HAVOC_MIN) { afl->stage_max = HAVOC_MIN; }
 
@@ -2063,8 +2064,9 @@ havoc_stage:
 
     afl->stage_name = "havoc";
     afl->stage_short = "havoc";
-    afl->stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
-                     perf_score / afl->havoc_div / 100;
+    afl->stage_max = ((doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
+                      perf_score / afl->havoc_div) >>
+                     7;
 
   } else {
 
@@ -2073,7 +2075,7 @@ havoc_stage:
     snprintf(afl->stage_name_buf, STAGE_BUF_SIZE, "splice %u", splice_cycle);
     afl->stage_name = afl->stage_name_buf;
     afl->stage_short = "splice";
-    afl->stage_max = SPLICE_HAVOC * perf_score / afl->havoc_div / 100;
+    afl->stage_max = (SPLICE_HAVOC * perf_score / afl->havoc_div) >> 7;
 
   }
 
@@ -4621,8 +4623,9 @@ pacemaker_fuzzing:
 
     afl->stage_name = MOpt_globals.havoc_stagename;
     afl->stage_short = MOpt_globals.havoc_stagenameshort;
-    afl->stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
-                     perf_score / afl->havoc_div / 100;
+    afl->stage_max = ((doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
+                      perf_score / afl->havoc_div) >>
+                     7;
 
   } else {
 
@@ -4632,7 +4635,7 @@ pacemaker_fuzzing:
              MOpt_globals.splice_stageformat, splice_cycle);
     afl->stage_name = afl->stage_name_buf;
     afl->stage_short = MOpt_globals.splice_stagenameshort;
-    afl->stage_max = SPLICE_HAVOC * perf_score / afl->havoc_div / 100;
+    afl->stage_max = (SPLICE_HAVOC * perf_score / afl->havoc_div) >> 7;
 
   }
 
@@ -5792,10 +5795,8 @@ void pso_updating(afl_state_t *afl) {
 
 }
 
-/* larger change for MOpt implementation: the original fuzz_one was renamed
-   to fuzz_one_original. All documentation references to fuzz_one therefore
-   mean fuzz_one_original */
-
+/* The entry point for the mutator, choosing the default mutator, and/or MOpt
+   depending on the configuration. */
 u8 fuzz_one(afl_state_t *afl) {
 
   int key_val_lv_1 = 0, key_val_lv_2 = 0;
@@ -5818,7 +5819,12 @@ u8 fuzz_one(afl_state_t *afl) {
 
 #endif
 
-  // if limit_time_sig == -1 then both are run after each other
+  /*
+     -L command line paramter => limit_time_sig value
+       limit_time_sig == 0 then run the default mutator
+       limit_time_sig  > 0 then run MOpt
+       limit_time_sig  < 0 both are run
+  */
 
   if (afl->limit_time_sig <= 0) { key_val_lv_1 = fuzz_one_original(afl); }
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 138df26c..5e0ecd1e 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -1580,6 +1580,29 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  if (afl->limit_time_sig > 0 && afl->custom_mutators_count) {
+
+    if (afl->custom_only) {
+
+      FATAL("Custom mutators are incompatible with MOpt (-L)");
+
+    }
+
+    u32 custom_fuzz = 0;
+    LIST_FOREACH(&afl->custom_mutator_list, struct custom_mutator, {
+
+      if (el->afl_custom_fuzz) { custom_fuzz = 1; }
+
+    });
+
+    if (custom_fuzz) {
+
+      WARNF("afl_custom_fuzz is incompatible with MOpt (-L)");
+
+    }
+
+  }
+
   if (afl->afl_env.afl_max_det_extras) {
 
     s32 max_det_extras = atoi(afl->afl_env.afl_max_det_extras);

From 4b915207c42f8100f306778f617d7003c3e2193f Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 16 Jan 2023 17:05:04 +0100
Subject: [PATCH 06/77] autotokens - much better tokenizer

---
 custom_mutators/autotokens/autotokens.cpp | 311 +++++++++++++---------
 1 file changed, 181 insertions(+), 130 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 9fbdf52a..850692a1 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -28,22 +28,41 @@ typedef struct my_mutator {
 #define DEBUG \
   if (unlikely(debug)) fprintf
 
-static afl_state                           *afl_ptr;
-static int                                  debug = AUTOTOKENS_DEBUG;
-static u32                                  current_id = 0;
-static u32                                  valid_structures = 0;
-static u32                                  extras_cnt = 0, a_extras_cnt = 0;
+static afl_state *afl_ptr;
+static int        debug = AUTOTOKENS_DEBUG;
+static u32        current_id;
+static u32        valid_structures;
+static u32        whitespace_ids;
+static u32        extras_cnt, a_extras_cnt;
+static u64        all_spaces, all_tabs, all_lf, all_ws;
 static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
-static regex        regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
-static regex        regex_comment_star("/\\*(.|\n)*?\\*/",
-                                       regex::multiline | regex::optimize);
-static regex        regex_string("\"(.*?)\"|'(.*?')", regex::optimize);
-static regex        regex_word("[A-Za-z0-9_$]+", regex::optimize);
-static regex        regex_whitespace(R"([ \t]+)", regex::optimize);
+// static regex        regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
+static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/",
+                                regex::multiline | regex::optimize);
+static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
 
+u32 good_whitespace_or_singleval() {
+
+  u32 i = rand_below(afl_ptr, current_id);
+  if (id_to_token[i].size() == 1) { return i; }
+  i = rand_below(afl_ptr, all_ws);
+  if (i < all_spaces) {
+
+    return 0;
+
+  } else if (i < all_tabs) {
+
+    return 1;
+
+  } else
+
+    return 2;  // linefeed
+
+}
+
 extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                   u8 **out_buf, u8 *add_buf,
                                   size_t add_buf_size, size_t max_size) {
@@ -68,30 +87,76 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   for (i = 0; i < rounds; ++i) {
 
-    u32 item, new_item;
-
     switch (rand_below(afl_ptr, max_rand)) {
 
       /* CHANGE */
       case 0:                                               /* fall through */
-      case 1:
-        item = rand_below(afl_ptr, m_size);
+      case 1: {
+
+        u32 pos = rand_below(afl_ptr, m_size);
+        u32 cur_item = m[pos], new_item;
         do {
 
-          new_item = 1 + rand_below(afl_ptr, current_id);
+          new_item = rand_below(afl_ptr, current_id);
 
-        } while (unlikely(new_item == m[item]));
+        } while (unlikely(
 
-        m[item] = new_item;
+            new_item == cur_item ||
+            (whitespace_ids < new_item && whitespace_ids >= cur_item) ||
+            (whitespace_ids >= new_item && whitespace_ids < cur_item)));
+
+        DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item);
+        m[pos] = new_item;
         break;
-      /* INSERT (+1 so we insert also after last place) */
-      case 2:
-        new_item = 1 + rand_below(afl_ptr, current_id);
-        m.insert(m.begin() + rand_below(afl_ptr, m_size + 1), new_item);
+
+      }
+
+      /* INSERT (m_size +1 so we insert also after last place) */
+      case 2: {
+
+        u32 new_item;
+        do {
+
+          new_item = rand_below(afl_ptr, current_id);
+
+        } while (new_item >= whitespace_ids);
+
+        u32 pos = rand_below(afl_ptr, m_size + 1);
+        m.insert(m.begin() + pos, new_item);
         ++m_size;
+
+        // if we insert an identifier or string we might need whitespace
+        if (id_to_token[new_item].size() > 1) {
+
+          // need to insert before?
+
+          if (pos && m[pos - 1] >= whitespace_ids &&
+              id_to_token[m[pos - 1]].size() > 1) {
+
+            m.insert(m.begin() + pos, good_whitespace_or_singleval());
+            ++m_size;
+
+          }
+
+          if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
+              id_to_token[m[pos + 1]].size() > 1) {
+
+            // need to insert after?
+
+            m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
+            ++m_size;
+
+          }
+
+        }
+
         break;
+
+      }
+
       /* ERASE - only if large enough */
-      case 3:
+      case 3: {
+
         if (m_size > 8) {
 
           m.erase(m.begin() + rand_below(afl_ptr, m_size));
@@ -105,6 +170,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         break;
 
+      }
+
         // TODO: add full line insert splice, replace splace, delete
 
     }
@@ -112,12 +179,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   }
 
   string output;
-  u32    m_size_1 = m_size - 1;
 
   for (i = 0; i < m_size; ++i) {
 
     output += id_to_token[m[i]];
-    if (likely(i < m_size_1)) { output += " "; }
 
   }
 
@@ -183,9 +248,9 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       if (ok) {
 
-        ++current_id;
         token_to_id[(char *)ptr] = current_id;
         id_to_token[current_id] = (char *)ptr;
+        ++current_id;
 
       }
 
@@ -212,9 +277,9 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       if (ok) {
 
-        ++current_id;
         token_to_id[(char *)ptr] = current_id;
         id_to_token[current_id] = (char *)ptr;
+        ++current_id;
 
       }
 
@@ -257,7 +322,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     string input;
     input.resize(len);
     rewind(fp);
-    fread(input.data(), input.size(), 1, fp);
+    fread((void *)input.data(), input.size(), 1, fp);
     fclose(fp);
 
     if (!afl_ptr->shm.cmplog_mode) {
@@ -287,28 +352,34 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
     // input.size(), filename, input.c_str());
 
-    input = regex_replace(input, regex_comment_slash, "$2");
+    // input = regex_replace(input, regex_comment_slash, "$2");
     input = regex_replace(input, regex_comment_star, "");
 
     DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
           filename, input.c_str());
 
-    /*
-    u32 spaces = count(input.begin(), input.end(), ' ');
-    u32 tabs = count(input.begin(), input.end(), '\t');
-    u32 linefeeds = count(input.begin(), input.end(), '\n');
+    u32  spaces = count(input.begin(), input.end(), ' ');
+    u32  tabs = count(input.begin(), input.end(), '\t');
+    u32  linefeeds = count(input.begin(), input.end(), '\n');
     bool ends_with_linefeed = input[input.length() - 1] == '\n';
     DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
           linefeeds, ends_with_linefeed);
-    */
+    all_spaces += spaces;
+    all_tabs += tabs;
+    all_lf += linefeeds;
+    all_ws = all_spaces + all_tabs + all_lf;
 
     // now extract all tokens
     vector<string>         tokens;
     smatch                 match;
-    string::const_iterator cur = input.begin(), ende = input.end(), last = cur,
-                           found, prev;
+    string::const_iterator cur = input.begin(), ende = input.end(), found, prev;
 
-    while (regex_search(cur, ende, match, regex_string)) {
+    DEBUG(stderr, "START!\n");
+
+    while (regex_search(cur, ende, match, regex_string,
+                        regex_constants::match_any |
+                            regex_constants::match_not_null |
+                            regex_constants::match_continuous)) {
 
       prev = cur;
       found = match[0].first;
@@ -316,62 +387,42 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n",
             match[0].str().c_str(), prev - input.begin(), match.position(),
             cur - input.begin());
+
       if (prev < found) {  // there are items between search start and find
-        sregex_token_iterator it{prev, found, regex_whitespace, -1};
-        vector<std::string>   tokenized{it, {}};
-        tokenized.erase(
-            std::remove_if(tokenized.begin(), tokenized.end(),
-                           [](std::string const &s) { return s.size() == 0; }),
-            tokenized.end());
-        tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+        while (prev < found) {
 
-        DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
-              input.size());
-        if (unlikely(debug))
-          for (auto x : tokenized) {
+          if (isspace(*prev)) {
 
-            cerr << x << endl;
+            auto start = prev;
+            while (isspace(*prev)) {
 
-          }
-
-        for (auto token : tokenized) {
-
-          string::const_iterator c = token.begin(), e = token.end(), f, p;
-          smatch                 m;
-
-          while (regex_search(c, e, m, regex_word)) {
-
-            p = c;
-            f = m[0].first;
-            c = m[0].second;
-            if (p < f) {
-
-              // there are items between search start and find
-              string foo(p, f);
-              DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
-              tokens.push_back(std::string(p, f));
+              ++prev;
 
             }
 
-            DEBUG(stderr,
-                  "SUBstring \"%s\" found at start %lu offset %lu continue at "
-                  "%lu\n",
-                  m[0].str().c_str(), p - input.begin(), m.position(),
-                  c - token.begin());
-            tokens.push_back(m[0].str());
+            tokens.push_back(std::string(start, prev));
+            DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
+                  tokens[tokens.size() - 1].c_str());
 
-          }
+          } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
 
-          if (c < e) {
+            auto start = prev;
+            while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
+                   *prev == '.' || *prev == '/') {
 
-            if (unlikely(debug)) {
-
-              string foo(c, e);
-              DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+              ++prev;
 
             }
 
-            tokens.push_back(std::string(c, e));
+            tokens.push_back(std::string(start, prev));
+            DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
+                  tokens[tokens.size() - 1].c_str());
+
+          } else {
+
+            tokens.push_back(std::string(prev, prev + 1));
+            DEBUG(stderr, "OTHER \"%c\"\n", *prev);
+            ++prev;
 
           }
 
@@ -383,68 +434,44 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
+    DEBUG(stderr, "AFTER all strings\n");
+
     if (cur < ende) {
 
-      sregex_token_iterator it{cur, ende, regex_whitespace, -1};
-      vector<std::string>   tokenized{it, {}};
-      tokenized.erase(
-          std::remove_if(tokenized.begin(), tokenized.end(),
-                         [](std::string const &s) { return s.size() == 0; }),
-          tokenized.end());
-      tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+      while (cur < ende) {
 
-      DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
-            input.size());
-      if (unlikely(debug))
-        for (auto x : tokenized) {
+        if (isspace(*cur)) {
 
-          cerr << x << endl;
+          auto start = cur;
+          while (isspace(*cur)) {
 
-        }
-
-      for (auto token : tokenized) {
-
-        string::const_iterator c = token.begin(), e = token.end(), f, p;
-        smatch                 m;
-
-        while (regex_search(c, e, m, regex_word)) {
-
-          p = c;
-          f = m[0].first;
-          c = m[0].second;
-          if (p < f) {
-
-            // there are items between search start and find
-            if (unlikely(debug)) {
-
-              string foo(p, f);
-              DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
-
-            }
-
-            tokens.push_back(std::string(p, f));
+            ++cur;
 
           }
 
-          DEBUG(stderr,
-                "SUB2string \"%s\" found at start %lu offset %lu continue at "
-                "%lu\n",
-                m[0].str().c_str(), p - input.begin(), m.position(),
-                c - token.begin());
-          tokens.push_back(m[0].str());
+          tokens.push_back(std::string(start, cur));
+          DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
+                tokens[tokens.size() - 1].c_str());
 
-        }
+        } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
 
-        if (c < e) {
+          auto start = cur;
+          while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
+                 *cur == '/') {
 
-          if (unlikely(debug)) {
-
-            string foo(c, e);
-            DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+            ++cur;
 
           }
 
-          tokens.push_back(std::string(c, e));
+          tokens.push_back(std::string(start, cur));
+          DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
+                tokens[tokens.size() - 1].c_str());
+
+        } else {
+
+          tokens.push_back(std::string(cur, cur + 1));
+          DEBUG(stderr, "OTHER \"%c\"\n", *cur);
+          ++cur;
 
         }
 
@@ -457,7 +484,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       DEBUG(stderr, "DUMPING TOKENS:\n");
       for (u32 i = 0; i < tokens.size(); ++i) {
 
-        DEBUG(stderr, "%s ", tokens[i].c_str());
+        DEBUG(stderr, "%s", tokens[i].c_str());
 
       }
 
@@ -475,10 +502,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       if ((id = token_to_id[tokens[i]]) == 0) {
 
         // First time we see this token, add it to the list
-        ++current_id;
         token_to_id[tokens[i]] = current_id;
         id_to_token[current_id] = tokens[i];
         structure->push_back(current_id);
+        ++current_id;
 
       } else {
 
@@ -529,6 +556,30 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   data->afl = afl_ptr = afl;
 
+  // set common whitespace tokens
+  token_to_id[" "] = current_id;
+  id_to_token[current_id] = " ";
+  ++current_id;
+  token_to_id["\t"] = current_id;
+  id_to_token[current_id] = "\t";
+  ++current_id;
+  token_to_id["\n"] = current_id;
+  id_to_token[current_id] = "\n";
+  ++current_id;
+  token_to_id["\r\n"] = current_id;
+  id_to_token[current_id] = "\r\n";
+  ++current_id;
+  token_to_id[" \n"] = current_id;
+  id_to_token[current_id] = " \n";
+  ++current_id;
+  token_to_id["  "] = current_id;
+  id_to_token[current_id] = "  ";
+  ++current_id;
+  token_to_id["\t\t"] = current_id;
+  id_to_token[current_id] = "\t\t";
+  ++current_id;
+  whitespace_ids = current_id;
+
   return data;
 
 }

From 33f41e3974348d3b0b71b3a30a6483bb0418068c Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Tue, 17 Jan 2023 09:52:35 +0100
Subject: [PATCH 07/77] autotokens: print stats at exit

---
 custom_mutators/autotokens/README         |  7 ++++---
 custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++
 include/config.h                          |  4 ++--
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 6849279e..0dcc6a3e 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -1,6 +1,6 @@
 # autotokens
 
-This implements an improved autotoken idea presented in
+This implements an improved autotoken grammar fuzzing idea presented in
 [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf].
 It is a grammar fuzzer without actually knowing the grammar.
 
@@ -8,5 +8,6 @@ It is recommended to run with together in an instance with `CMPLOG`.
 
 If you have a dictionary (`-x`) this improves this custom grammar mutator.
 
-If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`,
-to concentrate on grammar bug classes.
+If **not** running with `CMPLOG`, it is possible to set
+`AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes.
+
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 850692a1..d6b269fd 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -35,6 +35,7 @@ static u32        valid_structures;
 static u32        whitespace_ids;
 static u32        extras_cnt, a_extras_cnt;
 static u64        all_spaces, all_tabs, all_lf, all_ws;
+static u64        all_structure_items;
 static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
@@ -519,6 +520,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     file_mapping[fn] = structure;
     s = structure;
     ++valid_structures;
+    all_structure_items += structure->size();
 
     // we are done!
     DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",
@@ -586,6 +588,16 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
 extern "C" void afl_custom_deinit(my_mutator_t *data) {
 
+  /* we use this to print statistics at exit :-)
+     needs to be stderr as stdout is filtered */
+
+  fprintf(stderr,
+          "\n\nAutotoken mutator statistics:\n"
+          "  Number of all seen tokens:  %lu\n"
+          "  Number of input structures: %lu\n"
+          "  Number of all items in structures: %lu\n\n",
+          current_id - 1, valid_structures, all_structure_items);
+
   free(data);
 
 }
diff --git a/include/config.h b/include/config.h
index 6cfaac11..f8a742f2 100644
--- a/include/config.h
+++ b/include/config.h
@@ -364,9 +364,9 @@
  *                                                         *
  ***********************************************************/
 
-/* Call count interval between reseeding the libc PRNG from /dev/urandom: */
+/* Call count interval between reseeding the PRNG from /dev/urandom: */
 
-#define RESEED_RNG 100000
+#define RESEED_RNG 2500000
 
 /* The default maximum testcase cache size in MB, 0 = disable.
    A value between 50 and 250 is a good default value. Note that the

From efe57c936880608a2de452340d63f262470d9fcd Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Tue, 17 Jan 2023 09:57:23 +0100
Subject: [PATCH 08/77] more whitespace

---
 custom_mutators/autotokens/autotokens.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index d6b269fd..5580512a 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -559,6 +559,8 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   data->afl = afl_ptr = afl;
 
   // set common whitespace tokens
+  // we deliberately do not put uncommon ones here to these will count as
+  // identifier tokens.
   token_to_id[" "] = current_id;
   id_to_token[current_id] = " ";
   ++current_id;
@@ -580,6 +582,21 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   token_to_id["\t\t"] = current_id;
   id_to_token[current_id] = "\t\t";
   ++current_id;
+  token_to_id["\n\n"] = current_id;
+  id_to_token[current_id] = "\n\n";
+  ++current_id;
+  token_to_id["\r\n\r\n"] = current_id;
+  id_to_token[current_id] = "\r\n\r\n";
+  ++current_id;
+  token_to_id["    "] = current_id;
+  id_to_token[current_id] = "    ";
+  ++current_id;
+  token_to_id["\t\t\t\t"] = current_id;
+  id_to_token[current_id] = "\t\t\t\t";
+  ++current_id;
+  token_to_id["\n\n\n\n"] = current_id;
+  id_to_token[current_id] = "\n\n\n\n";
+  ++current_id;
   whitespace_ids = current_id;
 
   return data;

From a41fd5cc5c4a5073f38adf06270e2985c88da9d5 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 11:46:28 +0100
Subject: [PATCH 09/77] alternate tokenize, options

---
 custom_mutators/autotokens/README         |   9 +
 custom_mutators/autotokens/autotokens.cpp | 432 ++++++++++++++++++----
 2 files changed, 365 insertions(+), 76 deletions(-)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 0dcc6a3e..f6e9c753 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -11,3 +11,12 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator.
 If **not** running with `CMPLOG`, it is possible to set
 `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes.
 
+## Configuration via environment variables
+
+`AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items
+`AUTOTOKENS_COMMENT` - what character or string starts a comment which will be
+                       removed. Default: `/* ... */`
+`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
+                                   (experimental)
+`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,
+                          default is " "
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 5580512a..28ef91e2 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -15,7 +15,10 @@ extern "C" {
 #include <regex>
 
 #define AUTOTOKENS_DEBUG 0
+#define AUTOTOKENS_ONLY_FAV 0
+#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
 #define AUTOTOKENS_CHANGE_MIN 8
+#define AUTOTOKENS_WHITESPACE " "
 
 using namespace std;
 
@@ -30,6 +33,8 @@ typedef struct my_mutator {
 
 static afl_state *afl_ptr;
 static int        debug = AUTOTOKENS_DEBUG;
+static int        only_fav = AUTOTOKENS_ONLY_FAV;
+static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
 static u32        current_id;
 static u32        valid_structures;
 static u32        whitespace_ids;
@@ -39,9 +44,12 @@ static u64        all_structure_items;
 static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
-// static regex        regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
+static string                               whitespace = AUTOTOKENS_WHITESPACE;
+static regex                               *regex_comment_custom;
 static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/",
                                 regex::multiline | regex::optimize);
+static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
+static regex regex_whitespace(R"([ \t]+)", regex::optimize);
 static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
 
@@ -84,15 +92,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                afl_ptr->havoc_div / 256));
   // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
-  u32 max_rand = 4;
+  u32 max_rand = 7;
 
   for (i = 0; i < rounds; ++i) {
 
     switch (rand_below(afl_ptr, max_rand)) {
 
       /* CHANGE */
-      case 0:                                               /* fall through */
-      case 1: {
+      case 0 ... 3:                                         /* fall through */
+      {
 
         u32 pos = rand_below(afl_ptr, m_size);
         u32 cur_item = m[pos], new_item;
@@ -103,8 +111,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         } while (unlikely(
 
             new_item == cur_item ||
-            (whitespace_ids < new_item && whitespace_ids >= cur_item) ||
-            (whitespace_ids >= new_item && whitespace_ids < cur_item)));
+            (!alternative_tokenize &&
+             ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
+              (whitespace_ids >= new_item && whitespace_ids < cur_item)))));
 
         DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item);
         m[pos] = new_item;
@@ -113,7 +122,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       }
 
       /* INSERT (m_size +1 so we insert also after last place) */
-      case 2: {
+      case 4 ... 5: {
 
         u32 new_item;
         do {
@@ -126,26 +135,30 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         m.insert(m.begin() + pos, new_item);
         ++m_size;
 
-        // if we insert an identifier or string we might need whitespace
-        if (id_to_token[new_item].size() > 1) {
+        if (likely(!alternative_tokenize)) {
 
-          // need to insert before?
+          // if we insert an identifier or string we might need whitespace
+          if (id_to_token[new_item].size() > 1) {
 
-          if (pos && m[pos - 1] >= whitespace_ids &&
-              id_to_token[m[pos - 1]].size() > 1) {
+            // need to insert before?
 
-            m.insert(m.begin() + pos, good_whitespace_or_singleval());
-            ++m_size;
+            if (pos && m[pos - 1] >= whitespace_ids &&
+                id_to_token[m[pos - 1]].size() > 1) {
 
-          }
+              m.insert(m.begin() + pos, good_whitespace_or_singleval());
+              ++m_size;
 
-          if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
-              id_to_token[m[pos + 1]].size() > 1) {
+            }
 
-            // need to insert after?
+            if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
+                id_to_token[m[pos + 1]].size() > 1) {
 
-            m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
-            ++m_size;
+              // need to insert after?
+
+              m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
+              ++m_size;
+
+            }
 
           }
 
@@ -156,7 +169,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       }
 
       /* ERASE - only if large enough */
-      case 3: {
+      case 6: {
 
         if (m_size > 8) {
 
@@ -165,7 +178,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         } else {
 
-          max_rand = 3;
+          max_rand = 6;
 
         }
 
@@ -180,10 +193,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   }
 
   string output;
+  u32    m_size_1 = m_size - 1;
 
   for (i = 0; i < m_size; ++i) {
 
     output += id_to_token[m[i]];
+    if (unlikely(alternative_tokenize && i < m_size_1)) {
+
+      output += whitespace;
+
+    }
 
   }
 
@@ -219,7 +238,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
   if (likely(!debug)) {
 
-    if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) {
+    if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
+        (only_fav && !afl_ptr->queue_cur->favored)) {
 
       s = NULL;
       return 0;
@@ -353,8 +373,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
     // input.size(), filename, input.c_str());
 
-    // input = regex_replace(input, regex_comment_slash, "$2");
-    input = regex_replace(input, regex_comment_star, "");
+    if (regex_comment_custom) {
+
+      input = regex_replace(input, *regex_comment_custom, "$2");
+
+    } else {
+
+      input = regex_replace(input, regex_comment_star, "");
+
+    }
 
     DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
           filename, input.c_str());
@@ -377,53 +404,105 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     DEBUG(stderr, "START!\n");
 
-    while (regex_search(cur, ende, match, regex_string,
-                        regex_constants::match_any |
-                            regex_constants::match_not_null |
-                            regex_constants::match_continuous)) {
+    if (likely(!alternative_tokenize)) {
 
-      prev = cur;
-      found = match[0].first;
-      cur = match[0].second;
-      DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n",
-            match[0].str().c_str(), prev - input.begin(), match.position(),
-            cur - input.begin());
+      while (regex_search(cur, ende, match, regex_string,
+                          regex_constants::match_any |
+                              regex_constants::match_not_null |
+                              regex_constants::match_continuous)) {
 
-      if (prev < found) {  // there are items between search start and find
-        while (prev < found) {
+        prev = cur;
+        found = match[0].first;
+        cur = match[0].second;
+        DEBUG(stderr,
+              "string %s found at start %lu offset %lu continue at %lu\n",
+              match[0].str().c_str(), prev - input.begin(), match.position(),
+              cur - input.begin());
 
-          if (isspace(*prev)) {
+        if (prev < found) {  // there are items between search start and find
+          while (prev < found) {
 
-            auto start = prev;
-            while (isspace(*prev)) {
+            if (isspace(*prev)) {
 
+              auto start = prev;
+              while (isspace(*prev)) {
+
+                ++prev;
+
+              }
+
+              tokens.push_back(std::string(start, prev));
+              DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
+                    tokens[tokens.size() - 1].c_str());
+
+            } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
+
+              auto start = prev;
+              while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
+                     *prev == '.' || *prev == '/') {
+
+                ++prev;
+
+              }
+
+              tokens.push_back(std::string(start, prev));
+              DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
+                    tokens[tokens.size() - 1].c_str());
+
+            } else {
+
+              tokens.push_back(std::string(prev, prev + 1));
+              DEBUG(stderr, "OTHER \"%c\"\n", *prev);
               ++prev;
 
             }
 
-            tokens.push_back(std::string(start, prev));
-            DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
+          }
+
+        }
+
+        if (match[0].length() > 0) { tokens.push_back(match[0]); }
+
+      }
+
+      DEBUG(stderr, "AFTER all strings\n");
+
+      if (cur < ende) {
+
+        while (cur < ende) {
+
+          if (isspace(*cur)) {
+
+            auto start = cur;
+            while (isspace(*cur)) {
+
+              ++cur;
+
+            }
+
+            tokens.push_back(std::string(start, cur));
+            DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
                   tokens[tokens.size() - 1].c_str());
 
-          } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
+          } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
 
-            auto start = prev;
-            while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
-                   *prev == '.' || *prev == '/') {
+            auto start = cur;
+            while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
+                   *cur == '/') {
 
-              ++prev;
+              ++cur;
 
             }
 
-            tokens.push_back(std::string(start, prev));
-            DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
+            tokens.push_back(std::string(start, cur));
+            DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
                   tokens[tokens.size() - 1].c_str());
 
           } else {
 
-            tokens.push_back(std::string(prev, prev + 1));
-            DEBUG(stderr, "OTHER \"%c\"\n", *prev);
-            ++prev;
+            tokens.push_back(std::string(cur, cur + 1));
+            DEBUG(stderr, "OTHER \"%c\"\n", *cur);
+            ++cur;
 
           }
 
@@ -431,48 +510,227 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       }
 
-      if (match[0].length() > 0) { tokens.push_back(match[0]); }
+    } else {
 
-    }
+      // alternative tokenize
 
-    DEBUG(stderr, "AFTER all strings\n");
+      while (regex_search(cur, ende, match, regex_string)) {
 
-    if (cur < ende) {
+        prev = cur;
+        found = match[0].first;
+        cur = match[0].second;
+        DEBUG(stderr,
+              "string %s found at start %lu offset %lu continue at %lu\n",
+              match[0].str().c_str(), prev - input.begin(), match.position(),
+              cur - input.begin());
+        if (prev < found) {  // there are items between search start and find
+          sregex_token_iterator it{prev, found, regex_whitespace, -1};
+          vector<std::string>   tokenized{it, {}};
+          tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(),
+                                         [](std::string const &s) {
 
-      while (cur < ende) {
+                                           return s.size() == 0;
 
-        if (isspace(*cur)) {
+                                         }),
 
-          auto start = cur;
-          while (isspace(*cur)) {
+                          tokenized.end());
+          tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
 
-            ++cur;
+          if (unlikely(debug)) {
+
+            DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
+                  input.size());
+            for (auto x : tokenized) {
+
+              cerr << x << endl;
+
+            }
 
           }
 
-          tokens.push_back(std::string(start, cur));
-          DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
-                tokens[tokens.size() - 1].c_str());
+          for (auto token : tokenized) {
 
-        } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
+            string::const_iterator c = token.begin(), e = token.end(), f, p;
+            smatch                 m;
 
-          auto start = cur;
-          while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
-                 *cur == '/') {
+            while (regex_search(c, e, m, regex_word)) {
 
-            ++cur;
+              p = c;
+              f = m[0].first;
+              c = m[0].second;
+              if (p < f) {
+
+                // there are items between search start and find
+                while (p < f) {
+
+                  if (unlikely(debug)) {
+
+                    string foo(p, p + 1);
+                    DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+
+                  }
+
+                  tokens.push_back(std::string(p, p + 1));
+                  ++p;
+
+                }
+
+                /*
+                                string foo(p, f);
+                                DEBUG(stderr, "before string: \"%s\"\n",
+                   foo.c_str()); tokens.push_back(std::string(p, f));
+                */
+
+              }
+
+              DEBUG(
+                  stderr,
+                  "SUBstring \"%s\" found at start %lu offset %lu continue at "
+                  "%lu\n",
+                  m[0].str().c_str(), p - input.begin(), m.position(),
+                  c - token.begin());
+              tokens.push_back(m[0].str());
+
+            }
+
+            if (c < e) {
+
+              while (c < e) {
+
+                if (unlikely(debug)) {
+
+                  string foo(c, c + 1);
+                  DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+                }
+
+                tokens.push_back(std::string(c, c + 1));
+                ++c;
+
+              }
+
+              /*
+                            if (unlikely(debug)) {
+
+                              string foo(c, e);
+                              DEBUG(stderr, "after string: \"%s\"\n",
+                 foo.c_str());
+
+                            }
+
+                            tokens.push_back(std::string(c, e));
+              */
+
+            }
 
           }
 
-          tokens.push_back(std::string(start, cur));
-          DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
-                tokens[tokens.size() - 1].c_str());
+        }
 
-        } else {
+        if (match[0].length() > 0) { tokens.push_back(match[0]); }
 
-          tokens.push_back(std::string(cur, cur + 1));
-          DEBUG(stderr, "OTHER \"%c\"\n", *cur);
-          ++cur;
+      }
+
+      if (cur < ende) {
+
+        sregex_token_iterator it{cur, ende, regex_whitespace, -1};
+        vector<std::string>   tokenized{it, {}};
+        tokenized.erase(
+            std::remove_if(tokenized.begin(), tokenized.end(),
+                           [](std::string const &s) { return s.size() == 0; }),
+            tokenized.end());
+        tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+
+        if (unlikely(debug)) {
+
+          DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
+                input.size());
+          for (auto x : tokenized) {
+
+            cerr << x << endl;
+
+          }
+
+        }
+
+        for (auto token : tokenized) {
+
+          string::const_iterator c = token.begin(), e = token.end(), f, p;
+          smatch                 m;
+
+          while (regex_search(c, e, m, regex_word)) {
+
+            p = c;
+            f = m[0].first;
+            c = m[0].second;
+            if (p < f) {
+
+              // there are items between search start and find
+              while (p < f) {
+
+                if (unlikely(debug)) {
+
+                  string foo(p, p + 1);
+                  DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+
+                }
+
+                tokens.push_back(std::string(p, p + 1));
+                ++p;
+
+              }
+
+              /*
+                            if (unlikely(debug)) {
+
+                              string foo(p, f);
+                              DEBUG(stderr, "before string: \"%s\"\n",
+                 foo.c_str());
+
+                            }
+
+                            tokens.push_back(std::string(p, f));
+              */
+
+            }
+
+            DEBUG(stderr,
+                  "SUB2string \"%s\" found at start %lu offset %lu continue at "
+                  "%lu\n",
+                  m[0].str().c_str(), p - input.begin(), m.position(),
+                  c - token.begin());
+            tokens.push_back(m[0].str());
+
+          }
+
+          if (c < e) {
+
+            while (c < e) {
+
+              if (unlikely(debug)) {
+
+                string foo(c, c + 1);
+                DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+              }
+
+              tokens.push_back(std::string(c, c + 1));
+              ++c;
+
+            }
+
+            /*
+                        if (unlikely(debug)) {
+
+                          string foo(c, e);
+                          DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+                        }
+
+                        tokens.push_back(std::string(c, e));
+            */
+
+          }
 
         }
 
@@ -483,9 +741,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     if (unlikely(debug)) {
 
       DEBUG(stderr, "DUMPING TOKENS:\n");
+      u32 size_1 = tokens.size() - 1;
       for (u32 i = 0; i < tokens.size(); ++i) {
 
         DEBUG(stderr, "%s", tokens[i].c_str());
+        if (unlikely(alternative_tokenize && i < size_1)) {
+
+          DEBUG(stderr, "%s", whitespace.c_str());
+
+        }
 
       }
 
@@ -556,6 +820,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   }
 
+  if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
+  if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
+  if (getenv("AUTOTOKENS_WHITESPACE")) {
+
+    whitespace = getenv("AUTOTOKENS_WHITESPACE");
+
+  }
+
+  if (getenv("AUTOTOKENS_COMMENT")) {
+
+    char buf[256];
+    snprintf(buf, sizeof(buf), "(%s.*)([\r\n]?)", getenv("AUTOTOKENS_COMMENT"));
+    regex_comment_custom = new regex(buf, regex::optimize);
+
+  }
+
   data->afl = afl_ptr = afl;
 
   // set common whitespace tokens

From 70f4b456faf8e361f6e0a34246708380c94cb36e Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 13:58:27 +0100
Subject: [PATCH 10/77] fixes

---
 custom_mutators/autotokens/Makefile       |  7 ++++++-
 custom_mutators/autotokens/autotokens.cpp | 24 ++++++++++++++++-------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile
index 5dd52dee..8af63635 100644
--- a/custom_mutators/autotokens/Makefile
+++ b/custom_mutators/autotokens/Makefile
@@ -1,7 +1,12 @@
+ifdef debug
+	CFLAGS += "-fsanitize=address -Wall"
+	CXX := clang++
+endif
+
 all:	autotokens.so
 
 autotokens.so:	autotokens.cpp
 	$(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o
 
 clean:
-	rm -f autotokens.so *~ core
\ No newline at end of file
+	rm -f autotokens.so *~ core
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 28ef91e2..57c35846 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -48,7 +48,7 @@ static string                               whitespace = AUTOTOKENS_WHITESPACE;
 static regex                               *regex_comment_custom;
 static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/",
                                 regex::multiline | regex::optimize);
-static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
+static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
 static regex regex_whitespace(R"([ \t]+)", regex::optimize);
 static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
@@ -514,7 +514,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       // alternative tokenize
 
-      while (regex_search(cur, ende, match, regex_string)) {
+      while (regex_search(cur, ende, match, regex_string,
+                          regex_constants::match_any |
+                              regex_constants::match_not_null |
+                              regex_constants::match_continuous)) {
 
         prev = cur;
         found = match[0].first;
@@ -553,7 +556,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             string::const_iterator c = token.begin(), e = token.end(), f, p;
             smatch                 m;
 
-            while (regex_search(c, e, m, regex_word)) {
+            while (regex_search(c, e, m, regex_word,
+                                regex_constants::match_any |
+                                    regex_constants::match_not_null |
+                                    regex_constants::match_continuous)) {
 
               p = c;
               f = m[0].first;
@@ -658,7 +664,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
           string::const_iterator c = token.begin(), e = token.end(), f, p;
           smatch                 m;
 
-          while (regex_search(c, e, m, regex_word)) {
+          while (regex_search(c, e, m, regex_word,
+                              regex_constants::match_any |
+                                  regex_constants::match_not_null |
+                                  regex_constants::match_continuous)) {
 
             p = c;
             f = m[0].first;
@@ -820,6 +829,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   }
 
+  if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
   if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
   if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
   if (getenv("AUTOTOKENS_WHITESPACE")) {
@@ -890,9 +900,9 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) {
 
   fprintf(stderr,
           "\n\nAutotoken mutator statistics:\n"
-          "  Number of all seen tokens:  %lu\n"
-          "  Number of input structures: %lu\n"
-          "  Number of all items in structures: %lu\n\n",
+          "  Number of all seen tokens:  %u\n"
+          "  Number of input structures: %u\n"
+          "  Number of all items in structures: %llu\n\n",
           current_id - 1, valid_structures, all_structure_items);
 
   free(data);

From 0db662db7b433a08b01de7f5a989843450919b88 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 14:21:44 +0100
Subject: [PATCH 11/77] fix

---
 custom_mutators/autotokens/autotokens.cpp | 78 ++++++++++++-----------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 57c35846..94f86413 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -851,43 +851,47 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   // set common whitespace tokens
   // we deliberately do not put uncommon ones here to these will count as
   // identifier tokens.
-  token_to_id[" "] = current_id;
-  id_to_token[current_id] = " ";
-  ++current_id;
-  token_to_id["\t"] = current_id;
-  id_to_token[current_id] = "\t";
-  ++current_id;
-  token_to_id["\n"] = current_id;
-  id_to_token[current_id] = "\n";
-  ++current_id;
-  token_to_id["\r\n"] = current_id;
-  id_to_token[current_id] = "\r\n";
-  ++current_id;
-  token_to_id[" \n"] = current_id;
-  id_to_token[current_id] = " \n";
-  ++current_id;
-  token_to_id["  "] = current_id;
-  id_to_token[current_id] = "  ";
-  ++current_id;
-  token_to_id["\t\t"] = current_id;
-  id_to_token[current_id] = "\t\t";
-  ++current_id;
-  token_to_id["\n\n"] = current_id;
-  id_to_token[current_id] = "\n\n";
-  ++current_id;
-  token_to_id["\r\n\r\n"] = current_id;
-  id_to_token[current_id] = "\r\n\r\n";
-  ++current_id;
-  token_to_id["    "] = current_id;
-  id_to_token[current_id] = "    ";
-  ++current_id;
-  token_to_id["\t\t\t\t"] = current_id;
-  id_to_token[current_id] = "\t\t\t\t";
-  ++current_id;
-  token_to_id["\n\n\n\n"] = current_id;
-  id_to_token[current_id] = "\n\n\n\n";
-  ++current_id;
-  whitespace_ids = current_id;
+  if (!alternative_tokenize) {
+
+    token_to_id[" "] = current_id;
+    id_to_token[current_id] = " ";
+    ++current_id;
+    token_to_id["\t"] = current_id;
+    id_to_token[current_id] = "\t";
+    ++current_id;
+    token_to_id["\n"] = current_id;
+    id_to_token[current_id] = "\n";
+    ++current_id;
+    token_to_id["\r\n"] = current_id;
+    id_to_token[current_id] = "\r\n";
+    ++current_id;
+    token_to_id[" \n"] = current_id;
+    id_to_token[current_id] = " \n";
+    ++current_id;
+    token_to_id["  "] = current_id;
+    id_to_token[current_id] = "  ";
+    ++current_id;
+    token_to_id["\t\t"] = current_id;
+    id_to_token[current_id] = "\t\t";
+    ++current_id;
+    token_to_id["\n\n"] = current_id;
+    id_to_token[current_id] = "\n\n";
+    ++current_id;
+    token_to_id["\r\n\r\n"] = current_id;
+    id_to_token[current_id] = "\r\n\r\n";
+    ++current_id;
+    token_to_id["    "] = current_id;
+    id_to_token[current_id] = "    ";
+    ++current_id;
+    token_to_id["\t\t\t\t"] = current_id;
+    id_to_token[current_id] = "\t\t\t\t";
+    ++current_id;
+    token_to_id["\n\n\n\n"] = current_id;
+    id_to_token[current_id] = "\n\n\n\n";
+    ++current_id;
+    whitespace_ids = current_id;
+
+  }
 
   return data;
 

From 22f757a169d3da3081306c0f861ef99a509073fe Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 14:33:06 +0100
Subject: [PATCH 12/77] fix

---
 custom_mutators/autotokens/autotokens.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 94f86413..7aecb010 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -129,7 +129,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
           new_item = rand_below(afl_ptr, current_id);
 
-        } while (new_item >= whitespace_ids);
+        } while (!alternative_tokenize && new_item >= whitespace_ids);
 
         u32 pos = rand_below(afl_ptr, m_size + 1);
         m.insert(m.begin() + pos, new_item);

From 8fe5e29104fc514551bbc926c5142dac68562b43 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 14:56:26 +0100
Subject: [PATCH 13/77] ignore timeout env option

---
 docs/env_variables.md | 3 +++
 include/afl-fuzz.h    | 2 +-
 include/envs.h        | 1 +
 src/afl-fuzz-bitmap.c | 6 ++++++
 src/afl-fuzz-state.c  | 7 +++++++
 src/afl-fuzz.c        | 3 ++-
 6 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/docs/env_variables.md b/docs/env_variables.md
index 22a5c386..0a57d190 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -354,6 +354,9 @@ checks or alter some of the more exotic semantics of the tool:
   - Setting `AFL_KEEP_TIMEOUTS` will keep longer running inputs if they reach
     new coverage
 
+  - On the contrary, if you are not interested in any timeouts, you can set
+    `AFL_IGNORE_TIMEOUTS` to get a bit of speed instead.
+
   - `AFL_EXIT_ON_SEED_ISSUES` will restore the vanilla afl-fuzz behavior which
     does not allow crashes or timeout seeds in the initial -i corpus.
 
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index edef9207..69fea579 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -398,7 +398,7 @@ typedef struct afl_env_vars {
       afl_cycle_schedules, afl_expand_havoc, afl_statsd, afl_cmplog_only_new,
       afl_exit_on_seed_issues, afl_try_affinity, afl_ignore_problems,
       afl_keep_timeouts, afl_pizza_mode, afl_no_crash_readme,
-      afl_no_startup_calibration;
+      afl_ignore_timeouts, afl_no_startup_calibration;
 
   u8 *afl_tmpdir, *afl_custom_mutator_library, *afl_python_module, *afl_path,
       *afl_hang_tmout, *afl_forksrv_init_tmout, *afl_preload,
diff --git a/include/envs.h b/include/envs.h
index f4cdf390..0770f94d 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -103,6 +103,7 @@ static char *afl_environment_variables[] = {
     "AFL_HARDEN",
     "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES",
     "AFL_IGNORE_PROBLEMS",
+    "AFL_IGNORE_TIMEOUTS",
     "AFL_IGNORE_UNKNOWN_ENVS",
     "AFL_IMPORT_FIRST",
     "AFL_INPUT_LEN_MIN",
diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c
index 485b82db..b4e9537e 100644
--- a/src/afl-fuzz-bitmap.c
+++ b/src/afl-fuzz-bitmap.c
@@ -457,6 +457,12 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
   if (unlikely(len == 0)) { return 0; }
 
+  if (unlikely(fault == FSRV_RUN_TMOUT && afl->afl_env.afl_ignore_timeouts)) {
+
+    return 0;
+
+  }
+
   u8  fn[PATH_MAX];
   u8 *queue_fn = "";
   u8  new_bits = 0, keeping = 0, res, classified = 0, is_timeout = 0;
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 896b5f71..104b1e4b 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -292,6 +292,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_ignore_problems =
                 get_afl_env(afl_environment_variables[i]) ? 1 : 0;
 
+          } else if (!strncmp(env, "AFL_IGNORE_TIMEOUTS",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_ignore_timeouts =
+                get_afl_env(afl_environment_variables[i]) ? 1 : 0;
+
           } else if (!strncmp(env, "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES",
 
                               afl_environment_variable_len)) {
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 5e0ecd1e..4db55b5e 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -258,8 +258,9 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_FORKSRV_INIT_TMOUT: time spent waiting for forkserver during startup (in ms)\n"
       "AFL_HANG_TMOUT: override timeout value (in milliseconds)\n"
       "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: don't warn about core dump handlers\n"
-      "AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n"
       "AFL_IGNORE_PROBLEMS: do not abort fuzzing if an incorrect setup is detected\n"
+      "AFL_IGNORE_TIMEOUTS: do not process or save any timeouts\n"
+      "AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n"
       "AFL_IMPORT_FIRST: sync and import test cases from other fuzzer instances first\n"
       "AFL_INPUT_LEN_MIN/AFL_INPUT_LEN_MAX: like -g/-G set min/max fuzz length produced\n"
       "AFL_PIZZA_MODE: 1 - enforce pizza mode, 0 - disable for April 1st\n"

From 14d8eb9e40a6329abcb2f153174b543349c68c13 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 22:17:14 +0100
Subject: [PATCH 14/77] autotoken: splicing; splice_optout

---
 custom_mutators/autotokens/Makefile       |   6 +-
 custom_mutators/autotokens/autotokens.cpp | 103 ++++++++++++++++++++--
 docs/custom_mutators.md                   |  11 +++
 include/afl-fuzz.h                        |  14 +++
 src/afl-fuzz-mutators.c                   |  13 +++
 src/afl-fuzz-one.c                        |   3 +-
 src/afl-fuzz-python.c                     |  16 ++++
 7 files changed, 155 insertions(+), 11 deletions(-)

diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile
index 8af63635..ab1da4b6 100644
--- a/custom_mutators/autotokens/Makefile
+++ b/custom_mutators/autotokens/Makefile
@@ -1,5 +1,9 @@
 ifdef debug
-	CFLAGS += "-fsanitize=address -Wall"
+	CFLAGS += -fsanitize=address -Wall
+	CXX := clang++
+endif
+ifdef DEBUG
+	CFLAGS += -fsanitize=address -Wall
 	CXX := clang++
 endif
 
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 7aecb010..c9ec4352 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -19,6 +19,13 @@ extern "C" {
 #define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
 #define AUTOTOKENS_CHANGE_MIN 8
 #define AUTOTOKENS_WHITESPACE " "
+#define AUTOTOKENS_SIZE_MIN 8
+#define AUTOTOKENS_SPLICE_MIN 4
+#define AUTOTOKENS_SPLICE_MAX 64
+
+#if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN
+  #error SPLICE_MIN must be lower than SIZE_MIN
+#endif
 
 using namespace std;
 
@@ -42,6 +49,7 @@ static u32        extras_cnt, a_extras_cnt;
 static u64        all_spaces, all_tabs, all_lf, all_ws;
 static u64        all_structure_items;
 static unordered_map<string, vector<u32> *> file_mapping;
+static unordered_map<u32, vector<u32> *>    id_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
 static string                               whitespace = AUTOTOKENS_WHITESPACE;
@@ -76,6 +84,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                   u8 **out_buf, u8 *add_buf,
                                   size_t add_buf_size, size_t max_size) {
 
+  (void)(data);
+
   if (s == NULL) {
 
     *out_buf = NULL;
@@ -92,14 +102,14 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                afl_ptr->havoc_div / 256));
   // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
-  u32 max_rand = 7;
+  u32 max_rand = 14;
 
   for (i = 0; i < rounds; ++i) {
 
     switch (rand_below(afl_ptr, max_rand)) {
 
       /* CHANGE */
-      case 0 ... 3:                                         /* fall through */
+      case 0 ... 7:                                         /* fall through */
       {
 
         u32 pos = rand_below(afl_ptr, m_size);
@@ -122,18 +132,19 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       }
 
       /* INSERT (m_size +1 so we insert also after last place) */
-      case 4 ... 5: {
+      case 8 ... 9: {
 
         u32 new_item;
         do {
 
           new_item = rand_below(afl_ptr, current_id);
 
-        } while (!alternative_tokenize && new_item >= whitespace_ids);
+        } while (unlikely(!alternative_tokenize && new_item >= whitespace_ids));
 
         u32 pos = rand_below(afl_ptr, m_size + 1);
         m.insert(m.begin() + pos, new_item);
         ++m_size;
+        DEBUG(stderr, "INS: %u at %u\n", new_item, pos);
 
         if (likely(!alternative_tokenize)) {
 
@@ -168,8 +179,63 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
       }
 
+      /* SPLICING */
+      case 10 ... 11: {
+
+        u32  strategy = rand_below(afl_ptr, 4), dst_off, n;
+        auto src = id_mapping[rand_below(afl_ptr, valid_structures)];
+        u32  src_size = src->size();
+        u32  src_off = rand_below(afl_ptr, src_size - AUTOTOKENS_SPLICE_MIN);
+        u32  rand_r = 1 + MAX(AUTOTOKENS_SPLICE_MIN,
+                              MIN(AUTOTOKENS_SPLICE_MAX, src_size - src_off));
+
+        switch (strategy) {
+
+          // insert
+          case 0: {
+
+            dst_off = rand_below(afl_ptr, m_size);
+            n = AUTOTOKENS_SPLICE_MIN +
+                rand_below(afl_ptr, MIN(AUTOTOKENS_SPLICE_MAX,
+                                        rand_r - AUTOTOKENS_SPLICE_MIN));
+            m.insert(m.begin() + dst_off, src->begin() + src_off,
+                     src->begin() + src_off + n);
+            m_size += n;
+            DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off);
+            break;
+
+          }
+
+          // overwrite
+          default: {
+
+            dst_off = rand_below(afl_ptr, m_size - AUTOTOKENS_SPLICE_MIN);
+            n = AUTOTOKENS_SPLICE_MIN +
+                rand_below(
+                    afl_ptr,
+                    MIN(AUTOTOKENS_SPLICE_MAX - AUTOTOKENS_SPLICE_MIN,
+                        MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN,
+                            src_size - src_off - AUTOTOKENS_SPLICE_MIN)));
+
+            for (u32 i = 0; i < n; ++i) {
+
+              m[dst_off + i] = (*src)[src_off + i];
+
+            }
+
+            DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
+            break;
+
+          }
+
+        }
+
+        break;
+
+      }
+
       /* ERASE - only if large enough */
-      case 6: {
+      case 12 ... 13: {
 
         if (m_size > 8) {
 
@@ -178,7 +244,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         } else {
 
-          max_rand = 6;
+          max_rand = 12;
 
         }
 
@@ -236,12 +302,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 extern "C" unsigned char afl_custom_queue_get(void                *data,
                                               const unsigned char *filename) {
 
+  (void)(data);
+
   if (likely(!debug)) {
 
     if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
         (only_fav && !afl_ptr->queue_cur->favored)) {
 
       s = NULL;
+      DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n");
       return 0;
 
     }
@@ -334,8 +403,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       fclose(fp);
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
-      DEBUG(stderr, "Too short (%lu) %s\n", len, filename);
       s = NULL;
+      DEBUG(stderr, "Too short (%lu) %s\n", len, filename);
       return 0;
 
     }
@@ -362,8 +431,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) {
 
         file_mapping[fn] = NULL;
-        DEBUG(stderr, "Not text (%lu) %s\n", len, filename);
         s = NULL;
+        DEBUG(stderr, "Not text (%lu) %s\n", len, filename);
         return 0;
 
       }
@@ -766,6 +835,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
+    if (tokens.size() < AUTOTOKENS_SIZE_MIN) {
+
+      file_mapping[fn] = NULL;
+      s = NULL;
+      DEBUG(stderr, "too few tokens\n");
+      return 0;
+
+    }
+
     /* Now we transform the tokens into an ID list and saved that */
 
     structure = new vector<u32>();
@@ -791,8 +869,9 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     // save the token structure to the file mapping
     file_mapping[fn] = structure;
-    s = structure;
+    id_mapping[valid_structures] = structure;
     ++valid_structures;
+    s = structure;
     all_structure_items += structure->size();
 
     // we are done!
@@ -897,6 +976,12 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
 }
 
+extern "C" void afl_custom_splice_optout(my_mutator_t *data) {
+
+  (void)(data);
+
+}
+
 extern "C" void afl_custom_deinit(my_mutator_t *data) {
 
   /* we use this to print statistics at exit :-)
diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md
index 4ffeda7a..322caa5b 100644
--- a/docs/custom_mutators.md
+++ b/docs/custom_mutators.md
@@ -48,6 +48,7 @@ C/C++:
 ```c
 void *afl_custom_init(afl_state_t *afl, unsigned int seed);
 unsigned int afl_custom_fuzz_count(void *data, const unsigned char *buf, size_t buf_size);
+void afl_custom_splice_optout(void *data);
 size_t afl_custom_fuzz(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf, unsigned char *add_buf, size_t add_buf_size, size_t max_size);
 const char *afl_custom_describe(void *data, size_t max_description_len);
 size_t afl_custom_post_process(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf);
@@ -72,6 +73,9 @@ def init(seed):
 def fuzz_count(buf):
     return cnt
 
+def splice_optout()
+    pass
+
 def fuzz(buf, add_buf, max_size):
     return mutated_out
 
@@ -132,6 +136,13 @@ def deinit():  # optional for Python
     for a specific queue entry, use this function. This function is most useful
     if `AFL_CUSTOM_MUTATOR_ONLY` is **not** used.
 
+- `splice_optout` (optional):
+
+    If this function is present, no splicing target is passed to the `fuzz`
+    function. This saves time if splicing data is not needed by the custom
+    fuzzing function.
+    This function is never called, just needs to be present to activate.
+
 - `fuzz` (optional):
 
     This method performs custom mutations on a given input. It also accepts an
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 69fea579..1e8d085d 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -344,6 +344,7 @@ enum {
   /* 12 */ PY_FUNC_INTROSPECTION,
   /* 13 */ PY_FUNC_DESCRIBE,
   /* 14 */ PY_FUNC_FUZZ_SEND,
+  /* 15 */ PY_FUNC_SPLICE_OPTOUT,
   PY_FUNC_COUNT
 
 };
@@ -495,6 +496,7 @@ typedef struct afl_state {
       no_unlink,                        /* do not unlink cur_input          */
       debug,                            /* Debug mode                       */
       custom_only,                      /* Custom mutator only mode         */
+      custom_splice_optout,             /* Custom mutator no splice buffer  */
       is_main_node,                     /* if this is the main node         */
       is_secondary_node,                /* if this is a secondary instance  */
       pizza_is_served;                  /* pizza mode                       */
@@ -828,6 +830,17 @@ struct custom_mutator {
    */
   u32 (*afl_custom_fuzz_count)(void *data, const u8 *buf, size_t buf_size);
 
+  /**
+   * Opt-out of a splicing input for the fuzz mutator
+   *
+   * Empty dummy function. It's presence tells afl-fuzz not to pass a
+   * splice data pointer and len.
+   *
+   * @param data pointer returned in afl_custom_init by this custom mutator
+   * @noreturn
+   */
+  void (*afl_custom_splice_optout)(void *data);
+
   /**
    * Perform custom mutations on a given input
    *
@@ -1057,6 +1070,7 @@ u8          havoc_mutation_probability_py(void *);
 u8          queue_get_py(void *, const u8 *);
 const char *introspection_py(void *);
 u8          queue_new_entry_py(void *, const u8 *, const u8 *);
+void        splice_optout(void *);
 void        deinit_py(void *);
 
 #endif
diff --git a/src/afl-fuzz-mutators.c b/src/afl-fuzz-mutators.c
index 22e5262e..ce43064a 100644
--- a/src/afl-fuzz-mutators.c
+++ b/src/afl-fuzz-mutators.c
@@ -358,6 +358,19 @@ struct custom_mutator *load_custom_mutator(afl_state_t *afl, const char *fn) {
 
   }
 
+  /* "afl_custom_splice_optout", optional, never called */
+  mutator->afl_custom_splice_optout = dlsym(dh, "afl_custom_splice_optout");
+  if (!mutator->afl_custom_splice_optout) {
+
+    ACTF("optional symbol 'afl_custom_splice_optout' not found.");
+
+  } else {
+
+    OKF("Found 'afl_custom_splice_optout'.");
+    afl->custom_splice_optout = 1;
+
+  }
+
   /* "afl_custom_fuzz_send", optional */
   mutator->afl_custom_fuzz_send = dlsym(dh, "afl_custom_fuzz_send");
   if (!mutator->afl_custom_fuzz_send) {
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index eaf65987..5e352dcb 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -1954,7 +1954,8 @@ custom_mutator_stage:
           u32                 target_len = 0;
 
           /* check if splicing makes sense yet (enough entries) */
-          if (likely(afl->ready_for_splicing_count > 1)) {
+          if (likely(!afl->custom_splice_optout &&
+                     afl->ready_for_splicing_count > 1)) {
 
             /* Pick a random other queue entry for passing to external API
                that has the necessary length */
diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c
index b509b936..69c305f7 100644
--- a/src/afl-fuzz-python.c
+++ b/src/afl-fuzz-python.c
@@ -248,6 +248,8 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) {
         PyObject_GetAttrString(py_module, "queue_get");
     py_functions[PY_FUNC_FUZZ_SEND] =
         PyObject_GetAttrString(py_module, "fuzz_send");
+    py_functions[PY_FUNC_SPLICE_OPTOUT] =
+        PyObject_GetAttrString(py_module, "splice_optout");
     py_functions[PY_FUNC_QUEUE_NEW_ENTRY] =
         PyObject_GetAttrString(py_module, "queue_new_entry");
     py_functions[PY_FUNC_INTROSPECTION] =
@@ -394,6 +396,13 @@ void deinit_py(void *py_mutator) {
 
 }
 
+void splice_optout_py(void *py_mutator) {
+
+  // this is never called
+  (void)(py_mutator);
+
+}
+
 struct custom_mutator *load_custom_mutator_py(afl_state_t *afl,
                                               char        *module_name) {
 
@@ -474,6 +483,13 @@ struct custom_mutator *load_custom_mutator_py(afl_state_t *afl,
 
   }
 
+  if (py_functions[PY_FUNC_SPLICE_OPTOUT]) {
+
+    mutator->afl_custom_splice_optout = splice_optout_py;
+    afl->custom_splice_optout = 1;
+
+  }
+
   if (py_functions[PY_FUNC_QUEUE_NEW_ENTRY]) {
 
     mutator->afl_custom_queue_new_entry = queue_new_entry_py;

From 17752465e6b3c70fd0104fae7bb1f84c1cb8bb66 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 22:31:55 +0100
Subject: [PATCH 15/77] nit

---
 custom_mutators/autotokens/README         | 2 ++
 custom_mutators/autotokens/TODO           | 8 +-------
 custom_mutators/autotokens/autotokens.cpp | 7 ++-----
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index f6e9c753..f82dcd98 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -11,6 +11,8 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator.
 If **not** running with `CMPLOG`, it is possible to set
 `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes.
 
+Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
+
 ## Configuration via environment variables
 
 `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 2e5e384f..95b79373 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -1,12 +1,6 @@
-whitespace belassen oder notieren?		MAYBE
-0=space 1=tab 2=linefeed
-
 cmplog: only add tokens that were found to fit?
 
 create from thin air if no good seed after a cycle and dict large enough?
 (static u32 no_of_struct_inputs;) 
 
-splice insert, splice overwrite
-(linefeed, semicolon)
-
-
+splicing -> check if whitespace/token is needed
\ No newline at end of file
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index c9ec4352..5e683455 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -217,11 +217,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                         MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN,
                             src_size - src_off - AUTOTOKENS_SPLICE_MIN)));
 
-            for (u32 i = 0; i < n; ++i) {
-
-              m[dst_off + i] = (*src)[src_off + i];
-
-            }
+            copy(src->begin() + src_off, src->begin() + src_off + n,
+                 m.begin() + dst_off);
 
             DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
             break;

From 45567791c66e128361a7533481b385497ced881f Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 23:09:16 +0100
Subject: [PATCH 16/77] autotokens: define disable splice

---
 custom_mutators/autotokens/autotokens.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 5e683455..f6ab9ddd 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -22,6 +22,7 @@ extern "C" {
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
+#define AUTOTOKENS_SPLICE_DISABLE 0
 
 #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN
   #error SPLICE_MIN must be lower than SIZE_MIN
@@ -102,7 +103,13 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                afl_ptr->havoc_div / 256));
   // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
-  u32 max_rand = 14;
+#if AUTOTOKENS_SPLICE_DISABLE == 1
+  #define AUTOTOKENS_MUT_MAX 12
+#else
+  #define AUTOTOKENS_MUT_MAX 14
+#endif
+
+  u32 max_rand = AUTOTOKENS_MUT_MAX;
 
   for (i = 0; i < rounds; ++i) {
 
@@ -179,6 +186,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
       }
 
+#if AUTOTOKENS_SPLICE_DISABLE != 1
       /* SPLICING */
       case 10 ... 11: {
 
@@ -230,9 +238,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         break;
 
       }
+#endif
 
       /* ERASE - only if large enough */
-      case 12 ... 13: {
+      default: {
 
         if (m_size > 8) {
 
@@ -241,7 +250,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         } else {
 
-          max_rand = 12;
+          max_rand = AUTOTOKENS_MUT_MAX - 2;
 
         }
 

From 151a8facae2048a26c65658dfec507233a677fb0 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 23:16:18 +0100
Subject: [PATCH 17/77] autotokens: stats

---
 custom_mutators/autotokens/autotokens.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index f6ab9ddd..4f3289c9 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -22,7 +22,9 @@ extern "C" {
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
-#define AUTOTOKENS_SPLICE_DISABLE 0
+#ifndef AUTOTOKENS_SPLICE_DISABLE
+  #define AUTOTOKENS_SPLICE_DISABLE 0
+#endif
 
 #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN
   #error SPLICE_MIN must be lower than SIZE_MIN
@@ -49,6 +51,7 @@ static u32        whitespace_ids;
 static u32        extras_cnt, a_extras_cnt;
 static u64        all_spaces, all_tabs, all_lf, all_ws;
 static u64        all_structure_items;
+static u64        fuzz_count;
 static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<u32, vector<u32> *>    id_mapping;
 static unordered_map<string, u32>           token_to_id;
@@ -238,6 +241,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         break;
 
       }
+
 #endif
 
       /* ERASE - only if large enough */
@@ -298,6 +302,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   memcpy(mutated_out, output.data(), mutated_size);
   *out_buf = mutated_out;
+  ++fuzz_count;
   return mutated_size;
 
 }
@@ -997,8 +1002,9 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) {
           "\n\nAutotoken mutator statistics:\n"
           "  Number of all seen tokens:  %u\n"
           "  Number of input structures: %u\n"
-          "  Number of all items in structures: %llu\n\n",
-          current_id - 1, valid_structures, all_structure_items);
+          "  Number of all items in structures: %llu\n"
+          "  Number of total fuzzes: %llu\n\n",
+          current_id - 1, valid_structures, all_structure_items, fuzz_count);
 
   free(data);
 

From eeca3a0b2939c605497e9b3a615ee4a466f4a3f2 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 19 Jan 2023 11:52:19 +0100
Subject: [PATCH 18/77] lots of fixes

---
 custom_mutators/autotokens/TODO           |   2 +-
 custom_mutators/autotokens/autotokens.cpp | 424 ++++++++++++++--------
 docs/custom_mutators.md                   |   1 +
 include/afl-fuzz.h                        |  11 +-
 src/afl-fuzz-one.c                        |   3 +-
 5 files changed, 279 insertions(+), 162 deletions(-)

diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 95b79373..2e39511c 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -3,4 +3,4 @@ cmplog: only add tokens that were found to fit?
 create from thin air if no good seed after a cycle and dict large enough?
 (static u32 no_of_struct_inputs;) 
 
-splicing -> check if whitespace/token is needed
\ No newline at end of file
+splicing -> check if whitespace/token is needed
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 4f3289c9..102bea0f 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -38,8 +38,10 @@ typedef struct my_mutator {
 
 } my_mutator_t;
 
-#define DEBUG \
+#undef DEBUGF
+#define DEBUGF \
   if (unlikely(debug)) fprintf
+#define IFDEBUG if (unlikely(debug))
 
 static afl_state *afl_ptr;
 static int        debug = AUTOTOKENS_DEBUG;
@@ -57,12 +59,12 @@ static unordered_map<u32, vector<u32> *>    id_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
 static string                               whitespace = AUTOTOKENS_WHITESPACE;
+static string                               output;
 static regex                               *regex_comment_custom;
-static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/",
-                                regex::multiline | regex::optimize);
-static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
-static regex regex_whitespace(R"([ \t]+)", regex::optimize);
-static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize);
+static regex        regex_comment_star("/\\*([:print:]|\n)*?\\*/",
+                                       regex::multiline | regex::optimize);
+static regex        regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
+static regex        regex_whitespace(R"([ \t]+)", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
 
 u32 good_whitespace_or_singleval() {
@@ -104,7 +106,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       MAX(AUTOTOKENS_CHANGE_MIN,
           MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
                                afl_ptr->havoc_div / 256));
-  // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
+  // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
 #if AUTOTOKENS_SPLICE_DISABLE == 1
   #define AUTOTOKENS_MUT_MAX 12
@@ -112,7 +114,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   #define AUTOTOKENS_MUT_MAX 14
 #endif
 
-  u32 max_rand = AUTOTOKENS_MUT_MAX;
+  u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos;
 
   for (i = 0; i < rounds; ++i) {
 
@@ -122,8 +124,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       case 0 ... 7:                                         /* fall through */
       {
 
-        u32 pos = rand_below(afl_ptr, m_size);
-        u32 cur_item = m[pos], new_item;
+        pos = rand_below(afl_ptr, m_size);
+        u32 cur_item = m[pos];
         do {
 
           new_item = rand_below(afl_ptr, current_id);
@@ -135,7 +137,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
              ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
               (whitespace_ids >= new_item && whitespace_ids < cur_item)))));
 
-        DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item);
+        DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item);
         m[pos] = new_item;
         break;
 
@@ -144,7 +146,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       /* INSERT (m_size +1 so we insert also after last place) */
       case 8 ... 9: {
 
-        u32 new_item;
         do {
 
           new_item = rand_below(afl_ptr, current_id);
@@ -154,7 +155,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         u32 pos = rand_below(afl_ptr, m_size + 1);
         m.insert(m.begin() + pos, new_item);
         ++m_size;
-        DEBUG(stderr, "INS: %u at %u\n", new_item, pos);
+        DEBUGF(stderr, "INS: %u at %u\n", new_item, pos);
 
         if (likely(!alternative_tokenize)) {
 
@@ -212,7 +213,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
             m.insert(m.begin() + dst_off, src->begin() + src_off,
                      src->begin() + src_off + n);
             m_size += n;
-            DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off);
+            DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off);
+
             break;
 
           }
@@ -231,13 +233,36 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
             copy(src->begin() + src_off, src->begin() + src_off + n,
                  m.begin() + dst_off);
 
-            DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
+            DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
             break;
 
           }
 
         }
 
+        if (likely(!alternative_tokenize)) {
+
+          // do we need a whitespace/token at the beginning?
+          if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 &&
+              id_to_token[m[dst_off]].size() > 1) {
+
+            m.insert(m.begin() + dst_off, good_whitespace_or_singleval());
+            ++m_size;
+
+          }
+
+          // do we need a whitespace/token at the end?
+          if (dst_off + n < m_size &&
+              id_to_token[m[dst_off + n - 1]].size() > 1 &&
+              id_to_token[m[dst_off + n]].size() > 1) {
+
+            m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval());
+            ++m_size;
+
+          }
+
+        }
+
         break;
 
       }
@@ -249,11 +274,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         if (m_size > 8) {
 
-          m.erase(m.begin() + rand_below(afl_ptr, m_size));
-          --m_size;
+          do {
+
+            pos = rand_below(afl_ptr, m_size);
+
+          } while (unlikely(pos < whitespace_ids));
+
+          // if what we delete will result in a missing whitespace/token,
+          // instead of deleting we switch the item to a whitespace or token.
+          if (likely(!alternative_tokenize) && pos && pos < m_size &&
+              id_to_token[m[pos - 1]].size() > 1 &&
+              id_to_token[m[pos + 1]].size() > 1) {
+
+            m[pos] = good_whitespace_or_singleval();
+
+          } else {
+
+            m.erase(m.begin() + pos);
+            --m_size;
+
+          }
 
         } else {
 
+          // if the data is already too small do not try to make it smaller
+          // again this run.
+
           max_rand = AUTOTOKENS_MUT_MAX - 2;
 
         }
@@ -262,14 +308,12 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
       }
 
-        // TODO: add full line insert splice, replace splace, delete
-
     }
 
   }
 
-  string output;
-  u32    m_size_1 = m_size - 1;
+  u32 m_size_1 = m_size - 1;
+  output = "";
 
   for (i = 0; i < m_size; ++i) {
 
@@ -282,31 +326,108 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   }
 
-  u32 mutated_size = output.size();
-  u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size);
+  u32 mutated_size = (u32)output.size();
+  u8 *mutated_out = (u8 *)output.data();
 
-  if (unlikely(!mutated_out)) {
+  if (unlikely(mutated_size > max_size)) { mutated_size = max_size; }
 
-    *out_buf = NULL;
-    return 0;
+  IFDEBUG {
 
-  }
-
-  if (unlikely(debug)) {
-
-    DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size);
+    DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size);
     fwrite(output.data(), 1, mutated_size, stderr);
-    DEBUG(stderr, "\n---\n");
+    DEBUGF(stderr, "\n---\n");
 
   }
 
-  memcpy(mutated_out, output.data(), mutated_size);
   *out_buf = mutated_out;
   ++fuzz_count;
   return mutated_size;
 
 }
 
+/* I get f*cking stack overflow using C++ regex with a regex of
+   "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize
+   enabled :-( */
+u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
+                    string::const_iterator *match_begin,
+                    string::const_iterator *match_end) {
+
+  string::const_iterator start = cur, found_begin;
+  u8                     quote_type = 0;
+
+  while (cur < ende) {
+
+    switch (*cur) {
+
+      case '"': {
+
+        if (cur == start || *(cur - 1) != '\\') {
+
+          if (!quote_type) {
+
+            found_begin = cur;
+            quote_type = 1;
+
+          } else if (quote_type == 1) {
+
+            *match_begin = found_begin;
+            *match_end = cur + 1;
+            return 1;
+
+          }
+
+        }
+
+        break;
+
+      }
+
+      case '\'': {
+
+        if (cur == start || *(cur - 1) != '\\') {
+
+          if (!quote_type) {
+
+            found_begin = cur;
+            quote_type = 2;
+
+          } else if (quote_type == 2) {
+
+            *match_begin = found_begin;
+            *match_end = cur + 1;
+            return 1;
+
+          }
+
+        }
+
+        break;
+
+      }
+
+      case '\n':
+      case '\r':
+      case 0: {
+
+        quote_type = 0;
+        break;
+
+      }
+
+      default:
+        if (unlikely(quote_type && !isprint(*cur))) { quote_type = 0; }
+        break;
+
+    }
+
+    ++cur;
+
+  }
+
+  return 0;
+
+}
+
 /* We are not using afl_custom_queue_new_entry() because not every corpus entry
    will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
 
@@ -321,7 +442,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
         (only_fav && !afl_ptr->queue_cur->favored)) {
 
       s = NULL;
-      DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n");
+      DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
       return 0;
 
     }
@@ -356,7 +477,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       }
 
       ++extras_cnt;
-      DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr);
+      DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr);
 
     }
 
@@ -385,7 +506,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       }
 
       ++a_extras_cnt;
-      DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr);
+      DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr);
 
     }
 
@@ -415,7 +536,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       fclose(fp);
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
       s = NULL;
-      DEBUG(stderr, "Too short (%lu) %s\n", len, filename);
+      DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
       return 0;
 
     }
@@ -443,14 +564,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         file_mapping[fn] = NULL;
         s = NULL;
-        DEBUG(stderr, "Not text (%lu) %s\n", len, filename);
+        DEBUGF(stderr, "Not text (%lu) %s\n", len, filename);
         return 0;
 
       }
 
     }
 
-    // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
+    // DEBUGF(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
     // input.size(), filename, input.c_str());
 
     if (regex_comment_custom) {
@@ -463,15 +584,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
-    DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
-          filename, input.c_str());
+    DEBUGF(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
+           filename, input.c_str());
 
     u32  spaces = count(input.begin(), input.end(), ' ');
     u32  tabs = count(input.begin(), input.end(), '\t');
     u32  linefeeds = count(input.begin(), input.end(), '\n');
     bool ends_with_linefeed = input[input.length() - 1] == '\n';
-    DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
-          linefeeds, ends_with_linefeed);
+    DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
+           linefeeds, ends_with_linefeed);
     all_spaces += spaces;
     all_tabs += tabs;
     all_lf += linefeeds;
@@ -479,25 +600,28 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     // now extract all tokens
     vector<string>         tokens;
-    smatch                 match;
-    string::const_iterator cur = input.begin(), ende = input.end(), found, prev;
+    string::const_iterator cur = input.begin(), ende = input.end(), found, prev,
+                           match_begin, match_end;
 
-    DEBUG(stderr, "START!\n");
+    DEBUGF(stderr, "START!\n");
 
     if (likely(!alternative_tokenize)) {
 
-      while (regex_search(cur, ende, match, regex_string,
-                          regex_constants::match_any |
-                              regex_constants::match_not_null |
-                              regex_constants::match_continuous)) {
+      while (my_search_string(cur, ende, &match_begin, &match_end)) {
 
         prev = cur;
-        found = match[0].first;
-        cur = match[0].second;
-        DEBUG(stderr,
-              "string %s found at start %lu offset %lu continue at %lu\n",
-              match[0].str().c_str(), prev - input.begin(), match.position(),
-              cur - input.begin());
+        found = match_begin;
+        cur = match_end;
+
+        IFDEBUG {
+
+          string foo(match_begin, match_end);
+          DEBUGF(stderr,
+                 "string %s found at start %lu offset %lu continue at %lu\n",
+                 foo.c_str(), prev - input.begin(), found - prev,
+                 cur - input.begin());
+
+        }
 
         if (prev < found) {  // there are items between search start and find
           while (prev < found) {
@@ -512,8 +636,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
               }
 
               tokens.push_back(std::string(start, prev));
-              DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
-                    tokens[tokens.size() - 1].c_str());
+              DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
+                     tokens[tokens.size() - 1].c_str());
 
             } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
 
@@ -525,14 +649,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               }
 
-              tokens.push_back(std::string(start, prev));
-              DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
-                    tokens[tokens.size() - 1].c_str());
+              tokens.push_back(string(start, prev));
+              DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
+                     tokens[tokens.size() - 1].c_str());
 
             } else {
 
-              tokens.push_back(std::string(prev, prev + 1));
-              DEBUG(stderr, "OTHER \"%c\"\n", *prev);
+              tokens.push_back(string(prev, prev + 1));
+              DEBUGF(stderr, "OTHER \"%c\"\n", *prev);
               ++prev;
 
             }
@@ -541,11 +665,12 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         }
 
-        if (match[0].length() > 0) { tokens.push_back(match[0]); }
+        tokens.push_back(string(match_begin, match_end));
+        DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str());
 
       }
 
-      DEBUG(stderr, "AFTER all strings\n");
+      DEBUGF(stderr, "AFTER all strings\n");
 
       if (cur < ende) {
 
@@ -561,8 +686,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             }
 
             tokens.push_back(std::string(start, cur));
-            DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
-                  tokens[tokens.size() - 1].c_str());
+            DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
+                   tokens[tokens.size() - 1].c_str());
 
           } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
 
@@ -575,13 +700,13 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             }
 
             tokens.push_back(std::string(start, cur));
-            DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
-                  tokens[tokens.size() - 1].c_str());
+            DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
+                   tokens[tokens.size() - 1].c_str());
 
           } else {
 
             tokens.push_back(std::string(cur, cur + 1));
-            DEBUG(stderr, "OTHER \"%c\"\n", *cur);
+            DEBUGF(stderr, "OTHER \"%c\"\n", *cur);
             ++cur;
 
           }
@@ -593,19 +718,21 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     } else {
 
       // alternative tokenize
-
-      while (regex_search(cur, ende, match, regex_string,
-                          regex_constants::match_any |
-                              regex_constants::match_not_null |
-                              regex_constants::match_continuous)) {
+      while (my_search_string(cur, ende, &match_begin, &match_end)) {
 
         prev = cur;
-        found = match[0].first;
-        cur = match[0].second;
-        DEBUG(stderr,
-              "string %s found at start %lu offset %lu continue at %lu\n",
-              match[0].str().c_str(), prev - input.begin(), match.position(),
-              cur - input.begin());
+        found = match_begin;
+        cur = match_end;
+        IFDEBUG {
+
+          string foo(match_begin, match_end);
+          DEBUGF(stderr,
+                 "string %s found at start %lu offset %lu continue at %lu\n",
+                 foo.c_str(), prev - input.begin(), found - prev,
+                 cur - input.begin());
+
+        }
+
         if (prev < found) {  // there are items between search start and find
           sregex_token_iterator it{prev, found, regex_whitespace, -1};
           vector<std::string>   tokenized{it, {}};
@@ -619,10 +746,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
                           tokenized.end());
           tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
 
-          if (unlikely(debug)) {
+          IFDEBUG {
 
-            DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
-                  input.size());
+            DEBUGF(stderr, "tokens1: %lu   input size: %lu\n", tokenized.size(),
+                   input.size());
             for (auto x : tokenized) {
 
               cerr << x << endl;
@@ -636,10 +763,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             string::const_iterator c = token.begin(), e = token.end(), f, p;
             smatch                 m;
 
-            while (regex_search(c, e, m, regex_word,
-                                regex_constants::match_any |
-                                    regex_constants::match_not_null |
-                                    regex_constants::match_continuous)) {
+            while (regex_search(c, e, m, regex_word)) {
 
               p = c;
               f = m[0].first;
@@ -649,10 +773,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
                 // there are items between search start and find
                 while (p < f) {
 
-                  if (unlikely(debug)) {
+                  IFDEBUG {
 
                     string foo(p, p + 1);
-                    DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+                    DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
 
                   }
 
@@ -661,20 +785,21 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
                 }
 
-                /*
-                                string foo(p, f);
-                                DEBUG(stderr, "before string: \"%s\"\n",
-                   foo.c_str()); tokens.push_back(std::string(p, f));
-                */
+                IFDEBUG {
+
+                  string foo(p, f);
+                  DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
+                  tokens.push_back(std::string(p, f));
+
+                }
 
               }
 
-              DEBUG(
-                  stderr,
-                  "SUBstring \"%s\" found at start %lu offset %lu continue at "
-                  "%lu\n",
-                  m[0].str().c_str(), p - input.begin(), m.position(),
-                  c - token.begin());
+              DEBUGF(stderr,
+                     "SUBstring \"%s\" found at start %lu offset %lu continue "
+                     "at %lu\n",
+                     m[0].str().c_str(), p - input.begin(), m.position(),
+                     c - token.begin());
               tokens.push_back(m[0].str());
 
             }
@@ -683,10 +808,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               while (c < e) {
 
-                if (unlikely(debug)) {
+                IFDEBUG {
 
                   string foo(c, c + 1);
-                  DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+                  DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
                 }
 
@@ -695,17 +820,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               }
 
-              /*
-                            if (unlikely(debug)) {
+              IFDEBUG {
 
-                              string foo(c, e);
-                              DEBUG(stderr, "after string: \"%s\"\n",
-                 foo.c_str());
+                string foo(c, e);
+                DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
-                            }
+              }
 
-                            tokens.push_back(std::string(c, e));
-              */
+              tokens.push_back(std::string(c, e));
 
             }
 
@@ -713,7 +835,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         }
 
-        if (match[0].length() > 0) { tokens.push_back(match[0]); }
+        tokens.push_back(string(match_begin, match_end));
 
       }
 
@@ -727,10 +849,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             tokenized.end());
         tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
 
-        if (unlikely(debug)) {
+        IFDEBUG {
 
-          DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
-                input.size());
+          DEBUGF(stderr, "tokens2: %lu   input size: %lu\n", tokenized.size(),
+                 input.size());
           for (auto x : tokenized) {
 
             cerr << x << endl;
@@ -744,10 +866,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
           string::const_iterator c = token.begin(), e = token.end(), f, p;
           smatch                 m;
 
-          while (regex_search(c, e, m, regex_word,
-                              regex_constants::match_any |
-                                  regex_constants::match_not_null |
-                                  regex_constants::match_continuous)) {
+          while (regex_search(c, e, m, regex_word)) {
 
             p = c;
             f = m[0].first;
@@ -757,10 +876,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
               // there are items between search start and find
               while (p < f) {
 
-                if (unlikely(debug)) {
+                IFDEBUG {
 
                   string foo(p, p + 1);
-                  DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+                  DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
 
                 }
 
@@ -769,25 +888,22 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               }
 
-              /*
-                            if (unlikely(debug)) {
+              IFDEBUG {
 
-                              string foo(p, f);
-                              DEBUG(stderr, "before string: \"%s\"\n",
-                 foo.c_str());
+                string foo(p, f);
+                DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
 
-                            }
+              }
 
-                            tokens.push_back(std::string(p, f));
-              */
+              tokens.push_back(std::string(p, f));
 
             }
 
-            DEBUG(stderr,
-                  "SUB2string \"%s\" found at start %lu offset %lu continue at "
-                  "%lu\n",
-                  m[0].str().c_str(), p - input.begin(), m.position(),
-                  c - token.begin());
+            DEBUGF(stderr,
+                   "SUB2string \"%s\" found at start %lu offset %lu continue "
+                   "at %lu\n",
+                   m[0].str().c_str(), p - input.begin(), m.position(),
+                   c - token.begin());
             tokens.push_back(m[0].str());
 
           }
@@ -796,10 +912,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
             while (c < e) {
 
-              if (unlikely(debug)) {
+              IFDEBUG {
 
                 string foo(c, c + 1);
-                DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+                DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
               }
 
@@ -808,16 +924,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
             }
 
-            /*
-                        if (unlikely(debug)) {
+            IFDEBUG {
 
-                          string foo(c, e);
-                          DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+              string foo(c, e);
+              DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
-                        }
+            }
 
-                        tokens.push_back(std::string(c, e));
-            */
+            tokens.push_back(std::string(c, e));
 
           }
 
@@ -827,22 +941,22 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
-    if (unlikely(debug)) {
+    IFDEBUG {
 
-      DEBUG(stderr, "DUMPING TOKENS:\n");
+      DEBUGF(stderr, "DUMPING TOKENS:\n");
       u32 size_1 = tokens.size() - 1;
       for (u32 i = 0; i < tokens.size(); ++i) {
 
-        DEBUG(stderr, "%s", tokens[i].c_str());
+        DEBUGF(stderr, "%s", tokens[i].c_str());
         if (unlikely(alternative_tokenize && i < size_1)) {
 
-          DEBUG(stderr, "%s", whitespace.c_str());
+          DEBUGF(stderr, "%s", whitespace.c_str());
 
         }
 
       }
 
-      DEBUG(stderr, "---------------------------\n");
+      DEBUGF(stderr, "---------------------------\n");
 
     }
 
@@ -850,7 +964,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       file_mapping[fn] = NULL;
       s = NULL;
-      DEBUG(stderr, "too few tokens\n");
+      DEBUGF(stderr, "too few tokens\n");
       return 0;
 
     }
@@ -886,21 +1000,23 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     all_structure_items += structure->size();
 
     // we are done!
-    DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",
-          structure->size());
+    DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n",
+           structure->size());
 
-  } else {
+  }
+
+  else {
 
     if (entry->second == NULL) {
 
-      DEBUG(stderr, "Skipping %s\n", filename);
+      DEBUGF(stderr, "Skipping %s\n", filename);
       s = NULL;
       return 0;
 
     }
 
     s = entry->second;
-    DEBUG(stderr, "OK %s\n", filename);
+    DEBUGF(stderr, "OK %s\n", filename);
 
   }
 
diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md
index 322caa5b..82131c92 100644
--- a/docs/custom_mutators.md
+++ b/docs/custom_mutators.md
@@ -150,6 +150,7 @@ def deinit():  # optional for Python
     sense to use it. You would only skip this if `post_process` is used to fix
     checksums etc. so if you are using it, e.g., as a post processing library.
     Note that a length > 0 *must* be returned!
+    The returned output buffer is under **your** memory management!
 
 - `describe` (optional):
 
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 1e8d085d..229bc025 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -844,15 +844,16 @@ struct custom_mutator {
   /**
    * Perform custom mutations on a given input
    *
-   * (Optional for now. Required in the future)
+   * (Optional)
    *
-   * @param data pointer returned in afl_custom_init by this custom mutator
+   * Getting an add_buf can be skipped by using afl_custom_splice_optout().
+   *
+   * @param[in] data Pointer returned in afl_custom_init by this custom mutator
    * @param[in] buf Pointer to the input data to be mutated and the mutated
    *     output
    * @param[in] buf_size Size of the input/output data
-   * @param[out] out_buf the new buffer. We may reuse *buf if large enough.
-   *             *out_buf = NULL is treated as FATAL.
-   * @param[in] add_buf Buffer containing the additional test case
+   * @param[out] out_buf The new buffer, under your memory mgmt.
+   * @param[in] add_buf Buffer containing an additional test case (splicing)
    * @param[in] add_buf_size Size of the additional test case
    * @param[in] max_size Maximum size of the mutated output. The mutation must
    * not produce data larger than max_size.
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 5e352dcb..bd482562 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -564,8 +564,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
       if (afl->cmplog_lvl == 3 ||
           (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) ||
           afl->queue_cur->favored ||
-          !(afl->fsrv.total_execs % afl->queued_items) ||
-          get_cur_time() - afl->last_find_time > 300000) {  // 300 seconds
+          get_cur_time() - afl->last_find_time > 600000) {  // 600 seconds
 
         if (input_to_state_stage(afl, in_buf, out_buf, len)) {
 

From afff6f642c77e4986fdb8a4e9799c1a52e80ce32 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 19 Jan 2023 13:41:48 +0100
Subject: [PATCH 19/77] optimize

---
 custom_mutators/autotokens/autotokens.cpp | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 102bea0f..149ae430 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -109,9 +109,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
 #if AUTOTOKENS_SPLICE_DISABLE == 1
-  #define AUTOTOKENS_MUT_MAX 12
+  #define AUTOTOKENS_MUT_MAX 18
 #else
-  #define AUTOTOKENS_MUT_MAX 14
+  #define AUTOTOKENS_MUT_MAX 27
 #endif
 
   u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos;
@@ -120,8 +120,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
     switch (rand_below(afl_ptr, max_rand)) {
 
-      /* CHANGE */
-      case 0 ... 7:                                         /* fall through */
+      /* CHANGE/MUTATE single item */
+      case 0 ... 9:
       {
 
         pos = rand_below(afl_ptr, m_size);
@@ -144,7 +144,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       }
 
       /* INSERT (m_size +1 so we insert also after last place) */
-      case 8 ... 9: {
+      case 10 ... 13: {
 
         do {
 
@@ -192,7 +192,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
 #if AUTOTOKENS_SPLICE_DISABLE != 1
       /* SPLICING */
-      case 10 ... 11: {
+      case 14 ... 22: {
 
         u32  strategy = rand_below(afl_ptr, 4), dst_off, n;
         auto src = id_mapping[rand_below(afl_ptr, valid_structures)];
@@ -278,11 +278,11 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
             pos = rand_below(afl_ptr, m_size);
 
-          } while (unlikely(pos < whitespace_ids));
+          } while (unlikely(m[pos] < whitespace_ids));
 
           // if what we delete will result in a missing whitespace/token,
           // instead of deleting we switch the item to a whitespace or token.
-          if (likely(!alternative_tokenize) && pos && pos < m_size &&
+          if (likely(!alternative_tokenize) && pos && pos + 1 < m_size &&
               id_to_token[m[pos - 1]].size() > 1 &&
               id_to_token[m[pos + 1]].size() > 1) {
 
@@ -300,7 +300,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
           // if the data is already too small do not try to make it smaller
           // again this run.
 
-          max_rand = AUTOTOKENS_MUT_MAX - 2;
+          max_rand -= 4;
 
         }
 
@@ -734,6 +734,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
         }
 
         if (prev < found) {  // there are items between search start and find
+
           sregex_token_iterator it{prev, found, regex_whitespace, -1};
           vector<std::string>   tokenized{it, {}};
           tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(),

From 86d3c65559209ce12452e18daf96946222c19b46 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 19 Jan 2023 15:59:57 +0100
Subject: [PATCH 20/77] nit

---
 custom_mutators/autotokens/autotokens.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 149ae430..f4b96c7b 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -121,8 +121,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
     switch (rand_below(afl_ptr, max_rand)) {
 
       /* CHANGE/MUTATE single item */
-      case 0 ... 9:
-      {
+      case 0 ... 9: {
 
         pos = rand_below(afl_ptr, m_size);
         u32 cur_item = m[pos];
@@ -438,8 +437,9 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
   if (likely(!debug)) {
 
-    if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
-        (only_fav && !afl_ptr->queue_cur->favored)) {
+    if (unlikely(!afl_ptr->custom_only) &&
+        ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
+         (only_fav && !afl_ptr->queue_cur->favored))) {
 
       s = NULL;
       DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");

From 628b4b60021a0d62a2eccddca4fe321c9d57c663 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 19 Jan 2023 17:24:56 +0100
Subject: [PATCH 21/77] enhance examples

---
 custom_mutators/README.md                      | 10 ++++++++++
 custom_mutators/examples/custom_send.c         |  9 ++++++++-
 custom_mutators/examples/example.c             |  4 +++-
 custom_mutators/examples/post_library_gif.so.c |  4 ++--
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/custom_mutators/README.md b/custom_mutators/README.md
index 0289e150..8d01856f 100644
--- a/custom_mutators/README.md
+++ b/custom_mutators/README.md
@@ -11,6 +11,16 @@ The `./examples` folder contains examples for custom mutators in python and C.
 
 In `./rust`, you will find rust bindings, including a simple example in `./rust/example` and an example for structured fuzzing, based on lain, in`./rust/example_lain`.
 
+## The AFL++ grammar agnostic grammar mutator
+
+In `./autotokens` you find a token-level fuzzer that does not need to know
+anything about the grammar of an input as long as it is in ascii and allows
+whitespace.
+It is very fast and effective.
+
+If you are looking for an example of how to effectively create a custom
+mutator take a look at this one.
+
 ## The AFL++ Grammar Mutator
 
 If you use git to clone AFL++, then the following will incorporate our
diff --git a/custom_mutators/examples/custom_send.c b/custom_mutators/examples/custom_send.c
index ffea927e..7de72819 100644
--- a/custom_mutators/examples/custom_send.c
+++ b/custom_mutators/examples/custom_send.c
@@ -1,7 +1,14 @@
+//
+// This is an example on how to use afl_custom_send
+// It writes each mutated data set to /tmp/foo
+// You can modify this to send to IPC, shared memory, etc.
+//
 // cc -O3 -fPIC -shared -g -o custom_send.so -I../../include custom_send.c
 // cd ../..
 // afl-cc -o test-instr test-instr.c
-// afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo
+// AFL_CUSTOM_MUTATOR_LIBRARY=custom_mutators/examples/custom_send.so \
+//   afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo
+//
 
 #include "custom_mutator_helpers.h"
 
diff --git a/custom_mutators/examples/example.c b/custom_mutators/examples/example.c
index 3f299508..e680ec8e 100644
--- a/custom_mutators/examples/example.c
+++ b/custom_mutators/examples/example.c
@@ -6,7 +6,7 @@
              Dominik Maier <mail@dmnk.co>
 */
 
-// You need to use -I /path/to/AFLplusplus/include
+// You need to use -I/path/to/AFLplusplus/include -I.
 #include "custom_mutator_helpers.h"
 
 #include <stdint.h>
@@ -118,6 +118,8 @@ size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
 
   }
 
+  if (max_size > mutated_size) { mutated_size = max_size; }
+
   *out_buf = mutated_out;
   return mutated_size;
 
diff --git a/custom_mutators/examples/post_library_gif.so.c b/custom_mutators/examples/post_library_gif.so.c
index 9cd224f4..3cb018a6 100644
--- a/custom_mutators/examples/post_library_gif.so.c
+++ b/custom_mutators/examples/post_library_gif.so.c
@@ -129,8 +129,8 @@ size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf,
 
   /* Allocate memory for new buffer, reusing previous allocation if
      possible. Note we have to use afl-fuzz's own realloc!
-     Note that you should only do this if you need to grow the buffer,
-     otherwise work with in_buf, and assign it to *out_buf instead. */
+     We use afl_realloc because it is effective.
+     You can also work within in_buf, and assign it to *out_buf. */
 
   *out_buf = afl_realloc(out_buf, len);
 

From 67cfe4f6d4a03c596a5c3e1aa97d64d79263746a Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 19 Jan 2023 22:24:24 +0100
Subject: [PATCH 22/77] nits

---
 custom_mutators/autotokens/autotokens.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index f4b96c7b..16ee8109 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -544,7 +544,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     string input;
     input.resize(len);
     rewind(fp);
-    fread((void *)input.data(), input.size(), 1, fp);
+
+    if (fread((void *)input.data(), 1, len, fp) != len) {
+
+      s = NULL;
+      DEBUGF(stderr, "Too short read %s\n", len, filename);
+      return 0;
+
+    }
+
     fclose(fp);
 
     if (!afl_ptr->shm.cmplog_mode) {

From bd2cb4cd1c2f07d5406875771cd41fb9a6e1f84d Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Fri, 20 Jan 2023 12:22:29 +0100
Subject: [PATCH 23/77] more default tokens

---
 custom_mutators/autotokens/autotokens.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 16ee8109..f9b5bd2e 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -1105,6 +1105,12 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
     id_to_token[current_id] = "\n\n\n\n";
     ++current_id;
     whitespace_ids = current_id;
+    token_to_id["\""] = current_id;
+    id_to_token[current_id] = "\"";
+    ++current_id;
+    token_to_id["'"] = current_id;
+    id_to_token[current_id] = "'";
+    ++current_id;
 
   }
 

From 1b4e1d75b32c6024765ab27b36591ae97cb33f6b Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 25 Jan 2023 13:52:22 +0100
Subject: [PATCH 24/77] cmplog decision updates

---
 src/afl-fuzz-one.c             | 16 ++++++++++------
 utils/aflpp_driver/GNUmakefile | 23 ++++++++++++++---------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index bd482562..b25398c4 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -446,9 +446,12 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
     ACTF(
         "Fuzzing test case #%u (%u total, %llu crashes saved, "
-        "perf_score=%0.0f, exec_us=%llu, hits=%u, map=%u, ascii=%u)...",
+        "perf_score=%0.0f, weight=%0.0f, favorite=%u, was_fuzzed=%u, "
+        "exec_us=%llu, hits=%u, map=%u, ascii=%u)...",
         afl->current_entry, afl->queued_items, afl->saved_crashes,
-        afl->queue_cur->perf_score, afl->queue_cur->exec_us,
+        afl->queue_cur->perf_score, afl->queue_cur->weight,
+        afl->queue_cur->favored, afl->queue_cur->was_fuzzed,
+        afl->queue_cur->exec_us,
         likely(afl->n_fuzz) ? afl->n_fuzz[afl->queue_cur->n_fuzz_entry] : 0,
         afl->queue_cur->bitmap_size, afl->queue_cur->is_ascii);
     fflush(stdout);
@@ -561,10 +564,11 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
     } else {
 
-      if (afl->cmplog_lvl == 3 ||
-          (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) ||
-          afl->queue_cur->favored ||
-          get_cur_time() - afl->last_find_time > 600000) {  // 600 seconds
+      if (afl->queue_cur->favored || afl->cmplog_lvl == 3 ||
+          (afl->cmplog_lvl == 2 &&
+           (afl->queue_cur->tc_ref ||
+            afl->fsrv.total_execs % afl->queued_items <= 10)) ||
+          get_cur_time() - afl->last_find_time > 250000) {  // 250 seconds
 
         if (input_to_state_stage(afl, in_buf, out_buf, len)) {
 
diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile
index 234a1c31..b973f96a 100644
--- a/utils/aflpp_driver/GNUmakefile
+++ b/utils/aflpp_driver/GNUmakefile
@@ -8,9 +8,14 @@ ifeq "$(shell uname -s)" "Darwin"
   LDFLAGS += $(SDK_LD)
 endif
 
+ifeq "" "$(LLVM_CONFIG)"
+  LLVM_CONFIG := llvm-config
+endif
 LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
 ifneq "" "$(LLVM_BINDIR)"
-  LLVM_BINDIR := $(LLVM_BINDIR)/
+  ifeq "$(shell test -x $(LLVM_BINDIR)/clang && echo 1)" "1"
+    CC := $(LLVM_BINDIR)/clang
+  endif
 endif
 
 CFLAGS := -O3 -funroll-loops -g -fPIC
@@ -18,31 +23,31 @@ CFLAGS := -O3 -funroll-loops -g -fPIC
 all:	libAFLDriver.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so
 
 aflpp_driver.o:	aflpp_driver.c
-	-$(LLVM_BINDIR)clang -I. -I../../include $(CFLAGS) -c aflpp_driver.c
+	-$(CC) -I. -I../../include $(CFLAGS) -c aflpp_driver.c
 
 libAFLDriver.a:	aflpp_driver.o
 	@ar rc libAFLDriver.a aflpp_driver.o
 	@cp -vf libAFLDriver.a ../../
 
 debug:
-	$(LLVM_BINDIR)clang -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c
-	$(LLVM_BINDIR)clang -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c
-	#$(LLVM_BINDIR)clang -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c
-	#$(LLVM_BINDIR)clang -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c
+	$(CC) -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c
+	$(CC) -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c
+	#$(CC) -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c
+	#$(CC) -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c
 	ar rc libAFLDriver.a afl-performance.o aflpp_driver.o
 
 aflpp_qemu_driver.o:	aflpp_qemu_driver.c
-	-$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c
+	-$(CC) $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c
 
 libAFLQemuDriver.a:	aflpp_qemu_driver.o
 	@-ar rc libAFLQemuDriver.a aflpp_qemu_driver.o
 	@-cp -vf libAFLQemuDriver.a ../../
 
 aflpp_qemu_driver_hook.so:	aflpp_qemu_driver_hook.o
-	@-test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang $(LDFLAGS) -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built."
+	@-test -e aflpp_qemu_driver_hook.o && $(CC) $(LDFLAGS) -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built."
 
 aflpp_qemu_driver_hook.o:	aflpp_qemu_driver_hook.c
-	@-test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built."
+	@-test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(CC) $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built."
 
 test:	debug
 	#clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c

From 47f35d29ac53ed1cdb87f65591b62947a7965060 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Fri, 27 Jan 2023 14:32:18 +0100
Subject: [PATCH 25/77] fix

---
 custom_mutators/autotokens/autotokens.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index f9b5bd2e..4a2cc08f 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -61,8 +61,10 @@ static unordered_map<u32, string>           id_to_token;
 static string                               whitespace = AUTOTOKENS_WHITESPACE;
 static string                               output;
 static regex                               *regex_comment_custom;
-static regex        regex_comment_star("/\\*([:print:]|\n)*?\\*/",
-                                       regex::multiline | regex::optimize);
+// multiline requires g++-11 libs :(
+static regex regex_comment_star(
+    "/\\*([:print:]|\n)*?\\*/",
+    regex_constants::optimize /* | regex_constants::multiline */);
 static regex        regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
 static regex        regex_whitespace(R"([ \t]+)", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
@@ -548,7 +550,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     if (fread((void *)input.data(), 1, len, fp) != len) {
 
       s = NULL;
-      DEBUGF(stderr, "Too short read %s\n", len, filename);
+      DEBUGF(stderr, "Too short read %s\n", filename);
       return 0;
 
     }

From b5d8d4c866137a8a6bd55225b0eaf723123c46c9 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sun, 29 Jan 2023 10:07:33 +0100
Subject: [PATCH 26/77] comment

---
 custom_mutators/autotokens/autotokens.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 4a2cc08f..0a010f0b 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -1,3 +1,9 @@
+/*
+   token level fuzzing custom mutator for afl++
+   (c) by Marc Heuse <mh@mh-sec.de>
+   License: Apache 2.0
+*/
+
 extern "C" {
 
 #include "afl-fuzz.h"

From 91ccbf3f68ab9e6e4bc277f86c3efed666867132 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 1 Feb 2023 17:16:51 +0100
Subject: [PATCH 27/77] fix

---
 custom_mutators/autotokens/autotokens.cpp | 18 ++++++++----------
 src/afl-fuzz-one.c                        |  5 +++--
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 0a010f0b..548e1be9 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -451,7 +451,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       s = NULL;
       DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
-      return 0;
+      return 1;
 
     }
 
@@ -532,7 +532,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     if (!fp) {
 
       s = NULL;
-      return 0;
+      return 1;
 
     }  // should not happen
 
@@ -545,7 +545,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
       s = NULL;
       DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
-      return 0;
+      return 1;
 
     }
 
@@ -557,7 +557,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       s = NULL;
       DEBUGF(stderr, "Too short read %s\n", filename);
-      return 0;
+      return 1;
 
     }
 
@@ -581,7 +581,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
         file_mapping[fn] = NULL;
         s = NULL;
         DEBUGF(stderr, "Not text (%lu) %s\n", len, filename);
-        return 0;
+        return 1;
 
       }
 
@@ -982,7 +982,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       file_mapping[fn] = NULL;
       s = NULL;
       DEBUGF(stderr, "too few tokens\n");
-      return 0;
+      return 1;
 
     }
 
@@ -1020,15 +1020,13 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n",
            structure->size());
 
-  }
-
-  else {
+  } else {
 
     if (entry->second == NULL) {
 
       DEBUGF(stderr, "Skipping %s\n", filename);
       s = NULL;
-      return 0;
+      return 1;
 
     }
 
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index b25398c4..2f016217 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -1988,7 +1988,8 @@ custom_mutator_stage:
 
           if (unlikely(!mutated_buf)) {
 
-            FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size);
+            //FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size);
+            break;
 
           }
 
@@ -2040,7 +2041,7 @@ custom_mutator_stage:
   new_hit_cnt = afl->queued_items + afl->saved_crashes;
 
   afl->stage_finds[STAGE_CUSTOM_MUTATOR] += new_hit_cnt - orig_hit_cnt;
-  afl->stage_cycles[STAGE_CUSTOM_MUTATOR] += afl->stage_max;
+  afl->stage_cycles[STAGE_CUSTOM_MUTATOR] += afl->stage_cur;
 #ifdef INTROSPECTION
   afl->queue_cur->stats_mutated += afl->stage_max;
 #endif

From 4946e9cc3a340efd9b08807ae5cb0a657e0214a9 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 2 Feb 2023 12:08:45 +0100
Subject: [PATCH 28/77] small fix to compiler rt

---
 instrumentation/afl-compiler-rt.o.c | 66 ++++++++---------------------
 1 file changed, 17 insertions(+), 49 deletions(-)

diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index d6d6c38c..6ba19b5a 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -1534,6 +1534,16 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   if (start == stop || *start) return;
 
+  x = getenv("AFL_INST_RATIO");
+  if (x) { inst_ratio = (u32)atoi(x); }
+
+  if (!inst_ratio || inst_ratio > 100) {
+
+    fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n");
+    abort();
+
+  }
+
   // If a dlopen of an instrumented library happens after the forkserver then
   // we have a problem as we cannot increase the coverage map anymore.
   if (__afl_already_initialized_forkserver) {
@@ -1554,62 +1564,20 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
       while (start < stop) {
 
-        *(start++) = offset;
+        if (likely(inst_ratio == 100) || R(100) < inst_ratio)
+          *start = offset;
+        else
+          *start = 0;  // write to map[0]
         if (unlikely(++offset >= __afl_final_loc)) { offset = 4; }
 
       }
 
     }
 
-  }
-
-  x = getenv("AFL_INST_RATIO");
-  if (x) { inst_ratio = (u32)atoi(x); }
-
-  if (!inst_ratio || inst_ratio > 100) {
-
-    fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n");
-    abort();
+    return;  // we are done for this special case
 
   }
 
-  /* instrumented code is loaded *after* our forkserver is up. this is a
-     problem. We cannot prevent collisions then :( */
-  /*
-  if (__afl_already_initialized_forkserver &&
-      __afl_final_loc + 1 + stop - start > __afl_map_size) {
-
-    if (__afl_debug) {
-
-      fprintf(stderr, "Warning: new instrumented code after the forkserver!\n");
-
-    }
-
-    __afl_final_loc = 2;
-
-    if (1 + stop - start > __afl_map_size) {
-
-      *(start++) = ++__afl_final_loc;
-
-      while (start < stop) {
-
-        if (R(100) < inst_ratio)
-          *start = ++__afl_final_loc % __afl_map_size;
-        else
-          *start = 4;
-
-        start++;
-
-      }
-
-      return;
-
-    }
-
-  }
-
-  */
-
   /* Make sure that the first element in the range is always set - we use that
      to avoid duplicate calls (which can happen as an artifact of the underlying
      implementation in LLVM). */
@@ -1618,10 +1586,10 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   while (start < stop) {
 
-    if (R(100) < inst_ratio)
+    if (likely(inst_ratio == 100) || R(100) < inst_ratio)
       *start = ++__afl_final_loc;
     else
-      *start = 4;
+      *start = 0;  // write to map[0]
 
     start++;
 

From 25b4b32627a1ef1e65b328f90f3ad1fd25d8f906 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 2 Feb 2023 12:13:48 +0100
Subject: [PATCH 29/77] small fix to compiler rt

---
 instrumentation/afl-compiler-rt.o.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index 6ba19b5a..b1ce4427 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -1582,6 +1582,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
      to avoid duplicate calls (which can happen as an artifact of the underlying
      implementation in LLVM). */
 
+  if (__afl_final_loc < 3) __afl_final_loc = 3;  // we skip the first 4 entries
+
   *(start++) = ++__afl_final_loc;
 
   while (start < stop) {

From df9ef84f5e042bdc1db764e83baa83cb30a80d31 Mon Sep 17 00:00:00 2001
From: Nikolay Shaplov <dhyan@nataraj.su>
Date: Fri, 3 Feb 2023 14:32:17 +0000
Subject: [PATCH 30/77] Explicitly print error code if sched_setaffinity fails

---
 src/afl-gotcpu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c
index c5b8a27a..1762cfe2 100644
--- a/src/afl-gotcpu.c
+++ b/src/afl-gotcpu.c
@@ -214,7 +214,13 @@ int main(int argc, char **argv) {
   #if defined(__linux__)
       if (sched_setaffinity(0, sizeof(c), &c)) {
 
-        PFATAL("sched_setaffinity failed for cpu %d", i);
+        const char *error_code = "Unkown error code";
+        if (errno == EFAULT) error_code = "EFAULT";
+        if (errno == EINVAL) error_code = "EINVAL";
+        if (errno == EPERM)  error_code = "EPERM";
+        if (errno == ESRCH)  error_code = "ESRCH";
+
+        PFATAL("sched_setaffinity failed for cpu %d, error: %s", i, error_code);
 
       }
 

From 53c19a807c701760af577cea1f44916d9133a971 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sat, 4 Feb 2023 14:09:50 +0100
Subject: [PATCH 31/77] code indent

---
 instrumentation/split-compares-pass.so.cc | 102 ++++++++++++++--------
 src/afl-gotcpu.c                          |   4 +-
 2 files changed, 66 insertions(+), 40 deletions(-)

diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
index dd7b09a6..8a07610c 100644
--- a/instrumentation/split-compares-pass.so.cc
+++ b/instrumentation/split-compares-pass.so.cc
@@ -1152,10 +1152,14 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
     b_op1 = SelectInst::Create(isMzero_op1, ConstantInt::get(intType, PlusZero),
                                bpre_op1);
 #if LLVM_MAJOR >= 16
-    isMzero_op0->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
-    isMzero_op1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
-    b_op0->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
-    b_op1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
+    isMzero_op0->insertInto(nonan_bb,
+                            BasicBlock::iterator(nonan_bb->getTerminator()));
+    isMzero_op1->insertInto(nonan_bb,
+                            BasicBlock::iterator(nonan_bb->getTerminator()));
+    b_op0->insertInto(nonan_bb,
+                      BasicBlock::iterator(nonan_bb->getTerminator()));
+    b_op1->insertInto(nonan_bb,
+                      BasicBlock::iterator(nonan_bb->getTerminator()));
 #else
     nonan_bb->getInstList().insert(
         BasicBlock::iterator(nonan_bb->getTerminator()), isMzero_op0);
@@ -1192,7 +1196,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
     t_s0->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
     s_s1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
     t_s1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
-    icmp_sign_bit->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator()));
+    icmp_sign_bit->insertInto(nonan_bb,
+                              BasicBlock::iterator(nonan_bb->getTerminator()));
 #else
     nonan_bb->getInstList().insert(
         BasicBlock::iterator(nonan_bb->getTerminator()), s_s0);
@@ -1239,8 +1244,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         Instruction::LShr, b_op1,
         ConstantInt::get(b_op1->getType(), shiftR_exponent));
 #if LLVM_MAJOR >= 16
-    s_e0->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
-    s_e1->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
+    s_e0->insertInto(signequal_bb,
+                     BasicBlock::iterator(signequal_bb->getTerminator()));
+    s_e1->insertInto(signequal_bb,
+                     BasicBlock::iterator(signequal_bb->getTerminator()));
 #else
     signequal_bb->getInstList().insert(
         BasicBlock::iterator(signequal_bb->getTerminator()), s_e0);
@@ -1251,15 +1258,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
     t_e0 = new TruncInst(s_e0, IntExponentTy);
     t_e1 = new TruncInst(s_e1, IntExponentTy);
 #if LLVM_MAJOR >= 16
-    t_e0->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
-    t_e1->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
+    t_e0->insertInto(signequal_bb,
+                     BasicBlock::iterator(signequal_bb->getTerminator()));
+    t_e1->insertInto(signequal_bb,
+                     BasicBlock::iterator(signequal_bb->getTerminator()));
 #else
     signequal_bb->getInstList().insert(
         BasicBlock::iterator(signequal_bb->getTerminator()), t_e0);
     signequal_bb->getInstList().insert(
         BasicBlock::iterator(signequal_bb->getTerminator()), t_e1);
 #endif
-    
 
     if (sizeInBits - precision < exTySizeBytes * 8) {
 
@@ -1270,8 +1278,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
           Instruction::And, t_e1,
           ConstantInt::get(t_e1->getType(), mask_exponent));
 #if LLVM_MAJOR >= 16
-      m_e0->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
-      m_e1->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
+      m_e0->insertInto(signequal_bb,
+                       BasicBlock::iterator(signequal_bb->getTerminator()));
+      m_e1->insertInto(signequal_bb,
+                       BasicBlock::iterator(signequal_bb->getTerminator()));
 #else
       signequal_bb->getInstList().insert(
           BasicBlock::iterator(signequal_bb->getTerminator()), m_e0);
@@ -1312,7 +1322,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         icmp_exponents_equal =
             CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1);
 #if LLVM_MAJOR >= 16
-	icmp_exponents_equal->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
+        icmp_exponents_equal->insertInto(
+            signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
 #else
         signequal_bb->getInstList().insert(
             BasicBlock::iterator(signequal_bb->getTerminator()),
@@ -1332,7 +1343,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         icmp_exponent =
             CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1);
 #if LLVM_MAJOR >= 16
-	icmp_exponent->insertInto(signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator()));
+        icmp_exponent->insertInto(
+            signequal2_bb,
+            BasicBlock::iterator(signequal2_bb->getTerminator()));
 #else
         signequal2_bb->getInstList().insert(
             BasicBlock::iterator(signequal2_bb->getTerminator()),
@@ -1346,7 +1359,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         icmp_exponents_equal =
             CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1);
 #if LLVM_MAJOR >= 16
-	icmp_exponents_equal->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
+        icmp_exponents_equal->insertInto(
+            signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator()));
 #else
         signequal_bb->getInstList().insert(
             BasicBlock::iterator(signequal_bb->getTerminator()),
@@ -1366,7 +1380,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         icmp_exponent =
             CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1);
 #if LLVM_MAJOR >= 16
-	icmp_exponent->insertInto(signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator()));
+        icmp_exponent->insertInto(
+            signequal2_bb,
+            BasicBlock::iterator(signequal2_bb->getTerminator()));
 #else
         signequal2_bb->getInstList().insert(
             BasicBlock::iterator(signequal2_bb->getTerminator()),
@@ -1381,7 +1397,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
     }
 
 #if LLVM_MAJOR >= 16
-    icmp_exponent_result->insertInto(signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator()));
+    icmp_exponent_result->insertInto(
+        signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator()));
 #else
     signequal2_bb->getInstList().insert(
         BasicBlock::iterator(signequal2_bb->getTerminator()),
@@ -1437,8 +1454,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
           Instruction::And, b_op1,
           ConstantInt::get(b_op1->getType(), mask_fraction));
 #if LLVM_MAJOR >= 16
-	m_f0->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator()));
-	m_f1->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator()));
+      m_f0->insertInto(middle_bb,
+                       BasicBlock::iterator(middle_bb->getTerminator()));
+      m_f1->insertInto(middle_bb,
+                       BasicBlock::iterator(middle_bb->getTerminator()));
 #else
       middle_bb->getInstList().insert(
           BasicBlock::iterator(middle_bb->getTerminator()), m_f0);
@@ -1451,8 +1470,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         t_f0 = new TruncInst(m_f0, IntFractionTy);
         t_f1 = new TruncInst(m_f1, IntFractionTy);
 #if LLVM_MAJOR >= 16
-	t_f0->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator()));
-	t_f1->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator()));
+        t_f0->insertInto(middle_bb,
+                         BasicBlock::iterator(middle_bb->getTerminator()));
+        t_f1->insertInto(middle_bb,
+                         BasicBlock::iterator(middle_bb->getTerminator()));
 #else
         middle_bb->getInstList().insert(
             BasicBlock::iterator(middle_bb->getTerminator()), t_f0);
@@ -1474,8 +1495,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         t_f0 = new TruncInst(b_op0, IntFractionTy);
         t_f1 = new TruncInst(b_op1, IntFractionTy);
 #if LLVM_MAJOR >= 16
-	t_f0->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator()));
-	t_f1->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator()));
+        t_f0->insertInto(middle_bb,
+                         BasicBlock::iterator(middle_bb->getTerminator()));
+        t_f1->insertInto(middle_bb,
+                         BasicBlock::iterator(middle_bb->getTerminator()));
 #else
         middle_bb->getInstList().insert(
             BasicBlock::iterator(middle_bb->getTerminator()), t_f0);
@@ -1503,7 +1526,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         icmp_fraction_result =
             CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_f0, t_f1);
 #if LLVM_MAJOR >= 16
-	icmp_fraction_result->insertInto(middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator()));
+        icmp_fraction_result->insertInto(
+            middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator()));
 #else
         middle2_bb->getInstList().insert(
             BasicBlock::iterator(middle2_bb->getTerminator()),
@@ -1516,7 +1540,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         icmp_fraction_result =
             CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, t_f0, t_f1);
 #if LLVM_MAJOR >= 16
-	icmp_fraction_result->insertInto(middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator()));
+        icmp_fraction_result->insertInto(
+            middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator()));
 #else
         middle2_bb->getInstList().insert(
             BasicBlock::iterator(middle2_bb->getTerminator()),
@@ -1542,13 +1567,13 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         if (FcmpInst->getPredicate() == CmpInst::FCMP_OGT ||
             FcmpInst->getPredicate() == CmpInst::FCMP_UGT) {
 
-          icmp_fraction_result = CmpInst::Create(
-              Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1);
-          icmp_fraction_result2 = CmpInst::Create(
-              Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1);
+          icmp_fraction_result =
+              CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1);
+          icmp_fraction_result2 =
+              CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1);
 #if LLVM_MAJOR >= 16
-	  icmp_fraction_result->insertInto(negative_bb, negative_bb->end());
-	  icmp_fraction_result2->insertInto(positive_bb, negative_bb->end());
+          icmp_fraction_result->insertInto(negative_bb, negative_bb->end());
+          icmp_fraction_result2->insertInto(positive_bb, negative_bb->end());
 #else
           negative_bb->getInstList().push_back(icmp_fraction_result);
           positive_bb->getInstList().push_back(icmp_fraction_result2);
@@ -1556,13 +1581,13 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
 
         } else {
 
-          icmp_fraction_result = CmpInst::Create(
-              Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1);
-          icmp_fraction_result2 = CmpInst::Create(
-              Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1);
+          icmp_fraction_result =
+              CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1);
+          icmp_fraction_result2 =
+              CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1);
 #if LLVM_MAJOR >= 16
-	  icmp_fraction_result->insertInto(negative_bb, negative_bb->end());
-	  icmp_fraction_result2->insertInto(positive_bb, negative_bb->end());
+          icmp_fraction_result->insertInto(negative_bb, negative_bb->end());
+          icmp_fraction_result2->insertInto(positive_bb, negative_bb->end());
 #else
           negative_bb->getInstList().push_back(icmp_fraction_result);
           positive_bb->getInstList().push_back(icmp_fraction_result2);
@@ -1581,7 +1606,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
         PN2->addIncoming(icmp_fraction_result, negative_bb);
         PN2->addIncoming(icmp_fraction_result2, positive_bb);
 #if LLVM_MAJOR >= 16
-	PN2->insertInto(middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator()));
+        PN2->insertInto(middle2_bb,
+                        BasicBlock::iterator(middle2_bb->getTerminator()));
 #else
         middle2_bb->getInstList().insert(
             BasicBlock::iterator(middle2_bb->getTerminator()), PN2);
diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c
index 1762cfe2..fd9e9f54 100644
--- a/src/afl-gotcpu.c
+++ b/src/afl-gotcpu.c
@@ -217,8 +217,8 @@ int main(int argc, char **argv) {
         const char *error_code = "Unkown error code";
         if (errno == EFAULT) error_code = "EFAULT";
         if (errno == EINVAL) error_code = "EINVAL";
-        if (errno == EPERM)  error_code = "EPERM";
-        if (errno == ESRCH)  error_code = "ESRCH";
+        if (errno == EPERM) error_code = "EPERM";
+        if (errno == ESRCH) error_code = "ESRCH";
 
         PFATAL("sched_setaffinity failed for cpu %d, error: %s", i, error_code);
 

From e1434bcfcd8c13de838559fd7b797d1a3cd5a672 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sat, 4 Feb 2023 14:34:47 +0100
Subject: [PATCH 32/77] more autotoken options

---
 custom_mutators/autotokens/TODO           | 17 +++++++++
 custom_mutators/autotokens/autotokens.cpp | 45 ++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 2e39511c..3cae3060 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -4,3 +4,20 @@ create from thin air if no good seed after a cycle and dict large enough?
 (static u32 no_of_struct_inputs;) 
 
 splicing -> check if whitespace/token is needed
+
+whitespace/token check only AFTER mutation
+
+analyse welche einen DICT haben, und welche davon rein ascii
+
+corpus analyse:
+	+ libxml
+	- hardbuzz
+	- sqlite
+	- libpcap
+min len, max len, % wenn 95/98/99/100 ascii
+
+funktion und env für menge an mutationen
+
+env für menge an per mutation run
+
+only add inital dictionary, not furher finds, e.g. cmplog
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 548e1be9..a0125851 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -28,6 +28,9 @@ extern "C" {
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
+#define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
+// 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
+#define AUTOTOKENS_LEARN_DICT 2
 #ifndef AUTOTOKENS_SPLICE_DISABLE
   #define AUTOTOKENS_SPLICE_DISABLE 0
 #endif
@@ -53,6 +56,8 @@ static afl_state *afl_ptr;
 static int        debug = AUTOTOKENS_DEBUG;
 static int        only_fav = AUTOTOKENS_ONLY_FAV;
 static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
+static int        learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
+static int        fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
 static u32        current_id;
 static u32        valid_structures;
 static u32        whitespace_ids;
@@ -94,6 +99,22 @@ u32 good_whitespace_or_singleval() {
 
 }
 
+extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf,
+                                     size_t buf_size) {
+
+  if (s == NULL) return 0;
+
+  u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8;
+  u32 stage_max = (u32)((HAVOC_CYCLES * afl_ptr->queue_cur->perf_score) /
+                        afl_ptr->havoc_div) >>
+                  shift;
+  if (fuzz_count_shift) { stage_max >>= (u32)fuzz_count_shift; };
+  DEBUGF(stderr, "fuzz count: %u\n", stage_max);
+
+  return stage_max;
+
+}
+
 extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                   u8 **out_buf, u8 *add_buf,
                                   size_t add_buf_size, size_t max_size) {
@@ -441,6 +462,7 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
 extern "C" unsigned char afl_custom_queue_get(void                *data,
                                               const unsigned char *filename) {
 
+  static int learn_state;
   (void)(data);
 
   if (likely(!debug)) {
@@ -458,7 +480,9 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
   }
 
   // check if there are new dictionary entries and add them to the tokens
-  if (valid_structures) {
+  if (valid_structures && learn_state < learn_dictionary_tokens) {
+
+    if (unlikely(!learn_state)) { learn_state = 1; }
 
     while (extras_cnt < afl_ptr->extras_cnt) {
 
@@ -1053,6 +1077,25 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
   if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
   if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
+
+  if (getenv("AUTOTOKENS_LEARN_DICT")) {
+
+    learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT"));
+    if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) {
+
+      learn_dictionary_tokens = 2;
+
+    }
+
+  }
+
+  if (getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT")) {
+
+    fuzz_count_shift = atoi(getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT"));
+    if (fuzz_count_shift < 0 || fuzz_count_shift > 16) { fuzz_count_shift = 0; }
+
+  }
+
   if (getenv("AUTOTOKENS_WHITESPACE")) {
 
     whitespace = getenv("AUTOTOKENS_WHITESPACE");

From ec87abda93d68f489f26ed2a2ae75b4f1e26d0bb Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sat, 4 Feb 2023 14:37:28 +0100
Subject: [PATCH 33/77] readme

---
 custom_mutators/autotokens/README | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index f82dcd98..86e7c9b3 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -18,6 +18,12 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
 `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items
 `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be
                        removed. Default: `/* ... */`
+`AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting
+                                the value by this number set, e.g. 1.
+`AUTOTOKENS_LEARN_DICT` - learn from dictionaries?
+                          0 = none
+                          1 = only -x or autodict
+                          2 = -x, autodict and `CMPLOG`
 `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
                                    (experimental)
 `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,

From 90f61552f794fc0fae5dc2585f81f31d32db1e89 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sat, 4 Feb 2023 15:39:03 +0100
Subject: [PATCH 34/77] changes

---
 custom_mutators/autotokens/TODO           |  9 ++++-----
 custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++
 include/config.h                          |  4 ++--
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 3cae3060..528dff1f 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -1,5 +1,3 @@
-cmplog: only add tokens that were found to fit?
-
 create from thin air if no good seed after a cycle and dict large enough?
 (static u32 no_of_struct_inputs;) 
 
@@ -16,8 +14,9 @@ corpus analyse:
 	- libpcap
 min len, max len, % wenn 95/98/99/100 ascii
 
-funktion und env für menge an mutationen
-
 env für menge an per mutation run
 
-only add inital dictionary, not furher finds, e.g. cmplog
+AFL_TXT_MAX_LEN 65535
+AFL_TXT_MIN_LEN 16
+AFL_TXT_MIN_PERCENT=99
+
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index a0125851..46a347f8 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -34,6 +34,9 @@ extern "C" {
 #ifndef AUTOTOKENS_SPLICE_DISABLE
   #define AUTOTOKENS_SPLICE_DISABLE 0
 #endif
+#ifndef AFL_TXT_MAX_LEN
+  #define AFL_TXT_MAX_LEN 65535
+#endif
 
 #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN
   #error SPLICE_MIN must be lower than SIZE_MIN
@@ -571,6 +574,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
       return 1;
 
+    } else
+    if (len > AFL_TXT_MAX_LEN) {
+
+      fclose(fp);
+      file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
+      s = NULL;
+      DEBUGF(stderr, "Too long (%lu) %s\n", len, filename);
+      return 1;
+
     }
 
     string input;
diff --git a/include/config.h b/include/config.h
index f8a742f2..ed8b844c 100644
--- a/include/config.h
+++ b/include/config.h
@@ -489,12 +489,12 @@
 
 /* Minimum length of a queue input to be evaluated for "is_ascii"? */
 
-#define AFL_TXT_MIN_LEN 12
+#define AFL_TXT_MIN_LEN 16
 
 /* What is the minimum percentage of ascii characters present to be classifed
    as "is_ascii"? */
 
-#define AFL_TXT_MIN_PERCENT 95
+#define AFL_TXT_MIN_PERCENT 98
 
 /* How often to perform ASCII mutations 0 = disable, 1-8 are good values */
 

From f99656e22bffb4bfac8e201ad973a1ea5a6abaa0 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sun, 5 Feb 2023 13:15:06 +0100
Subject: [PATCH 35/77] create from thin air, max mutation

---
 custom_mutators/autotokens/autotokens.cpp | 97 ++++++++++++++++++-----
 1 file changed, 78 insertions(+), 19 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 46a347f8..f1263600 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -24,10 +24,12 @@ extern "C" {
 #define AUTOTOKENS_ONLY_FAV 0
 #define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
 #define AUTOTOKENS_CHANGE_MIN 8
+#define AUTOTOKENS_CHANGE_MAX 64
 #define AUTOTOKENS_WHITESPACE " "
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
+#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1
 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
 #define AUTOTOKENS_LEARN_DICT 2
@@ -61,6 +63,7 @@ static int        only_fav = AUTOTOKENS_ONLY_FAV;
 static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
 static int        learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
 static int        fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
+static int        create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR;
 static u32        current_id;
 static u32        valid_structures;
 static u32        whitespace_ids;
@@ -83,7 +86,18 @@ static regex        regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
 static regex        regex_whitespace(R"([ \t]+)", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
 
-u32 good_whitespace_or_singleval() {
+// FUNCTIONS
+
+/* This function is called once after everything is set up but before
+   any fuzzing attempt has been performed.
+   This is called in afl_custom_queue_get() */
+static void first_run(void *data) {
+
+  (void)(data);
+
+}
+
+static u32 good_whitespace_or_singleval() {
 
   u32 i = rand_below(afl_ptr, current_id);
   if (id_to_token[i].size() == 1) { return i; }
@@ -105,6 +119,8 @@ u32 good_whitespace_or_singleval() {
 extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf,
                                      size_t buf_size) {
 
+  (void)(data);
+
   if (s == NULL) return 0;
 
   u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8;
@@ -135,9 +151,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   u32         i, m_size = (u32)m.size();
 
   u32 rounds =
-      MAX(AUTOTOKENS_CHANGE_MIN,
-          MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
-                               afl_ptr->havoc_div / 256));
+      MIN(AUTOTOKENS_CHANGE_MAX,
+          MAX(AUTOTOKENS_CHANGE_MIN,
+              MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
+                                   afl_ptr->havoc_div / 256)));
   // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
 #if AUTOTOKENS_SPLICE_DISABLE == 1
@@ -379,9 +396,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 /* I get f*cking stack overflow using C++ regex with a regex of
    "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize
    enabled :-( */
-u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
-                    string::const_iterator *match_begin,
-                    string::const_iterator *match_end) {
+static u8 my_search_string(string::const_iterator  cur,
+                           string::const_iterator  ende,
+                           string::const_iterator *match_begin,
+                           string::const_iterator *match_end) {
 
   string::const_iterator start = cur, found_begin;
   u8                     quote_type = 0;
@@ -460,25 +478,30 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
 }
 
 /* We are not using afl_custom_queue_new_entry() because not every corpus entry
-   will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
+   will be necessarily fuzzed with this custom mutator.
+   So we use afl_custom_queue_get() instead. */
 
 extern "C" unsigned char afl_custom_queue_get(void                *data,
                                               const unsigned char *filename) {
 
-  static int learn_state;
+  static int learn_state = 0;
+  static int is_first_run = 1;
   (void)(data);
 
-  if (likely(!debug)) {
+  if (unlikely(is_first_run)) {
 
-    if (unlikely(!afl_ptr->custom_only) &&
-        ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
-         (only_fav && !afl_ptr->queue_cur->favored))) {
+    is_first_run = 0;
+    first_run(data);
 
-      s = NULL;
-      DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
-      return 1;
+  }
 
-    }
+  if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air &&
+      ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
+       (only_fav && !afl_ptr->queue_cur->favored))) {
+
+    s = NULL;
+    DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
+    return 1;
 
   }
 
@@ -551,6 +574,42 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
   string       fn = (char *)filename;
   auto         entry = file_mapping.find(fn);
 
+  // if there is only one active queue item at start and it is very small
+  // the we create once a structure randomly.
+  if (unlikely(create_from_thin_air)) {
+
+    if (current_id > whitespace_ids + 6 && afl_ptr->active_items == 1 &&
+        afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN) {
+
+      DEBUGF(stderr, "Creating an entry from thin air...\n");
+      structure = new vector<u32>();
+      u32 item, prev, cnt = current_id >> 1;
+      structure->reserve(cnt + 4);
+      for (u32 i = 0; i < cnt; i++) {
+
+        item = rand_below(afl_ptr, current_id);
+        if (i && id_to_token[item].length() > 1 &&
+            id_to_token[prev].length() > 1) {
+
+          structure->push_back(good_whitespace_or_singleval());
+
+        }
+
+        structure->push_back(item);
+        prev = item;
+
+      }
+
+      file_mapping[fn] = structure;
+      s = structure;
+      return 1;
+
+    }
+
+    create_from_thin_air = 0;
+
+  }
+
   if (entry == file_mapping.end()) {
 
     // this input file was not analyzed for tokens yet, so let's do it!
@@ -574,8 +633,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
       return 1;
 
-    } else
-    if (len > AFL_TXT_MAX_LEN) {
+    } else if (len > AFL_TXT_MAX_LEN) {
 
       fclose(fp);
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
@@ -1088,6 +1146,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
   if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
+  if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; }
   if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
 
   if (getenv("AUTOTOKENS_LEARN_DICT")) {

From e6120282556e4df79c01236849e5f6f225b8e428 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sun, 5 Feb 2023 14:19:10 +0100
Subject: [PATCH 36/77] dict fix

---
 custom_mutators/autotokens/README         |  3 +++
 custom_mutators/autotokens/autotokens.cpp | 22 +++++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 86e7c9b3..d8613232 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -24,6 +24,9 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
                           0 = none
                           1 = only -x or autodict
                           2 = -x, autodict and `CMPLOG`
+`AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and
+                                    a dictionary loaded then create one initial
+                                    structure based on the dictionary.
 `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
                                    (experimental)
 `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index f1263600..d3ae7e9c 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -29,7 +29,7 @@ extern "C" {
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
-#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1
+#define AUTOTOKENS_CREATE_FROM_THIN_AIR 0
 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
 #define AUTOTOKENS_LEARN_DICT 2
@@ -506,14 +506,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
   }
 
   // check if there are new dictionary entries and add them to the tokens
-  if (valid_structures && learn_state < learn_dictionary_tokens) {
+  if (likely(valid_structures || create_from_thin_air) &&
+      learn_state < learn_dictionary_tokens) {
 
     if (unlikely(!learn_state)) { learn_state = 1; }
 
     while (extras_cnt < afl_ptr->extras_cnt) {
 
       u32 ok = 1, l = afl_ptr->extras[extras_cnt].len;
-      u8 *ptr = afl_ptr->extras[extras_cnt].data;
+      u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data;
 
       for (u32 i = 0; i < l; ++i) {
 
@@ -528,14 +529,17 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       if (ok) {
 
-        token_to_id[(char *)ptr] = current_id;
-        id_to_token[current_id] = (char *)ptr;
+        buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1);
+        memcpy(buf, afl_ptr->extras[extras_cnt].data,
+               afl_ptr->extras[extras_cnt].len);
+        buf[afl_ptr->extras[extras_cnt].len] = 0;
+        token_to_id[(char *)buf] = current_id;
+        id_to_token[current_id] = (char *)buf;
         ++current_id;
 
       }
 
       ++extras_cnt;
-      DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr);
 
     }
 
@@ -600,8 +604,12 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       }
 
-      file_mapping[fn] = structure;
       s = structure;
+      file_mapping[fn] = structure;
+      id_mapping[valid_structures] = structure;
+      ++valid_structures;
+      all_structure_items += structure->size();
+
       return 1;
 
     }

From 8a2547073c500fcd637a7b276b7a38313bb70b5f Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 6 Feb 2023 08:51:20 +0100
Subject: [PATCH 37/77] more options

---
 custom_mutators/autotokens/README         |  2 ++
 custom_mutators/autotokens/TODO           |  4 +++-
 custom_mutators/autotokens/autotokens.cpp | 26 +++++++++++++++++++----
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index d8613232..e9c48662 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -24,6 +24,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
                           0 = none
                           1 = only -x or autodict
                           2 = -x, autodict and `CMPLOG`
+`AUTOTOKENS_CHANGE_MIN` - minimum number of mutations (1-256, default 8)
+`AUTOTOKENS_CHANGE_MAX` - maximum number of mutations (1-4096, default 64)
 `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and
                                     a dictionary loaded then create one initial
                                     structure based on the dictionary.
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 528dff1f..496bfd45 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -9,7 +9,6 @@ analyse welche einen DICT haben, und welche davon rein ascii
 
 corpus analyse:
 	+ libxml
-	- hardbuzz
 	- sqlite
 	- libpcap
 min len, max len, % wenn 95/98/99/100 ascii
@@ -20,3 +19,6 @@ AFL_TXT_MAX_LEN 65535
 AFL_TXT_MIN_LEN 16
 AFL_TXT_MIN_PERCENT=99
 
+-> KEIN FAV!
+
+change_min/_max werte
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index d3ae7e9c..ee35c68b 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -32,7 +32,7 @@ extern "C" {
 #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0
 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
-#define AUTOTOKENS_LEARN_DICT 2
+#define AUTOTOKENS_LEARN_DICT 1
 #ifndef AUTOTOKENS_SPLICE_DISABLE
   #define AUTOTOKENS_SPLICE_DISABLE 0
 #endif
@@ -64,6 +64,8 @@ static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
 static int        learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
 static int        fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
 static int        create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR;
+static int        change_min = AUTOTOKENS_CHANGE_MIN;
+static int        change_max = AUTOTOKENS_CHANGE_MAX;
 static u32        current_id;
 static u32        valid_structures;
 static u32        whitespace_ids;
@@ -151,8 +153,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   u32         i, m_size = (u32)m.size();
 
   u32 rounds =
-      MIN(AUTOTOKENS_CHANGE_MAX,
-          MAX(AUTOTOKENS_CHANGE_MIN,
+      MIN(change_max,
+          MAX(change_min,
               MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
                                    afl_ptr->havoc_div / 256)));
   // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
@@ -1162,7 +1164,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
     learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT"));
     if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) {
 
-      learn_dictionary_tokens = 2;
+      learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
 
     }
 
@@ -1175,6 +1177,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   }
 
+  if (getenv("AUTOTOKENS_CHANGE_MIN")) {
+
+    change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN"));
+    if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; }
+
+  }
+
+  if (getenv("AUTOTOKENS_CHANGE_MAX")) {
+
+    change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX"));
+    if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; }
+
+  }
+
+  if (change_max < change_min) { change_max = change_min + 1; }
+
   if (getenv("AUTOTOKENS_WHITESPACE")) {
 
     whitespace = getenv("AUTOTOKENS_WHITESPACE");

From ca063c92d20f4dee6ae9fd1d48dc531768b14ca5 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 6 Feb 2023 08:52:12 +0100
Subject: [PATCH 38/77] more options

---
 include/config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/config.h b/include/config.h
index ed8b844c..49d09174 100644
--- a/include/config.h
+++ b/include/config.h
@@ -494,7 +494,7 @@
 /* What is the minimum percentage of ascii characters present to be classifed
    as "is_ascii"? */
 
-#define AFL_TXT_MIN_PERCENT 98
+#define AFL_TXT_MIN_PERCENT 99
 
 /* How often to perform ASCII mutations 0 = disable, 1-8 are good values */
 

From ca2e8a1bf65d6f5d33244c9c7971a21294dc932b Mon Sep 17 00:00:00 2001
From: Dawin Schmidt <dschmidt@argo.ai>
Date: Mon, 6 Feb 2023 08:38:20 -0500
Subject: [PATCH 39/77] Add Qemu deferred initialization example

---
 .../README.deferred_initialization_example.md | 201 ++++++++++++++++++
 qemu_mode/README.md                           |   2 +
 2 files changed, 203 insertions(+)
 create mode 100644 qemu_mode/README.deferred_initialization_example.md

diff --git a/qemu_mode/README.deferred_initialization_example.md b/qemu_mode/README.deferred_initialization_example.md
new file mode 100644
index 00000000..0ba04b79
--- /dev/null
+++ b/qemu_mode/README.deferred_initialization_example.md
@@ -0,0 +1,201 @@
+# Fuzz ARM32 Python Native Extensions in Binary-only Mode (LLVM fork-based)
+
+This is an example on how to fuzz Python native extensions in LLVM mode with deferred initialization on ARM32.
+
+We use Ubuntu x86_64 to run AFL++ and an Alpine ARMv7 Chroot to build the fuzzing target.
+
+Check [Resources](#resources) for the code used in this example.
+
+## Setup Alpine ARM Chroot on your x86_64 Linux Host
+
+### Use systemd-nspawn
+
+1. Install `qemu-user-binfmt`, `qemu-user-static` and `systemd-container` dependencies.
+2. Restart the systemd-binfmt service: `systemctl restart systemd-binfmt.service`
+3. Download an Alpine ARM RootFS from https://alpinelinux.org/downloads/
+4. Create a new `alpine_sysroot` folder and extract: `tar xfz alpine-minirootfs-3.17.1-armv7.tar.gz -C alpine_sysroot/`
+5. Copy `qemu-arm-static` to Alpine's RootFS: `cp $(which qemu-arm-static) ./alpine/usr/bin/`
+6. Chroot into the container: `sudo systemd-nspawn -D alpine/ --bind-ro=/etc/resolv.conf`
+7. Install dependencies: `apk update && apk add build-base musl-dev clang15 python3 python3-dev py3-pip`
+8. Exit the container with `exit`
+
+### Alternatively use Docker
+
+1. Install `qemu-user-binfmt` and `qemu-user-static`
+2. Run Qemu container: ```$ docker run --rm --privileged multiarch/qemu-user-static --reset -p yes```
+3. Run Alpine container: ```$ docker run -it --rm arm32v7/alpine sh```
+
+## Build AFL++ Qemu Mode with ARM Support
+
+First, build AFL++ as described [here](https://github.com/AFLplusplus/AFLplusplus/blob/dev/docs/INSTALL.md). Then, run the Qemu build script:
+
+```bash
+cd qemu_mode && CPU_TARGET=arm ./build_qemu_support.sh
+```
+
+## Compile and Build the Fuzzing Project
+Build the native extension and the fuzzing harness for ARM using the Alpine container (check [Resources](#resources) for the code):
+```bash
+ALPINE_ROOT=<your-alpine-sysroot-directory>
+FUZZ=<your-path-to-the-code>
+sudo systemd-nspawn -D $ALPINE_ROOT --bind=$FUZZ:/fuzz
+CC=$(which clang) CFLAGS="-g" LDSHARED="clang -shared" python3 -m pip install /fuzz
+clang $(python3-config --embed --cflags) $(python3-config --embed --ldflags) -o /fuzz/fuzz_harness.a /fuzz/fuzz_harness.c
+exit
+```
+
+Manually trigger bug:
+```bash
+echo -n "FUZZ" | qemu-arm-static -L $ALPINE_ROOT $FUZZ/fuzz_harness.a
+```
+
+## Run AFL++
+Make sure to start the forkserver *after* loading all the shared objects by setting the `AFL_ENTRYPOINT` environment variable (see [here](https://aflplus.plus/docs/env_variables/#5-settings-for-afl-qemu-trace) for details):
+
+Choose an address just before the `while()` loop, for example:
+```bash
+qemu-arm-static -L $ALPINE_ROOT $ALPINE_ROOT/usr/bin/objdump -d $FUZZ/fuzz_harness.a | grep -A 1 "PyObject_GetAttrString"
+
+00000584 <PyObject_GetAttrString@plt>:
+ 584:	e28fc600 	add	ip, pc, #0, 12
+--
+ 7c8:	ebffff6d 	bl	584 <PyObject_GetAttrString@plt>
+ 7cc:	e58d0008 	str	r0, [sp, #8]
+...
+```
+
+Check Qemu memory maps using the instructions from [here](https://aflplus.plus/docs/tutorials/libxml2_tutorial/):
+>The binary is position independent and QEMU persistent needs the real addresses, not the offsets. Fortunately, QEMU loads PIE executables at a fixed address, 0x4000000000 for x86_64.
+>
+> We can check it using `AFL_QEMU_DEBUG_MAPS`. You don’t need this step if your binary is not PIE.
+
+Setup Python environment variables and run `afl-qemu-trace`:
+```bash
+PYTHONPATH=$ALPINE_ROOT/usr/lib/python3.10/ PYTHONHOME=$ALPINE_ROOT/usr/bin/ QEMU_LD_PREFIX=$ALPINE_ROOT AFL_QEMU_DEBUG_MAPS=1 afl-qemu-trace $FUZZ/fuzz_harness.a
+
+...
+40000000-40001000 r-xp 00000000 103:03 8002276                           fuzz_harness.a
+40001000-4001f000 ---p 00000000 00:00 0
+4001f000-40020000 r--p 0000f000 103:03 8002276                           fuzz_harness.a
+40020000-40021000 rw-p 00010000 103:03 8002276                           fuzz_harness.a
+40021000-40022000 ---p 00000000 00:00 0
+40022000-40023000 rw-p 00000000 00:00 0
+```
+
+Finally, setup Qemu environment variables...
+```bash
+export QEMU_SET_ENV=PYTHONPATH=$ALPINE_ROOT/usr/lib/python310.zip:$ALPINE_ROOT/usr/lib/python3.10:$ALPINE_ROOT/usr/lib/python3.10/lib-dynload:$ALPINE_ROOT/usr/lib/python3.10/site-packages,PYTHONHOME=$ALPINE_ROOT/usr/bin/
+export QEMU_LD_PREFIX=$ALPINE_ROOT
+```
+
+... and run AFL++:
+```bash
+mkdir -p $FUZZ/in && echo -n "FU" > $FUZZ/in/seed
+AFL_ENTRYPOINT=0x400007cc afl-fuzz -i $FUZZ/in -o $FUZZ/out -Q -- $FUZZ/fuzz_harness.a
+```
+
+## Resources
+
+### setup.py
+
+```python
+from distutils.core import setup, Extension
+
+module = Extension("memory", sources=["fuzz_target.c"])
+
+setup(
+    name="memory",
+    version="1.0",
+    description='A simple "BOOM!" extension',
+    ext_modules=[module],
+)
+```
+
+### fuzz_target.c
+
+```c
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#pragma clang optimize off
+
+static PyObject *corruption(PyObject* self, PyObject* args) {
+    char arr[3];
+    Py_buffer name;
+
+    if (!PyArg_ParseTuple(args, "y*", &name))
+        return NULL;
+
+    if (name.buf != NULL) {
+        if (strcmp(name.buf, "FUZZ") == 0) {
+            arr[0] = 'B';
+            arr[1] = 'O';
+            arr[2] = 'O';
+            arr[3] = 'M';
+        }
+    }
+
+    PyBuffer_Release(&name);
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef MemoryMethods[] = {
+    {"corruption", corruption, METH_VARARGS, "BOOM!"},
+    {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef memory_module = {
+    PyModuleDef_HEAD_INIT,
+    "memory",
+    "BOOM!",
+    -1,
+    MemoryMethods
+};
+
+PyMODINIT_FUNC PyInit_memory(void) {
+    return PyModule_Create(&memory_module);
+}
+```
+
+### fuzz_harness.c
+
+```c
+#include <Python.h>
+
+#pragma clang optimize off
+
+int main(int argc, char **argv) {
+    unsigned char buf[1024000];
+    ssize_t size;
+
+    Py_Initialize();
+    PyObject* name = PyUnicode_DecodeFSDefault("memory");
+    PyObject* module = PyImport_Import(name);
+    Py_DECREF(name);
+
+    if (module != NULL) {
+        PyObject* corruption_func = PyObject_GetAttrString(module, "corruption");
+
+        while ((size = read(0, buf, sizeof(buf))) > 0 ? 1 : 0) {
+            PyObject* arg = PyBytes_FromStringAndSize((char *)buf, size);
+
+            if (arg != NULL) {
+                PyObject* res = PyObject_CallFunctionObjArgs(corruption_func, arg, NULL);
+
+                if (res != NULL) {
+                    Py_XDECREF(res);
+                }
+
+                Py_DECREF(arg);
+            }
+        }
+
+        Py_DECREF(corruption_func);
+        Py_DECREF(module);
+    }
+
+    // Py_Finalize() leaks memory on certain Python versions (see https://bugs.python.org/issue1635741)
+    // Py_Finalize();
+    return 0;
+}
+```
diff --git a/qemu_mode/README.md b/qemu_mode/README.md
index 4ed2f298..92038737 100644
--- a/qemu_mode/README.md
+++ b/qemu_mode/README.md
@@ -66,6 +66,8 @@ allows to move the forkserver to a different part, e.g., just before the file is
 opened (e.g., way after command line parsing and config file loading, etc.)
 which can be a huge speed improvement.
 
+For an example, see [README.deferred_initialization_example.md](README.deferred_initialization_example.md).
+
 ## 4) Persistent mode
 
 AFL++'s QEMU mode now supports also persistent mode for x86, x86_64, arm, and

From 24e36212d507422bbbff78a514791d7f8d47301e Mon Sep 17 00:00:00 2001
From: Dawin Schmidt <dschmidt@argo.ai>
Date: Mon, 6 Feb 2023 09:04:33 -0500
Subject: [PATCH 40/77] Rename fuzzing harness

---
 .../README.deferred_initialization_example.md    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/qemu_mode/README.deferred_initialization_example.md b/qemu_mode/README.deferred_initialization_example.md
index 0ba04b79..d940d6b5 100644
--- a/qemu_mode/README.deferred_initialization_example.md
+++ b/qemu_mode/README.deferred_initialization_example.md
@@ -40,13 +40,13 @@ ALPINE_ROOT=<your-alpine-sysroot-directory>
 FUZZ=<your-path-to-the-code>
 sudo systemd-nspawn -D $ALPINE_ROOT --bind=$FUZZ:/fuzz
 CC=$(which clang) CFLAGS="-g" LDSHARED="clang -shared" python3 -m pip install /fuzz
-clang $(python3-config --embed --cflags) $(python3-config --embed --ldflags) -o /fuzz/fuzz_harness.a /fuzz/fuzz_harness.c
+clang $(python3-config --embed --cflags) $(python3-config --embed --ldflags) -o /fuzz/fuzz_harness /fuzz/fuzz_harness.c
 exit
 ```
 
 Manually trigger bug:
 ```bash
-echo -n "FUZZ" | qemu-arm-static -L $ALPINE_ROOT $FUZZ/fuzz_harness.a
+echo -n "FUZZ" | qemu-arm-static -L $ALPINE_ROOT $FUZZ/fuzz_harness
 ```
 
 ## Run AFL++
@@ -54,7 +54,7 @@ Make sure to start the forkserver *after* loading all the shared objects by sett
 
 Choose an address just before the `while()` loop, for example:
 ```bash
-qemu-arm-static -L $ALPINE_ROOT $ALPINE_ROOT/usr/bin/objdump -d $FUZZ/fuzz_harness.a | grep -A 1 "PyObject_GetAttrString"
+qemu-arm-static -L $ALPINE_ROOT $ALPINE_ROOT/usr/bin/objdump -d $FUZZ/fuzz_harness | grep -A 1 "PyObject_GetAttrString"
 
 00000584 <PyObject_GetAttrString@plt>:
  584:	e28fc600 	add	ip, pc, #0, 12
@@ -71,13 +71,13 @@ Check Qemu memory maps using the instructions from [here](https://aflplus.plus/d
 
 Setup Python environment variables and run `afl-qemu-trace`:
 ```bash
-PYTHONPATH=$ALPINE_ROOT/usr/lib/python3.10/ PYTHONHOME=$ALPINE_ROOT/usr/bin/ QEMU_LD_PREFIX=$ALPINE_ROOT AFL_QEMU_DEBUG_MAPS=1 afl-qemu-trace $FUZZ/fuzz_harness.a
+PYTHONPATH=$ALPINE_ROOT/usr/lib/python3.10/ PYTHONHOME=$ALPINE_ROOT/usr/bin/ QEMU_LD_PREFIX=$ALPINE_ROOT AFL_QEMU_DEBUG_MAPS=1 afl-qemu-trace $FUZZ/fuzz_harness
 
 ...
-40000000-40001000 r-xp 00000000 103:03 8002276                           fuzz_harness.a
+40000000-40001000 r-xp 00000000 103:03 8002276                           fuzz_harness
 40001000-4001f000 ---p 00000000 00:00 0
-4001f000-40020000 r--p 0000f000 103:03 8002276                           fuzz_harness.a
-40020000-40021000 rw-p 00010000 103:03 8002276                           fuzz_harness.a
+4001f000-40020000 r--p 0000f000 103:03 8002276                           fuzz_harness
+40020000-40021000 rw-p 00010000 103:03 8002276                           fuzz_harness
 40021000-40022000 ---p 00000000 00:00 0
 40022000-40023000 rw-p 00000000 00:00 0
 ```
@@ -91,7 +91,7 @@ export QEMU_LD_PREFIX=$ALPINE_ROOT
 ... and run AFL++:
 ```bash
 mkdir -p $FUZZ/in && echo -n "FU" > $FUZZ/in/seed
-AFL_ENTRYPOINT=0x400007cc afl-fuzz -i $FUZZ/in -o $FUZZ/out -Q -- $FUZZ/fuzz_harness.a
+AFL_ENTRYPOINT=0x400007cc afl-fuzz -i $FUZZ/in -o $FUZZ/out -Q -- $FUZZ/fuzz_harness
 ```
 
 ## Resources

From dbfa23b40a6bdd1b8affc3920c68f11a6e63b231 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 6 Feb 2023 16:38:46 +0100
Subject: [PATCH 41/77] fixes

---
 instrumentation/afl-compiler-rt.o.c | 10 +++++++---
 src/afl-fuzz-one.c                  |  4 ++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index b1ce4427..9871d7f4 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -1518,9 +1518,13 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   _is_sancov = 1;
 
-  __afl_auto_first();
-  __afl_auto_second();
-  __afl_auto_early();
+  if (!getenv("AFL_DUMP_MAP_SIZE")) {
+
+    __afl_auto_first();
+    __afl_auto_second();
+    __afl_auto_early();
+
+  }
 
   if (__afl_debug) {
 
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 97855607..6367f597 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -5798,7 +5798,7 @@ void pso_updating(afl_state_t *afl) {
 
 u8 fuzz_one(afl_state_t *afl) {
 
-  int key_val_lv_1 = 0, key_val_lv_2 = 0;
+  int key_val_lv_1 = -1, key_val_lv_2 = -1;
 
 #ifdef _AFL_DOCUMENT_MUTATIONS
 
@@ -5840,7 +5840,7 @@ u8 fuzz_one(afl_state_t *afl) {
 
   }
 
-  return (key_val_lv_1 | key_val_lv_2);
+  return (key_val_lv_1 == 0 || key_val_lv_2 == 0 ? 0 : 1 );
 
 }
 

From 6596284cc41484ec5062ca53109ec5bd7899e56f Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 6 Feb 2023 17:59:17 +0100
Subject: [PATCH 42/77] endless loop fix

---
 src/afl-fuzz.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index b8114a7f..748c7acf 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -2210,8 +2210,8 @@ int main(int argc, char **argv_orig, char **envp) {
   cull_queue(afl);
 
   // ensure we have at least one seed that is not disabled.
-  u32 entry, valid_seeds = 0;
-  for (entry = 0; entry < afl->queued_items; ++entry)
+  u32 valid_seeds = 0;
+  for (u32 entry = 0; entry < afl->queued_items; ++entry)
     if (!afl->queue_buf[entry]->disabled) { ++valid_seeds; }
 
   if (!afl->pending_not_fuzzed || !valid_seeds) {
@@ -2241,7 +2241,7 @@ int main(int argc, char **argv_orig, char **envp) {
 
       u64 max_ms = 0;
 
-      for (entry = 0; entry < afl->queued_items; ++entry)
+      for (u32 entry = 0; entry < afl->queued_items; ++entry)
         if (!afl->queue_buf[entry]->disabled)
           if (afl->queue_buf[entry]->exec_us > max_ms)
             max_ms = afl->queue_buf[entry]->exec_us;
@@ -2285,7 +2285,7 @@ int main(int argc, char **argv_orig, char **envp) {
   #ifdef INTROSPECTION
   u32 prev_saved_crashes = 0, prev_saved_tmouts = 0;
   #endif
-  u32 prev_queued_items = 0, runs_in_current_cycle = (u32)-1;
+  u32 skip_count = 0, prev_queued_items = 0, runs_in_current_cycle = (u32)-1;
   u8  skipped_fuzz;
 
   #ifdef INTROSPECTION
@@ -2547,8 +2547,57 @@ int main(int argc, char **argv_orig, char **envp) {
       }
 
       skipped_fuzz = fuzz_one(afl);
+
+      if (unlikely(skipped_fuzz)) {
+
+        ++skip_count;
+
+        if (unlikely(skip_count > afl->active_items)) {
+
+          if (afl->active_items > 1 && !afl->old_seed_selection) {
+
+            u32 found = 0;
+            for (u32 i = 0; i < afl->queued_items; ++i) {
+
+              if (likely(afl->queue_buf[i]->disabled &&
+                         !afl->queue_buf[i]->perf_score)) {
+
+                ++found;
+
+              }
+
+            }
+
+            if (found >= afl->active_items) {
+
+              // all active items have a perf_score of 0 ... damn
+              for (u32 i = 0; i < afl->queued_items; ++i) {
+
+                if (likely(afl->queue_buf[i]->disabled)) {
+
+                  afl->queue_buf[i]->perf_score = afl->queue_buf[i]->weight;
+
+                }
+
+              }
+
+            }
+
+          }
+
+          skip_count = 0;
+
+        }
+
+      } else {
+
+        skip_count = 0;
+
+      }
+
   #ifdef INTROSPECTION
       ++afl->queue_cur->stats_selected;
+
       if (unlikely(skipped_fuzz)) {
 
         ++afl->queue_cur->stats_skipped;

From 03e6d33a4044115c44afeb6c1ae735c0310018af Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Tue, 7 Feb 2023 15:27:31 +0100
Subject: [PATCH 43/77] fix perfscore 0 check

---
 src/afl-fuzz.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 748c7acf..8c2eb5b7 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -2559,8 +2559,8 @@ int main(int argc, char **argv_orig, char **envp) {
             u32 found = 0;
             for (u32 i = 0; i < afl->queued_items; ++i) {
 
-              if (likely(afl->queue_buf[i]->disabled &&
-                         !afl->queue_buf[i]->perf_score)) {
+              if (likely(!afl->queue_buf[i]->disabled &&
+                         afl->queue_buf[i]->perf_score == 0)) {
 
                 ++found;
 
@@ -2573,7 +2573,7 @@ int main(int argc, char **argv_orig, char **envp) {
               // all active items have a perf_score of 0 ... damn
               for (u32 i = 0; i < afl->queued_items; ++i) {
 
-                if (likely(afl->queue_buf[i]->disabled)) {
+                if (likely(!afl->queue_buf[i]->disabled)) {
 
                   afl->queue_buf[i]->perf_score = afl->queue_buf[i]->weight;
 

From ab26356bf73f2242555e6be72a004082fa22d402 Mon Sep 17 00:00:00 2001
From: Daniil Kutz <kutz@ispras.ru>
Date: Tue, 7 Feb 2023 19:50:07 +0300
Subject: [PATCH 44/77] Increase fuzz_level for mopt_common_fuzzing

Change performance score calculation for lin and quad power schedules
---
 src/afl-fuzz-one.c   | 1 +
 src/afl-fuzz-queue.c | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 6367f597..76826945 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -5683,6 +5683,7 @@ pacemaker_fuzzing:
 
   }                                                                /* block */
 
+  ++afl->queue_cur->fuzz_level;
   return ret_val;
 
 }
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index e3faa392..ebfc252c 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -1007,10 +1007,16 @@ u32 calculate_score(afl_state_t *afl, struct queue_entry *q) {
       break;
 
     case LIN:
+      // Don't modify perf_score for unfuzzed seeds
+      if (!q->fuzz_level) break;
+
       factor = q->fuzz_level / (afl->n_fuzz[q->n_fuzz_entry] + 1);
       break;
 
     case QUAD:
+      // Don't modify perf_score for unfuzzed seeds
+      if (!q->fuzz_level) break;
+
       factor =
           q->fuzz_level * q->fuzz_level / (afl->n_fuzz[q->n_fuzz_entry] + 1);
       break;

From 846e910e0c6d09808ea6f87b59e2cf79769979dc Mon Sep 17 00:00:00 2001
From: Daniil Kutz <kutz@ispras.ru>
Date: Wed, 8 Feb 2023 13:50:03 +0300
Subject: [PATCH 45/77] Validate -M and -p power schedule options

---
 src/afl-fuzz.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 8c2eb5b7..de41600b 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -1297,6 +1297,12 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  if (afl->is_main_node == 1 && afl->schedule != FAST && afl->schedule != EXPLORE) {
+
+    FATAL("-M is compatible only with fast and explore -p power schedules");
+
+  }
+
   if (optind == argc || !afl->in_dir || !afl->out_dir || show_help) {
 
     usage(argv[0], show_help);

From 05b1189a55b573a4021abed078dab098f4591ad6 Mon Sep 17 00:00:00 2001
From: Marcello Maugeri <maray97@hotmail.it>
Date: Wed, 8 Feb 2023 15:53:49 +0100
Subject: [PATCH 46/77] Update afl-forkserver.c

Fix typo
---
 src/afl-forkserver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 89d01460..5aa4c2ff 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -1370,7 +1370,7 @@ afl_fsrv_run_target(afl_forkserver_t *fsrv, u32 timeout,
       case Crash:
       case Asan:
         return FSRV_RUN_CRASH;
-      case Timout:
+      case Timeout:
         return FSRV_RUN_TMOUT;
       case InvalidWriteToPayload:
         /* ??? */

From c86d06849b46865f126a522b7b4c8eb0f72c6ba1 Mon Sep 17 00:00:00 2001
From: Marcello Maugeri <maray97@hotmail.it>
Date: Wed, 8 Feb 2023 15:54:27 +0100
Subject: [PATCH 47/77] Update forkserver.h

Fix typo
---
 include/forkserver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/forkserver.h b/include/forkserver.h
index 35bc1771..50898a08 100644
--- a/include/forkserver.h
+++ b/include/forkserver.h
@@ -43,7 +43,7 @@ typedef enum NyxReturnValue {
   Normal,
   Crash,
   Asan,
-  Timout,
+  Timeout,
   InvalidWriteToPayload,
   Error,
   IoError,

From f2be73186e2e16c3992f92b65ae9ba598d6fff2f Mon Sep 17 00:00:00 2001
From: Yaakov Saxon <ysaxon@gmail.com>
Date: Thu, 9 Feb 2023 21:37:35 +0000
Subject: [PATCH 48/77] cmplog exec with target_path

---
 src/afl-fuzz-cmplog.c | 2 +-
 src/afl-fuzz.c        | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/afl-fuzz-cmplog.c b/src/afl-fuzz-cmplog.c
index 8967d4bc..2bf26d19 100644
--- a/src/afl-fuzz-cmplog.c
+++ b/src/afl-fuzz-cmplog.c
@@ -41,7 +41,7 @@ void cmplog_exec_child(afl_forkserver_t *fsrv, char **argv) {
 
   }
 
-  execv(argv[0], argv);
+  execv(fsrv->target_path, argv);
 
 }
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 8c2eb5b7..e7fd3dfe 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -2081,6 +2081,7 @@ int main(int argc, char **argv_orig, char **envp) {
     afl->cmplog_fsrv.qemu_mode = afl->fsrv.qemu_mode;
     afl->cmplog_fsrv.frida_mode = afl->fsrv.frida_mode;
     afl->cmplog_fsrv.cmplog_binary = afl->cmplog_binary;
+    afl->cmplog_fsrv.target_path = afl->fsrv.target_path;
     afl->cmplog_fsrv.init_child_func = cmplog_exec_child;
 
     if ((map_size <= DEFAULT_SHMEM_SIZE ||

From 673a0a3866783bf28e31d14fbd7a9009c7816ec3 Mon Sep 17 00:00:00 2001
From: Yaakov Saxon <ysaxon@gmail.com>
Date: Thu, 9 Feb 2023 22:02:47 +0000
Subject: [PATCH 49/77] add test for unprefixed path

---
 frida_mode/test/cmplog/GNUmakefile | 17 +++++++++++++++--
 frida_mode/test/cmplog/Makefile    |  3 +++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/frida_mode/test/cmplog/GNUmakefile b/frida_mode/test/cmplog/GNUmakefile
index bcaff42d..fca52f82 100644
--- a/frida_mode/test/cmplog/GNUmakefile
+++ b/frida_mode/test/cmplog/GNUmakefile
@@ -2,8 +2,9 @@ PWD:=$(shell pwd)/
 ROOT:=$(PWD)../../../
 BUILD_DIR:=$(PWD)build/
 
+TEST_CMPLOG_BASENAME=compcovtest
 TEST_CMPLOG_SRC=$(PWD)cmplog.c
-TEST_CMPLOG_OBJ=$(BUILD_DIR)compcovtest
+TEST_CMPLOG_OBJ=$(BUILD_DIR)$(TEST_CMPLOG_BASENAME)
 
 TEST_BIN:=$(PWD)../../build/test
 
@@ -13,7 +14,7 @@ CMP_LOG_INPUT:=$(TEST_DATA_DIR)in
 QEMU_OUT:=$(BUILD_DIR)qemu-out
 FRIDA_OUT:=$(BUILD_DIR)frida-out
 
-.PHONY: all 32 clean qemu frida frida-nocmplog format
+.PHONY: all 32 clean qemu frida frida-nocmplog frida-unprefixedpath format
 
 all: $(TEST_CMPLOG_OBJ)
 	make -C $(ROOT)frida_mode/
@@ -64,6 +65,18 @@ frida-nocmplog: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT)
 		-- \
 			$(TEST_CMPLOG_OBJ) @@
 
+
+frida-unprefixedpath: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT)
+	PATH=$(BUILD_DIR) $(ROOT)afl-fuzz \
+		-O \
+		-i $(TEST_DATA_DIR) \
+		-o $(FRIDA_OUT) \
+		-c 0 \
+		-l 3AT \
+		-Z \
+		-- \
+			$(TEST_CMPLOG_BASENAME) @@
+
 debug: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT)
 	gdb \
 		--ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \
diff --git a/frida_mode/test/cmplog/Makefile b/frida_mode/test/cmplog/Makefile
index 7ca9a9a5..b84e9218 100644
--- a/frida_mode/test/cmplog/Makefile
+++ b/frida_mode/test/cmplog/Makefile
@@ -19,6 +19,9 @@ frida:
 frida-nocmplog:
 	@gmake frida-nocmplog
 
+frida-unprefixedpath:
+	@gmake frida-unprefixedpath
+
 format:
 	@gmake format
 

From d3cdeabf9297ed2b5a5c06ce5b59980d41cdcb40 Mon Sep 17 00:00:00 2001
From: Yaakov Saxon <ysaxon@gmail.com>
Date: Thu, 9 Feb 2023 22:04:18 +0000
Subject: [PATCH 50/77] Add myself to contributors :)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index eeab7aa1..821b8cb7 100644
--- a/README.md
+++ b/README.md
@@ -228,6 +228,7 @@ Thank you! (For people sending pull requests - please add yourself to this list
     Thomas Rooijakkers                    David Carlier
     Ruben ten Hove                        Joey Jiao
     fuzzah                                @intrigus-lgtm
+    Yaakov Saxon
   ```
 
 </details>

From 141c324eb935ddd25a9ea898bf94ed4f3afb7a79 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sun, 12 Feb 2023 17:55:16 +0100
Subject: [PATCH 51/77] revert perfscore 0 fix attempt

---
 src/afl-fuzz.c | 56 ++++----------------------------------------------
 1 file changed, 4 insertions(+), 52 deletions(-)

diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index e7fd3dfe..6bd81304 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -2211,8 +2211,8 @@ int main(int argc, char **argv_orig, char **envp) {
   cull_queue(afl);
 
   // ensure we have at least one seed that is not disabled.
-  u32 valid_seeds = 0;
-  for (u32 entry = 0; entry < afl->queued_items; ++entry)
+  u32 entry, valid_seeds = 0;
+  for (entry = 0; entry < afl->queued_items; ++entry)
     if (!afl->queue_buf[entry]->disabled) { ++valid_seeds; }
 
   if (!afl->pending_not_fuzzed || !valid_seeds) {
@@ -2242,7 +2242,7 @@ int main(int argc, char **argv_orig, char **envp) {
 
       u64 max_ms = 0;
 
-      for (u32 entry = 0; entry < afl->queued_items; ++entry)
+      for (entry = 0; entry < afl->queued_items; ++entry)
         if (!afl->queue_buf[entry]->disabled)
           if (afl->queue_buf[entry]->exec_us > max_ms)
             max_ms = afl->queue_buf[entry]->exec_us;
@@ -2286,7 +2286,7 @@ int main(int argc, char **argv_orig, char **envp) {
   #ifdef INTROSPECTION
   u32 prev_saved_crashes = 0, prev_saved_tmouts = 0;
   #endif
-  u32 skip_count = 0, prev_queued_items = 0, runs_in_current_cycle = (u32)-1;
+  u32 prev_queued_items = 0, runs_in_current_cycle = (u32)-1;
   u8  skipped_fuzz;
 
   #ifdef INTROSPECTION
@@ -2548,54 +2548,6 @@ int main(int argc, char **argv_orig, char **envp) {
       }
 
       skipped_fuzz = fuzz_one(afl);
-
-      if (unlikely(skipped_fuzz)) {
-
-        ++skip_count;
-
-        if (unlikely(skip_count > afl->active_items)) {
-
-          if (afl->active_items > 1 && !afl->old_seed_selection) {
-
-            u32 found = 0;
-            for (u32 i = 0; i < afl->queued_items; ++i) {
-
-              if (likely(!afl->queue_buf[i]->disabled &&
-                         afl->queue_buf[i]->perf_score == 0)) {
-
-                ++found;
-
-              }
-
-            }
-
-            if (found >= afl->active_items) {
-
-              // all active items have a perf_score of 0 ... damn
-              for (u32 i = 0; i < afl->queued_items; ++i) {
-
-                if (likely(!afl->queue_buf[i]->disabled)) {
-
-                  afl->queue_buf[i]->perf_score = afl->queue_buf[i]->weight;
-
-                }
-
-              }
-
-            }
-
-          }
-
-          skip_count = 0;
-
-        }
-
-      } else {
-
-        skip_count = 0;
-
-      }
-
   #ifdef INTROSPECTION
       ++afl->queue_cur->stats_selected;
 

From 7eaef449a1e92999c89df23ab474b3be3da595f8 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 08:14:04 +0100
Subject: [PATCH 52/77] remove ALTERNATIVE_TOKENIZE

---
 custom_mutators/autotokens/autotokens.cpp | 548 ++++++----------------
 1 file changed, 149 insertions(+), 399 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index ee35c68b..a027ac2b 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -22,7 +22,6 @@ extern "C" {
 
 #define AUTOTOKENS_DEBUG 0
 #define AUTOTOKENS_ONLY_FAV 0
-#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
 #define AUTOTOKENS_CHANGE_MIN 8
 #define AUTOTOKENS_CHANGE_MAX 64
 #define AUTOTOKENS_WHITESPACE " "
@@ -60,7 +59,6 @@ typedef struct my_mutator {
 static afl_state *afl_ptr;
 static int        debug = AUTOTOKENS_DEBUG;
 static int        only_fav = AUTOTOKENS_ONLY_FAV;
-static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
 static int        learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
 static int        fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
 static int        create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR;
@@ -142,7 +140,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   (void)(data);
 
-  if (s == NULL) {
+  if (unlikely(s == NULL)) {
 
     *out_buf = NULL;
     return 0;
@@ -183,9 +181,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         } while (unlikely(
 
             new_item == cur_item ||
-            (!alternative_tokenize &&
-             ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
-              (whitespace_ids >= new_item && whitespace_ids < cur_item)))));
+            ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
+             (whitespace_ids >= new_item && whitespace_ids < cur_item))));
 
         DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item);
         m[pos] = new_item;
@@ -200,37 +197,33 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
           new_item = rand_below(afl_ptr, current_id);
 
-        } while (unlikely(!alternative_tokenize && new_item >= whitespace_ids));
+        } while (unlikely(new_item >= whitespace_ids));
 
         u32 pos = rand_below(afl_ptr, m_size + 1);
         m.insert(m.begin() + pos, new_item);
         ++m_size;
         DEBUGF(stderr, "INS: %u at %u\n", new_item, pos);
 
-        if (likely(!alternative_tokenize)) {
+        // if we insert an identifier or string we might need whitespace
+        if (id_to_token[new_item].size() > 1) {
 
-          // if we insert an identifier or string we might need whitespace
-          if (id_to_token[new_item].size() > 1) {
+          // need to insert before?
 
-            // need to insert before?
+          if (pos && m[pos - 1] >= whitespace_ids &&
+              id_to_token[m[pos - 1]].size() > 1) {
 
-            if (pos && m[pos - 1] >= whitespace_ids &&
-                id_to_token[m[pos - 1]].size() > 1) {
+            m.insert(m.begin() + pos, good_whitespace_or_singleval());
+            ++m_size;
 
-              m.insert(m.begin() + pos, good_whitespace_or_singleval());
-              ++m_size;
+          }
 
-            }
+          if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
+              id_to_token[m[pos + 1]].size() > 1) {
 
-            if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
-                id_to_token[m[pos + 1]].size() > 1) {
+            // need to insert after?
 
-              // need to insert after?
-
-              m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
-              ++m_size;
-
-            }
+            m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
+            ++m_size;
 
           }
 
@@ -290,26 +283,22 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         }
 
-        if (likely(!alternative_tokenize)) {
+        // do we need a whitespace/token at the beginning?
+        if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 &&
+            id_to_token[m[dst_off]].size() > 1) {
 
-          // do we need a whitespace/token at the beginning?
-          if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 &&
-              id_to_token[m[dst_off]].size() > 1) {
+          m.insert(m.begin() + dst_off, good_whitespace_or_singleval());
+          ++m_size;
 
-            m.insert(m.begin() + dst_off, good_whitespace_or_singleval());
-            ++m_size;
+        }
 
-          }
+        // do we need a whitespace/token at the end?
+        if (dst_off + n < m_size &&
+            id_to_token[m[dst_off + n - 1]].size() > 1 &&
+            id_to_token[m[dst_off + n]].size() > 1) {
 
-          // do we need a whitespace/token at the end?
-          if (dst_off + n < m_size &&
-              id_to_token[m[dst_off + n - 1]].size() > 1 &&
-              id_to_token[m[dst_off + n]].size() > 1) {
-
-            m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval());
-            ++m_size;
-
-          }
+          m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval());
+          ++m_size;
 
         }
 
@@ -332,8 +321,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
           // if what we delete will result in a missing whitespace/token,
           // instead of deleting we switch the item to a whitespace or token.
-          if (likely(!alternative_tokenize) && pos && pos + 1 < m_size &&
-              id_to_token[m[pos - 1]].size() > 1 &&
+          if (pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 &&
               id_to_token[m[pos + 1]].size() > 1) {
 
             m[pos] = good_whitespace_or_singleval();
@@ -362,17 +350,11 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   }
 
-  u32 m_size_1 = m_size - 1;
   output = "";
 
   for (i = 0; i < m_size; ++i) {
 
     output += id_to_token[m[i]];
-    if (unlikely(alternative_tokenize && i < m_size_1)) {
-
-      output += whitespace;
-
-    }
 
   }
 
@@ -725,336 +707,107 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     DEBUGF(stderr, "START!\n");
 
-    if (likely(!alternative_tokenize)) {
+    while (my_search_string(cur, ende, &match_begin, &match_end)) {
 
-      while (my_search_string(cur, ende, &match_begin, &match_end)) {
+      prev = cur;
+      found = match_begin;
+      cur = match_end;
 
-        prev = cur;
-        found = match_begin;
-        cur = match_end;
+      IFDEBUG {
 
-        IFDEBUG {
+        string foo(match_begin, match_end);
+        DEBUGF(stderr,
+               "string %s found at start %lu offset %lu continue at %lu\n",
+               foo.c_str(), prev - input.begin(), found - prev,
+               cur - input.begin());
 
-          string foo(match_begin, match_end);
-          DEBUGF(stderr,
-                 "string %s found at start %lu offset %lu continue at %lu\n",
-                 foo.c_str(), prev - input.begin(), found - prev,
-                 cur - input.begin());
+      }
 
-        }
+      if (prev < found) {  // there are items between search start and find
+        while (prev < found) {
 
-        if (prev < found) {  // there are items between search start and find
-          while (prev < found) {
+          if (isspace(*prev)) {
 
-            if (isspace(*prev)) {
+            auto start = prev;
+            while (isspace(*prev)) {
 
-              auto start = prev;
-              while (isspace(*prev)) {
-
-                ++prev;
-
-              }
-
-              tokens.push_back(std::string(start, prev));
-              DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
-                     tokens[tokens.size() - 1].c_str());
-
-            } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
-
-              auto start = prev;
-              while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
-                     *prev == '.' || *prev == '/') {
-
-                ++prev;
-
-              }
-
-              tokens.push_back(string(start, prev));
-              DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
-                     tokens[tokens.size() - 1].c_str());
-
-            } else {
-
-              tokens.push_back(string(prev, prev + 1));
-              DEBUGF(stderr, "OTHER \"%c\"\n", *prev);
               ++prev;
 
             }
 
-          }
-
-        }
-
-        tokens.push_back(string(match_begin, match_end));
-        DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str());
-
-      }
-
-      DEBUGF(stderr, "AFTER all strings\n");
-
-      if (cur < ende) {
-
-        while (cur < ende) {
-
-          if (isspace(*cur)) {
-
-            auto start = cur;
-            while (isspace(*cur)) {
-
-              ++cur;
-
-            }
-
-            tokens.push_back(std::string(start, cur));
-            DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
+            tokens.push_back(std::string(start, prev));
+            DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
                    tokens[tokens.size() - 1].c_str());
 
-          } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
+          } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
 
-            auto start = cur;
-            while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
-                   *cur == '/') {
+            auto start = prev;
+            while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
+                   *prev == '.' || *prev == '/') {
 
-              ++cur;
+              ++prev;
 
             }
 
-            tokens.push_back(std::string(start, cur));
-            DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
+            tokens.push_back(string(start, prev));
+            DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
                    tokens[tokens.size() - 1].c_str());
 
           } else {
 
-            tokens.push_back(std::string(cur, cur + 1));
-            DEBUGF(stderr, "OTHER \"%c\"\n", *cur);
+            tokens.push_back(string(prev, prev + 1));
+            DEBUGF(stderr, "OTHER \"%c\"\n", *prev);
+            ++prev;
+
+          }
+
+        }
+
+      }
+
+      tokens.push_back(string(match_begin, match_end));
+      DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str());
+
+    }
+
+    DEBUGF(stderr, "AFTER all strings\n");
+
+    if (cur < ende) {
+
+      while (cur < ende) {
+
+        if (isspace(*cur)) {
+
+          auto start = cur;
+          while (isspace(*cur)) {
+
             ++cur;
 
           }
 
-        }
+          tokens.push_back(std::string(start, cur));
+          DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
+                 tokens[tokens.size() - 1].c_str());
 
-      }
+        } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
 
-    } else {
+          auto start = cur;
+          while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
+                 *cur == '/') {
 
-      // alternative tokenize
-      while (my_search_string(cur, ende, &match_begin, &match_end)) {
-
-        prev = cur;
-        found = match_begin;
-        cur = match_end;
-        IFDEBUG {
-
-          string foo(match_begin, match_end);
-          DEBUGF(stderr,
-                 "string %s found at start %lu offset %lu continue at %lu\n",
-                 foo.c_str(), prev - input.begin(), found - prev,
-                 cur - input.begin());
-
-        }
-
-        if (prev < found) {  // there are items between search start and find
-
-          sregex_token_iterator it{prev, found, regex_whitespace, -1};
-          vector<std::string>   tokenized{it, {}};
-          tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(),
-                                         [](std::string const &s) {
-
-                                           return s.size() == 0;
-
-                                         }),
-
-                          tokenized.end());
-          tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
-
-          IFDEBUG {
-
-            DEBUGF(stderr, "tokens1: %lu   input size: %lu\n", tokenized.size(),
-                   input.size());
-            for (auto x : tokenized) {
-
-              cerr << x << endl;
-
-            }
+            ++cur;
 
           }
 
-          for (auto token : tokenized) {
+          tokens.push_back(std::string(start, cur));
+          DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
+                 tokens[tokens.size() - 1].c_str());
 
-            string::const_iterator c = token.begin(), e = token.end(), f, p;
-            smatch                 m;
+        } else {
 
-            while (regex_search(c, e, m, regex_word)) {
-
-              p = c;
-              f = m[0].first;
-              c = m[0].second;
-              if (p < f) {
-
-                // there are items between search start and find
-                while (p < f) {
-
-                  IFDEBUG {
-
-                    string foo(p, p + 1);
-                    DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
-
-                  }
-
-                  tokens.push_back(std::string(p, p + 1));
-                  ++p;
-
-                }
-
-                IFDEBUG {
-
-                  string foo(p, f);
-                  DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
-                  tokens.push_back(std::string(p, f));
-
-                }
-
-              }
-
-              DEBUGF(stderr,
-                     "SUBstring \"%s\" found at start %lu offset %lu continue "
-                     "at %lu\n",
-                     m[0].str().c_str(), p - input.begin(), m.position(),
-                     c - token.begin());
-              tokens.push_back(m[0].str());
-
-            }
-
-            if (c < e) {
-
-              while (c < e) {
-
-                IFDEBUG {
-
-                  string foo(c, c + 1);
-                  DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
-                }
-
-                tokens.push_back(std::string(c, c + 1));
-                ++c;
-
-              }
-
-              IFDEBUG {
-
-                string foo(c, e);
-                DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
-              }
-
-              tokens.push_back(std::string(c, e));
-
-            }
-
-          }
-
-        }
-
-        tokens.push_back(string(match_begin, match_end));
-
-      }
-
-      if (cur < ende) {
-
-        sregex_token_iterator it{cur, ende, regex_whitespace, -1};
-        vector<std::string>   tokenized{it, {}};
-        tokenized.erase(
-            std::remove_if(tokenized.begin(), tokenized.end(),
-                           [](std::string const &s) { return s.size() == 0; }),
-            tokenized.end());
-        tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
-
-        IFDEBUG {
-
-          DEBUGF(stderr, "tokens2: %lu   input size: %lu\n", tokenized.size(),
-                 input.size());
-          for (auto x : tokenized) {
-
-            cerr << x << endl;
-
-          }
-
-        }
-
-        for (auto token : tokenized) {
-
-          string::const_iterator c = token.begin(), e = token.end(), f, p;
-          smatch                 m;
-
-          while (regex_search(c, e, m, regex_word)) {
-
-            p = c;
-            f = m[0].first;
-            c = m[0].second;
-            if (p < f) {
-
-              // there are items between search start and find
-              while (p < f) {
-
-                IFDEBUG {
-
-                  string foo(p, p + 1);
-                  DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
-
-                }
-
-                tokens.push_back(std::string(p, p + 1));
-                ++p;
-
-              }
-
-              IFDEBUG {
-
-                string foo(p, f);
-                DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
-
-              }
-
-              tokens.push_back(std::string(p, f));
-
-            }
-
-            DEBUGF(stderr,
-                   "SUB2string \"%s\" found at start %lu offset %lu continue "
-                   "at %lu\n",
-                   m[0].str().c_str(), p - input.begin(), m.position(),
-                   c - token.begin());
-            tokens.push_back(m[0].str());
-
-          }
-
-          if (c < e) {
-
-            while (c < e) {
-
-              IFDEBUG {
-
-                string foo(c, c + 1);
-                DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
-              }
-
-              tokens.push_back(std::string(c, c + 1));
-              ++c;
-
-            }
-
-            IFDEBUG {
-
-              string foo(c, e);
-              DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
-            }
-
-            tokens.push_back(std::string(c, e));
-
-          }
+          tokens.push_back(std::string(cur, cur + 1));
+          DEBUGF(stderr, "OTHER \"%c\"\n", *cur);
+          ++cur;
 
         }
 
@@ -1065,15 +818,9 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     IFDEBUG {
 
       DEBUGF(stderr, "DUMPING TOKENS:\n");
-      u32 size_1 = tokens.size() - 1;
       for (u32 i = 0; i < tokens.size(); ++i) {
 
         DEBUGF(stderr, "%s", tokens[i].c_str());
-        if (unlikely(alternative_tokenize && i < size_1)) {
-
-          DEBUGF(stderr, "%s", whitespace.c_str());
-
-        }
 
       }
 
@@ -1157,7 +904,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
   if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
   if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; }
-  if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
 
   if (getenv("AUTOTOKENS_LEARN_DICT")) {
 
@@ -1180,14 +926,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   if (getenv("AUTOTOKENS_CHANGE_MIN")) {
 
     change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN"));
-    if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; }
+    if (change_min < 1 || change_min > 256) {
+
+      change_min = AUTOTOKENS_CHANGE_MIN;
+
+    }
 
   }
 
   if (getenv("AUTOTOKENS_CHANGE_MAX")) {
 
     change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX"));
-    if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; }
+    if (change_max < 1 || change_max > 4096) {
+
+      change_max = AUTOTOKENS_CHANGE_MAX;
+
+    }
 
   }
 
@@ -1212,53 +966,49 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   // set common whitespace tokens
   // we deliberately do not put uncommon ones here to these will count as
   // identifier tokens.
-  if (!alternative_tokenize) {
-
-    token_to_id[" "] = current_id;
-    id_to_token[current_id] = " ";
-    ++current_id;
-    token_to_id["\t"] = current_id;
-    id_to_token[current_id] = "\t";
-    ++current_id;
-    token_to_id["\n"] = current_id;
-    id_to_token[current_id] = "\n";
-    ++current_id;
-    token_to_id["\r\n"] = current_id;
-    id_to_token[current_id] = "\r\n";
-    ++current_id;
-    token_to_id[" \n"] = current_id;
-    id_to_token[current_id] = " \n";
-    ++current_id;
-    token_to_id["  "] = current_id;
-    id_to_token[current_id] = "  ";
-    ++current_id;
-    token_to_id["\t\t"] = current_id;
-    id_to_token[current_id] = "\t\t";
-    ++current_id;
-    token_to_id["\n\n"] = current_id;
-    id_to_token[current_id] = "\n\n";
-    ++current_id;
-    token_to_id["\r\n\r\n"] = current_id;
-    id_to_token[current_id] = "\r\n\r\n";
-    ++current_id;
-    token_to_id["    "] = current_id;
-    id_to_token[current_id] = "    ";
-    ++current_id;
-    token_to_id["\t\t\t\t"] = current_id;
-    id_to_token[current_id] = "\t\t\t\t";
-    ++current_id;
-    token_to_id["\n\n\n\n"] = current_id;
-    id_to_token[current_id] = "\n\n\n\n";
-    ++current_id;
-    whitespace_ids = current_id;
-    token_to_id["\""] = current_id;
-    id_to_token[current_id] = "\"";
-    ++current_id;
-    token_to_id["'"] = current_id;
-    id_to_token[current_id] = "'";
-    ++current_id;
-
-  }
+  token_to_id[" "] = current_id;
+  id_to_token[current_id] = " ";
+  ++current_id;
+  token_to_id["\t"] = current_id;
+  id_to_token[current_id] = "\t";
+  ++current_id;
+  token_to_id["\n"] = current_id;
+  id_to_token[current_id] = "\n";
+  ++current_id;
+  token_to_id["\r\n"] = current_id;
+  id_to_token[current_id] = "\r\n";
+  ++current_id;
+  token_to_id[" \n"] = current_id;
+  id_to_token[current_id] = " \n";
+  ++current_id;
+  token_to_id["  "] = current_id;
+  id_to_token[current_id] = "  ";
+  ++current_id;
+  token_to_id["\t\t"] = current_id;
+  id_to_token[current_id] = "\t\t";
+  ++current_id;
+  token_to_id["\n\n"] = current_id;
+  id_to_token[current_id] = "\n\n";
+  ++current_id;
+  token_to_id["\r\n\r\n"] = current_id;
+  id_to_token[current_id] = "\r\n\r\n";
+  ++current_id;
+  token_to_id["    "] = current_id;
+  id_to_token[current_id] = "    ";
+  ++current_id;
+  token_to_id["\t\t\t\t"] = current_id;
+  id_to_token[current_id] = "\t\t\t\t";
+  ++current_id;
+  token_to_id["\n\n\n\n"] = current_id;
+  id_to_token[current_id] = "\n\n\n\n";
+  ++current_id;
+  whitespace_ids = current_id;
+  token_to_id["\""] = current_id;
+  id_to_token[current_id] = "\"";
+  ++current_id;
+  token_to_id["'"] = current_id;
+  id_to_token[current_id] = "'";
+  ++current_id;
 
   return data;
 

From 240f6421d8240b4b4d4d5bd509c0c3277a083896 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 08:23:47 +0100
Subject: [PATCH 53/77] optimize performance

---
 custom_mutators/autotokens/autotokens.cpp | 80 +++++++----------------
 1 file changed, 23 insertions(+), 57 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index a027ac2b..ca738d0b 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -204,31 +204,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         ++m_size;
         DEBUGF(stderr, "INS: %u at %u\n", new_item, pos);
 
-        // if we insert an identifier or string we might need whitespace
-        if (id_to_token[new_item].size() > 1) {
-
-          // need to insert before?
-
-          if (pos && m[pos - 1] >= whitespace_ids &&
-              id_to_token[m[pos - 1]].size() > 1) {
-
-            m.insert(m.begin() + pos, good_whitespace_or_singleval());
-            ++m_size;
-
-          }
-
-          if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
-              id_to_token[m[pos + 1]].size() > 1) {
-
-            // need to insert after?
-
-            m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
-            ++m_size;
-
-          }
-
-        }
-
         break;
 
       }
@@ -283,25 +258,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         }
 
-        // do we need a whitespace/token at the beginning?
-        if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 &&
-            id_to_token[m[dst_off]].size() > 1) {
-
-          m.insert(m.begin() + dst_off, good_whitespace_or_singleval());
-          ++m_size;
-
-        }
-
-        // do we need a whitespace/token at the end?
-        if (dst_off + n < m_size &&
-            id_to_token[m[dst_off + n - 1]].size() > 1 &&
-            id_to_token[m[dst_off + n]].size() > 1) {
-
-          m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval());
-          ++m_size;
-
-        }
-
         break;
 
       }
@@ -319,19 +275,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
           } while (unlikely(m[pos] < whitespace_ids));
 
-          // if what we delete will result in a missing whitespace/token,
-          // instead of deleting we switch the item to a whitespace or token.
-          if (pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 &&
-              id_to_token[m[pos + 1]].size() > 1) {
-
-            m[pos] = good_whitespace_or_singleval();
-
-          } else {
-
-            m.erase(m.begin() + pos);
-            --m_size;
-
-          }
+          m.erase(m.begin() + pos);
+          --m_size;
 
         } else {
 
@@ -350,10 +295,31 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   }
 
+  /* Now we create the output */
+
   output = "";
+  u32 prev_size = 0;
 
   for (i = 0; i < m_size; ++i) {
 
+    if (likely(i + 1 < m_size)) {
+
+      u32 this_size = id_to_token[m[i]].size();
+
+      /* The output we are generating might need repairing.
+         General rule: two items that have a size larger than 2 are strings
+         or identifizers and need a whitespace or an item of length 1 in
+         between. */
+      if (unlikely(prev_size > 1 && this_size > 1)) {
+
+        output += id_to_token[good_whitespace_or_singleval()];
+
+      }
+
+      prev_size = this_size;
+
+    }
+
     output += id_to_token[m[i]];
 
   }

From 61439859cece05cd3e204af60bb5ff08556c490d Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 08:26:30 +0100
Subject: [PATCH 54/77] cleanup

---
 custom_mutators/autotokens/README         | 4 ----
 custom_mutators/autotokens/autotokens.cpp | 8 --------
 2 files changed, 12 deletions(-)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index e9c48662..904b5fa3 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -29,7 +29,3 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
 `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and
                                     a dictionary loaded then create one initial
                                     structure based on the dictionary.
-`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
-                                   (experimental)
-`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,
-                          default is " "
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index ca738d0b..10afa2c2 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -24,7 +24,6 @@ extern "C" {
 #define AUTOTOKENS_ONLY_FAV 0
 #define AUTOTOKENS_CHANGE_MIN 8
 #define AUTOTOKENS_CHANGE_MAX 64
-#define AUTOTOKENS_WHITESPACE " "
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
@@ -75,7 +74,6 @@ static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<u32, vector<u32> *>    id_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
-static string                               whitespace = AUTOTOKENS_WHITESPACE;
 static string                               output;
 static regex                               *regex_comment_custom;
 // multiline requires g++-11 libs :(
@@ -913,12 +911,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   if (change_max < change_min) { change_max = change_min + 1; }
 
-  if (getenv("AUTOTOKENS_WHITESPACE")) {
-
-    whitespace = getenv("AUTOTOKENS_WHITESPACE");
-
-  }
-
   if (getenv("AUTOTOKENS_COMMENT")) {
 
     char buf[256];

From 54fa78d32ce6779117a656c72f5c630713e7033f Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 09:52:57 +0100
Subject: [PATCH 55/77] autodisable and better performance

---
 custom_mutators/autotokens/Makefile       |  12 +-
 custom_mutators/autotokens/TODO           |  21 ----
 custom_mutators/autotokens/autotokens.cpp | 143 +++++++++++++++++-----
 include/config.h                          |   4 +
 src/afl-fuzz-queue.c                      |  89 +++++++++-----
 5 files changed, 179 insertions(+), 90 deletions(-)

diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile
index ab1da4b6..6ee7d324 100644
--- a/custom_mutators/autotokens/Makefile
+++ b/custom_mutators/autotokens/Makefile
@@ -1,16 +1,22 @@
 ifdef debug
-	CFLAGS += -fsanitize=address -Wall
+	CPPLAGS += -fsanitize=address
+	CXXFLAGS += -Wall
+	CC := clang
 	CXX := clang++
 endif
 ifdef DEBUG
-	CFLAGS += -fsanitize=address -Wall
+	CPPFLAGS += -fsanitize=address
+	CXXFLAGS += -Wall
+	CC := clang
 	CXX := clang++
 endif
 
 all:	autotokens.so
 
 autotokens.so:	autotokens.cpp
-	$(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o
+	$(CC) -D_STANDALONE_MODULE=1 -I../../include -g -O3 $(CPPFLAGS) -fPIC -c -o ./afl-fuzz-queue.o ../../src/afl-fuzz-queue.c
+	$(CC) -I../../include -g -O3 $(CPPFLAGS) -DBIN_PATH=\"dummy\" -Wno-pointer-sign -fPIC -c -o ./afl-common.o ../../src/afl-common.c
+	$(CXX) -Wno-deprecated -g -O3 $(CXXFLAGS) $(CPPFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp  ./afl-fuzz-queue.o ../../src/afl-performance.o ./afl-common.o
 
 clean:
 	rm -f autotokens.so *~ core
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 496bfd45..2e99e147 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -1,24 +1,3 @@
-create from thin air if no good seed after a cycle and dict large enough?
-(static u32 no_of_struct_inputs;) 
-
-splicing -> check if whitespace/token is needed
-
-whitespace/token check only AFTER mutation
-
-analyse welche einen DICT haben, und welche davon rein ascii
-
-corpus analyse:
-	+ libxml
-	- sqlite
-	- libpcap
-min len, max len, % wenn 95/98/99/100 ascii
-
 env für menge an per mutation run
 
-AFL_TXT_MAX_LEN 65535
-AFL_TXT_MIN_LEN 16
-AFL_TXT_MIN_PERCENT=99
-
--> KEIN FAV!
-
 change_min/_max werte
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 10afa2c2..cda90a38 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -25,10 +25,12 @@ extern "C" {
 #define AUTOTOKENS_CHANGE_MIN 8
 #define AUTOTOKENS_CHANGE_MAX 64
 #define AUTOTOKENS_SIZE_MIN 8
+#define AUTOTOKENS_SIZE_MAX 65535
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
 #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0
 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
+#define AUTOTOKENS_AUTO_DISABLE 0
 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
 #define AUTOTOKENS_LEARN_DICT 1
 #ifndef AUTOTOKENS_SPLICE_DISABLE
@@ -56,6 +58,8 @@ typedef struct my_mutator {
 #define IFDEBUG if (unlikely(debug))
 
 static afl_state *afl_ptr;
+static int        module_disabled = 0;
+static int        auto_disable = AUTOTOKENS_AUTO_DISABLE;
 static int        debug = AUTOTOKENS_DEBUG;
 static int        only_fav = AUTOTOKENS_ONLY_FAV;
 static int        learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
@@ -93,6 +97,99 @@ static void first_run(void *data) {
 
   (void)(data);
 
+  /* For auto-loading this module we check here if we can analyze from the
+     input if the inputs look like text inputs and disable the module if
+     not. */
+
+  if (afl_ptr->custom_only || !auto_disable) { return; }
+
+  if (unlikely(afl_ptr->active_items == 1 &&
+               afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN)) {
+
+    if (afl_ptr->extras_cnt > 8) {
+
+      u32 valid = 0;
+
+      while (extras_cnt < afl_ptr->extras_cnt) {
+
+        u32 ok = 1, l = afl_ptr->extras[extras_cnt].len;
+        u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data;
+
+        for (u32 i = 0; i < l; ++i) {
+
+          if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) {
+
+            ok = 0;
+            break;
+
+          }
+
+        }
+
+        if (ok) {
+
+          buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1);
+          memcpy(buf, afl_ptr->extras[extras_cnt].data,
+                 afl_ptr->extras[extras_cnt].len);
+          buf[afl_ptr->extras[extras_cnt].len] = 0;
+          token_to_id[(char *)buf] = current_id;
+          id_to_token[current_id] = (char *)buf;
+          ++current_id;
+          ++valid;
+
+        }
+
+        ++extras_cnt;
+
+      }
+
+      if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; }
+
+    } else {
+
+      module_disabled = 1;
+
+    }
+
+    return;
+
+  }
+
+  u32 is_ascii = 0, valid = 0;
+
+  for (u32 i = 0; i < afl_ptr->queued_items; ++i) {
+
+    struct queue_entry *q;
+
+    q = afl_ptr->queue_buf[i];
+
+    if (!q->disabled && q->len >= AUTOTOKENS_SIZE_MIN &&
+        q->len <= AFL_TXT_MAX_LEN) {
+
+      ++valid;
+      u8 *input = queue_testcase_get(afl_ptr, q);
+
+      u32 valid_chars = 0;
+      for (u32 i = 0; i < q->len; ++i) {
+
+        if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; }
+
+      }
+
+      // we want at least 99% of text characters ...
+      if (((q->len * AFL_TXT_MIN_PERCENT) / 100) <= valid_chars) {
+
+        ++is_ascii;
+        q->is_ascii = 1;
+
+      }
+
+    }
+
+  }
+
+  if ((is_ascii * 100) / valid < 70) { module_disabled = 1; }
+
 }
 
 static u32 good_whitespace_or_singleval() {
@@ -441,21 +538,25 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     is_first_run = 0;
     first_run(data);
 
+    if (module_disabled) { WARNF("Autotokens custom module is disabled."); }
+
   }
 
-  if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air &&
-      ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
-       (only_fav && !afl_ptr->queue_cur->favored))) {
+  if (likely(module_disabled) ||
+      (unlikely(!afl_ptr->custom_only) && !create_from_thin_air &&
+       ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
+        (only_fav && !afl_ptr->queue_cur->favored)))) {
 
     s = NULL;
-    DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
+    DEBUGF(stderr,
+           "cmplog not ascii or only_fav and not favorite or disabled\n");
     return 1;
 
   }
 
   // check if there are new dictionary entries and add them to the tokens
-  if (likely(valid_structures || create_from_thin_air) &&
-      learn_state < learn_dictionary_tokens) {
+  if (unlikely(learn_state < learn_dictionary_tokens) &&
+      likely(valid_structures || create_from_thin_air)) {
 
     if (unlikely(!learn_state)) { learn_state = 1; }
 
@@ -569,21 +670,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
   if (entry == file_mapping.end()) {
 
     // this input file was not analyzed for tokens yet, so let's do it!
-
-    FILE *fp = fopen((char *)filename, "rb");
-    if (!fp) {
-
-      s = NULL;
-      return 1;
-
-    }  // should not happen
-
-    fseek(fp, 0, SEEK_END);
-    size_t len = (size_t)ftell(fp);
+    size_t len = afl_ptr->queue_cur->len;
 
     if (len < AFL_TXT_MIN_LEN) {
 
-      fclose(fp);
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
       s = NULL;
       DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
@@ -591,7 +681,6 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     } else if (len > AFL_TXT_MAX_LEN) {
 
-      fclose(fp);
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
       s = NULL;
       DEBUGF(stderr, "Too long (%lu) %s\n", len, filename);
@@ -599,19 +688,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
-    string input;
-    input.resize(len);
-    rewind(fp);
-
-    if (fread((void *)input.data(), 1, len, fp) != len) {
-
-      s = NULL;
-      DEBUGF(stderr, "Too short read %s\n", filename);
-      return 1;
-
-    }
-
-    fclose(fp);
+    u8    *input_buf = queue_testcase_get(afl_ptr, afl_ptr->queue_cur);
+    string input((char *)input_buf, afl_ptr->queue_cur->len);
 
     if (!afl_ptr->shm.cmplog_mode) {
 
@@ -866,6 +944,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   }
 
   if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
+  if (getenv("AUTOTOKENS_AUTO_DISABLE")) { auto_disable = 1; }
   if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
   if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; }
 
diff --git a/include/config.h b/include/config.h
index 49d09174..ad8b76a8 100644
--- a/include/config.h
+++ b/include/config.h
@@ -491,6 +491,10 @@
 
 #define AFL_TXT_MIN_LEN 16
 
+/* Maximum length of a queue input to be evaluated for "is_ascii"? */
+
+#define AFL_TXT_MAX_LEN 65535
+
 /* What is the minimum percentage of ascii characters present to be classifed
    as "is_ascii"? */
 
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index e3faa392..3c8a3e46 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -27,6 +27,22 @@
 #include <ctype.h>
 #include <math.h>
 
+#ifdef _STANDALONE_MODULE
+void minimize_bits(afl_state_t *afl, u8 *dst, u8 *src) {
+
+  return;
+
+}
+
+void run_afl_custom_queue_new_entry(afl_state_t *afl, struct queue_entry *q,
+                                    u8 *a, u8 *b) {
+
+  return;
+
+}
+
+#endif
+
 /* select next queue entry based on alias algo - fast! */
 
 inline u32 select_next_queue_entry(afl_state_t *afl) {
@@ -78,8 +94,8 @@ void create_alias_table(afl_state_t *afl) {
   afl->alias_probability = (double *)afl_realloc(
       (void **)&afl->alias_probability, n * sizeof(double));
   double *P = (double *)afl_realloc(AFL_BUF_PARAM(out), n * sizeof(double));
-  int    *S = (u32 *)afl_realloc(AFL_BUF_PARAM(out_scratch), n * sizeof(u32));
-  int    *L = (u32 *)afl_realloc(AFL_BUF_PARAM(in_scratch), n * sizeof(u32));
+  int    *S = (int *)afl_realloc(AFL_BUF_PARAM(out_scratch), n * sizeof(u32));
+  int    *L = (int *)afl_realloc(AFL_BUF_PARAM(in_scratch), n * sizeof(u32));
 
   if (!P || !S || !L || !afl->alias_table || !afl->alias_probability) {
 
@@ -247,11 +263,11 @@ void create_alias_table(afl_state_t *afl) {
 
 void mark_as_det_done(afl_state_t *afl, struct queue_entry *q) {
 
-  u8  fn[PATH_MAX];
-  s32 fd;
+  char fn[PATH_MAX];
+  s32  fd;
 
   snprintf(fn, PATH_MAX, "%s/queue/.state/deterministic_done/%s", afl->out_dir,
-           strrchr(q->fname, '/') + 1);
+           strrchr((char *)q->fname, '/') + 1);
 
   fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
   if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
@@ -266,10 +282,10 @@ void mark_as_det_done(afl_state_t *afl, struct queue_entry *q) {
 
 void mark_as_variable(afl_state_t *afl, struct queue_entry *q) {
 
-  u8 fn[PATH_MAX];
-  u8 ldest[PATH_MAX];
+  char fn[PATH_MAX];
+  char ldest[PATH_MAX];
 
-  u8 *fn_name = strrchr(q->fname, '/') + 1;
+  char *fn_name = strrchr((char *)q->fname, '/') + 1;
 
   sprintf(ldest, "../../%s", fn_name);
   sprintf(fn, "%s/queue/.state/variable_behavior/%s", afl->out_dir, fn_name);
@@ -293,12 +309,12 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
 
   if (likely(state == q->fs_redundant)) { return; }
 
-  u8 fn[PATH_MAX];
+  char fn[PATH_MAX];
 
   q->fs_redundant = state;
 
   sprintf(fn, "%s/queue/.state/redundant_edges/%s", afl->out_dir,
-          strrchr(q->fname, '/') + 1);
+          strrchr((char *)q->fname, '/') + 1);
 
   if (state) {
 
@@ -409,7 +425,7 @@ u8 check_if_text_buf(u8 *buf, u32 len) {
 
 static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) {
 
-  if (q->len < AFL_TXT_MIN_LEN) return 0;
+  if (q->len < AFL_TXT_MIN_LEN || q->len < AFL_TXT_MAX_LEN) return 0;
 
   u8     *buf;
   int     fd;
@@ -417,8 +433,8 @@ static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) {
   ssize_t comp;
 
   if (len >= MAX_FILE) len = MAX_FILE - 1;
-  if ((fd = open(q->fname, O_RDONLY)) < 0) return 0;
-  buf = afl_realloc(AFL_BUF_PARAM(in_scratch), len + 1);
+  if ((fd = open((char *)q->fname, O_RDONLY)) < 0) return 0;
+  buf = (u8 *)afl_realloc(AFL_BUF_PARAM(in_scratch), len + 1);
   comp = read(fd, buf, len);
   close(fd);
   if (comp != (ssize_t)len) return 0;
@@ -520,7 +536,8 @@ static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) {
 
 void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
 
-  struct queue_entry *q = ck_alloc(sizeof(struct queue_entry));
+  struct queue_entry *q =
+      (struct queue_entry *)ck_alloc(sizeof(struct queue_entry));
 
   q->fname = fname;
   q->len = len;
@@ -554,7 +571,7 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
 
   afl->cycles_wo_finds = 0;
 
-  struct queue_entry **queue_buf = afl_realloc(
+  struct queue_entry **queue_buf = (struct queue_entry **)afl_realloc(
       AFL_BUF_PARAM(queue), afl->queued_items * sizeof(struct queue_entry *));
   if (unlikely(!queue_buf)) { PFATAL("alloc"); }
   queue_buf[afl->queued_items - 1] = q;
@@ -574,7 +591,11 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
   }
 
   /* only redqueen currently uses is_ascii */
-  if (afl->shm.cmplog_mode) q->is_ascii = check_if_text(afl, q);
+  if (unlikely(afl->shm.cmplog_mode && !q->is_ascii)) {
+
+    q->is_ascii = check_if_text(afl, q);
+
+  }
 
 }
 
@@ -704,7 +725,7 @@ void update_bitmap_score(afl_state_t *afl, struct queue_entry *q) {
       if (!q->trace_mini) {
 
         u32 len = (afl->fsrv.map_size >> 3);
-        q->trace_mini = ck_alloc(len);
+        q->trace_mini = (u8 *)ck_alloc(len);
         minimize_bits(afl, q->trace_mini, afl->fsrv.trace_bits);
 
       }
@@ -1090,19 +1111,19 @@ inline void queue_testcase_retake(afl_state_t *afl, struct queue_entry *q,
     if (len != old_len) {
 
       afl->q_testcase_cache_size = afl->q_testcase_cache_size + len - old_len;
-      q->testcase_buf = realloc(q->testcase_buf, len);
+      q->testcase_buf = (u8 *)realloc(q->testcase_buf, len);
 
       if (unlikely(!q->testcase_buf)) {
 
-        PFATAL("Unable to malloc '%s' with len %u", q->fname, len);
+        PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len);
 
       }
 
     }
 
-    int fd = open(q->fname, O_RDONLY);
+    int fd = open((char *)q->fname, O_RDONLY);
 
-    if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", q->fname); }
+    if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", (char *)q->fname); }
 
     ck_read(fd, q->testcase_buf, len, q->fname);
     close(fd);
@@ -1122,7 +1143,7 @@ inline void queue_testcase_retake_mem(afl_state_t *afl, struct queue_entry *q,
 
     if (likely(len != old_len)) {
 
-      u8 *ptr = realloc(q->testcase_buf, len);
+      u8 *ptr = (u8 *)realloc(q->testcase_buf, len);
 
       if (likely(ptr)) {
 
@@ -1154,23 +1175,23 @@ inline u8 *queue_testcase_get(afl_state_t *afl, struct queue_entry *q) {
 
     if (unlikely(q == afl->queue_cur)) {
 
-      buf = afl_realloc((void **)&afl->testcase_buf, len);
+      buf = (u8 *)afl_realloc((void **)&afl->testcase_buf, len);
 
     } else {
 
-      buf = afl_realloc((void **)&afl->splicecase_buf, len);
+      buf = (u8 *)afl_realloc((void **)&afl->splicecase_buf, len);
 
     }
 
     if (unlikely(!buf)) {
 
-      PFATAL("Unable to malloc '%s' with len %u", q->fname, len);
+      PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len);
 
     }
 
-    int fd = open(q->fname, O_RDONLY);
+    int fd = open((char *)q->fname, O_RDONLY);
 
-    if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", q->fname); }
+    if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", (char *)q->fname); }
 
     ck_read(fd, buf, len, q->fname);
     close(fd);
@@ -1214,7 +1235,7 @@ inline u8 *queue_testcase_get(afl_state_t *afl, struct queue_entry *q) {
 
         do_once = 1;
         // release unneeded memory
-        afl->q_testcase_cache = ck_realloc(
+        afl->q_testcase_cache = (struct queue_entry **)ck_realloc(
             afl->q_testcase_cache,
             (afl->q_testcase_max_cache_entries + 1) * sizeof(size_t));
 
@@ -1261,15 +1282,15 @@ inline u8 *queue_testcase_get(afl_state_t *afl, struct queue_entry *q) {
 
     /* Map the test case into memory. */
 
-    int fd = open(q->fname, O_RDONLY);
+    int fd = open((char *)q->fname, O_RDONLY);
 
-    if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", q->fname); }
+    if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", (char *)q->fname); }
 
-    q->testcase_buf = malloc(len);
+    q->testcase_buf = (u8 *)malloc(len);
 
     if (unlikely(!q->testcase_buf)) {
 
-      PFATAL("Unable to malloc '%s' with len %u", q->fname, len);
+      PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len);
 
     }
 
@@ -1332,11 +1353,11 @@ inline void queue_testcase_store_mem(afl_state_t *afl, struct queue_entry *q,
 
   /* Map the test case into memory. */
 
-  q->testcase_buf = malloc(len);
+  q->testcase_buf = (u8 *)malloc(len);
 
   if (unlikely(!q->testcase_buf)) {
 
-    PFATAL("Unable to malloc '%s' with len %u", q->fname, len);
+    PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len);
 
   }
 

From 5a0100c6eece0d668c7040ec6e6ed3f59ef0d1ba Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 10:01:02 +0100
Subject: [PATCH 56/77] add to readme

---
 custom_mutators/autotokens/README | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 904b5fa3..295cd736 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -20,6 +20,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
                        removed. Default: `/* ... */`
 `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting
                                 the value by this number set, e.g. 1.
+`AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii
+                            (or no input and no (ascii) dictionary)
 `AUTOTOKENS_LEARN_DICT` - learn from dictionaries?
                           0 = none
                           1 = only -x or autodict

From 80eabd6e8a30c2ffc0f084ab34df8b9d582419c3 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 11:34:14 +0100
Subject: [PATCH 57/77] AFL_LLVM_DICT2FILE_NO_MAIN support

---
 TODO.md                                    |  3 +--
 docs/Changelog.md                          |  1 +
 docs/env_variables.md                      |  3 +++
 docs/fuzzing_in_depth.md                   |  4 +++-
 include/envs.h                             |  1 +
 instrumentation/README.llvm.md             |  4 ++++
 instrumentation/SanitizerCoverageLTO.so.cc | 11 ++++++++++-
 instrumentation/afl-llvm-dict2file.so.cc   | 17 ++++++++++++++---
 src/afl-cc.c                               |  5 ++++-
 9 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/TODO.md b/TODO.md
index 862224f0..187fa191 100644
--- a/TODO.md
+++ b/TODO.md
@@ -9,13 +9,12 @@
  - afl-plot to support multiple plot_data
  - parallel builds for source-only targets
  - get rid of check_binary, replace with more forkserver communication
- - first fuzzer should be a main automatically
+ - first fuzzer should be a main automatically? not sure.
 
 ## Maybe
 
  - forkserver tells afl-fuzz if cmplog is supported and if so enable
    it by default, with AFL_CMPLOG_NO=1 (?) set to skip?
- - afl_custom_fuzz_splice_optin()
  - afl_custom_splice()
  - cmdline option from-to range for mutations
 
diff --git a/docs/Changelog.md b/docs/Changelog.md
index eee88a51..89c37912 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -10,6 +10,7 @@
     - add CFI sanitizer variant to gcc targets
     - llvm 16 support (thanks to @devnexen!)
     - support llvm 15 native pcguard changes
+  - LTO autoken and llvm_mode: added AFL_LLVM_DICT2FILE_NO_MAIN support
   - better sanitizer default options support for all tools
   - unicorn_mode: updated and minor issues fixed
   - frida_mode: fix issue on MacOS
diff --git a/docs/env_variables.md b/docs/env_variables.md
index 0a57d190..61fb1e2b 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -129,6 +129,9 @@ subset of the settings discussed in section 1, with the exception of:
     write all constant string comparisons to this file to be used later with
     afl-fuzz' `-x` option.
 
+  - An option to `AFL_LLVM_DICT2FILE` is `AFL_LLVM_DICT2FILE_NO_MAIN=1` which
+    skill not parse `main()`.
+
   - `TMPDIR` and `AFL_KEEP_ASSEMBLY`, since no temporary assembly files are
     created.
 
diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md
index 87f31a58..efab0633 100644
--- a/docs/fuzzing_in_depth.md
+++ b/docs/fuzzing_in_depth.md
@@ -534,6 +534,8 @@ dictionaries/FORMAT.dict`.
 * With `afl-clang-fast`, you can set
   `AFL_LLVM_DICT2FILE=/full/path/to/new/file.dic` to automatically generate a
   dictionary during target compilation.
+  Adding `AFL_LLVM_DICT2FILE_NO_MAIN=1` to not parse main (usually command line
+  parameter parsing) is often a good idea too.
 * You also have the option to generate a dictionary yourself during an
   independent run of the target, see
   [utils/libtokencap/README.md](../utils/libtokencap/README.md).
@@ -935,7 +937,7 @@ phase and start fuzzing at once.
 3. Also randomize the afl-fuzz runtime options, e.g.:
     * 65% for `AFL_DISABLE_TRIM`
     * 50% for `AFL_KEEP_TIMEOUTS`
-    * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE`
+    * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE` + `AFL_LLVM_DICT2FILE_NO_MAIN=1`
     * 40% use MOpt (`-L 0`)
     * 40% for `AFL_EXPAND_HAVOC_NOW`
     * 20% for old queue processing (`-Z`)
diff --git a/include/envs.h b/include/envs.h
index 0770f94d..5018b0f8 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -133,6 +133,7 @@ static char *afl_environment_variables[] = {
     "AFL_LLVM_CTX",
     "AFL_LLVM_CTX_K",
     "AFL_LLVM_DICT2FILE",
+    "AFL_LLVM_DICT2FILE_NO_MAIN",
     "AFL_LLVM_DOCUMENT_IDS",
     "AFL_LLVM_INSTRIM_LOOPHEAD",
     "AFL_LLVM_INSTRUMENT",
diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md
index 9da1b0f6..c0677474 100644
--- a/instrumentation/README.llvm.md
+++ b/instrumentation/README.llvm.md
@@ -167,6 +167,10 @@ Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation
 all constant string compare parameters will be written to this file to be used
 with afl-fuzz' `-x` option.
 
+Adding `AFL_LLVM_DICT2FILE_NO_MAIN=1` will skip parsing `main()` which often
+does command line parsing which has string comparisons that are not helpful
+for fuzzing.
+
 ## 6) AFL++ Context Sensitive Branch Coverage
 
 ### What is this?
diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc
index 231151f5..f82224ed 100644
--- a/instrumentation/SanitizerCoverageLTO.so.cc
+++ b/instrumentation/SanitizerCoverageLTO.so.cc
@@ -236,6 +236,7 @@ class ModuleSanitizerCoverageLTO
   // const SpecialCaseList *          Allowlist;
   // const SpecialCaseList *          Blocklist;
   uint32_t                         autodictionary = 1;
+  uint32_t                         autodictionary_no_main = 0;
   uint32_t                         inst = 0;
   uint32_t                         afl_global_id = 0;
   uint32_t                         unhandled = 0;
@@ -411,7 +412,8 @@ bool ModuleSanitizerCoverageLTO::instrumentModule(
 
   /* Show a banner */
   setvbuf(stdout, NULL, _IONBF, 0);
-  if (getenv("AFL_DEBUG")) debug = 1;
+  if (getenv("AFL_DEBUG")) { debug = 1; }
+  if (getenv("AFL_LLVM_DICT2FILE_NO_MAIN")) { autodictionary_no_main = 1; }
 
   if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
 
@@ -503,6 +505,13 @@ bool ModuleSanitizerCoverageLTO::instrumentModule(
 
       if (!isInInstrumentList(&F, MNAME) || !F.size()) { continue; }
 
+      if (autodictionary_no_main &&
+          (!F.getName().compare("main") || !F.getName().compare("_main"))) {
+
+        continue;
+
+      }
+
       for (auto &BB : F) {
 
         for (auto &IN : BB) {
diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc
index bbbbe32c..97f1d47f 100644
--- a/instrumentation/afl-llvm-dict2file.so.cc
+++ b/instrumentation/afl-llvm-dict2file.so.cc
@@ -182,7 +182,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
   DenseMap<Value *, std::string *> valueMap;
   char                            *ptr;
-  int                              found = 0;
+  int                              found = 0, handle_main = 1;
 
   /* Show a banner */
   setvbuf(stdout, NULL, _IONBF, 0);
@@ -192,10 +192,14 @@ bool AFLdict2filePass::runOnModule(Module &M) {
     SAYF(cCYA "afl-llvm-dict2file" VERSION cRST
               " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
 
-  } else
+  } else {
 
     be_quiet = 1;
 
+  }
+
+  if (getenv("AFL_LLVM_DICT2FILE_NO_MAIN")) { handle_main = 0; }
+
   scanForDangerousFunctions(&M);
 
   ptr = getenv("AFL_LLVM_DICT2FILE");
@@ -210,7 +214,14 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
   for (auto &F : M) {
 
-    if (isIgnoreFunction(&F)) continue;
+    if (!handle_main &&
+        (!F.getName().compare("main") || !F.getName().compare("_main"))) {
+
+      continue;
+
+    }
+
+    if (isIgnoreFunction(&F)) { continue; }
     if (!isInInstrumentList(&F, MNAME) || !F.size()) { continue; }
 
     /*  Some implementation notes.
diff --git a/src/afl-cc.c b/src/afl-cc.c
index 7c3682fb..7b059d40 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -2041,6 +2041,8 @@ int main(int argc, char **argv, char **envp) {
 
             "  AFL_LLVM_DICT2FILE: generate an afl dictionary based on found "
             "comparisons\n"
+            "  AFL_LLVM_DICT2FILE_NO_MAIN: skip parsing main() for the "
+            "dictionary\n"
             "  AFL_LLVM_LAF_ALL: enables all LAF splits/transforms\n"
             "  AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n"
             "  AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n"
@@ -2128,7 +2130,8 @@ int main(int argc, char **argv, char **envp) {
         "defaults.\n"
         "Recommended is afl-clang-lto with AFL_LLVM_CMPLOG or afl-clang-fast "
         "with\n"
-        "AFL_LLVM_CMPLOG and AFL_LLVM_DICT2FILE.\n\n");
+        "AFL_LLVM_CMPLOG and "
+        "AFL_LLVM_DICT2FILE+AFL_LLVM_DICT2FILE_NO_MAIN.\n\n");
 
     exit(1);
 

From 8bc3fa1df286aac46a0a724f64e2e07010d2497e Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Mon, 13 Feb 2023 23:00:15 +0000
Subject: [PATCH 58/77] LLVM cmplog factoring custom Instruction iterator with
 added restriction

---
 instrumentation/afl-llvm-common.cc          | 18 ++++++++++++++++++
 instrumentation/afl-llvm-common.h           |  2 ++
 instrumentation/cmplog-instructions-pass.cc | 15 ---------------
 instrumentation/cmplog-switches-pass.cc     | 15 ---------------
 4 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index dc34d191..b50269fe 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -582,6 +582,24 @@ bool isInInstrumentList(llvm::Function *F, std::string Filename) {
 
 }
 
+template <class Iterator>
+Iterator Unique(Iterator first, Iterator last) {
+  static_assert(std::is_trivially_copyable<
+        typename std::iterator_traits<Iterator>
+        >::value_type, "Invalid underlying type");
+
+  while (first != last) {
+
+    Iterator next(first);
+    last = std::remove(++next, last, *first);
+    first = next;
+
+  }
+
+  return last;
+
+}
+
 // Calculate the number of average collisions that would occur if all
 // location IDs would be assigned randomly (like normal afl/afl++).
 // This uses the "balls in bins" algorithm.
diff --git a/instrumentation/afl-llvm-common.h b/instrumentation/afl-llvm-common.h
index 0112c325..8b8dc756 100644
--- a/instrumentation/afl-llvm-common.h
+++ b/instrumentation/afl-llvm-common.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <fstream>
 #include <optional>
+#include <type_traits>
 #include <sys/time.h>
 
 #include "llvm/Config/llvm-config.h"
@@ -53,6 +54,7 @@ void  initInstrumentList();
 bool  isInInstrumentList(llvm::Function *F, std::string Filename);
 unsigned long long int calculateCollisions(uint32_t edges);
 void                   scanForDangerousFunctions(llvm::Module *M);
+template<class Iterator> Iterator Unique(Iterator, Iterator);
 
 #ifndef IS_EXTERN
   #define IS_EXTERN
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
index bca1f927..c6fd7c56 100644
--- a/instrumentation/cmplog-instructions-pass.cc
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -138,21 +138,6 @@ llvmGetPassPluginInfo() {
 char CmpLogInstructions::ID = 0;
 #endif
 
-template <class Iterator>
-Iterator Unique(Iterator first, Iterator last) {
-
-  while (first != last) {
-
-    Iterator next(first);
-    last = std::remove(++next, last, *first);
-    first = next;
-
-  }
-
-  return last;
-
-}
-
 bool CmpLogInstructions::hookInstrs(Module &M) {
 
   std::vector<Instruction *> icomps;
diff --git a/instrumentation/cmplog-switches-pass.cc b/instrumentation/cmplog-switches-pass.cc
index cd0ae76d..f4a9fbd7 100644
--- a/instrumentation/cmplog-switches-pass.cc
+++ b/instrumentation/cmplog-switches-pass.cc
@@ -131,21 +131,6 @@ llvmGetPassPluginInfo() {
 char CmplogSwitches::ID = 0;
 #endif
 
-template <class Iterator>
-Iterator Unique(Iterator first, Iterator last) {
-
-  while (first != last) {
-
-    Iterator next(first);
-    last = std::remove(++next, last, *first);
-    first = next;
-
-  }
-
-  return last;
-
-}
-
 bool CmplogSwitches::hookInstrs(Module &M) {
 
   std::vector<SwitchInst *> switches;

From a7c43484e1e3afe6d1db440927e72e0f103ba977 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 15 Feb 2023 07:45:45 +0100
Subject: [PATCH 59/77] bettern custom mut warning

---
 src/afl-fuzz-mutators.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/afl-fuzz-mutators.c b/src/afl-fuzz-mutators.c
index 22e5262e..f722374f 100644
--- a/src/afl-fuzz-mutators.c
+++ b/src/afl-fuzz-mutators.c
@@ -312,12 +312,18 @@ struct custom_mutator *load_custom_mutator(afl_state_t *afl, const char *fn) {
 
   if (notrim) {
 
+    if (mutator->afl_custom_init_trim || mutator->afl_custom_trim ||
+        mutator->afl_custom_post_trim) {
+
+      WARNF(
+          "Custom mutator does not implement all three trim APIs, standard "
+          "trimming will be used.");
+
+    }
+
     mutator->afl_custom_init_trim = NULL;
     mutator->afl_custom_trim = NULL;
     mutator->afl_custom_post_trim = NULL;
-    ACTF(
-        "Custom mutator does not implement all three trim APIs, standard "
-        "trimming will be used.");
 
   }
 

From 668f5e1fa9c126bb8c751a6e4ef038ae60a442fa Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 15 Feb 2023 09:32:32 +0100
Subject: [PATCH 60/77] debug output

---
 custom_mutators/autotokens/Makefile       |  8 ++++++--
 custom_mutators/autotokens/autotokens.cpp | 17 ++++++++++++++++-
 docs/Changelog.md                         |  1 +
 docs/env_variables.md                     |  2 ++
 include/afl-fuzz.h                        |  2 +-
 include/envs.h                            |  1 +
 src/afl-fuzz-init.c                       |  2 +-
 src/afl-fuzz-one.c                        |  2 +-
 src/afl-fuzz-run.c                        |  2 +-
 src/afl-fuzz-state.c                      |  7 +++++++
 10 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile
index 6ee7d324..0daba17d 100644
--- a/custom_mutators/autotokens/Makefile
+++ b/custom_mutators/autotokens/Makefile
@@ -13,10 +13,14 @@ endif
 
 all:	autotokens.so
 
-autotokens.so:	autotokens.cpp
+afl-fuzz-queue.o:	../../src/afl-fuzz-queue.c
 	$(CC) -D_STANDALONE_MODULE=1 -I../../include -g -O3 $(CPPFLAGS) -fPIC -c -o ./afl-fuzz-queue.o ../../src/afl-fuzz-queue.c
+
+afl-common.o:	../../src/afl-common.c
 	$(CC) -I../../include -g -O3 $(CPPFLAGS) -DBIN_PATH=\"dummy\" -Wno-pointer-sign -fPIC -c -o ./afl-common.o ../../src/afl-common.c
+
+autotokens.so:	afl-fuzz-queue.o afl-common.o autotokens.cpp
 	$(CXX) -Wno-deprecated -g -O3 $(CXXFLAGS) $(CPPFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp  ./afl-fuzz-queue.o ../../src/afl-performance.o ./afl-common.o
 
 clean:
-	rm -f autotokens.so *~ core
+	rm -f autotokens.so *.o *~ core
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index cda90a38..043d9588 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -145,6 +145,9 @@ static void first_run(void *data) {
 
       if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; }
 
+      DEBUGF(stderr, "DICT: valid %u, total %u, %u < 95 == disable\n", valid,
+             afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt));
+
     } else {
 
       module_disabled = 1;
@@ -190,6 +193,10 @@ static void first_run(void *data) {
 
   if ((is_ascii * 100) / valid < 70) { module_disabled = 1; }
 
+  DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u < 70 == disabled\n",
+         afl_ptr->active_items, valid, is_ascii,
+         (u32)((is_ascii * 100) / valid));
+
 }
 
 static u32 good_whitespace_or_singleval() {
@@ -538,7 +545,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     is_first_run = 0;
     first_run(data);
 
-    if (module_disabled) { WARNF("Autotokens custom module is disabled."); }
+    if (module_disabled) {
+
+      WARNF("Autotokens custom module is disabled.");
+
+    } else if (auto_disable) {
+
+      OKF("Autotokens custom module is enabled.");
+
+    }
 
   }
 
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 89c37912..5f253064 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -6,6 +6,7 @@
 ### Version ++4.06a (dev)
   - afl-fuzz:
     - ensure temporary file descriptor is closed when not used
+    - added `AFL_NO_WARN_INSTABILITY`
   - afl-cc:
     - add CFI sanitizer variant to gcc targets
     - llvm 16 support (thanks to @devnexen!)
diff --git a/docs/env_variables.md b/docs/env_variables.md
index 61fb1e2b..7a574e59 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -482,6 +482,8 @@ checks or alter some of the more exotic semantics of the tool:
   - Setting `AFL_NO_STARTUP_CALIBRATION` will skip the initial calibration
     of all starting seeds, and start fuzzing at once.
 
+  - Setting `AFL_NO_WARN_INSTABILITY` will suppress instability warnings.
+
   - In QEMU mode (-Q) and FRIDA mode (-O), `AFL_PATH` will be searched for
     afl-qemu-trace and afl-frida-trace.so.
 
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 229bc025..9bf91faf 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -399,7 +399,7 @@ typedef struct afl_env_vars {
       afl_cycle_schedules, afl_expand_havoc, afl_statsd, afl_cmplog_only_new,
       afl_exit_on_seed_issues, afl_try_affinity, afl_ignore_problems,
       afl_keep_timeouts, afl_pizza_mode, afl_no_crash_readme,
-      afl_ignore_timeouts, afl_no_startup_calibration;
+      afl_ignore_timeouts, afl_no_startup_calibration, afl_no_warn_instability;
 
   u8 *afl_tmpdir, *afl_custom_mutator_library, *afl_python_module, *afl_path,
       *afl_hang_tmout, *afl_forksrv_init_tmout, *afl_preload,
diff --git a/include/envs.h b/include/envs.h
index 5018b0f8..56675eda 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -172,6 +172,7 @@ static char *afl_environment_variables[] = {
     "AFL_NO_UI",
     "AFL_NO_PYTHON",
     "AFL_NO_STARTUP_CALIBRATION",
+    "AFL_NO_WARN_INSTABILITY",
     "AFL_UNTRACER_FILE",
     "AFL_LLVM_USE_TRACE_PC",
     "AFL_MAP_SIZE",
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 1182bd41..c20965b4 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -1120,7 +1120,7 @@ void perform_dry_run(afl_state_t *afl) {
 
     }
 
-    if (q->var_behavior) {
+    if (unlikely(q->var_behavior && !afl->afl_env.afl_no_warn_instability)) {
 
       WARNF("Instrumentation output varies across runs.");
 
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 2f016217..e97db273 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -1988,7 +1988,7 @@ custom_mutator_stage:
 
           if (unlikely(!mutated_buf)) {
 
-            //FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size);
+            // FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size);
             break;
 
           }
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index 7dd83150..f5425011 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -523,7 +523,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
         }
 
-        if (unlikely(!var_detected)) {
+        if (unlikely(!var_detected && !afl->afl_env.afl_no_warn_instability)) {
 
           // note: from_queue seems to only be set during initialization
           if (afl->afl_env.afl_no_ui || from_queue) {
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 104b1e4b..6d8c8758 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -204,6 +204,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_no_affinity =
                 get_afl_env(afl_environment_variables[i]) ? 1 : 0;
 
+          } else if (!strncmp(env, "AFL_NO_WARN_INSTABILITY",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_no_warn_instability =
+                get_afl_env(afl_environment_variables[i]) ? 1 : 0;
+
           } else if (!strncmp(env, "AFL_TRY_AFFINITY",
 
                               afl_environment_variable_len)) {

From 2090f17a9bb9cc225c1d24e8b21ed0c993a2665f Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 15 Feb 2023 11:23:42 +0100
Subject: [PATCH 61/77] opt

---
 custom_mutators/autotokens/autotokens.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 043d9588..a2b2814f 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -143,9 +143,9 @@ static void first_run(void *data) {
 
       }
 
-      if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; }
+      if ((valid * 100) / afl_ptr->extras_cnt <= 70) { module_disabled = 1; }
 
-      DEBUGF(stderr, "DICT: valid %u, total %u, %u < 95 == disable\n", valid,
+      DEBUGF(stderr, "DICT: valid %u, total %u, %u <= 70 == disable\n", valid,
              afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt));
 
     } else {
@@ -191,9 +191,9 @@ static void first_run(void *data) {
 
   }
 
-  if ((is_ascii * 100) / valid < 70) { module_disabled = 1; }
+  if ((is_ascii * 100) / valid <= 70) { module_disabled = 1; }
 
-  DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u < 70 == disabled\n",
+  DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u <= 70 == disabled\n",
          afl_ptr->active_items, valid, is_ascii,
          (u32)((is_ascii * 100) / valid));
 

From 04356ecbbe2c6cb72d279081702a6044fcc3ae92 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 15 Feb 2023 11:28:43 +0100
Subject: [PATCH 62/77] fix

---
 custom_mutators/autotokens/autotokens.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index a2b2814f..b1f1542e 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -145,8 +145,9 @@ static void first_run(void *data) {
 
       if ((valid * 100) / afl_ptr->extras_cnt <= 70) { module_disabled = 1; }
 
-      DEBUGF(stderr, "DICT: valid %u, total %u, %u <= 70 == disable\n", valid,
-             afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt));
+      DEBUGF(stderr, "DICT: total %u, valid %u, %u <= 70 == disable\n",
+             afl_ptr->extras_cnt, valid,
+             (u32)((valid * 100) / afl_ptr->extras_cnt));
 
     } else {
 

From ae94499503596d1e7f45e1a93bc5f7148c6163b6 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 15 Feb 2023 11:48:49 +0100
Subject: [PATCH 63/77] fix

---
 custom_mutators/autotokens/autotokens.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index b1f1542e..e6b9931d 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -1078,6 +1078,8 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) {
   /* we use this to print statistics at exit :-)
      needs to be stderr as stdout is filtered */
 
+  if (module_disabled) { return; }
+
   fprintf(stderr,
           "\n\nAutotoken mutator statistics:\n"
           "  Number of all seen tokens:  %u\n"

From 7f2bafbb8b709720cd3703789071c08064e518bd Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 15 Feb 2023 11:54:39 +0100
Subject: [PATCH 64/77] remove some debug

---
 custom_mutators/autotokens/autotokens.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index e6b9931d..22c78a60 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -287,7 +287,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
             ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
              (whitespace_ids >= new_item && whitespace_ids < cur_item))));
 
-        DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item);
+        // DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item);
         m[pos] = new_item;
         break;
 
@@ -305,7 +305,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         u32 pos = rand_below(afl_ptr, m_size + 1);
         m.insert(m.begin() + pos, new_item);
         ++m_size;
-        DEBUGF(stderr, "INS: %u at %u\n", new_item, pos);
+        // DEBUGF(stderr, "INS: %u at %u\n", new_item, pos);
 
         break;
 
@@ -334,7 +334,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
             m.insert(m.begin() + dst_off, src->begin() + src_off,
                      src->begin() + src_off + n);
             m_size += n;
-            DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off);
+            // DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off);
 
             break;
 
@@ -354,7 +354,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
             copy(src->begin() + src_off, src->begin() + src_off + n,
                  m.begin() + dst_off);
 
-            DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
+            // DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
             break;
 
           }
@@ -432,6 +432,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   if (unlikely(mutated_size > max_size)) { mutated_size = max_size; }
 
+  /*
   IFDEBUG {
 
     DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size);
@@ -440,6 +441,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   }
 
+  */
+
   *out_buf = mutated_out;
   ++fuzz_count;
   return mutated_size;
@@ -633,7 +636,6 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       }
 
       ++a_extras_cnt;
-      DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr);
 
     }
 
@@ -751,8 +753,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     u32  tabs = count(input.begin(), input.end(), '\t');
     u32  linefeeds = count(input.begin(), input.end(), '\n');
     bool ends_with_linefeed = input[input.length() - 1] == '\n';
+
     DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
            linefeeds, ends_with_linefeed);
+
     all_spaces += spaces;
     all_tabs += tabs;
     all_lf += linefeeds;

From 1faf6f67313e726c645ac3b9ecd2d8b5e65f605a Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 16 Feb 2023 07:47:36 +0100
Subject: [PATCH 65/77] fix

---
 custom_mutators/autotokens/autotokens.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 22c78a60..8135aba1 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -401,25 +401,28 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   /* Now we create the output */
 
   output = "";
-  u32 prev_size = 0;
+  u32 prev_size = 1, was_whitespace = 1;
 
   for (i = 0; i < m_size; ++i) {
 
     if (likely(i + 1 < m_size)) {
 
       u32 this_size = id_to_token[m[i]].size();
+      u32 is_whitespace = m[i] < whitespace_ids;
 
       /* The output we are generating might need repairing.
          General rule: two items that have a size larger than 2 are strings
          or identifizers and need a whitespace or an item of length 1 in
          between. */
-      if (unlikely(prev_size > 1 && this_size > 1)) {
+      if (unlikely(!(prev_size == 1 || was_whitespace || this_size == 1 ||
+                     is_whitespace))) {
 
         output += id_to_token[good_whitespace_or_singleval()];
 
       }
 
       prev_size = this_size;
+      was_whitespace = is_whitespace;
 
     }
 

From 9da3a2ed4522d1a980ad7ddc7806f02833dd99fc Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 16 Feb 2023 13:11:11 +0100
Subject: [PATCH 66/77] fixes

---
 src/afl-fuzz-redqueen.c | 2 ++
 src/afl-gotcpu.c        | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c
index 8da1df13..290be881 100644
--- a/src/afl-fuzz-redqueen.c
+++ b/src/afl-fuzz-redqueen.c
@@ -1624,6 +1624,8 @@ static void try_to_add_to_dictN(afl_state_t *afl, u128 v, u8 size) {
 
     }
 
+    if (cons_0 > 1 || cons_ff > 1) { return; }
+
   }
 
   maybe_add_auto(afl, (u8 *)&v + off, size);
diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c
index fd9e9f54..8988fd54 100644
--- a/src/afl-gotcpu.c
+++ b/src/afl-gotcpu.c
@@ -92,7 +92,7 @@ static u32 measure_preemption(u32 target_ms) {
   volatile u32 v1, v2 = 0;
 
   u64 st_t, en_t, st_c, en_c, real_delta, slice_delta;
-  s32 loop_repeats = 0;
+  //s32 loop_repeats = 0;
 
   st_t = get_cur_time_us();
   st_c = get_cpu_usage_us();
@@ -113,7 +113,7 @@ repeat_loop:
 
   if (en_t - st_t < target_ms * 1000) {
 
-    loop_repeats++;
+    //loop_repeats++;
     goto repeat_loop;
 
   }

From ebaac23a514cd3950d4a6cb597bd921e13ab9baa Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 20 Feb 2023 11:42:40 +0100
Subject: [PATCH 67/77] clarify AFL_NO_STARTUP_CALIBRATION

---
 docs/env_variables.md    | 3 ++-
 docs/fuzzing_in_depth.md | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/env_variables.md b/docs/env_variables.md
index 22a5c386..646db3f2 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -474,7 +474,8 @@ checks or alter some of the more exotic semantics of the tool:
     output from afl-fuzz is redirected to a file or to a pipe.
 
   - Setting `AFL_NO_STARTUP_CALIBRATION` will skip the initial calibration
-    of all starting seeds, and start fuzzing at once.
+    of all starting seeds, and start fuzzing at once. Use with care, this
+    degrades the fuzzing performance!
 
   - In QEMU mode (-Q) and FRIDA mode (-O), `AFL_PATH` will be searched for
     afl-qemu-trace and afl-frida-trace.so.
diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md
index 87f31a58..2a088201 100644
--- a/docs/fuzzing_in_depth.md
+++ b/docs/fuzzing_in_depth.md
@@ -628,7 +628,8 @@ If you have a large corpus, a corpus from a previous run or are fuzzing in a CI,
 then also set `export AFL_CMPLOG_ONLY_NEW=1` and `export AFL_FAST_CAL=1`.
 If the queue in the CI is huge and/or the execution time is slow then you can
 also add `AFL_NO_STARTUP_CALIBRATION=1` to skip the initial queue calibration
-phase and start fuzzing at once.
+phase and start fuzzing at once - but only do this if the calibration phase
+would be too long for your fuzz run time.
 
 You can also use different fuzzers. If you are using AFL spinoffs or AFL
 conforming fuzzers, then just use the same -o directory and give it a unique
@@ -914,7 +915,8 @@ normal fuzzing campaigns as these are much shorter runnings.
 
 If the queue in the CI is huge and/or the execution time is slow then you can
 also add `AFL_NO_STARTUP_CALIBRATION=1` to skip the initial queue calibration
-phase and start fuzzing at once.
+phase and start fuzzing at once. But only do that if the calibration time is
+too long for your overall available fuzz run time.
 
 1. Always:
     * LTO has a much longer compile time which is diametrical to short fuzzing -

From b786558dea5fd5dca471a0e36a8b420ff6a65846 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 20 Feb 2023 15:43:54 +0100
Subject: [PATCH 68/77] Revert "LLVM cmplog factoring custom Instruction
 iterator with added restriction"

This reverts commit 8bc3fa1df286aac46a0a724f64e2e07010d2497e.
---
 instrumentation/afl-llvm-common.cc          | 18 ------------------
 instrumentation/afl-llvm-common.h           |  2 --
 instrumentation/cmplog-instructions-pass.cc | 15 +++++++++++++++
 instrumentation/cmplog-switches-pass.cc     | 15 +++++++++++++++
 4 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index b50269fe..dc34d191 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -582,24 +582,6 @@ bool isInInstrumentList(llvm::Function *F, std::string Filename) {
 
 }
 
-template <class Iterator>
-Iterator Unique(Iterator first, Iterator last) {
-  static_assert(std::is_trivially_copyable<
-        typename std::iterator_traits<Iterator>
-        >::value_type, "Invalid underlying type");
-
-  while (first != last) {
-
-    Iterator next(first);
-    last = std::remove(++next, last, *first);
-    first = next;
-
-  }
-
-  return last;
-
-}
-
 // Calculate the number of average collisions that would occur if all
 // location IDs would be assigned randomly (like normal afl/afl++).
 // This uses the "balls in bins" algorithm.
diff --git a/instrumentation/afl-llvm-common.h b/instrumentation/afl-llvm-common.h
index 8b8dc756..0112c325 100644
--- a/instrumentation/afl-llvm-common.h
+++ b/instrumentation/afl-llvm-common.h
@@ -9,7 +9,6 @@
 #include <string>
 #include <fstream>
 #include <optional>
-#include <type_traits>
 #include <sys/time.h>
 
 #include "llvm/Config/llvm-config.h"
@@ -54,7 +53,6 @@ void  initInstrumentList();
 bool  isInInstrumentList(llvm::Function *F, std::string Filename);
 unsigned long long int calculateCollisions(uint32_t edges);
 void                   scanForDangerousFunctions(llvm::Module *M);
-template<class Iterator> Iterator Unique(Iterator, Iterator);
 
 #ifndef IS_EXTERN
   #define IS_EXTERN
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
index c6fd7c56..bca1f927 100644
--- a/instrumentation/cmplog-instructions-pass.cc
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -138,6 +138,21 @@ llvmGetPassPluginInfo() {
 char CmpLogInstructions::ID = 0;
 #endif
 
+template <class Iterator>
+Iterator Unique(Iterator first, Iterator last) {
+
+  while (first != last) {
+
+    Iterator next(first);
+    last = std::remove(++next, last, *first);
+    first = next;
+
+  }
+
+  return last;
+
+}
+
 bool CmpLogInstructions::hookInstrs(Module &M) {
 
   std::vector<Instruction *> icomps;
diff --git a/instrumentation/cmplog-switches-pass.cc b/instrumentation/cmplog-switches-pass.cc
index f4a9fbd7..cd0ae76d 100644
--- a/instrumentation/cmplog-switches-pass.cc
+++ b/instrumentation/cmplog-switches-pass.cc
@@ -131,6 +131,21 @@ llvmGetPassPluginInfo() {
 char CmplogSwitches::ID = 0;
 #endif
 
+template <class Iterator>
+Iterator Unique(Iterator first, Iterator last) {
+
+  while (first != last) {
+
+    Iterator next(first);
+    last = std::remove(++next, last, *first);
+    first = next;
+
+  }
+
+  return last;
+
+}
+
 bool CmplogSwitches::hookInstrs(Module &M) {
 
   std::vector<SwitchInst *> switches;

From 91b7f1c9f2dc429b7d4beaafb7497203f456bcd3 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Tue, 21 Feb 2023 01:05:46 +0100
Subject: [PATCH 69/77] fix regression

---
 src/afl-fuzz-one.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 76826945..0f237126 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -5841,7 +5841,10 @@ u8 fuzz_one(afl_state_t *afl) {
 
   }
 
-  return (key_val_lv_1 == 0 || key_val_lv_2 == 0 ? 0 : 1 );
+  if (key_val_lv_1 == -1) { key_val_lv_1 = 0; }
+  if (key_val_lv_2 == -1) { key_val_lv_2 = 0; }
+
+  return (key_val_lv_1 | key_val_lv_2);
 
 }
 

From 6f4b5ae0832774389b12c5a8cd3fb95821b438e5 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Tue, 21 Feb 2023 01:07:02 +0100
Subject: [PATCH 70/77] nit

---
 src/afl-fuzz-one.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 0f237126..cce3d7cf 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -5841,8 +5841,8 @@ u8 fuzz_one(afl_state_t *afl) {
 
   }
 
-  if (key_val_lv_1 == -1) { key_val_lv_1 = 0; }
-  if (key_val_lv_2 == -1) { key_val_lv_2 = 0; }
+  if (unlikely(key_val_lv_1 == -1)) { key_val_lv_1 = 0; }
+  if (likely(key_val_lv_2 == -1)) { key_val_lv_2 = 0; }
 
   return (key_val_lv_1 | key_val_lv_2);
 

From 8a8e350f34fa4fe5eb862d1a71921be9b739e8bb Mon Sep 17 00:00:00 2001
From: lazymio <mio@lazym.io>
Date: Wed, 22 Feb 2023 22:48:03 +0100
Subject: [PATCH 71/77] Also install libclang-rt-dev

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index fd47a59f..59ce8778 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -47,7 +47,7 @@ RUN apt-get update && \
     clang-${LLVM_VERSION} clang-tools-${LLVM_VERSION} libc++1-${LLVM_VERSION} \
     libc++-${LLVM_VERSION}-dev libc++abi1-${LLVM_VERSION} libc++abi-${LLVM_VERSION}-dev \
     libclang1-${LLVM_VERSION} libclang-${LLVM_VERSION}-dev \
-    libclang-common-${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} \
+    libclang-common-${LLVM_VERSION}-dev libclang-rt-${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} \
     libclang-cpp${LLVM_VERSION}-dev liblld-${LLVM_VERSION} \
     liblld-${LLVM_VERSION}-dev liblldb-${LLVM_VERSION} liblldb-${LLVM_VERSION}-dev \
     libllvm${LLVM_VERSION} libomp-${LLVM_VERSION}-dev libomp5-${LLVM_VERSION} \

From 0c0a6c3bfabf0facaed33fae1aa5ad54a6a11b32 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 23 Feb 2023 11:22:40 +0100
Subject: [PATCH 72/77] regression fix

---
 include/config.h      | 2 +-
 src/afl-forkserver.c  | 2 +-
 src/afl-fuzz-cmplog.c | 8 ++++++--
 src/afl-fuzz.c        | 3 ++-
 src/afl-gotcpu.c      | 4 ++--
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/config.h b/include/config.h
index ad8b76a8..e46f515a 100644
--- a/include/config.h
+++ b/include/config.h
@@ -489,7 +489,7 @@
 
 /* Minimum length of a queue input to be evaluated for "is_ascii"? */
 
-#define AFL_TXT_MIN_LEN 16
+#define AFL_TXT_MIN_LEN 12
 
 /* Maximum length of a queue input to be evaluated for "is_ascii"? */
 
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 5aa4c2ff..50dc7a26 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -59,7 +59,7 @@ static list_t fsrv_list = {.element_prealloc_count = 0};
 
 static void fsrv_exec_child(afl_forkserver_t *fsrv, char **argv) {
 
-  if (fsrv->qemu_mode || fsrv->cs_mode) {
+  if (fsrv->qemu_mode || fsrv->frida_mode || fsrv->cs_mode) {
 
     setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0);
 
diff --git a/src/afl-fuzz-cmplog.c b/src/afl-fuzz-cmplog.c
index 2bf26d19..229aef09 100644
--- a/src/afl-fuzz-cmplog.c
+++ b/src/afl-fuzz-cmplog.c
@@ -33,11 +33,15 @@ void cmplog_exec_child(afl_forkserver_t *fsrv, char **argv) {
 
   setenv("___AFL_EINS_ZWEI_POLIZEI___", "1", 1);
 
-  if (fsrv->qemu_mode) { setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0); }
+  if (fsrv->qemu_mode || fsrv->frida_mode || fsrv->cs_mode) {
+
+    setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0);
+
+  }
 
   if (!fsrv->qemu_mode && !fsrv->frida_mode && argv[0] != fsrv->cmplog_binary) {
 
-    argv[0] = fsrv->cmplog_binary;
+    fsrv->target_path = argv[0] = fsrv->cmplog_binary;
 
   }
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index ea467401..4914ce0b 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -1298,7 +1298,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
-  if (afl->is_main_node == 1 && afl->schedule != FAST && afl->schedule != EXPLORE) {
+  if (afl->is_main_node == 1 && afl->schedule != FAST &&
+      afl->schedule != EXPLORE) {
 
     FATAL("-M is compatible only with fast and explore -p power schedules");
 
diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c
index 8988fd54..4f851099 100644
--- a/src/afl-gotcpu.c
+++ b/src/afl-gotcpu.c
@@ -92,7 +92,7 @@ static u32 measure_preemption(u32 target_ms) {
   volatile u32 v1, v2 = 0;
 
   u64 st_t, en_t, st_c, en_c, real_delta, slice_delta;
-  //s32 loop_repeats = 0;
+  // s32 loop_repeats = 0;
 
   st_t = get_cur_time_us();
   st_c = get_cpu_usage_us();
@@ -113,7 +113,7 @@ repeat_loop:
 
   if (en_t - st_t < target_ms * 1000) {
 
-    //loop_repeats++;
+    // loop_repeats++;
     goto repeat_loop;
 
   }

From eeccb2da69d7e6f32ee74c431e7c5053e8379dff Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 23 Feb 2023 11:45:26 +0100
Subject: [PATCH 73/77] nits

---
 docs/Changelog.md | 1 +
 qemu_mode/qemuafl | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/Changelog.md b/docs/Changelog.md
index 5f253064..8f71fd83 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -11,6 +11,7 @@
     - add CFI sanitizer variant to gcc targets
     - llvm 16 support (thanks to @devnexen!)
     - support llvm 15 native pcguard changes
+  - new custom module: autotoken, grammar free fuzzer for text inputs
   - LTO autoken and llvm_mode: added AFL_LLVM_DICT2FILE_NO_MAIN support
   - better sanitizer default options support for all tools
   - unicorn_mode: updated and minor issues fixed
diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl
index a120c3fe..a8af9cbd 160000
--- a/qemu_mode/qemuafl
+++ b/qemu_mode/qemuafl
@@ -1 +1 @@
-Subproject commit a120c3feb573d4cade292cdeb7c1f6b1ce109efe
+Subproject commit a8af9cbde71e333ce72a46f15e655d0b82ed0939

From ffdb5ec9b1d92e9feb226d83c78d057cb613eeb0 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 23 Feb 2023 14:32:54 +0100
Subject: [PATCH 74/77] improve cmplog ci

---
 test/test-llvm.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test-llvm.sh b/test/test-llvm.sh
index ce64d76c..52be04ef 100755
--- a/test/test-llvm.sh
+++ b/test/test-llvm.sh
@@ -257,12 +257,13 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   }
   rm -f test-compcov test.out instrumentlist.txt
   AFL_LLVM_CMPLOG=1 ../afl-clang-fast -o test-cmplog test-cmplog.c > /dev/null 2>&1
+  ../afl-clang-fast -o test-c test-cmplog.c > /dev/null 2>&1
   test -e test-cmplog && {
     $ECHO "$GREY[*] running afl-fuzz for llvm_mode cmplog, this will take approx 10 seconds"
     {
       mkdir -p in
       echo 00000000000000000000000000000000 > in/in
-      AFL_BENCH_UNTIL_CRASH=1 ../afl-fuzz -m none -V60 -i in -o out -c./test-cmplog -- ./test-cmplog >>errors 2>&1
+      AFL_BENCH_UNTIL_CRASH=1 ../afl-fuzz -m none -V60 -i in -o out -c./test-cmplog -- ./test-c >>errors 2>&1
     } >>errors 2>&1
     test -n "$( ls out/default/crashes/id:000000* out/default/hangs/id:000000* 2>/dev/null )" & {
       $ECHO "$GREEN[+] afl-fuzz is working correctly with llvm_mode cmplog"
@@ -277,7 +278,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
     $ECHO "$YELLOW[-] we cannot test llvm_mode cmplog because it is not present"
     INCOMPLETE=1
   }
-  rm -rf errors test-cmplog in core.*
+  rm -rf errors test-cmplog test-c in core.*
   ../afl-clang-fast -o test-persistent ../utils/persistent_mode/persistent_demo.c > /dev/null 2>&1
   test -e test-persistent && {
     echo foo | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -q -r ./test-persistent && {

From add2eb42c0f0e2b590fcb17427e5fce29c2fdd54 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 23 Feb 2023 15:26:41 +0100
Subject: [PATCH 75/77] nits

---
 .gitignore                        | 133 +++++++++++++++---------------
 custom_mutators/autotokens/README |   7 +-
 custom_mutators/autotokens/TODO   |   3 -
 3 files changed, 72 insertions(+), 71 deletions(-)
 delete mode 100644 custom_mutators/autotokens/TODO

diff --git a/.gitignore b/.gitignore
index 45d8676c..c01750e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,104 +1,107 @@
-.test
-.test2
-.sync_tmp
-.vscode
+!coresight_mode
+!coresight_mode/coresight-trace
+*.dSYM
 *.o
+*.o.tmp
+*.pyc
 *.so
 *.swp
-*.pyc
-*.dSYM
-as
-a.out
-ld
-in
-out
-core*
-compile_commands.json
+.sync_tmp
+.test
+.test2
+.vscode
 afl-analyze
+afl-analyze.8
 afl-as
+afl-as.8
+afl-c++
+afl-c++.8
+afl-cc
+afl-cc.8
 afl-clang
 afl-clang++
 afl-clang-fast
 afl-clang-fast++
-afl-clang-lto
-afl-clang-lto++
-afl-fuzz
-afl-g++
-afl-gcc
-afl-gcc-fast
-afl-g++-fast
-afl-gotcpu
-afl-ld
-afl-ld-lto
-afl-cs-proxy
-afl-qemu-trace
-afl-showmap
-afl-tmin
-afl-analyze.8
-afl-as.8
 afl-clang-fast++.8
 afl-clang-fast.8
-afl-clang-lto.8
+afl-clang-lto
+afl-clang-lto++
 afl-clang-lto++.8
+afl-clang-lto.8
 afl-cmin.8
 afl-cmin.bash.8
+afl-cs-proxy
+afl-frida-trace.so
+afl-fuzz
 afl-fuzz.8
-afl-c++.8
-afl-cc.8
-afl-gcc.8
+afl-g++
 afl-g++.8
+afl-gcc
+afl-gcc.8
+afl-gcc-fast
 afl-gcc-fast.8
+afl-g++-fast
 afl-g++-fast.8
+afl-gotcpu
 afl-gotcpu.8
-afl-plot.8
-afl-showmap.8
-afl-system-config.8
-afl-tmin.8
-afl-whatsup.8
-afl-persistent-config.8
-afl-c++
-afl-cc
+afl-ld
+afl-ld-lto
 afl-lto
 afl-lto++
 afl-lto++.8
 afl-lto.8
+afl-persistent-config.8
+afl-plot.8
+afl-qemu-trace
+afl-showmap
+afl-showmap.8
+afl-system-config.8
+afl-tmin
+afl-tmin.8
+afl-whatsup.8
+a.out
+as
+compile_commands.json
+core*
+examples/afl_frida/afl-frida
+examples/afl_frida/frida-gum-example.c
+examples/afl_frida/frida-gum.h
+examples/afl_frida/libtestinstr.so
+examples/afl_network_proxy/afl-network-client
+examples/afl_network_proxy/afl-network-server
+examples/aflpp_driver/libAFLDriver.a
+examples/aflpp_driver/libAFLQemuDriver.a
+gmon.out
+in
+ld
+libAFLDriver.a
+libAFLQemuDriver.a
+out
 qemu_mode/libcompcov/compcovtest
 qemu_mode/qemu-*
 qemu_mode/qemuafl
-unicorn_mode/samples/*/\.test-*
-unicorn_mode/samples/*/output/
-test/unittests/unit_maybe_alloc
-test/unittests/unit_preallocable
-test/unittests/unit_list
-test/unittests/unit_rand
-test/unittests/unit_hash
-examples/afl_network_proxy/afl-network-server
-examples/afl_network_proxy/afl-network-client
-examples/afl_frida/afl-frida
-examples/afl_frida/libtestinstr.so
-examples/afl_frida/frida-gum-example.c
-examples/afl_frida/frida-gum.h
-examples/aflpp_driver/libAFLDriver.a
-examples/aflpp_driver/libAFLQemuDriver.a
-libAFLDriver.a
-libAFLQemuDriver.a
 test/.afl_performance
 test-instr
 test/output
+test/test-c
+test/test-cmplog
+test/test-compcov
 test/test-instr.ts
 test/test-persistent
-gmon.out
-afl-frida-trace.so
+test/unittests/unit_hash
+test/unittests/unit_list
+test/unittests/unit_maybe_alloc
+test/unittests/unit_preallocable
+test/unittests/unit_rand
+unicorn_mode/samples/*/output/
+unicorn_mode/samples/*/\.test-*
 utils/afl_network_proxy/afl-network-client
 utils/afl_network_proxy/afl-network-server
-utils/plot_ui/afl-plot-ui
-*.o.tmp
 utils/afl_proxy/afl-proxy
 utils/optimin/build
 utils/optimin/optimin
 utils/persistent_mode/persistent_demo
 utils/persistent_mode/persistent_demo_new
 utils/persistent_mode/test-instr
-!coresight_mode
-!coresight_mode/coresight-trace
-vuln_prog
\ No newline at end of file
+utils/plot_ui/afl-plot-ui
+vuln_prog
diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 295cd736..cca168fd 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -1,8 +1,9 @@
-# autotokens
+# Autotokens
 
 This implements an improved autotoken grammar fuzzing idea presented in
 [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf].
-It is a grammar fuzzer without actually knowing the grammar.
+It is a grammar fuzzer without actually knowing the grammar, but only works
+with text based inputs.
 
 It is recommended to run with together in an instance with `CMPLOG`.
 
@@ -19,7 +20,7 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
 `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be
                        removed. Default: `/* ... */`
 `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting
-                                the value by this number set, e.g. 1.
+                                the value by this number, e.g. 1.
 `AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii
                             (or no input and no (ascii) dictionary)
 `AUTOTOKENS_LEARN_DICT` - learn from dictionaries?
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
deleted file mode 100644
index 2e99e147..00000000
--- a/custom_mutators/autotokens/TODO
+++ /dev/null
@@ -1,3 +0,0 @@
-env für menge an per mutation run
-
-change_min/_max werte

From 2bea77e28a969fcb62921862bef61cd751d7b9d5 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 23 Feb 2023 17:09:06 +0100
Subject: [PATCH 76/77] fix custom python splice optout

---
 src/afl-fuzz-python.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c
index 69c305f7..2799268b 100644
--- a/src/afl-fuzz-python.c
+++ b/src/afl-fuzz-python.c
@@ -231,8 +231,12 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) {
         PyObject_GetAttrString(py_module, "describe");
     py_functions[PY_FUNC_FUZZ_COUNT] =
         PyObject_GetAttrString(py_module, "fuzz_count");
-    if (!py_functions[PY_FUNC_FUZZ])
+    if (!py_functions[PY_FUNC_FUZZ]) {
+
       WARNF("fuzz function not found in python module");
+
+    }
+
     py_functions[PY_FUNC_POST_PROCESS] =
         PyObject_GetAttrString(py_module, "post_process");
     py_functions[PY_FUNC_INIT_TRIM] =
@@ -250,6 +254,7 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) {
         PyObject_GetAttrString(py_module, "fuzz_send");
     py_functions[PY_FUNC_SPLICE_OPTOUT] =
         PyObject_GetAttrString(py_module, "splice_optout");
+    if (py_functions[PY_FUNC_SPLICE_OPTOUT]) { afl->custom_splice_optout = 1; }
     py_functions[PY_FUNC_QUEUE_NEW_ENTRY] =
         PyObject_GetAttrString(py_module, "queue_new_entry");
     py_functions[PY_FUNC_INTROSPECTION] =

From 65d4d10762a14e2dab6b89962b0ccf0cbdc8de2e Mon Sep 17 00:00:00 2001
From: "Dongjia \"toka\" Zhang" <toka971223@g.ecc.u-tokyo.ac.jp>
Date: Fri, 24 Feb 2023 23:14:40 +0900
Subject: [PATCH 77/77] Update afl-fuzz-redqueen.c

---
 src/afl-fuzz-redqueen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c
index 8da1df13..07736537 100644
--- a/src/afl-fuzz-redqueen.c
+++ b/src/afl-fuzz-redqueen.c
@@ -1035,7 +1035,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
         } else {
 
-          diff = 0;
+          o_diff = 0;
 
         }