From e3dadbfe0f9fad435a6fa201131315500f1a348a Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 13 Jan 2023 18:27:22 +0100 Subject: [PATCH 01/77] autotokens --- custom_mutators/autotokens/Makefile | 7 + custom_mutators/autotokens/autotokens.cpp | 391 ++++++++++++++++++++++ qemu_mode/qemuafl | 2 +- 3 files changed, 399 insertions(+), 1 deletion(-) create mode 100644 custom_mutators/autotokens/Makefile create mode 100644 custom_mutators/autotokens/autotokens.cpp diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile new file mode 100644 index 00000000..1ee7f5c4 --- /dev/null +++ b/custom_mutators/autotokens/Makefile @@ -0,0 +1,7 @@ +all: autotokens.so + +autotokens.so: autotokens.cpp + $(CXX) -O3 -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o + +clean: + rm -f autotokens.so *~ core \ No newline at end of file diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp new file mode 100644 index 00000000..afde8c26 --- /dev/null +++ b/custom_mutators/autotokens/autotokens.cpp @@ -0,0 +1,391 @@ +extern "C" { +#include "afl-fuzz.h" +} + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define AUTOTOKENS_DEBUG 1 +#define AUTOTOKENS_LEN_MIN 12 +#define AUTOTOKENS_CHANGE_MIN_PERCENT 5 +#define AUTOTOKENS_CHANGE_MAX_PERCENT 10 + +using namespace std; + +typedef struct my_mutator { + + afl_state *afl; + +} my_mutator_t; + +#define DEBUG \ + if (unlikely(debug)) fprintf + +static afl_state *afl_ptr; +static int debug = AUTOTOKENS_DEBUG; +static u32 current_id = 0; +static unordered_map *> file_mapping; +static unordered_map token_to_id; +static unordered_map id_to_token; +static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static regex regex_comment_star("/\\*(.|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); +static vector *s; + +extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, + u8 **out_buf, uint8_t *add_buf, + size_t add_buf_size, size_t max_size) { + + DEBUG(stderr, "MUT!\n"); + + if (s == NULL) { return 0; } + + vector m = *s; + u32 i, m_size = (u32)m.size(); + + u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); + DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + + for (i = 0; i < rounds; ++i) { + + u32 item, new_item; + + switch(rand_below(afl_ptr, 4)) { + /* CHANGE */ + case 0: /* fall through */ + case 1: + item = rand_below(afl_ptr, m_size); + do { + new_item = 1 + rand_below(afl_ptr, current_id); + } while(unlikely(new_item == m[item])); + m[item] = new_item; + break; + /* INSERT (+1 so we insert also after last place) */ + case 2: + new_item = 1 + rand_below(afl_ptr, current_id); + m.insert(m.begin() + rand_below(afl_ptr, m_size + 1), new_item); + ++m_size; + break; + /* ERASE - only if large enough */ + case 3: + if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); } + --m_size; + break; + } + + } + + string output; + u32 m_size_1 = m_size - 1; + for (i = 0; i < m_size; ++i) { + output += id_to_token[m[i]]; + if (likely(i < m_size_1)) { output += " "; } + } + + u32 mutated_size = output.size(); + u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size); + + if (unlikely(!mutated_out)) { + + *out_buf = NULL; + return 0; + + } + + /* + *out_buf = buf; + return buf_size; + */ + memcpy(mutated_out, output.data(), mutated_size); + *out_buf = mutated_out; + DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out); + return mutated_size; + +} + + +/* We are not using afl_custom_queue_new_entry() because not every corpus entry + will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ + +extern "C" unsigned char afl_custom_queue_get(void *data, + const unsigned char *filename) { + + if (likely(!debug)) + if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; } + + vector *structure = NULL; + string fn = (char *)filename; + + auto entry = file_mapping.find(fn); + if (entry == file_mapping.end()) { + + // this input file was not analyzed for tokens yet, so let's do it! + + FILE *fp = fopen((char *)filename, "rb"); + if (!fp) { s = NULL; return 0; } // should not happen + fseek(fp, 0, SEEK_END); + size_t len = (size_t)ftell(fp); + if (len < AUTOTOKENS_LEN_MIN) { + + fclose(fp); + file_mapping[fn] = structure; // NULL ptr so we don't read the file again + DEBUG(stderr, "Too short (%lu) %s\n", len, filename); + s = NULL; + return 0; + + } + + string input; + input.resize(len); + rewind(fp); + fread(input.data(), input.size(), 1, fp); + fclose(fp); + + // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", + // input.size(), filename, input.c_str()); + + input = regex_replace(input, regex_comment_slash, "$2"); + input = regex_replace(input, regex_comment_star, ""); + + DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), + filename, input.c_str()); + + /* + u32 spaces = count(input.begin(), input.end(), ' '); + u32 tabs = count(input.begin(), input.end(), '\t'); + u32 linefeeds = count(input.begin(), input.end(), '\n'); + bool ends_with_linefeed = input[input.length() - 1] == '\n'; + DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, + linefeeds, ends_with_linefeed); + */ + + // now extract all tokens + vector tokens; + smatch match; + string::const_iterator cur = input.begin(), ende = input.end(), last = cur, + found, prev; + + DEBUG(stderr, "MATCHES:\n"); + while (regex_search(cur, ende, match, regex_string)) { + + prev = cur; + found = match[1].first; + cur = match[1].second; + DEBUG(stderr, + "string \"%s\" found at start %lu offset %lu continue at %lu\n", + match[1].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); + if (prev < found) { // there are items between search start and find + sregex_token_iterator it{prev, found, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase( + std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { return s.size() == 0; }), + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } + + for (auto token : tokenized) { + + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; + + while (regex_search(c, e, m, regex_word)) { + + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(p, f)); + + } + + DEBUG(stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(c, e)); + + } + + } + + } + + if (match[1].length() > 0) { tokens.push_back(match[1]); } + + } + + if (cur < ende) { + + DEBUG(stderr, "REST!\n"); + + sregex_token_iterator it{cur, ende, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase( + std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { return s.size() == 0; }), + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } + + for (auto token : tokenized) { + + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; + + while (regex_search(c, e, m, regex_word)) { + + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(p, f)); + + } + + DEBUG(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(c, e)); + + } + + } + + } + + DEBUG(stderr, "DUMPING TOKENS:\n"); + if (unlikely(debug)) + for (u32 i = 0; i < tokens.size(); ++i) { + + DEBUG(stderr, "%s ", tokens[i].c_str()); + + } + + DEBUG(stderr, "---------------------------\n"); + + /* Now we transform the tokens into an ID list and saved that */ + + structure = new vector(); + u32 id; + + for (u32 i = 0; i < tokens.size(); ++i) { + + if ((id = token_to_id[tokens[i]]) == 0) { + + // First time we see this token, add it to the list + ++current_id; + token_to_id[tokens[i]] = current_id; + id_to_token[current_id] = tokens[i]; + structure->push_back(current_id); + + } else { + + structure->push_back(id); + + } + + } + + // save the token structure to the file mapping + file_mapping[fn] = structure; + s = structure; + + // we are done! + DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", + structure->size()); + + } else { + + if (entry->second == NULL) { + + DEBUG(stderr, "Skipping %s\n", filename); + s = NULL; + return 0; + + } + + s = entry->second; + DEBUG(stderr, "OK %s\n", filename); + + } + + return 1; // we always fuzz unless non-ascii or too small + +} + +extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { + + (void)(seed); + my_mutator_t *data = (my_mutator_t *)calloc(1, sizeof(my_mutator_t)); + if (!data) { + + perror("afl_custom_init alloc"); + return NULL; + + } + + data->afl = afl_ptr = afl; + + return data; + +} + +extern "C" void afl_custom_deinit(my_mutator_t *data) { + + free(data); + +} + diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl index a8af9cbd..a120c3fe 160000 --- a/qemu_mode/qemuafl +++ b/qemu_mode/qemuafl @@ -1 +1 @@ -Subproject commit a8af9cbde71e333ce72a46f15e655d0b82ed0939 +Subproject commit a120c3feb573d4cade292cdeb7c1f6b1ce109efe From 9548af52b266ecc2aed81f388f7a1a7a3fcfb181 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 14 Jan 2023 09:30:25 +0100 Subject: [PATCH 02/77] texts --- custom_mutators/autotokens/README | 12 ++++++++++++ custom_mutators/autotokens/TODO | 13 +++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 custom_mutators/autotokens/README create mode 100644 custom_mutators/autotokens/TODO diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README new file mode 100644 index 00000000..6849279e --- /dev/null +++ b/custom_mutators/autotokens/README @@ -0,0 +1,12 @@ +# autotokens + +This implements an improved autotoken idea presented in +[Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. +It is a grammar fuzzer without actually knowing the grammar. + +It is recommended to run with together in an instance with `CMPLOG`. + +If you have a dictionary (`-x`) this improves this custom grammar mutator. + +If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`, +to concentrate on grammar bug classes. diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO new file mode 100644 index 00000000..700b3fa7 --- /dev/null +++ b/custom_mutators/autotokens/TODO @@ -0,0 +1,13 @@ +whitespace belassen oder notieren? MAYBE +0=space 1=tab 2=linefeed + +dictionary mitverwenden? JA aber nur ascii +-> neue liste? +wie mache ich das bei honggfuzz? +ansonsten neuer custom mutator entrypoint? + +nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung +die aber dann dafür sorgt dass eine leere struktur da ist. +is is_ascii in afl-common.o ? + +cmplog: only add tokens that were found to fit? From 35801bed7a5feb8cc3a363bafbd577f256c467f6 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 15 Jan 2023 13:47:31 +0100 Subject: [PATCH 03/77] dictionary support --- custom_mutators/autotokens/TODO | 17 +- custom_mutators/autotokens/autotokens.cpp | 248 +++++++++++++++++----- include/config.h | 2 +- 3 files changed, 199 insertions(+), 68 deletions(-) diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 700b3fa7..2e5e384f 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,13 +1,12 @@ whitespace belassen oder notieren? MAYBE 0=space 1=tab 2=linefeed -dictionary mitverwenden? JA aber nur ascii --> neue liste? -wie mache ich das bei honggfuzz? -ansonsten neuer custom mutator entrypoint? - -nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung -die aber dann dafür sorgt dass eine leere struktur da ist. -is is_ascii in afl-common.o ? - cmplog: only add tokens that were found to fit? + +create from thin air if no good seed after a cycle and dict large enough? +(static u32 no_of_struct_inputs;) + +splice insert, splice overwrite +(linefeed, semicolon) + + diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index afde8c26..2fad8dd7 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1,5 +1,7 @@ extern "C" { + #include "afl-fuzz.h" + } #include @@ -13,9 +15,7 @@ extern "C" { #include #define AUTOTOKENS_DEBUG 1 -#define AUTOTOKENS_LEN_MIN 12 -#define AUTOTOKENS_CHANGE_MIN_PERCENT 5 -#define AUTOTOKENS_CHANGE_MAX_PERCENT 10 +#define AUTOTOKENS_CHANGE_MIN 8 using namespace std; @@ -31,43 +31,55 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; static u32 current_id = 0; +static u32 valid_structures = 0; +static u32 extras_cnt = 0, a_extras_cnt = 0; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); -static regex regex_comment_star("/\\*(.|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); -static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); -static vector *s; +static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static regex regex_comment_star("/\\*(.|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); +static vector *s; // the structure of the currently selected input -extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, - u8 **out_buf, uint8_t *add_buf, - size_t add_buf_size, size_t max_size) { +extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, + u8 **out_buf, u8 *add_buf, + size_t add_buf_size, size_t max_size) { - DEBUG(stderr, "MUT!\n"); + if (s == NULL) { - if (s == NULL) { return 0; } + *out_buf = NULL; + return 0; - vector m = *s; - u32 i, m_size = (u32)m.size(); + } - u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); - DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + vector m = *s; // copy of the structure we will modify + u32 i, m_size = (u32)m.size(); + + u32 rounds = + MAX(AUTOTOKENS_CHANGE_MIN, + MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * + afl_ptr->havoc_div / 256)); + // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); for (i = 0; i < rounds; ++i) { - + u32 item, new_item; - - switch(rand_below(afl_ptr, 4)) { + + switch (rand_below(afl_ptr, 4)) { + /* CHANGE */ - case 0: /* fall through */ + case 0: /* fall through */ case 1: item = rand_below(afl_ptr, m_size); do { + new_item = 1 + rand_below(afl_ptr, current_id); - } while(unlikely(new_item == m[item])); + + } while (unlikely(new_item == m[item])); + m[item] = new_item; break; /* INSERT (+1 so we insert also after last place) */ @@ -81,31 +93,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); } --m_size; break; + // TODO: add full line insert splice, replace splace, delete + } - + } - + string output; - u32 m_size_1 = m_size - 1; + u32 m_size_1 = m_size - 1; + for (i = 0; i < m_size; ++i) { + output += id_to_token[m[i]]; if (likely(i < m_size_1)) { output += " "; } + } u32 mutated_size = output.size(); - u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size); + u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size); if (unlikely(!mutated_out)) { - + *out_buf = NULL; return 0; - + } - /* - *out_buf = buf; - return buf_size; - */ memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out); @@ -113,29 +126,106 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s } - /* We are not using afl_custom_queue_new_entry() because not every corpus entry will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { - if (likely(!debug)) - if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; } + if (likely(!debug)) { + + if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) { + + s = NULL; + return 0; + + } + + } + + // check if there are new dictionary entries and add them to the tokens + if (valid_structures) { + + while (extras_cnt < afl_ptr->extras_cnt) { + + u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; + u8 *ptr = afl_ptr->extras[extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + ++current_id; + token_to_id[(char *)ptr] = current_id; + id_to_token[current_id] = (char *)ptr; + + } + + ++extras_cnt; + DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr); + + } + + while (a_extras_cnt < afl_ptr->a_extras_cnt) { + + u32 ok = 1, l = afl_ptr->a_extras[a_extras_cnt].len; + u8 *ptr = afl_ptr->a_extras[a_extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + ++current_id; + token_to_id[(char *)ptr] = current_id; + id_to_token[current_id] = (char *)ptr; + + } + + ++a_extras_cnt; + DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr); + + } + + } vector *structure = NULL; string fn = (char *)filename; + auto entry = file_mapping.find(fn); - auto entry = file_mapping.find(fn); if (entry == file_mapping.end()) { // this input file was not analyzed for tokens yet, so let's do it! FILE *fp = fopen((char *)filename, "rb"); - if (!fp) { s = NULL; return 0; } // should not happen + if (!fp) { + + s = NULL; + return 0; + + } // should not happen + fseek(fp, 0, SEEK_END); size_t len = (size_t)ftell(fp); - if (len < AUTOTOKENS_LEN_MIN) { + + if (len < AFL_TXT_MIN_LEN) { fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again @@ -151,6 +241,30 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fread(input.data(), input.size(), 1, fp); fclose(fp); + if (!afl_ptr->shm.cmplog_mode) { + + // not running with CMPLOG? bad choice, but whatever ... + // we only want text inputs, so we have to check it ourselves. + + u32 valid_chars = 0; + for (u32 i = 0; i < len; ++i) { + + if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; } + + } + + // we want at least 95% of text characters ... + if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) { + + file_mapping[fn] = NULL; + DEBUG(stderr, "Not text (%lu) %s\n", len, filename); + s = NULL; + return 0; + + } + + } + // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); @@ -175,7 +289,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator cur = input.begin(), ende = input.end(), last = cur, found, prev; - DEBUG(stderr, "MATCHES:\n"); while (regex_search(cur, ende, match, regex_string)) { prev = cur; @@ -196,11 +309,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), input.size()); - for (auto x : tokenized) { + if (unlikely(debug)) + for (auto x : tokenized) { - cerr << x << endl; + cerr << x << endl; - } + } for (auto token : tokenized) { @@ -232,8 +346,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (c < e) { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(c, e)); } @@ -248,8 +367,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (cur < ende) { - DEBUG(stderr, "REST!\n"); - sregex_token_iterator it{cur, ende, regex_whitespace, -1}; vector tokenized{it, {}}; tokenized.erase( @@ -260,11 +377,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), input.size()); - for (auto x : tokenized) { + if (unlikely(debug)) + for (auto x : tokenized) { - cerr << x << endl; + cerr << x << endl; - } + } for (auto token : tokenized) { @@ -279,8 +397,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (p < f) { // there are items between search start and find - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(p, f)); } @@ -296,8 +419,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (c < e) { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(c, e)); } @@ -306,15 +434,18 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - DEBUG(stderr, "DUMPING TOKENS:\n"); - if (unlikely(debug)) + if (unlikely(debug)) { + + DEBUG(stderr, "DUMPING TOKENS:\n"); for (u32 i = 0; i < tokens.size(); ++i) { DEBUG(stderr, "%s ", tokens[i].c_str()); } - DEBUG(stderr, "---------------------------\n"); + DEBUG(stderr, "---------------------------\n"); + + } /* Now we transform the tokens into an ID list and saved that */ @@ -342,6 +473,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // save the token structure to the file mapping file_mapping[fn] = structure; s = structure; + ++valid_structures; // we are done! DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", diff --git a/include/config.h b/include/config.h index a5a4c473..6cfaac11 100644 --- a/include/config.h +++ b/include/config.h @@ -494,7 +494,7 @@ /* What is the minimum percentage of ascii characters present to be classifed as "is_ascii"? */ -#define AFL_TXT_MIN_PERCENT 94 +#define AFL_TXT_MIN_PERCENT 95 /* How often to perform ASCII mutations 0 = disable, 1-8 are good values */ From 10b82c72772f40f703119fc7cd1c9063500a6bbe Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 15 Jan 2023 18:17:28 +0100 Subject: [PATCH 04/77] fixes --- custom_mutators/autotokens/Makefile | 2 +- custom_mutators/autotokens/autotokens.cpp | 40 ++++++++++++++++------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 1ee7f5c4..5dd52dee 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,7 +1,7 @@ all: autotokens.so autotokens.so: autotokens.cpp - $(CXX) -O3 -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o + $(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o clean: rm -f autotokens.so *~ core \ No newline at end of file diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 2fad8dd7..9fbdf52a 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -14,7 +14,7 @@ extern "C" { #include #include -#define AUTOTOKENS_DEBUG 1 +#define AUTOTOKENS_DEBUG 0 #define AUTOTOKENS_CHANGE_MIN 8 using namespace std; @@ -64,11 +64,13 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + u32 max_rand = 4; + for (i = 0; i < rounds; ++i) { u32 item, new_item; - switch (rand_below(afl_ptr, 4)) { + switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ case 0: /* fall through */ @@ -90,9 +92,19 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; /* ERASE - only if large enough */ case 3: - if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); } - --m_size; + if (m_size > 8) { + + m.erase(m.begin() + rand_below(afl_ptr, m_size)); + --m_size; + + } else { + + max_rand = 3; + + } + break; + // TODO: add full line insert splice, replace splace, delete } @@ -119,9 +131,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + if (unlikely(debug)) { + + DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size); + fwrite(output.data(), 1, mutated_size, stderr); + DEBUG(stderr, "\n---\n"); + + } + memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; - DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out); return mutated_size; } @@ -292,11 +311,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (regex_search(cur, ende, match, regex_string)) { prev = cur; - found = match[1].first; - cur = match[1].second; - DEBUG(stderr, - "string \"%s\" found at start %lu offset %lu continue at %lu\n", - match[1].str().c_str(), prev - input.begin(), match.position(), + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), cur - input.begin()); if (prev < found) { // there are items between search start and find sregex_token_iterator it{prev, found, regex_whitespace, -1}; @@ -361,7 +379,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[1].length() > 0) { tokens.push_back(match[1]); } + if (match[0].length() > 0) { tokens.push_back(match[0]); } } From 8cc1c6c54edbeb5ac7a8bcb050eb7976009517fa Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 16 Jan 2023 10:18:08 +0100 Subject: [PATCH 05/77] nits --- src/afl-fuzz-one.c | 32 +++++++++++++++++++------------- src/afl-fuzz.c | 23 +++++++++++++++++++++++ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 97855607..eaf65987 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -584,7 +584,7 @@ u8 fuzz_one_original(afl_state_t *afl) { if it has gone through deterministic testing in earlier, resumed runs (passed_det). */ - if (likely(afl->queue_cur->passed_det) || likely(afl->skip_deterministic) || + if (likely(afl->skip_deterministic) || likely(afl->queue_cur->passed_det) || likely(perf_score < (afl->queue_cur->depth * 30 <= afl->havoc_max_mult * 100 ? afl->queue_cur->depth * 30 @@ -1908,9 +1908,10 @@ custom_mutator_stage: afl->stage_name = "custom mutator"; afl->stage_short = "custom"; - afl->stage_max = HAVOC_CYCLES * perf_score / afl->havoc_div / 100; afl->stage_val_type = STAGE_VAL_NONE; bool has_custom_fuzz = false; + u32 shift = unlikely(afl->custom_only) ? 7 : 8; + afl->stage_max = (HAVOC_CYCLES * perf_score / afl->havoc_div) >> shift; if (afl->stage_max < HAVOC_MIN) { afl->stage_max = HAVOC_MIN; } @@ -2063,8 +2064,9 @@ havoc_stage: afl->stage_name = "havoc"; afl->stage_short = "havoc"; - afl->stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * - perf_score / afl->havoc_div / 100; + afl->stage_max = ((doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * + perf_score / afl->havoc_div) >> + 7; } else { @@ -2073,7 +2075,7 @@ havoc_stage: snprintf(afl->stage_name_buf, STAGE_BUF_SIZE, "splice %u", splice_cycle); afl->stage_name = afl->stage_name_buf; afl->stage_short = "splice"; - afl->stage_max = SPLICE_HAVOC * perf_score / afl->havoc_div / 100; + afl->stage_max = (SPLICE_HAVOC * perf_score / afl->havoc_div) >> 7; } @@ -4621,8 +4623,9 @@ pacemaker_fuzzing: afl->stage_name = MOpt_globals.havoc_stagename; afl->stage_short = MOpt_globals.havoc_stagenameshort; - afl->stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * - perf_score / afl->havoc_div / 100; + afl->stage_max = ((doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * + perf_score / afl->havoc_div) >> + 7; } else { @@ -4632,7 +4635,7 @@ pacemaker_fuzzing: MOpt_globals.splice_stageformat, splice_cycle); afl->stage_name = afl->stage_name_buf; afl->stage_short = MOpt_globals.splice_stagenameshort; - afl->stage_max = SPLICE_HAVOC * perf_score / afl->havoc_div / 100; + afl->stage_max = (SPLICE_HAVOC * perf_score / afl->havoc_div) >> 7; } @@ -5792,10 +5795,8 @@ void pso_updating(afl_state_t *afl) { } -/* larger change for MOpt implementation: the original fuzz_one was renamed - to fuzz_one_original. All documentation references to fuzz_one therefore - mean fuzz_one_original */ - +/* The entry point for the mutator, choosing the default mutator, and/or MOpt + depending on the configuration. */ u8 fuzz_one(afl_state_t *afl) { int key_val_lv_1 = 0, key_val_lv_2 = 0; @@ -5818,7 +5819,12 @@ u8 fuzz_one(afl_state_t *afl) { #endif - // if limit_time_sig == -1 then both are run after each other + /* + -L command line paramter => limit_time_sig value + limit_time_sig == 0 then run the default mutator + limit_time_sig > 0 then run MOpt + limit_time_sig < 0 both are run + */ if (afl->limit_time_sig <= 0) { key_val_lv_1 = fuzz_one_original(afl); } diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 138df26c..5e0ecd1e 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -1580,6 +1580,29 @@ int main(int argc, char **argv_orig, char **envp) { } + if (afl->limit_time_sig > 0 && afl->custom_mutators_count) { + + if (afl->custom_only) { + + FATAL("Custom mutators are incompatible with MOpt (-L)"); + + } + + u32 custom_fuzz = 0; + LIST_FOREACH(&afl->custom_mutator_list, struct custom_mutator, { + + if (el->afl_custom_fuzz) { custom_fuzz = 1; } + + }); + + if (custom_fuzz) { + + WARNF("afl_custom_fuzz is incompatible with MOpt (-L)"); + + } + + } + if (afl->afl_env.afl_max_det_extras) { s32 max_det_extras = atoi(afl->afl_env.afl_max_det_extras); From 4b915207c42f8100f306778f617d7003c3e2193f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 16 Jan 2023 17:05:04 +0100 Subject: [PATCH 06/77] autotokens - much better tokenizer --- custom_mutators/autotokens/autotokens.cpp | 311 +++++++++++++--------- 1 file changed, 181 insertions(+), 130 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 9fbdf52a..850692a1 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -28,22 +28,41 @@ typedef struct my_mutator { #define DEBUG \ if (unlikely(debug)) fprintf -static afl_state *afl_ptr; -static int debug = AUTOTOKENS_DEBUG; -static u32 current_id = 0; -static u32 valid_structures = 0; -static u32 extras_cnt = 0, a_extras_cnt = 0; +static afl_state *afl_ptr; +static int debug = AUTOTOKENS_DEBUG; +static u32 current_id; +static u32 valid_structures; +static u32 whitespace_ids; +static u32 extras_cnt, a_extras_cnt; +static u64 all_spaces, all_tabs, all_lf, all_ws; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); -static regex regex_comment_star("/\\*(.|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); -static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); +// static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); static vector *s; // the structure of the currently selected input +u32 good_whitespace_or_singleval() { + + u32 i = rand_below(afl_ptr, current_id); + if (id_to_token[i].size() == 1) { return i; } + i = rand_below(afl_ptr, all_ws); + if (i < all_spaces) { + + return 0; + + } else if (i < all_tabs) { + + return 1; + + } else + + return 2; // linefeed + +} + extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u8 **out_buf, u8 *add_buf, size_t add_buf_size, size_t max_size) { @@ -68,30 +87,76 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, for (i = 0; i < rounds; ++i) { - u32 item, new_item; - switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ case 0: /* fall through */ - case 1: - item = rand_below(afl_ptr, m_size); + case 1: { + + u32 pos = rand_below(afl_ptr, m_size); + u32 cur_item = m[pos], new_item; do { - new_item = 1 + rand_below(afl_ptr, current_id); + new_item = rand_below(afl_ptr, current_id); - } while (unlikely(new_item == m[item])); + } while (unlikely( - m[item] = new_item; + new_item == cur_item || + (whitespace_ids < new_item && whitespace_ids >= cur_item) || + (whitespace_ids >= new_item && whitespace_ids < cur_item))); + + DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); + m[pos] = new_item; break; - /* INSERT (+1 so we insert also after last place) */ - case 2: - new_item = 1 + rand_below(afl_ptr, current_id); - m.insert(m.begin() + rand_below(afl_ptr, m_size + 1), new_item); + + } + + /* INSERT (m_size +1 so we insert also after last place) */ + case 2: { + + u32 new_item; + do { + + new_item = rand_below(afl_ptr, current_id); + + } while (new_item >= whitespace_ids); + + u32 pos = rand_below(afl_ptr, m_size + 1); + m.insert(m.begin() + pos, new_item); ++m_size; + + // if we insert an identifier or string we might need whitespace + if (id_to_token[new_item].size() > 1) { + + // need to insert before? + + if (pos && m[pos - 1] >= whitespace_ids && + id_to_token[m[pos - 1]].size() > 1) { + + m.insert(m.begin() + pos, good_whitespace_or_singleval()); + ++m_size; + + } + + if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && + id_to_token[m[pos + 1]].size() > 1) { + + // need to insert after? + + m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); + ++m_size; + + } + + } + break; + + } + /* ERASE - only if large enough */ - case 3: + case 3: { + if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); @@ -105,6 +170,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; + } + // TODO: add full line insert splice, replace splace, delete } @@ -112,12 +179,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } string output; - u32 m_size_1 = m_size - 1; for (i = 0; i < m_size; ++i) { output += id_to_token[m[i]]; - if (likely(i < m_size_1)) { output += " "; } } @@ -183,9 +248,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (ok) { - ++current_id; token_to_id[(char *)ptr] = current_id; id_to_token[current_id] = (char *)ptr; + ++current_id; } @@ -212,9 +277,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (ok) { - ++current_id; token_to_id[(char *)ptr] = current_id; id_to_token[current_id] = (char *)ptr; + ++current_id; } @@ -257,7 +322,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string input; input.resize(len); rewind(fp); - fread(input.data(), input.size(), 1, fp); + fread((void *)input.data(), input.size(), 1, fp); fclose(fp); if (!afl_ptr->shm.cmplog_mode) { @@ -287,28 +352,34 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); - input = regex_replace(input, regex_comment_slash, "$2"); + // input = regex_replace(input, regex_comment_slash, "$2"); input = regex_replace(input, regex_comment_star, ""); DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), filename, input.c_str()); - /* - u32 spaces = count(input.begin(), input.end(), ' '); - u32 tabs = count(input.begin(), input.end(), '\t'); - u32 linefeeds = count(input.begin(), input.end(), '\n'); + u32 spaces = count(input.begin(), input.end(), ' '); + u32 tabs = count(input.begin(), input.end(), '\t'); + u32 linefeeds = count(input.begin(), input.end(), '\n'); bool ends_with_linefeed = input[input.length() - 1] == '\n'; DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, linefeeds, ends_with_linefeed); - */ + all_spaces += spaces; + all_tabs += tabs; + all_lf += linefeeds; + all_ws = all_spaces + all_tabs + all_lf; // now extract all tokens vector tokens; smatch match; - string::const_iterator cur = input.begin(), ende = input.end(), last = cur, - found, prev; + string::const_iterator cur = input.begin(), ende = input.end(), found, prev; - while (regex_search(cur, ende, match, regex_string)) { + DEBUG(stderr, "START!\n"); + + while (regex_search(cur, ende, match, regex_string, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { prev = cur; found = match[0].first; @@ -316,62 +387,42 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n", match[0].str().c_str(), prev - input.begin(), match.position(), cur - input.begin()); + if (prev < found) { // there are items between search start and find - sregex_token_iterator it{prev, found, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase( - std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { return s.size() == 0; }), - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + while (prev < found) { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); - if (unlikely(debug)) - for (auto x : tokenized) { + if (isspace(*prev)) { - cerr << x << endl; + auto start = prev; + while (isspace(*prev)) { - } - - for (auto token : tokenized) { - - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; - - while (regex_search(c, e, m, regex_word)) { - - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { - - // there are items between search start and find - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); - tokens.push_back(std::string(p, f)); + ++prev; } - DEBUG(stderr, - "SUBstring \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); - } + } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { - if (c < e) { + auto start = prev; + while (isalnum(*prev) || *prev == '$' || *prev == '_' || + *prev == '.' || *prev == '/') { - if (unlikely(debug)) { - - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + ++prev; } - tokens.push_back(std::string(c, e)); + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); + + } else { + + tokens.push_back(std::string(prev, prev + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *prev); + ++prev; } @@ -383,68 +434,44 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } + DEBUG(stderr, "AFTER all strings\n"); + if (cur < ende) { - sregex_token_iterator it{cur, ende, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase( - std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { return s.size() == 0; }), - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + while (cur < ende) { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); - if (unlikely(debug)) - for (auto x : tokenized) { + if (isspace(*cur)) { - cerr << x << endl; + auto start = cur; + while (isspace(*cur)) { - } - - for (auto token : tokenized) { - - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; - - while (regex_search(c, e, m, regex_word)) { - - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { - - // there are items between search start and find - if (unlikely(debug)) { - - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(p, f)); + ++cur; } - DEBUG(stderr, - "SUB2string \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); - } + } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { - if (c < e) { + auto start = cur; + while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || + *cur == '/') { - if (unlikely(debug)) { - - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + ++cur; } - tokens.push_back(std::string(c, e)); + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); + + } else { + + tokens.push_back(std::string(cur, cur + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *cur); + ++cur; } @@ -457,7 +484,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "DUMPING TOKENS:\n"); for (u32 i = 0; i < tokens.size(); ++i) { - DEBUG(stderr, "%s ", tokens[i].c_str()); + DEBUG(stderr, "%s", tokens[i].c_str()); } @@ -475,10 +502,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if ((id = token_to_id[tokens[i]]) == 0) { // First time we see this token, add it to the list - ++current_id; token_to_id[tokens[i]] = current_id; id_to_token[current_id] = tokens[i]; structure->push_back(current_id); + ++current_id; } else { @@ -529,6 +556,30 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { data->afl = afl_ptr = afl; + // set common whitespace tokens + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t"] = current_id; + id_to_token[current_id] = "\t"; + ++current_id; + token_to_id["\n"] = current_id; + id_to_token[current_id] = "\n"; + ++current_id; + token_to_id["\r\n"] = current_id; + id_to_token[current_id] = "\r\n"; + ++current_id; + token_to_id[" \n"] = current_id; + id_to_token[current_id] = " \n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t"] = current_id; + id_to_token[current_id] = "\t\t"; + ++current_id; + whitespace_ids = current_id; + return data; } From 33f41e3974348d3b0b71b3a30a6483bb0418068c Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 17 Jan 2023 09:52:35 +0100 Subject: [PATCH 07/77] autotokens: print stats at exit --- custom_mutators/autotokens/README | 7 ++++--- custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++ include/config.h | 4 ++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 6849279e..0dcc6a3e 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -1,6 +1,6 @@ # autotokens -This implements an improved autotoken idea presented in +This implements an improved autotoken grammar fuzzing idea presented in [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. It is a grammar fuzzer without actually knowing the grammar. @@ -8,5 +8,6 @@ It is recommended to run with together in an instance with `CMPLOG`. If you have a dictionary (`-x`) this improves this custom grammar mutator. -If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`, -to concentrate on grammar bug classes. +If **not** running with `CMPLOG`, it is possible to set +`AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. + diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 850692a1..d6b269fd 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -35,6 +35,7 @@ static u32 valid_structures; static u32 whitespace_ids; static u32 extras_cnt, a_extras_cnt; static u64 all_spaces, all_tabs, all_lf, all_ws; +static u64 all_structure_items; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; @@ -519,6 +520,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = structure; s = structure; ++valid_structures; + all_structure_items += structure->size(); // we are done! DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", @@ -586,6 +588,16 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { extern "C" void afl_custom_deinit(my_mutator_t *data) { + /* we use this to print statistics at exit :-) + needs to be stderr as stdout is filtered */ + + fprintf(stderr, + "\n\nAutotoken mutator statistics:\n" + " Number of all seen tokens: %lu\n" + " Number of input structures: %lu\n" + " Number of all items in structures: %lu\n\n", + current_id - 1, valid_structures, all_structure_items); + free(data); } diff --git a/include/config.h b/include/config.h index 6cfaac11..f8a742f2 100644 --- a/include/config.h +++ b/include/config.h @@ -364,9 +364,9 @@ * * ***********************************************************/ -/* Call count interval between reseeding the libc PRNG from /dev/urandom: */ +/* Call count interval between reseeding the PRNG from /dev/urandom: */ -#define RESEED_RNG 100000 +#define RESEED_RNG 2500000 /* The default maximum testcase cache size in MB, 0 = disable. A value between 50 and 250 is a good default value. Note that the From efe57c936880608a2de452340d63f262470d9fcd Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 17 Jan 2023 09:57:23 +0100 Subject: [PATCH 08/77] more whitespace --- custom_mutators/autotokens/autotokens.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index d6b269fd..5580512a 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -559,6 +559,8 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { data->afl = afl_ptr = afl; // set common whitespace tokens + // we deliberately do not put uncommon ones here to these will count as + // identifier tokens. token_to_id[" "] = current_id; id_to_token[current_id] = " "; ++current_id; @@ -580,6 +582,21 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { token_to_id["\t\t"] = current_id; id_to_token[current_id] = "\t\t"; ++current_id; + token_to_id["\n\n"] = current_id; + id_to_token[current_id] = "\n\n"; + ++current_id; + token_to_id["\r\n\r\n"] = current_id; + id_to_token[current_id] = "\r\n\r\n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t\t\t"] = current_id; + id_to_token[current_id] = "\t\t\t\t"; + ++current_id; + token_to_id["\n\n\n\n"] = current_id; + id_to_token[current_id] = "\n\n\n\n"; + ++current_id; whitespace_ids = current_id; return data; From a41fd5cc5c4a5073f38adf06270e2985c88da9d5 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 11:46:28 +0100 Subject: [PATCH 09/77] alternate tokenize, options --- custom_mutators/autotokens/README | 9 + custom_mutators/autotokens/autotokens.cpp | 432 ++++++++++++++++++---- 2 files changed, 365 insertions(+), 76 deletions(-) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 0dcc6a3e..f6e9c753 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -11,3 +11,12 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator. If **not** running with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. +## Configuration via environment variables + +`AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items +`AUTOTOKENS_COMMENT` - what character or string starts a comment which will be + removed. Default: `/* ... */` +`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation + (experimental) +`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, + default is " " diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 5580512a..28ef91e2 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -15,7 +15,10 @@ extern "C" { #include #define AUTOTOKENS_DEBUG 0 +#define AUTOTOKENS_ONLY_FAV 0 +#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 +#define AUTOTOKENS_WHITESPACE " " using namespace std; @@ -30,6 +33,8 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; +static int only_fav = AUTOTOKENS_ONLY_FAV; +static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -39,9 +44,12 @@ static u64 all_structure_items; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -// static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static string whitespace = AUTOTOKENS_WHITESPACE; +static regex *regex_comment_custom; static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", regex::multiline | regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); static vector *s; // the structure of the currently selected input @@ -84,15 +92,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); - u32 max_rand = 4; + u32 max_rand = 7; for (i = 0; i < rounds; ++i) { switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ - case 0: /* fall through */ - case 1: { + case 0 ... 3: /* fall through */ + { u32 pos = rand_below(afl_ptr, m_size); u32 cur_item = m[pos], new_item; @@ -103,8 +111,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } while (unlikely( new_item == cur_item || - (whitespace_ids < new_item && whitespace_ids >= cur_item) || - (whitespace_ids >= new_item && whitespace_ids < cur_item))); + (!alternative_tokenize && + ((whitespace_ids < new_item && whitespace_ids >= cur_item) || + (whitespace_ids >= new_item && whitespace_ids < cur_item))))); DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; @@ -113,7 +122,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* INSERT (m_size +1 so we insert also after last place) */ - case 2: { + case 4 ... 5: { u32 new_item; do { @@ -126,26 +135,30 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + pos, new_item); ++m_size; - // if we insert an identifier or string we might need whitespace - if (id_to_token[new_item].size() > 1) { + if (likely(!alternative_tokenize)) { - // need to insert before? + // if we insert an identifier or string we might need whitespace + if (id_to_token[new_item].size() > 1) { - if (pos && m[pos - 1] >= whitespace_ids && - id_to_token[m[pos - 1]].size() > 1) { + // need to insert before? - m.insert(m.begin() + pos, good_whitespace_or_singleval()); - ++m_size; + if (pos && m[pos - 1] >= whitespace_ids && + id_to_token[m[pos - 1]].size() > 1) { - } + m.insert(m.begin() + pos, good_whitespace_or_singleval()); + ++m_size; - if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && - id_to_token[m[pos + 1]].size() > 1) { + } - // need to insert after? + if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && + id_to_token[m[pos + 1]].size() > 1) { - m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); - ++m_size; + // need to insert after? + + m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); + ++m_size; + + } } @@ -156,7 +169,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* ERASE - only if large enough */ - case 3: { + case 6: { if (m_size > 8) { @@ -165,7 +178,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } else { - max_rand = 3; + max_rand = 6; } @@ -180,10 +193,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } string output; + u32 m_size_1 = m_size - 1; for (i = 0; i < m_size; ++i) { output += id_to_token[m[i]]; + if (unlikely(alternative_tokenize && i < m_size_1)) { + + output += whitespace; + + } } @@ -219,7 +238,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (likely(!debug)) { - if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) { + if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; return 0; @@ -353,8 +373,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); - // input = regex_replace(input, regex_comment_slash, "$2"); - input = regex_replace(input, regex_comment_star, ""); + if (regex_comment_custom) { + + input = regex_replace(input, *regex_comment_custom, "$2"); + + } else { + + input = regex_replace(input, regex_comment_star, ""); + + } DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), filename, input.c_str()); @@ -377,53 +404,105 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "START!\n"); - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + if (likely(!alternative_tokenize)) { - prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + while (regex_search(cur, ende, match, regex_string, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { - if (prev < found) { // there are items between search start and find - while (prev < found) { + prev = cur; + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); - if (isspace(*prev)) { + if (prev < found) { // there are items between search start and find + while (prev < found) { - auto start = prev; - while (isspace(*prev)) { + if (isspace(*prev)) { + auto start = prev; + while (isspace(*prev)) { + + ++prev; + + } + + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); + + } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { + + auto start = prev; + while (isalnum(*prev) || *prev == '$' || *prev == '_' || + *prev == '.' || *prev == '/') { + + ++prev; + + } + + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); + + } else { + + tokens.push_back(std::string(prev, prev + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *prev); ++prev; } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + } + + } + + if (match[0].length() > 0) { tokens.push_back(match[0]); } + + } + + DEBUG(stderr, "AFTER all strings\n"); + + if (cur < ende) { + + while (cur < ende) { + + if (isspace(*cur)) { + + auto start = cur; + while (isspace(*cur)) { + + ++cur; + + } + + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, tokens[tokens.size() - 1].c_str()); - } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { + } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { - auto start = prev; - while (isalnum(*prev) || *prev == '$' || *prev == '_' || - *prev == '.' || *prev == '/') { + auto start = cur; + while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || + *cur == '/') { - ++prev; + ++cur; } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(prev, prev + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *prev); - ++prev; + tokens.push_back(std::string(cur, cur + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *cur); + ++cur; } @@ -431,48 +510,227 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + } else { - } + // alternative tokenize - DEBUG(stderr, "AFTER all strings\n"); + while (regex_search(cur, ende, match, regex_string)) { - if (cur < ende) { + prev = cur; + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); + if (prev < found) { // there are items between search start and find + sregex_token_iterator it{prev, found, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { - while (cur < ende) { + return s.size() == 0; - if (isspace(*cur)) { + }), - auto start = cur; - while (isspace(*cur)) { + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - ++cur; + if (unlikely(debug)) { + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } } - tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + for (auto token : tokenized) { - } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; - auto start = cur; - while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || - *cur == '/') { + while (regex_search(c, e, m, regex_word)) { - ++cur; + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + while (p < f) { + + if (unlikely(debug)) { + + string foo(p, p + 1); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(p, p + 1)); + ++p; + + } + + /* + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", + foo.c_str()); tokens.push_back(std::string(p, f)); + */ + + } + + DEBUG( + stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + while (c < e) { + + if (unlikely(debug)) { + + string foo(c, c + 1); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, c + 1)); + ++c; + + } + + /* + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", + foo.c_str()); + + } + + tokens.push_back(std::string(c, e)); + */ + + } } - tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + } - } else { + if (match[0].length() > 0) { tokens.push_back(match[0]); } - tokens.push_back(std::string(cur, cur + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *cur); - ++cur; + } + + if (cur < ende) { + + sregex_token_iterator it{cur, ende, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase( + std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { return s.size() == 0; }), + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + + if (unlikely(debug)) { + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } + + } + + for (auto token : tokenized) { + + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; + + while (regex_search(c, e, m, regex_word)) { + + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + while (p < f) { + + if (unlikely(debug)) { + + string foo(p, p + 1); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(p, p + 1)); + ++p; + + } + + /* + if (unlikely(debug)) { + + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", + foo.c_str()); + + } + + tokens.push_back(std::string(p, f)); + */ + + } + + DEBUG(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + while (c < e) { + + if (unlikely(debug)) { + + string foo(c, c + 1); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, c + 1)); + ++c; + + } + + /* + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, e)); + */ + + } } @@ -483,9 +741,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (unlikely(debug)) { DEBUG(stderr, "DUMPING TOKENS:\n"); + u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { DEBUG(stderr, "%s", tokens[i].c_str()); + if (unlikely(alternative_tokenize && i < size_1)) { + + DEBUG(stderr, "%s", whitespace.c_str()); + + } } @@ -556,6 +820,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } + if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } + if (getenv("AUTOTOKENS_WHITESPACE")) { + + whitespace = getenv("AUTOTOKENS_WHITESPACE"); + + } + + if (getenv("AUTOTOKENS_COMMENT")) { + + char buf[256]; + snprintf(buf, sizeof(buf), "(%s.*)([\r\n]?)", getenv("AUTOTOKENS_COMMENT")); + regex_comment_custom = new regex(buf, regex::optimize); + + } + data->afl = afl_ptr = afl; // set common whitespace tokens From 70f4b456faf8e361f6e0a34246708380c94cb36e Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 13:58:27 +0100 Subject: [PATCH 10/77] fixes --- custom_mutators/autotokens/Makefile | 7 ++++++- custom_mutators/autotokens/autotokens.cpp | 24 ++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 5dd52dee..8af63635 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,7 +1,12 @@ +ifdef debug + CFLAGS += "-fsanitize=address -Wall" + CXX := clang++ +endif + all: autotokens.so autotokens.so: autotokens.cpp $(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o clean: - rm -f autotokens.so *~ core \ No newline at end of file + rm -f autotokens.so *~ core diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 28ef91e2..57c35846 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -48,7 +48,7 @@ static string whitespace = AUTOTOKENS_WHITESPACE; static regex *regex_comment_custom; static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", regex::multiline | regex::optimize); -static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); static regex regex_whitespace(R"([ \t]+)", regex::optimize); static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); static vector *s; // the structure of the currently selected input @@ -514,7 +514,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // alternative tokenize - while (regex_search(cur, ende, match, regex_string)) { + while (regex_search(cur, ende, match, regex_string, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { prev = cur; found = match[0].first; @@ -553,7 +556,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word)) { + while (regex_search(c, e, m, regex_word, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { p = c; f = m[0].first; @@ -658,7 +664,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word)) { + while (regex_search(c, e, m, regex_word, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { p = c; f = m[0].first; @@ -820,6 +829,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } if (getenv("AUTOTOKENS_WHITESPACE")) { @@ -890,9 +900,9 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) { fprintf(stderr, "\n\nAutotoken mutator statistics:\n" - " Number of all seen tokens: %lu\n" - " Number of input structures: %lu\n" - " Number of all items in structures: %lu\n\n", + " Number of all seen tokens: %u\n" + " Number of input structures: %u\n" + " Number of all items in structures: %llu\n\n", current_id - 1, valid_structures, all_structure_items); free(data); From 0db662db7b433a08b01de7f5a989843450919b88 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 14:21:44 +0100 Subject: [PATCH 11/77] fix --- custom_mutators/autotokens/autotokens.cpp | 78 ++++++++++++----------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 57c35846..94f86413 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -851,43 +851,47 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { // set common whitespace tokens // we deliberately do not put uncommon ones here to these will count as // identifier tokens. - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t"] = current_id; - id_to_token[current_id] = "\t"; - ++current_id; - token_to_id["\n"] = current_id; - id_to_token[current_id] = "\n"; - ++current_id; - token_to_id["\r\n"] = current_id; - id_to_token[current_id] = "\r\n"; - ++current_id; - token_to_id[" \n"] = current_id; - id_to_token[current_id] = " \n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t"] = current_id; - id_to_token[current_id] = "\t\t"; - ++current_id; - token_to_id["\n\n"] = current_id; - id_to_token[current_id] = "\n\n"; - ++current_id; - token_to_id["\r\n\r\n"] = current_id; - id_to_token[current_id] = "\r\n\r\n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t\t\t"] = current_id; - id_to_token[current_id] = "\t\t\t\t"; - ++current_id; - token_to_id["\n\n\n\n"] = current_id; - id_to_token[current_id] = "\n\n\n\n"; - ++current_id; - whitespace_ids = current_id; + if (!alternative_tokenize) { + + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t"] = current_id; + id_to_token[current_id] = "\t"; + ++current_id; + token_to_id["\n"] = current_id; + id_to_token[current_id] = "\n"; + ++current_id; + token_to_id["\r\n"] = current_id; + id_to_token[current_id] = "\r\n"; + ++current_id; + token_to_id[" \n"] = current_id; + id_to_token[current_id] = " \n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t"] = current_id; + id_to_token[current_id] = "\t\t"; + ++current_id; + token_to_id["\n\n"] = current_id; + id_to_token[current_id] = "\n\n"; + ++current_id; + token_to_id["\r\n\r\n"] = current_id; + id_to_token[current_id] = "\r\n\r\n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t\t\t"] = current_id; + id_to_token[current_id] = "\t\t\t\t"; + ++current_id; + token_to_id["\n\n\n\n"] = current_id; + id_to_token[current_id] = "\n\n\n\n"; + ++current_id; + whitespace_ids = current_id; + + } return data; From 22f757a169d3da3081306c0f861ef99a509073fe Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 14:33:06 +0100 Subject: [PATCH 12/77] fix --- custom_mutators/autotokens/autotokens.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 94f86413..7aecb010 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -129,7 +129,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, new_item = rand_below(afl_ptr, current_id); - } while (new_item >= whitespace_ids); + } while (!alternative_tokenize && new_item >= whitespace_ids); u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); From 8fe5e29104fc514551bbc926c5142dac68562b43 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 14:56:26 +0100 Subject: [PATCH 13/77] ignore timeout env option --- docs/env_variables.md | 3 +++ include/afl-fuzz.h | 2 +- include/envs.h | 1 + src/afl-fuzz-bitmap.c | 6 ++++++ src/afl-fuzz-state.c | 7 +++++++ src/afl-fuzz.c | 3 ++- 6 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/env_variables.md b/docs/env_variables.md index 22a5c386..0a57d190 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -354,6 +354,9 @@ checks or alter some of the more exotic semantics of the tool: - Setting `AFL_KEEP_TIMEOUTS` will keep longer running inputs if they reach new coverage + - On the contrary, if you are not interested in any timeouts, you can set + `AFL_IGNORE_TIMEOUTS` to get a bit of speed instead. + - `AFL_EXIT_ON_SEED_ISSUES` will restore the vanilla afl-fuzz behavior which does not allow crashes or timeout seeds in the initial -i corpus. diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index edef9207..69fea579 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -398,7 +398,7 @@ typedef struct afl_env_vars { afl_cycle_schedules, afl_expand_havoc, afl_statsd, afl_cmplog_only_new, afl_exit_on_seed_issues, afl_try_affinity, afl_ignore_problems, afl_keep_timeouts, afl_pizza_mode, afl_no_crash_readme, - afl_no_startup_calibration; + afl_ignore_timeouts, afl_no_startup_calibration; u8 *afl_tmpdir, *afl_custom_mutator_library, *afl_python_module, *afl_path, *afl_hang_tmout, *afl_forksrv_init_tmout, *afl_preload, diff --git a/include/envs.h b/include/envs.h index f4cdf390..0770f94d 100644 --- a/include/envs.h +++ b/include/envs.h @@ -103,6 +103,7 @@ static char *afl_environment_variables[] = { "AFL_HARDEN", "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES", "AFL_IGNORE_PROBLEMS", + "AFL_IGNORE_TIMEOUTS", "AFL_IGNORE_UNKNOWN_ENVS", "AFL_IMPORT_FIRST", "AFL_INPUT_LEN_MIN", diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c index 485b82db..b4e9537e 100644 --- a/src/afl-fuzz-bitmap.c +++ b/src/afl-fuzz-bitmap.c @@ -457,6 +457,12 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) { if (unlikely(len == 0)) { return 0; } + if (unlikely(fault == FSRV_RUN_TMOUT && afl->afl_env.afl_ignore_timeouts)) { + + return 0; + + } + u8 fn[PATH_MAX]; u8 *queue_fn = ""; u8 new_bits = 0, keeping = 0, res, classified = 0, is_timeout = 0; diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c index 896b5f71..104b1e4b 100644 --- a/src/afl-fuzz-state.c +++ b/src/afl-fuzz-state.c @@ -292,6 +292,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) { afl->afl_env.afl_ignore_problems = get_afl_env(afl_environment_variables[i]) ? 1 : 0; + } else if (!strncmp(env, "AFL_IGNORE_TIMEOUTS", + + afl_environment_variable_len)) { + + afl->afl_env.afl_ignore_timeouts = + get_afl_env(afl_environment_variables[i]) ? 1 : 0; + } else if (!strncmp(env, "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES", afl_environment_variable_len)) { diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 5e0ecd1e..4db55b5e 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -258,8 +258,9 @@ static void usage(u8 *argv0, int more_help) { "AFL_FORKSRV_INIT_TMOUT: time spent waiting for forkserver during startup (in ms)\n" "AFL_HANG_TMOUT: override timeout value (in milliseconds)\n" "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: don't warn about core dump handlers\n" - "AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n" "AFL_IGNORE_PROBLEMS: do not abort fuzzing if an incorrect setup is detected\n" + "AFL_IGNORE_TIMEOUTS: do not process or save any timeouts\n" + "AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n" "AFL_IMPORT_FIRST: sync and import test cases from other fuzzer instances first\n" "AFL_INPUT_LEN_MIN/AFL_INPUT_LEN_MAX: like -g/-G set min/max fuzz length produced\n" "AFL_PIZZA_MODE: 1 - enforce pizza mode, 0 - disable for April 1st\n" From 14d8eb9e40a6329abcb2f153174b543349c68c13 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 22:17:14 +0100 Subject: [PATCH 14/77] autotoken: splicing; splice_optout --- custom_mutators/autotokens/Makefile | 6 +- custom_mutators/autotokens/autotokens.cpp | 103 ++++++++++++++++++++-- docs/custom_mutators.md | 11 +++ include/afl-fuzz.h | 14 +++ src/afl-fuzz-mutators.c | 13 +++ src/afl-fuzz-one.c | 3 +- src/afl-fuzz-python.c | 16 ++++ 7 files changed, 155 insertions(+), 11 deletions(-) diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 8af63635..ab1da4b6 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,5 +1,9 @@ ifdef debug - CFLAGS += "-fsanitize=address -Wall" + CFLAGS += -fsanitize=address -Wall + CXX := clang++ +endif +ifdef DEBUG + CFLAGS += -fsanitize=address -Wall CXX := clang++ endif diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 7aecb010..c9ec4352 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -19,6 +19,13 @@ extern "C" { #define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_WHITESPACE " " +#define AUTOTOKENS_SIZE_MIN 8 +#define AUTOTOKENS_SPLICE_MIN 4 +#define AUTOTOKENS_SPLICE_MAX 64 + +#if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN + #error SPLICE_MIN must be lower than SIZE_MIN +#endif using namespace std; @@ -42,6 +49,7 @@ static u32 extras_cnt, a_extras_cnt; static u64 all_spaces, all_tabs, all_lf, all_ws; static u64 all_structure_items; static unordered_map *> file_mapping; +static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; static string whitespace = AUTOTOKENS_WHITESPACE; @@ -76,6 +84,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u8 **out_buf, u8 *add_buf, size_t add_buf_size, size_t max_size) { + (void)(data); + if (s == NULL) { *out_buf = NULL; @@ -92,14 +102,14 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); - u32 max_rand = 7; + u32 max_rand = 14; for (i = 0; i < rounds; ++i) { switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ - case 0 ... 3: /* fall through */ + case 0 ... 7: /* fall through */ { u32 pos = rand_below(afl_ptr, m_size); @@ -122,18 +132,19 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* INSERT (m_size +1 so we insert also after last place) */ - case 4 ... 5: { + case 8 ... 9: { u32 new_item; do { new_item = rand_below(afl_ptr, current_id); - } while (!alternative_tokenize && new_item >= whitespace_ids); + } while (unlikely(!alternative_tokenize && new_item >= whitespace_ids)); u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; + DEBUG(stderr, "INS: %u at %u\n", new_item, pos); if (likely(!alternative_tokenize)) { @@ -168,8 +179,63 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + /* SPLICING */ + case 10 ... 11: { + + u32 strategy = rand_below(afl_ptr, 4), dst_off, n; + auto src = id_mapping[rand_below(afl_ptr, valid_structures)]; + u32 src_size = src->size(); + u32 src_off = rand_below(afl_ptr, src_size - AUTOTOKENS_SPLICE_MIN); + u32 rand_r = 1 + MAX(AUTOTOKENS_SPLICE_MIN, + MIN(AUTOTOKENS_SPLICE_MAX, src_size - src_off)); + + switch (strategy) { + + // insert + case 0: { + + dst_off = rand_below(afl_ptr, m_size); + n = AUTOTOKENS_SPLICE_MIN + + rand_below(afl_ptr, MIN(AUTOTOKENS_SPLICE_MAX, + rand_r - AUTOTOKENS_SPLICE_MIN)); + m.insert(m.begin() + dst_off, src->begin() + src_off, + src->begin() + src_off + n); + m_size += n; + DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + break; + + } + + // overwrite + default: { + + dst_off = rand_below(afl_ptr, m_size - AUTOTOKENS_SPLICE_MIN); + n = AUTOTOKENS_SPLICE_MIN + + rand_below( + afl_ptr, + MIN(AUTOTOKENS_SPLICE_MAX - AUTOTOKENS_SPLICE_MIN, + MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN, + src_size - src_off - AUTOTOKENS_SPLICE_MIN))); + + for (u32 i = 0; i < n; ++i) { + + m[dst_off + i] = (*src)[src_off + i]; + + } + + DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); + break; + + } + + } + + break; + + } + /* ERASE - only if large enough */ - case 6: { + case 12 ... 13: { if (m_size > 8) { @@ -178,7 +244,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } else { - max_rand = 6; + max_rand = 12; } @@ -236,12 +302,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { + (void)(data); + if (likely(!debug)) { if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; + DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n"); return 0; } @@ -334,8 +403,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again - DEBUG(stderr, "Too short (%lu) %s\n", len, filename); s = NULL; + DEBUG(stderr, "Too short (%lu) %s\n", len, filename); return 0; } @@ -362,8 +431,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) { file_mapping[fn] = NULL; - DEBUG(stderr, "Not text (%lu) %s\n", len, filename); s = NULL; + DEBUG(stderr, "Not text (%lu) %s\n", len, filename); return 0; } @@ -766,6 +835,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } + if (tokens.size() < AUTOTOKENS_SIZE_MIN) { + + file_mapping[fn] = NULL; + s = NULL; + DEBUG(stderr, "too few tokens\n"); + return 0; + + } + /* Now we transform the tokens into an ID list and saved that */ structure = new vector(); @@ -791,8 +869,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // save the token structure to the file mapping file_mapping[fn] = structure; - s = structure; + id_mapping[valid_structures] = structure; ++valid_structures; + s = structure; all_structure_items += structure->size(); // we are done! @@ -897,6 +976,12 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } +extern "C" void afl_custom_splice_optout(my_mutator_t *data) { + + (void)(data); + +} + extern "C" void afl_custom_deinit(my_mutator_t *data) { /* we use this to print statistics at exit :-) diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 4ffeda7a..322caa5b 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -48,6 +48,7 @@ C/C++: ```c void *afl_custom_init(afl_state_t *afl, unsigned int seed); unsigned int afl_custom_fuzz_count(void *data, const unsigned char *buf, size_t buf_size); +void afl_custom_splice_optout(void *data); size_t afl_custom_fuzz(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf, unsigned char *add_buf, size_t add_buf_size, size_t max_size); const char *afl_custom_describe(void *data, size_t max_description_len); size_t afl_custom_post_process(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf); @@ -72,6 +73,9 @@ def init(seed): def fuzz_count(buf): return cnt +def splice_optout() + pass + def fuzz(buf, add_buf, max_size): return mutated_out @@ -132,6 +136,13 @@ def deinit(): # optional for Python for a specific queue entry, use this function. This function is most useful if `AFL_CUSTOM_MUTATOR_ONLY` is **not** used. +- `splice_optout` (optional): + + If this function is present, no splicing target is passed to the `fuzz` + function. This saves time if splicing data is not needed by the custom + fuzzing function. + This function is never called, just needs to be present to activate. + - `fuzz` (optional): This method performs custom mutations on a given input. It also accepts an diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 69fea579..1e8d085d 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -344,6 +344,7 @@ enum { /* 12 */ PY_FUNC_INTROSPECTION, /* 13 */ PY_FUNC_DESCRIBE, /* 14 */ PY_FUNC_FUZZ_SEND, + /* 15 */ PY_FUNC_SPLICE_OPTOUT, PY_FUNC_COUNT }; @@ -495,6 +496,7 @@ typedef struct afl_state { no_unlink, /* do not unlink cur_input */ debug, /* Debug mode */ custom_only, /* Custom mutator only mode */ + custom_splice_optout, /* Custom mutator no splice buffer */ is_main_node, /* if this is the main node */ is_secondary_node, /* if this is a secondary instance */ pizza_is_served; /* pizza mode */ @@ -828,6 +830,17 @@ struct custom_mutator { */ u32 (*afl_custom_fuzz_count)(void *data, const u8 *buf, size_t buf_size); + /** + * Opt-out of a splicing input for the fuzz mutator + * + * Empty dummy function. It's presence tells afl-fuzz not to pass a + * splice data pointer and len. + * + * @param data pointer returned in afl_custom_init by this custom mutator + * @noreturn + */ + void (*afl_custom_splice_optout)(void *data); + /** * Perform custom mutations on a given input * @@ -1057,6 +1070,7 @@ u8 havoc_mutation_probability_py(void *); u8 queue_get_py(void *, const u8 *); const char *introspection_py(void *); u8 queue_new_entry_py(void *, const u8 *, const u8 *); +void splice_optout(void *); void deinit_py(void *); #endif diff --git a/src/afl-fuzz-mutators.c b/src/afl-fuzz-mutators.c index 22e5262e..ce43064a 100644 --- a/src/afl-fuzz-mutators.c +++ b/src/afl-fuzz-mutators.c @@ -358,6 +358,19 @@ struct custom_mutator *load_custom_mutator(afl_state_t *afl, const char *fn) { } + /* "afl_custom_splice_optout", optional, never called */ + mutator->afl_custom_splice_optout = dlsym(dh, "afl_custom_splice_optout"); + if (!mutator->afl_custom_splice_optout) { + + ACTF("optional symbol 'afl_custom_splice_optout' not found."); + + } else { + + OKF("Found 'afl_custom_splice_optout'."); + afl->custom_splice_optout = 1; + + } + /* "afl_custom_fuzz_send", optional */ mutator->afl_custom_fuzz_send = dlsym(dh, "afl_custom_fuzz_send"); if (!mutator->afl_custom_fuzz_send) { diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index eaf65987..5e352dcb 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -1954,7 +1954,8 @@ custom_mutator_stage: u32 target_len = 0; /* check if splicing makes sense yet (enough entries) */ - if (likely(afl->ready_for_splicing_count > 1)) { + if (likely(!afl->custom_splice_optout && + afl->ready_for_splicing_count > 1)) { /* Pick a random other queue entry for passing to external API that has the necessary length */ diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c index b509b936..69c305f7 100644 --- a/src/afl-fuzz-python.c +++ b/src/afl-fuzz-python.c @@ -248,6 +248,8 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) { PyObject_GetAttrString(py_module, "queue_get"); py_functions[PY_FUNC_FUZZ_SEND] = PyObject_GetAttrString(py_module, "fuzz_send"); + py_functions[PY_FUNC_SPLICE_OPTOUT] = + PyObject_GetAttrString(py_module, "splice_optout"); py_functions[PY_FUNC_QUEUE_NEW_ENTRY] = PyObject_GetAttrString(py_module, "queue_new_entry"); py_functions[PY_FUNC_INTROSPECTION] = @@ -394,6 +396,13 @@ void deinit_py(void *py_mutator) { } +void splice_optout_py(void *py_mutator) { + + // this is never called + (void)(py_mutator); + +} + struct custom_mutator *load_custom_mutator_py(afl_state_t *afl, char *module_name) { @@ -474,6 +483,13 @@ struct custom_mutator *load_custom_mutator_py(afl_state_t *afl, } + if (py_functions[PY_FUNC_SPLICE_OPTOUT]) { + + mutator->afl_custom_splice_optout = splice_optout_py; + afl->custom_splice_optout = 1; + + } + if (py_functions[PY_FUNC_QUEUE_NEW_ENTRY]) { mutator->afl_custom_queue_new_entry = queue_new_entry_py; From 17752465e6b3c70fd0104fae7bb1f84c1cb8bb66 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 22:31:55 +0100 Subject: [PATCH 15/77] nit --- custom_mutators/autotokens/README | 2 ++ custom_mutators/autotokens/TODO | 8 +------- custom_mutators/autotokens/autotokens.cpp | 7 ++----- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index f6e9c753..f82dcd98 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -11,6 +11,8 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator. If **not** running with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. +Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! + ## Configuration via environment variables `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 2e5e384f..95b79373 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,12 +1,6 @@ -whitespace belassen oder notieren? MAYBE -0=space 1=tab 2=linefeed - cmplog: only add tokens that were found to fit? create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) -splice insert, splice overwrite -(linefeed, semicolon) - - +splicing -> check if whitespace/token is needed \ No newline at end of file diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index c9ec4352..5e683455 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -217,11 +217,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN, src_size - src_off - AUTOTOKENS_SPLICE_MIN))); - for (u32 i = 0; i < n; ++i) { - - m[dst_off + i] = (*src)[src_off + i]; - - } + copy(src->begin() + src_off, src->begin() + src_off + n, + m.begin() + dst_off); DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; From 45567791c66e128361a7533481b385497ced881f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 23:09:16 +0100 Subject: [PATCH 16/77] autotokens: define disable splice --- custom_mutators/autotokens/autotokens.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 5e683455..f6ab9ddd 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -22,6 +22,7 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 +#define AUTOTOKENS_SPLICE_DISABLE 0 #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN #error SPLICE_MIN must be lower than SIZE_MIN @@ -102,7 +103,13 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); - u32 max_rand = 14; +#if AUTOTOKENS_SPLICE_DISABLE == 1 + #define AUTOTOKENS_MUT_MAX 12 +#else + #define AUTOTOKENS_MUT_MAX 14 +#endif + + u32 max_rand = AUTOTOKENS_MUT_MAX; for (i = 0; i < rounds; ++i) { @@ -179,6 +186,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } +#if AUTOTOKENS_SPLICE_DISABLE != 1 /* SPLICING */ case 10 ... 11: { @@ -230,9 +238,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; } +#endif /* ERASE - only if large enough */ - case 12 ... 13: { + default: { if (m_size > 8) { @@ -241,7 +250,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } else { - max_rand = 12; + max_rand = AUTOTOKENS_MUT_MAX - 2; } From 151a8facae2048a26c65658dfec507233a677fb0 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 23:16:18 +0100 Subject: [PATCH 17/77] autotokens: stats --- custom_mutators/autotokens/autotokens.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f6ab9ddd..4f3289c9 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -22,7 +22,9 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 -#define AUTOTOKENS_SPLICE_DISABLE 0 +#ifndef AUTOTOKENS_SPLICE_DISABLE + #define AUTOTOKENS_SPLICE_DISABLE 0 +#endif #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN #error SPLICE_MIN must be lower than SIZE_MIN @@ -49,6 +51,7 @@ static u32 whitespace_ids; static u32 extras_cnt, a_extras_cnt; static u64 all_spaces, all_tabs, all_lf, all_ws; static u64 all_structure_items; +static u64 fuzz_count; static unordered_map *> file_mapping; static unordered_map *> id_mapping; static unordered_map token_to_id; @@ -238,6 +241,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; } + #endif /* ERASE - only if large enough */ @@ -298,6 +302,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; + ++fuzz_count; return mutated_size; } @@ -997,8 +1002,9 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) { "\n\nAutotoken mutator statistics:\n" " Number of all seen tokens: %u\n" " Number of input structures: %u\n" - " Number of all items in structures: %llu\n\n", - current_id - 1, valid_structures, all_structure_items); + " Number of all items in structures: %llu\n" + " Number of total fuzzes: %llu\n\n", + current_id - 1, valid_structures, all_structure_items, fuzz_count); free(data); From eeca3a0b2939c605497e9b3a615ee4a466f4a3f2 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 11:52:19 +0100 Subject: [PATCH 18/77] lots of fixes --- custom_mutators/autotokens/TODO | 2 +- custom_mutators/autotokens/autotokens.cpp | 424 ++++++++++++++-------- docs/custom_mutators.md | 1 + include/afl-fuzz.h | 11 +- src/afl-fuzz-one.c | 3 +- 5 files changed, 279 insertions(+), 162 deletions(-) diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 95b79373..2e39511c 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -3,4 +3,4 @@ cmplog: only add tokens that were found to fit? create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) -splicing -> check if whitespace/token is needed \ No newline at end of file +splicing -> check if whitespace/token is needed diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 4f3289c9..102bea0f 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -38,8 +38,10 @@ typedef struct my_mutator { } my_mutator_t; -#define DEBUG \ +#undef DEBUGF +#define DEBUGF \ if (unlikely(debug)) fprintf +#define IFDEBUG if (unlikely(debug)) static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; @@ -57,12 +59,12 @@ static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; static string whitespace = AUTOTOKENS_WHITESPACE; +static string output; static regex *regex_comment_custom; -static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); -static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); +static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); static vector *s; // the structure of the currently selected input u32 good_whitespace_or_singleval() { @@ -104,7 +106,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, MAX(AUTOTOKENS_CHANGE_MIN, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); - // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); #if AUTOTOKENS_SPLICE_DISABLE == 1 #define AUTOTOKENS_MUT_MAX 12 @@ -112,7 +114,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, #define AUTOTOKENS_MUT_MAX 14 #endif - u32 max_rand = AUTOTOKENS_MUT_MAX; + u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos; for (i = 0; i < rounds; ++i) { @@ -122,8 +124,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, case 0 ... 7: /* fall through */ { - u32 pos = rand_below(afl_ptr, m_size); - u32 cur_item = m[pos], new_item; + pos = rand_below(afl_ptr, m_size); + u32 cur_item = m[pos]; do { new_item = rand_below(afl_ptr, current_id); @@ -135,7 +137,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, ((whitespace_ids < new_item && whitespace_ids >= cur_item) || (whitespace_ids >= new_item && whitespace_ids < cur_item))))); - DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); + DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; break; @@ -144,7 +146,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, /* INSERT (m_size +1 so we insert also after last place) */ case 8 ... 9: { - u32 new_item; do { new_item = rand_below(afl_ptr, current_id); @@ -154,7 +155,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; - DEBUG(stderr, "INS: %u at %u\n", new_item, pos); + DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); if (likely(!alternative_tokenize)) { @@ -212,7 +213,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + dst_off, src->begin() + src_off, src->begin() + src_off + n); m_size += n; - DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + break; } @@ -231,13 +233,36 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, copy(src->begin() + src_off, src->begin() + src_off + n, m.begin() + dst_off); - DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); + DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; } } + if (likely(!alternative_tokenize)) { + + // do we need a whitespace/token at the beginning? + if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && + id_to_token[m[dst_off]].size() > 1) { + + m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); + ++m_size; + + } + + // do we need a whitespace/token at the end? + if (dst_off + n < m_size && + id_to_token[m[dst_off + n - 1]].size() > 1 && + id_to_token[m[dst_off + n]].size() > 1) { + + m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); + ++m_size; + + } + + } + break; } @@ -249,11 +274,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, if (m_size > 8) { - m.erase(m.begin() + rand_below(afl_ptr, m_size)); - --m_size; + do { + + pos = rand_below(afl_ptr, m_size); + + } while (unlikely(pos < whitespace_ids)); + + // if what we delete will result in a missing whitespace/token, + // instead of deleting we switch the item to a whitespace or token. + if (likely(!alternative_tokenize) && pos && pos < m_size && + id_to_token[m[pos - 1]].size() > 1 && + id_to_token[m[pos + 1]].size() > 1) { + + m[pos] = good_whitespace_or_singleval(); + + } else { + + m.erase(m.begin() + pos); + --m_size; + + } } else { + // if the data is already too small do not try to make it smaller + // again this run. + max_rand = AUTOTOKENS_MUT_MAX - 2; } @@ -262,14 +308,12 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - // TODO: add full line insert splice, replace splace, delete - } } - string output; - u32 m_size_1 = m_size - 1; + u32 m_size_1 = m_size - 1; + output = ""; for (i = 0; i < m_size; ++i) { @@ -282,31 +326,108 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - u32 mutated_size = output.size(); - u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size); + u32 mutated_size = (u32)output.size(); + u8 *mutated_out = (u8 *)output.data(); - if (unlikely(!mutated_out)) { + if (unlikely(mutated_size > max_size)) { mutated_size = max_size; } - *out_buf = NULL; - return 0; + IFDEBUG { - } - - if (unlikely(debug)) { - - DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size); + DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size); fwrite(output.data(), 1, mutated_size, stderr); - DEBUG(stderr, "\n---\n"); + DEBUGF(stderr, "\n---\n"); } - memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; ++fuzz_count; return mutated_size; } +/* I get f*cking stack overflow using C++ regex with a regex of + "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize + enabled :-( */ +u8 my_search_string(string::const_iterator cur, string::const_iterator ende, + string::const_iterator *match_begin, + string::const_iterator *match_end) { + + string::const_iterator start = cur, found_begin; + u8 quote_type = 0; + + while (cur < ende) { + + switch (*cur) { + + case '"': { + + if (cur == start || *(cur - 1) != '\\') { + + if (!quote_type) { + + found_begin = cur; + quote_type = 1; + + } else if (quote_type == 1) { + + *match_begin = found_begin; + *match_end = cur + 1; + return 1; + + } + + } + + break; + + } + + case '\'': { + + if (cur == start || *(cur - 1) != '\\') { + + if (!quote_type) { + + found_begin = cur; + quote_type = 2; + + } else if (quote_type == 2) { + + *match_begin = found_begin; + *match_end = cur + 1; + return 1; + + } + + } + + break; + + } + + case '\n': + case '\r': + case 0: { + + quote_type = 0; + break; + + } + + default: + if (unlikely(quote_type && !isprint(*cur))) { quote_type = 0; } + break; + + } + + ++cur; + + } + + return 0; + +} + /* We are not using afl_custom_queue_new_entry() because not every corpus entry will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ @@ -321,7 +442,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; - DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n"); + DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); return 0; } @@ -356,7 +477,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++extras_cnt; - DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr); + DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr); } @@ -385,7 +506,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++a_extras_cnt; - DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr); + DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr); } @@ -415,7 +536,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; - DEBUG(stderr, "Too short (%lu) %s\n", len, filename); + DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); return 0; } @@ -443,14 +564,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; - DEBUG(stderr, "Not text (%lu) %s\n", len, filename); + DEBUGF(stderr, "Not text (%lu) %s\n", len, filename); return 0; } } - // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", + // DEBUGF(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); if (regex_comment_custom) { @@ -463,15 +584,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), - filename, input.c_str()); + DEBUGF(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), + filename, input.c_str()); u32 spaces = count(input.begin(), input.end(), ' '); u32 tabs = count(input.begin(), input.end(), '\t'); u32 linefeeds = count(input.begin(), input.end(), '\n'); bool ends_with_linefeed = input[input.length() - 1] == '\n'; - DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, - linefeeds, ends_with_linefeed); + DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, + linefeeds, ends_with_linefeed); all_spaces += spaces; all_tabs += tabs; all_lf += linefeeds; @@ -479,25 +600,28 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // now extract all tokens vector tokens; - smatch match; - string::const_iterator cur = input.begin(), ende = input.end(), found, prev; + string::const_iterator cur = input.begin(), ende = input.end(), found, prev, + match_begin, match_end; - DEBUG(stderr, "START!\n"); + DEBUGF(stderr, "START!\n"); if (likely(!alternative_tokenize)) { - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + found = match_begin; + cur = match_end; + + IFDEBUG { + + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); + + } if (prev < found) { // there are items between search start and find while (prev < found) { @@ -512,8 +636,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { @@ -525,14 +649,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); + tokens.push_back(string(start, prev)); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(prev, prev + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *prev); + tokens.push_back(string(prev, prev + 1)); + DEBUGF(stderr, "OTHER \"%c\"\n", *prev); ++prev; } @@ -541,11 +665,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + tokens.push_back(string(match_begin, match_end)); + DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str()); } - DEBUG(stderr, "AFTER all strings\n"); + DEBUGF(stderr, "AFTER all strings\n"); if (cur < ende) { @@ -561,8 +686,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { @@ -575,13 +700,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); } else { tokens.push_back(std::string(cur, cur + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *cur); + DEBUGF(stderr, "OTHER \"%c\"\n", *cur); ++cur; } @@ -593,19 +718,21 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } else { // alternative tokenize - - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + found = match_begin; + cur = match_end; + IFDEBUG { + + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); + + } + if (prev < found) { // there are items between search start and find sregex_token_iterator it{prev, found, regex_whitespace, -1}; vector tokenized{it, {}}; @@ -619,10 +746,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, tokenized.end()); tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); + DEBUGF(stderr, "tokens1: %lu input size: %lu\n", tokenized.size(), + input.size()); for (auto x : tokenized) { cerr << x << endl; @@ -636,10 +763,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (regex_search(c, e, m, regex_word)) { p = c; f = m[0].first; @@ -649,10 +773,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // there are items between search start and find while (p < f) { - if (unlikely(debug)) { + IFDEBUG { string foo(p, p + 1); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); } @@ -661,20 +785,21 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", - foo.c_str()); tokens.push_back(std::string(p, f)); - */ + IFDEBUG { + + string foo(p, f); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(p, f)); + + } } - DEBUG( - stderr, - "SUBstring \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); + DEBUGF(stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue " + "at %lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); tokens.push_back(m[0].str()); } @@ -683,10 +808,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (c < e) { - if (unlikely(debug)) { + IFDEBUG { string foo(c, c + 1); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); } @@ -695,17 +820,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", - foo.c_str()); + string foo(c, e); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(c, e)); - */ + tokens.push_back(std::string(c, e)); } @@ -713,7 +835,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + tokens.push_back(string(match_begin, match_end)); } @@ -727,10 +849,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, tokenized.end()); tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); + DEBUGF(stderr, "tokens2: %lu input size: %lu\n", tokenized.size(), + input.size()); for (auto x : tokenized) { cerr << x << endl; @@ -744,10 +866,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (regex_search(c, e, m, regex_word)) { p = c; f = m[0].first; @@ -757,10 +876,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // there are items between search start and find while (p < f) { - if (unlikely(debug)) { + IFDEBUG { string foo(p, p + 1); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); } @@ -769,25 +888,22 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", - foo.c_str()); + string foo(p, f); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(p, f)); - */ + tokens.push_back(std::string(p, f)); } - DEBUG(stderr, - "SUB2string \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); + DEBUGF(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue " + "at %lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); tokens.push_back(m[0].str()); } @@ -796,10 +912,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (c < e) { - if (unlikely(debug)) { + IFDEBUG { string foo(c, c + 1); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); } @@ -808,16 +924,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + string foo(c, e); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(c, e)); - */ + tokens.push_back(std::string(c, e)); } @@ -827,22 +941,22 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "DUMPING TOKENS:\n"); + DEBUGF(stderr, "DUMPING TOKENS:\n"); u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { - DEBUG(stderr, "%s", tokens[i].c_str()); + DEBUGF(stderr, "%s", tokens[i].c_str()); if (unlikely(alternative_tokenize && i < size_1)) { - DEBUG(stderr, "%s", whitespace.c_str()); + DEBUGF(stderr, "%s", whitespace.c_str()); } } - DEBUG(stderr, "---------------------------\n"); + DEBUGF(stderr, "---------------------------\n"); } @@ -850,7 +964,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; - DEBUG(stderr, "too few tokens\n"); + DEBUGF(stderr, "too few tokens\n"); return 0; } @@ -886,21 +1000,23 @@ extern "C" unsigned char afl_custom_queue_get(void *data, all_structure_items += structure->size(); // we are done! - DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", - structure->size()); + DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n", + structure->size()); - } else { + } + + else { if (entry->second == NULL) { - DEBUG(stderr, "Skipping %s\n", filename); + DEBUGF(stderr, "Skipping %s\n", filename); s = NULL; return 0; } s = entry->second; - DEBUG(stderr, "OK %s\n", filename); + DEBUGF(stderr, "OK %s\n", filename); } diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 322caa5b..82131c92 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -150,6 +150,7 @@ def deinit(): # optional for Python sense to use it. You would only skip this if `post_process` is used to fix checksums etc. so if you are using it, e.g., as a post processing library. Note that a length > 0 *must* be returned! + The returned output buffer is under **your** memory management! - `describe` (optional): diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 1e8d085d..229bc025 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -844,15 +844,16 @@ struct custom_mutator { /** * Perform custom mutations on a given input * - * (Optional for now. Required in the future) + * (Optional) * - * @param data pointer returned in afl_custom_init by this custom mutator + * Getting an add_buf can be skipped by using afl_custom_splice_optout(). + * + * @param[in] data Pointer returned in afl_custom_init by this custom mutator * @param[in] buf Pointer to the input data to be mutated and the mutated * output * @param[in] buf_size Size of the input/output data - * @param[out] out_buf the new buffer. We may reuse *buf if large enough. - * *out_buf = NULL is treated as FATAL. - * @param[in] add_buf Buffer containing the additional test case + * @param[out] out_buf The new buffer, under your memory mgmt. + * @param[in] add_buf Buffer containing an additional test case (splicing) * @param[in] add_buf_size Size of the additional test case * @param[in] max_size Maximum size of the mutated output. The mutation must * not produce data larger than max_size. diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 5e352dcb..bd482562 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -564,8 +564,7 @@ u8 fuzz_one_original(afl_state_t *afl) { if (afl->cmplog_lvl == 3 || (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) || afl->queue_cur->favored || - !(afl->fsrv.total_execs % afl->queued_items) || - get_cur_time() - afl->last_find_time > 300000) { // 300 seconds + get_cur_time() - afl->last_find_time > 600000) { // 600 seconds if (input_to_state_stage(afl, in_buf, out_buf, len)) { From afff6f642c77e4986fdb8a4e9799c1a52e80ce32 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 13:41:48 +0100 Subject: [PATCH 19/77] optimize --- custom_mutators/autotokens/autotokens.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 102bea0f..149ae430 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -109,9 +109,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); #if AUTOTOKENS_SPLICE_DISABLE == 1 - #define AUTOTOKENS_MUT_MAX 12 + #define AUTOTOKENS_MUT_MAX 18 #else - #define AUTOTOKENS_MUT_MAX 14 + #define AUTOTOKENS_MUT_MAX 27 #endif u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos; @@ -120,8 +120,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, switch (rand_below(afl_ptr, max_rand)) { - /* CHANGE */ - case 0 ... 7: /* fall through */ + /* CHANGE/MUTATE single item */ + case 0 ... 9: { pos = rand_below(afl_ptr, m_size); @@ -144,7 +144,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* INSERT (m_size +1 so we insert also after last place) */ - case 8 ... 9: { + case 10 ... 13: { do { @@ -192,7 +192,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, #if AUTOTOKENS_SPLICE_DISABLE != 1 /* SPLICING */ - case 10 ... 11: { + case 14 ... 22: { u32 strategy = rand_below(afl_ptr, 4), dst_off, n; auto src = id_mapping[rand_below(afl_ptr, valid_structures)]; @@ -278,11 +278,11 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, pos = rand_below(afl_ptr, m_size); - } while (unlikely(pos < whitespace_ids)); + } while (unlikely(m[pos] < whitespace_ids)); // if what we delete will result in a missing whitespace/token, // instead of deleting we switch the item to a whitespace or token. - if (likely(!alternative_tokenize) && pos && pos < m_size && + if (likely(!alternative_tokenize) && pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 && id_to_token[m[pos + 1]].size() > 1) { @@ -300,7 +300,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, // if the data is already too small do not try to make it smaller // again this run. - max_rand = AUTOTOKENS_MUT_MAX - 2; + max_rand -= 4; } @@ -734,6 +734,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } if (prev < found) { // there are items between search start and find + sregex_token_iterator it{prev, found, regex_whitespace, -1}; vector tokenized{it, {}}; tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(), From 86d3c65559209ce12452e18daf96946222c19b46 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 15:59:57 +0100 Subject: [PATCH 20/77] nit --- custom_mutators/autotokens/autotokens.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 149ae430..f4b96c7b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -121,8 +121,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, switch (rand_below(afl_ptr, max_rand)) { /* CHANGE/MUTATE single item */ - case 0 ... 9: - { + case 0 ... 9: { pos = rand_below(afl_ptr, m_size); u32 cur_item = m[pos]; @@ -438,8 +437,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (likely(!debug)) { - if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || - (only_fav && !afl_ptr->queue_cur->favored)) { + if (unlikely(!afl_ptr->custom_only) && + ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored))) { s = NULL; DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); From 628b4b60021a0d62a2eccddca4fe321c9d57c663 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 17:24:56 +0100 Subject: [PATCH 21/77] enhance examples --- custom_mutators/README.md | 10 ++++++++++ custom_mutators/examples/custom_send.c | 9 ++++++++- custom_mutators/examples/example.c | 4 +++- custom_mutators/examples/post_library_gif.so.c | 4 ++-- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/custom_mutators/README.md b/custom_mutators/README.md index 0289e150..8d01856f 100644 --- a/custom_mutators/README.md +++ b/custom_mutators/README.md @@ -11,6 +11,16 @@ The `./examples` folder contains examples for custom mutators in python and C. In `./rust`, you will find rust bindings, including a simple example in `./rust/example` and an example for structured fuzzing, based on lain, in`./rust/example_lain`. +## The AFL++ grammar agnostic grammar mutator + +In `./autotokens` you find a token-level fuzzer that does not need to know +anything about the grammar of an input as long as it is in ascii and allows +whitespace. +It is very fast and effective. + +If you are looking for an example of how to effectively create a custom +mutator take a look at this one. + ## The AFL++ Grammar Mutator If you use git to clone AFL++, then the following will incorporate our diff --git a/custom_mutators/examples/custom_send.c b/custom_mutators/examples/custom_send.c index ffea927e..7de72819 100644 --- a/custom_mutators/examples/custom_send.c +++ b/custom_mutators/examples/custom_send.c @@ -1,7 +1,14 @@ +// +// This is an example on how to use afl_custom_send +// It writes each mutated data set to /tmp/foo +// You can modify this to send to IPC, shared memory, etc. +// // cc -O3 -fPIC -shared -g -o custom_send.so -I../../include custom_send.c // cd ../.. // afl-cc -o test-instr test-instr.c -// afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo +// AFL_CUSTOM_MUTATOR_LIBRARY=custom_mutators/examples/custom_send.so \ +// afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo +// #include "custom_mutator_helpers.h" diff --git a/custom_mutators/examples/example.c b/custom_mutators/examples/example.c index 3f299508..e680ec8e 100644 --- a/custom_mutators/examples/example.c +++ b/custom_mutators/examples/example.c @@ -6,7 +6,7 @@ Dominik Maier */ -// You need to use -I /path/to/AFLplusplus/include +// You need to use -I/path/to/AFLplusplus/include -I. #include "custom_mutator_helpers.h" #include @@ -118,6 +118,8 @@ size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, } + if (max_size > mutated_size) { mutated_size = max_size; } + *out_buf = mutated_out; return mutated_size; diff --git a/custom_mutators/examples/post_library_gif.so.c b/custom_mutators/examples/post_library_gif.so.c index 9cd224f4..3cb018a6 100644 --- a/custom_mutators/examples/post_library_gif.so.c +++ b/custom_mutators/examples/post_library_gif.so.c @@ -129,8 +129,8 @@ size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, /* Allocate memory for new buffer, reusing previous allocation if possible. Note we have to use afl-fuzz's own realloc! - Note that you should only do this if you need to grow the buffer, - otherwise work with in_buf, and assign it to *out_buf instead. */ + We use afl_realloc because it is effective. + You can also work within in_buf, and assign it to *out_buf. */ *out_buf = afl_realloc(out_buf, len); From 67cfe4f6d4a03c596a5c3e1aa97d64d79263746a Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 22:24:24 +0100 Subject: [PATCH 22/77] nits --- custom_mutators/autotokens/autotokens.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f4b96c7b..16ee8109 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -544,7 +544,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string input; input.resize(len); rewind(fp); - fread((void *)input.data(), input.size(), 1, fp); + + if (fread((void *)input.data(), 1, len, fp) != len) { + + s = NULL; + DEBUGF(stderr, "Too short read %s\n", len, filename); + return 0; + + } + fclose(fp); if (!afl_ptr->shm.cmplog_mode) { From bd2cb4cd1c2f07d5406875771cd41fb9a6e1f84d Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 20 Jan 2023 12:22:29 +0100 Subject: [PATCH 23/77] more default tokens --- custom_mutators/autotokens/autotokens.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 16ee8109..f9b5bd2e 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1105,6 +1105,12 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { id_to_token[current_id] = "\n\n\n\n"; ++current_id; whitespace_ids = current_id; + token_to_id["\""] = current_id; + id_to_token[current_id] = "\""; + ++current_id; + token_to_id["'"] = current_id; + id_to_token[current_id] = "'"; + ++current_id; } From 1b4e1d75b32c6024765ab27b36591ae97cb33f6b Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 25 Jan 2023 13:52:22 +0100 Subject: [PATCH 24/77] cmplog decision updates --- src/afl-fuzz-one.c | 16 ++++++++++------ utils/aflpp_driver/GNUmakefile | 23 ++++++++++++++--------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index bd482562..b25398c4 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -446,9 +446,12 @@ u8 fuzz_one_original(afl_state_t *afl) { ACTF( "Fuzzing test case #%u (%u total, %llu crashes saved, " - "perf_score=%0.0f, exec_us=%llu, hits=%u, map=%u, ascii=%u)...", + "perf_score=%0.0f, weight=%0.0f, favorite=%u, was_fuzzed=%u, " + "exec_us=%llu, hits=%u, map=%u, ascii=%u)...", afl->current_entry, afl->queued_items, afl->saved_crashes, - afl->queue_cur->perf_score, afl->queue_cur->exec_us, + afl->queue_cur->perf_score, afl->queue_cur->weight, + afl->queue_cur->favored, afl->queue_cur->was_fuzzed, + afl->queue_cur->exec_us, likely(afl->n_fuzz) ? afl->n_fuzz[afl->queue_cur->n_fuzz_entry] : 0, afl->queue_cur->bitmap_size, afl->queue_cur->is_ascii); fflush(stdout); @@ -561,10 +564,11 @@ u8 fuzz_one_original(afl_state_t *afl) { } else { - if (afl->cmplog_lvl == 3 || - (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) || - afl->queue_cur->favored || - get_cur_time() - afl->last_find_time > 600000) { // 600 seconds + if (afl->queue_cur->favored || afl->cmplog_lvl == 3 || + (afl->cmplog_lvl == 2 && + (afl->queue_cur->tc_ref || + afl->fsrv.total_execs % afl->queued_items <= 10)) || + get_cur_time() - afl->last_find_time > 250000) { // 250 seconds if (input_to_state_stage(afl, in_buf, out_buf, len)) { diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index 234a1c31..b973f96a 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -8,9 +8,14 @@ ifeq "$(shell uname -s)" "Darwin" LDFLAGS += $(SDK_LD) endif +ifeq "" "$(LLVM_CONFIG)" + LLVM_CONFIG := llvm-config +endif LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) ifneq "" "$(LLVM_BINDIR)" - LLVM_BINDIR := $(LLVM_BINDIR)/ + ifeq "$(shell test -x $(LLVM_BINDIR)/clang && echo 1)" "1" + CC := $(LLVM_BINDIR)/clang + endif endif CFLAGS := -O3 -funroll-loops -g -fPIC @@ -18,31 +23,31 @@ CFLAGS := -O3 -funroll-loops -g -fPIC all: libAFLDriver.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so aflpp_driver.o: aflpp_driver.c - -$(LLVM_BINDIR)clang -I. -I../../include $(CFLAGS) -c aflpp_driver.c + -$(CC) -I. -I../../include $(CFLAGS) -c aflpp_driver.c libAFLDriver.a: aflpp_driver.o @ar rc libAFLDriver.a aflpp_driver.o @cp -vf libAFLDriver.a ../../ debug: - $(LLVM_BINDIR)clang -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c - $(LLVM_BINDIR)clang -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c - #$(LLVM_BINDIR)clang -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c - #$(LLVM_BINDIR)clang -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c + $(CC) -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c + $(CC) -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c + #$(CC) -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c + #$(CC) -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c ar rc libAFLDriver.a afl-performance.o aflpp_driver.o aflpp_qemu_driver.o: aflpp_qemu_driver.c - -$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c + -$(CC) $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c libAFLQemuDriver.a: aflpp_qemu_driver.o @-ar rc libAFLQemuDriver.a aflpp_qemu_driver.o @-cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - @-test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang $(LDFLAGS) -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." + @-test -e aflpp_qemu_driver_hook.o && $(CC) $(LDFLAGS) -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - @-test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." + @-test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(CC) $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c From 47f35d29ac53ed1cdb87f65591b62947a7965060 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 27 Jan 2023 14:32:18 +0100 Subject: [PATCH 25/77] fix --- custom_mutators/autotokens/autotokens.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f9b5bd2e..4a2cc08f 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -61,8 +61,10 @@ static unordered_map id_to_token; static string whitespace = AUTOTOKENS_WHITESPACE; static string output; static regex *regex_comment_custom; -static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", - regex::multiline | regex::optimize); +// multiline requires g++-11 libs :( +static regex regex_comment_star( + "/\\*([:print:]|\n)*?\\*/", + regex_constants::optimize /* | regex_constants::multiline */); static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); static regex regex_whitespace(R"([ \t]+)", regex::optimize); static vector *s; // the structure of the currently selected input @@ -548,7 +550,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (fread((void *)input.data(), 1, len, fp) != len) { s = NULL; - DEBUGF(stderr, "Too short read %s\n", len, filename); + DEBUGF(stderr, "Too short read %s\n", filename); return 0; } From b5d8d4c866137a8a6bd55225b0eaf723123c46c9 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 29 Jan 2023 10:07:33 +0100 Subject: [PATCH 26/77] comment --- custom_mutators/autotokens/autotokens.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 4a2cc08f..0a010f0b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1,3 +1,9 @@ +/* + token level fuzzing custom mutator for afl++ + (c) by Marc Heuse + License: Apache 2.0 +*/ + extern "C" { #include "afl-fuzz.h" From 91ccbf3f68ab9e6e4bc277f86c3efed666867132 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 1 Feb 2023 17:16:51 +0100 Subject: [PATCH 27/77] fix --- custom_mutators/autotokens/autotokens.cpp | 18 ++++++++---------- src/afl-fuzz-one.c | 5 +++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 0a010f0b..548e1be9 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -451,7 +451,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, s = NULL; DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); - return 0; + return 1; } @@ -532,7 +532,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (!fp) { s = NULL; - return 0; + return 1; } // should not happen @@ -545,7 +545,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); - return 0; + return 1; } @@ -557,7 +557,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, s = NULL; DEBUGF(stderr, "Too short read %s\n", filename); - return 0; + return 1; } @@ -581,7 +581,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; DEBUGF(stderr, "Not text (%lu) %s\n", len, filename); - return 0; + return 1; } @@ -982,7 +982,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; DEBUGF(stderr, "too few tokens\n"); - return 0; + return 1; } @@ -1020,15 +1020,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n", structure->size()); - } - - else { + } else { if (entry->second == NULL) { DEBUGF(stderr, "Skipping %s\n", filename); s = NULL; - return 0; + return 1; } diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index b25398c4..2f016217 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -1988,7 +1988,8 @@ custom_mutator_stage: if (unlikely(!mutated_buf)) { - FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size); + //FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size); + break; } @@ -2040,7 +2041,7 @@ custom_mutator_stage: new_hit_cnt = afl->queued_items + afl->saved_crashes; afl->stage_finds[STAGE_CUSTOM_MUTATOR] += new_hit_cnt - orig_hit_cnt; - afl->stage_cycles[STAGE_CUSTOM_MUTATOR] += afl->stage_max; + afl->stage_cycles[STAGE_CUSTOM_MUTATOR] += afl->stage_cur; #ifdef INTROSPECTION afl->queue_cur->stats_mutated += afl->stage_max; #endif From 4946e9cc3a340efd9b08807ae5cb0a657e0214a9 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 2 Feb 2023 12:08:45 +0100 Subject: [PATCH 28/77] small fix to compiler rt --- instrumentation/afl-compiler-rt.o.c | 66 ++++++++--------------------- 1 file changed, 17 insertions(+), 49 deletions(-) diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index d6d6c38c..6ba19b5a 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -1534,6 +1534,16 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { if (start == stop || *start) return; + x = getenv("AFL_INST_RATIO"); + if (x) { inst_ratio = (u32)atoi(x); } + + if (!inst_ratio || inst_ratio > 100) { + + fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n"); + abort(); + + } + // If a dlopen of an instrumented library happens after the forkserver then // we have a problem as we cannot increase the coverage map anymore. if (__afl_already_initialized_forkserver) { @@ -1554,62 +1564,20 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { while (start < stop) { - *(start++) = offset; + if (likely(inst_ratio == 100) || R(100) < inst_ratio) + *start = offset; + else + *start = 0; // write to map[0] if (unlikely(++offset >= __afl_final_loc)) { offset = 4; } } } - } - - x = getenv("AFL_INST_RATIO"); - if (x) { inst_ratio = (u32)atoi(x); } - - if (!inst_ratio || inst_ratio > 100) { - - fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n"); - abort(); + return; // we are done for this special case } - /* instrumented code is loaded *after* our forkserver is up. this is a - problem. We cannot prevent collisions then :( */ - /* - if (__afl_already_initialized_forkserver && - __afl_final_loc + 1 + stop - start > __afl_map_size) { - - if (__afl_debug) { - - fprintf(stderr, "Warning: new instrumented code after the forkserver!\n"); - - } - - __afl_final_loc = 2; - - if (1 + stop - start > __afl_map_size) { - - *(start++) = ++__afl_final_loc; - - while (start < stop) { - - if (R(100) < inst_ratio) - *start = ++__afl_final_loc % __afl_map_size; - else - *start = 4; - - start++; - - } - - return; - - } - - } - - */ - /* Make sure that the first element in the range is always set - we use that to avoid duplicate calls (which can happen as an artifact of the underlying implementation in LLVM). */ @@ -1618,10 +1586,10 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { while (start < stop) { - if (R(100) < inst_ratio) + if (likely(inst_ratio == 100) || R(100) < inst_ratio) *start = ++__afl_final_loc; else - *start = 4; + *start = 0; // write to map[0] start++; From 25b4b32627a1ef1e65b328f90f3ad1fd25d8f906 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 2 Feb 2023 12:13:48 +0100 Subject: [PATCH 29/77] small fix to compiler rt --- instrumentation/afl-compiler-rt.o.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 6ba19b5a..b1ce4427 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -1582,6 +1582,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { to avoid duplicate calls (which can happen as an artifact of the underlying implementation in LLVM). */ + if (__afl_final_loc < 3) __afl_final_loc = 3; // we skip the first 4 entries + *(start++) = ++__afl_final_loc; while (start < stop) { From df9ef84f5e042bdc1db764e83baa83cb30a80d31 Mon Sep 17 00:00:00 2001 From: Nikolay Shaplov Date: Fri, 3 Feb 2023 14:32:17 +0000 Subject: [PATCH 30/77] Explicitly print error code if sched_setaffinity fails --- src/afl-gotcpu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c index c5b8a27a..1762cfe2 100644 --- a/src/afl-gotcpu.c +++ b/src/afl-gotcpu.c @@ -214,7 +214,13 @@ int main(int argc, char **argv) { #if defined(__linux__) if (sched_setaffinity(0, sizeof(c), &c)) { - PFATAL("sched_setaffinity failed for cpu %d", i); + const char *error_code = "Unkown error code"; + if (errno == EFAULT) error_code = "EFAULT"; + if (errno == EINVAL) error_code = "EINVAL"; + if (errno == EPERM) error_code = "EPERM"; + if (errno == ESRCH) error_code = "ESRCH"; + + PFATAL("sched_setaffinity failed for cpu %d, error: %s", i, error_code); } From 53c19a807c701760af577cea1f44916d9133a971 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 14:09:50 +0100 Subject: [PATCH 31/77] code indent --- instrumentation/split-compares-pass.so.cc | 102 ++++++++++++++-------- src/afl-gotcpu.c | 4 +- 2 files changed, 66 insertions(+), 40 deletions(-) diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc index dd7b09a6..8a07610c 100644 --- a/instrumentation/split-compares-pass.so.cc +++ b/instrumentation/split-compares-pass.so.cc @@ -1152,10 +1152,14 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { b_op1 = SelectInst::Create(isMzero_op1, ConstantInt::get(intType, PlusZero), bpre_op1); #if LLVM_MAJOR >= 16 - isMzero_op0->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); - isMzero_op1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); - b_op0->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); - b_op1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); + isMzero_op0->insertInto(nonan_bb, + BasicBlock::iterator(nonan_bb->getTerminator())); + isMzero_op1->insertInto(nonan_bb, + BasicBlock::iterator(nonan_bb->getTerminator())); + b_op0->insertInto(nonan_bb, + BasicBlock::iterator(nonan_bb->getTerminator())); + b_op1->insertInto(nonan_bb, + BasicBlock::iterator(nonan_bb->getTerminator())); #else nonan_bb->getInstList().insert( BasicBlock::iterator(nonan_bb->getTerminator()), isMzero_op0); @@ -1192,7 +1196,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { t_s0->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); s_s1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); t_s1->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); - icmp_sign_bit->insertInto(nonan_bb, BasicBlock::iterator(nonan_bb->getTerminator())); + icmp_sign_bit->insertInto(nonan_bb, + BasicBlock::iterator(nonan_bb->getTerminator())); #else nonan_bb->getInstList().insert( BasicBlock::iterator(nonan_bb->getTerminator()), s_s0); @@ -1239,8 +1244,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::LShr, b_op1, ConstantInt::get(b_op1->getType(), shiftR_exponent)); #if LLVM_MAJOR >= 16 - s_e0->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); - s_e1->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); + s_e0->insertInto(signequal_bb, + BasicBlock::iterator(signequal_bb->getTerminator())); + s_e1->insertInto(signequal_bb, + BasicBlock::iterator(signequal_bb->getTerminator())); #else signequal_bb->getInstList().insert( BasicBlock::iterator(signequal_bb->getTerminator()), s_e0); @@ -1251,15 +1258,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { t_e0 = new TruncInst(s_e0, IntExponentTy); t_e1 = new TruncInst(s_e1, IntExponentTy); #if LLVM_MAJOR >= 16 - t_e0->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); - t_e1->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); + t_e0->insertInto(signequal_bb, + BasicBlock::iterator(signequal_bb->getTerminator())); + t_e1->insertInto(signequal_bb, + BasicBlock::iterator(signequal_bb->getTerminator())); #else signequal_bb->getInstList().insert( BasicBlock::iterator(signequal_bb->getTerminator()), t_e0); signequal_bb->getInstList().insert( BasicBlock::iterator(signequal_bb->getTerminator()), t_e1); #endif - if (sizeInBits - precision < exTySizeBytes * 8) { @@ -1270,8 +1278,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::And, t_e1, ConstantInt::get(t_e1->getType(), mask_exponent)); #if LLVM_MAJOR >= 16 - m_e0->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); - m_e1->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); + m_e0->insertInto(signequal_bb, + BasicBlock::iterator(signequal_bb->getTerminator())); + m_e1->insertInto(signequal_bb, + BasicBlock::iterator(signequal_bb->getTerminator())); #else signequal_bb->getInstList().insert( BasicBlock::iterator(signequal_bb->getTerminator()), m_e0); @@ -1312,7 +1322,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponents_equal = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1); #if LLVM_MAJOR >= 16 - icmp_exponents_equal->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); + icmp_exponents_equal->insertInto( + signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); #else signequal_bb->getInstList().insert( BasicBlock::iterator(signequal_bb->getTerminator()), @@ -1332,7 +1343,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1); #if LLVM_MAJOR >= 16 - icmp_exponent->insertInto(signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator())); + icmp_exponent->insertInto( + signequal2_bb, + BasicBlock::iterator(signequal2_bb->getTerminator())); #else signequal2_bb->getInstList().insert( BasicBlock::iterator(signequal2_bb->getTerminator()), @@ -1346,7 +1359,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponents_equal = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1); #if LLVM_MAJOR >= 16 - icmp_exponents_equal->insertInto(signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); + icmp_exponents_equal->insertInto( + signequal_bb, BasicBlock::iterator(signequal_bb->getTerminator())); #else signequal_bb->getInstList().insert( BasicBlock::iterator(signequal_bb->getTerminator()), @@ -1366,7 +1380,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1); #if LLVM_MAJOR >= 16 - icmp_exponent->insertInto(signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator())); + icmp_exponent->insertInto( + signequal2_bb, + BasicBlock::iterator(signequal2_bb->getTerminator())); #else signequal2_bb->getInstList().insert( BasicBlock::iterator(signequal2_bb->getTerminator()), @@ -1381,7 +1397,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } #if LLVM_MAJOR >= 16 - icmp_exponent_result->insertInto(signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator())); + icmp_exponent_result->insertInto( + signequal2_bb, BasicBlock::iterator(signequal2_bb->getTerminator())); #else signequal2_bb->getInstList().insert( BasicBlock::iterator(signequal2_bb->getTerminator()), @@ -1437,8 +1454,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::And, b_op1, ConstantInt::get(b_op1->getType(), mask_fraction)); #if LLVM_MAJOR >= 16 - m_f0->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator())); - m_f1->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator())); + m_f0->insertInto(middle_bb, + BasicBlock::iterator(middle_bb->getTerminator())); + m_f1->insertInto(middle_bb, + BasicBlock::iterator(middle_bb->getTerminator())); #else middle_bb->getInstList().insert( BasicBlock::iterator(middle_bb->getTerminator()), m_f0); @@ -1451,8 +1470,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { t_f0 = new TruncInst(m_f0, IntFractionTy); t_f1 = new TruncInst(m_f1, IntFractionTy); #if LLVM_MAJOR >= 16 - t_f0->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator())); - t_f1->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator())); + t_f0->insertInto(middle_bb, + BasicBlock::iterator(middle_bb->getTerminator())); + t_f1->insertInto(middle_bb, + BasicBlock::iterator(middle_bb->getTerminator())); #else middle_bb->getInstList().insert( BasicBlock::iterator(middle_bb->getTerminator()), t_f0); @@ -1474,8 +1495,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { t_f0 = new TruncInst(b_op0, IntFractionTy); t_f1 = new TruncInst(b_op1, IntFractionTy); #if LLVM_MAJOR >= 16 - t_f0->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator())); - t_f1->insertInto(middle_bb, BasicBlock::iterator(middle_bb->getTerminator())); + t_f0->insertInto(middle_bb, + BasicBlock::iterator(middle_bb->getTerminator())); + t_f1->insertInto(middle_bb, + BasicBlock::iterator(middle_bb->getTerminator())); #else middle_bb->getInstList().insert( BasicBlock::iterator(middle_bb->getTerminator()), t_f0); @@ -1503,7 +1526,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction_result = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_f0, t_f1); #if LLVM_MAJOR >= 16 - icmp_fraction_result->insertInto(middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator())); + icmp_fraction_result->insertInto( + middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator())); #else middle2_bb->getInstList().insert( BasicBlock::iterator(middle2_bb->getTerminator()), @@ -1516,7 +1540,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction_result = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, t_f0, t_f1); #if LLVM_MAJOR >= 16 - icmp_fraction_result->insertInto(middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator())); + icmp_fraction_result->insertInto( + middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator())); #else middle2_bb->getInstList().insert( BasicBlock::iterator(middle2_bb->getTerminator()), @@ -1542,13 +1567,13 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { if (FcmpInst->getPredicate() == CmpInst::FCMP_OGT || FcmpInst->getPredicate() == CmpInst::FCMP_UGT) { - icmp_fraction_result = CmpInst::Create( - Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1); - icmp_fraction_result2 = CmpInst::Create( - Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1); + icmp_fraction_result = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1); + icmp_fraction_result2 = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1); #if LLVM_MAJOR >= 16 - icmp_fraction_result->insertInto(negative_bb, negative_bb->end()); - icmp_fraction_result2->insertInto(positive_bb, negative_bb->end()); + icmp_fraction_result->insertInto(negative_bb, negative_bb->end()); + icmp_fraction_result2->insertInto(positive_bb, negative_bb->end()); #else negative_bb->getInstList().push_back(icmp_fraction_result); positive_bb->getInstList().push_back(icmp_fraction_result2); @@ -1556,13 +1581,13 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } else { - icmp_fraction_result = CmpInst::Create( - Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1); - icmp_fraction_result2 = CmpInst::Create( - Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1); + icmp_fraction_result = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1); + icmp_fraction_result2 = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1); #if LLVM_MAJOR >= 16 - icmp_fraction_result->insertInto(negative_bb, negative_bb->end()); - icmp_fraction_result2->insertInto(positive_bb, negative_bb->end()); + icmp_fraction_result->insertInto(negative_bb, negative_bb->end()); + icmp_fraction_result2->insertInto(positive_bb, negative_bb->end()); #else negative_bb->getInstList().push_back(icmp_fraction_result); positive_bb->getInstList().push_back(icmp_fraction_result2); @@ -1581,7 +1606,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { PN2->addIncoming(icmp_fraction_result, negative_bb); PN2->addIncoming(icmp_fraction_result2, positive_bb); #if LLVM_MAJOR >= 16 - PN2->insertInto(middle2_bb, BasicBlock::iterator(middle2_bb->getTerminator())); + PN2->insertInto(middle2_bb, + BasicBlock::iterator(middle2_bb->getTerminator())); #else middle2_bb->getInstList().insert( BasicBlock::iterator(middle2_bb->getTerminator()), PN2); diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c index 1762cfe2..fd9e9f54 100644 --- a/src/afl-gotcpu.c +++ b/src/afl-gotcpu.c @@ -217,8 +217,8 @@ int main(int argc, char **argv) { const char *error_code = "Unkown error code"; if (errno == EFAULT) error_code = "EFAULT"; if (errno == EINVAL) error_code = "EINVAL"; - if (errno == EPERM) error_code = "EPERM"; - if (errno == ESRCH) error_code = "ESRCH"; + if (errno == EPERM) error_code = "EPERM"; + if (errno == ESRCH) error_code = "ESRCH"; PFATAL("sched_setaffinity failed for cpu %d, error: %s", i, error_code); From e1434bcfcd8c13de838559fd7b797d1a3cd5a672 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 14:34:47 +0100 Subject: [PATCH 32/77] more autotoken options --- custom_mutators/autotokens/TODO | 17 +++++++++ custom_mutators/autotokens/autotokens.cpp | 45 ++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 2e39511c..3cae3060 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -4,3 +4,20 @@ create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) splicing -> check if whitespace/token is needed + +whitespace/token check only AFTER mutation + +analyse welche einen DICT haben, und welche davon rein ascii + +corpus analyse: + + libxml + - hardbuzz + - sqlite + - libpcap +min len, max len, % wenn 95/98/99/100 ascii + +funktion und env für menge an mutationen + +env für menge an per mutation run + +only add inital dictionary, not furher finds, e.g. cmplog diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 548e1be9..a0125851 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -28,6 +28,9 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 +#define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 +// 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog +#define AUTOTOKENS_LEARN_DICT 2 #ifndef AUTOTOKENS_SPLICE_DISABLE #define AUTOTOKENS_SPLICE_DISABLE 0 #endif @@ -53,6 +56,8 @@ static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; static int only_fav = AUTOTOKENS_ONLY_FAV; static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; +static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; +static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -94,6 +99,22 @@ u32 good_whitespace_or_singleval() { } +extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf, + size_t buf_size) { + + if (s == NULL) return 0; + + u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8; + u32 stage_max = (u32)((HAVOC_CYCLES * afl_ptr->queue_cur->perf_score) / + afl_ptr->havoc_div) >> + shift; + if (fuzz_count_shift) { stage_max >>= (u32)fuzz_count_shift; }; + DEBUGF(stderr, "fuzz count: %u\n", stage_max); + + return stage_max; + +} + extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u8 **out_buf, u8 *add_buf, size_t add_buf_size, size_t max_size) { @@ -441,6 +462,7 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende, extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { + static int learn_state; (void)(data); if (likely(!debug)) { @@ -458,7 +480,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } // check if there are new dictionary entries and add them to the tokens - if (valid_structures) { + if (valid_structures && learn_state < learn_dictionary_tokens) { + + if (unlikely(!learn_state)) { learn_state = 1; } while (extras_cnt < afl_ptr->extras_cnt) { @@ -1053,6 +1077,25 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } + + if (getenv("AUTOTOKENS_LEARN_DICT")) { + + learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT")); + if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) { + + learn_dictionary_tokens = 2; + + } + + } + + if (getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT")) { + + fuzz_count_shift = atoi(getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT")); + if (fuzz_count_shift < 0 || fuzz_count_shift > 16) { fuzz_count_shift = 0; } + + } + if (getenv("AUTOTOKENS_WHITESPACE")) { whitespace = getenv("AUTOTOKENS_WHITESPACE"); From ec87abda93d68f489f26ed2a2ae75b4f1e26d0bb Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 14:37:28 +0100 Subject: [PATCH 33/77] readme --- custom_mutators/autotokens/README | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index f82dcd98..86e7c9b3 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -18,6 +18,12 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be removed. Default: `/* ... */` +`AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting + the value by this number set, e.g. 1. +`AUTOTOKENS_LEARN_DICT` - learn from dictionaries? + 0 = none + 1 = only -x or autodict + 2 = -x, autodict and `CMPLOG` `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation (experimental) `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, From 90f61552f794fc0fae5dc2585f81f31d32db1e89 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 15:39:03 +0100 Subject: [PATCH 34/77] changes --- custom_mutators/autotokens/TODO | 9 ++++----- custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++ include/config.h | 4 ++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 3cae3060..528dff1f 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,5 +1,3 @@ -cmplog: only add tokens that were found to fit? - create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) @@ -16,8 +14,9 @@ corpus analyse: - libpcap min len, max len, % wenn 95/98/99/100 ascii -funktion und env für menge an mutationen - env für menge an per mutation run -only add inital dictionary, not furher finds, e.g. cmplog +AFL_TXT_MAX_LEN 65535 +AFL_TXT_MIN_LEN 16 +AFL_TXT_MIN_PERCENT=99 + diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index a0125851..46a347f8 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -34,6 +34,9 @@ extern "C" { #ifndef AUTOTOKENS_SPLICE_DISABLE #define AUTOTOKENS_SPLICE_DISABLE 0 #endif +#ifndef AFL_TXT_MAX_LEN + #define AFL_TXT_MAX_LEN 65535 +#endif #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN #error SPLICE_MIN must be lower than SIZE_MIN @@ -571,6 +574,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); return 1; + } else + if (len > AFL_TXT_MAX_LEN) { + + fclose(fp); + file_mapping[fn] = structure; // NULL ptr so we don't read the file again + s = NULL; + DEBUGF(stderr, "Too long (%lu) %s\n", len, filename); + return 1; + } string input; diff --git a/include/config.h b/include/config.h index f8a742f2..ed8b844c 100644 --- a/include/config.h +++ b/include/config.h @@ -489,12 +489,12 @@ /* Minimum length of a queue input to be evaluated for "is_ascii"? */ -#define AFL_TXT_MIN_LEN 12 +#define AFL_TXT_MIN_LEN 16 /* What is the minimum percentage of ascii characters present to be classifed as "is_ascii"? */ -#define AFL_TXT_MIN_PERCENT 95 +#define AFL_TXT_MIN_PERCENT 98 /* How often to perform ASCII mutations 0 = disable, 1-8 are good values */ From f99656e22bffb4bfac8e201ad973a1ea5a6abaa0 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 5 Feb 2023 13:15:06 +0100 Subject: [PATCH 35/77] create from thin air, max mutation --- custom_mutators/autotokens/autotokens.cpp | 97 ++++++++++++++++++----- 1 file changed, 78 insertions(+), 19 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 46a347f8..f1263600 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -24,10 +24,12 @@ extern "C" { #define AUTOTOKENS_ONLY_FAV 0 #define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 +#define AUTOTOKENS_CHANGE_MAX 64 #define AUTOTOKENS_WHITESPACE " " #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 +#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog #define AUTOTOKENS_LEARN_DICT 2 @@ -61,6 +63,7 @@ static int only_fav = AUTOTOKENS_ONLY_FAV; static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; +static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -83,7 +86,18 @@ static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); static regex regex_whitespace(R"([ \t]+)", regex::optimize); static vector *s; // the structure of the currently selected input -u32 good_whitespace_or_singleval() { +// FUNCTIONS + +/* This function is called once after everything is set up but before + any fuzzing attempt has been performed. + This is called in afl_custom_queue_get() */ +static void first_run(void *data) { + + (void)(data); + +} + +static u32 good_whitespace_or_singleval() { u32 i = rand_below(afl_ptr, current_id); if (id_to_token[i].size() == 1) { return i; } @@ -105,6 +119,8 @@ u32 good_whitespace_or_singleval() { extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf, size_t buf_size) { + (void)(data); + if (s == NULL) return 0; u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8; @@ -135,9 +151,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 i, m_size = (u32)m.size(); u32 rounds = - MAX(AUTOTOKENS_CHANGE_MIN, - MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * - afl_ptr->havoc_div / 256)); + MIN(AUTOTOKENS_CHANGE_MAX, + MAX(AUTOTOKENS_CHANGE_MIN, + MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * + afl_ptr->havoc_div / 256))); // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); #if AUTOTOKENS_SPLICE_DISABLE == 1 @@ -379,9 +396,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, /* I get f*cking stack overflow using C++ regex with a regex of "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize enabled :-( */ -u8 my_search_string(string::const_iterator cur, string::const_iterator ende, - string::const_iterator *match_begin, - string::const_iterator *match_end) { +static u8 my_search_string(string::const_iterator cur, + string::const_iterator ende, + string::const_iterator *match_begin, + string::const_iterator *match_end) { string::const_iterator start = cur, found_begin; u8 quote_type = 0; @@ -460,25 +478,30 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende, } /* We are not using afl_custom_queue_new_entry() because not every corpus entry - will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ + will be necessarily fuzzed with this custom mutator. + So we use afl_custom_queue_get() instead. */ extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { - static int learn_state; + static int learn_state = 0; + static int is_first_run = 1; (void)(data); - if (likely(!debug)) { + if (unlikely(is_first_run)) { - if (unlikely(!afl_ptr->custom_only) && - ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || - (only_fav && !afl_ptr->queue_cur->favored))) { + is_first_run = 0; + first_run(data); - s = NULL; - DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); - return 1; + } - } + if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air && + ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored))) { + + s = NULL; + DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); + return 1; } @@ -551,6 +574,42 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string fn = (char *)filename; auto entry = file_mapping.find(fn); + // if there is only one active queue item at start and it is very small + // the we create once a structure randomly. + if (unlikely(create_from_thin_air)) { + + if (current_id > whitespace_ids + 6 && afl_ptr->active_items == 1 && + afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN) { + + DEBUGF(stderr, "Creating an entry from thin air...\n"); + structure = new vector(); + u32 item, prev, cnt = current_id >> 1; + structure->reserve(cnt + 4); + for (u32 i = 0; i < cnt; i++) { + + item = rand_below(afl_ptr, current_id); + if (i && id_to_token[item].length() > 1 && + id_to_token[prev].length() > 1) { + + structure->push_back(good_whitespace_or_singleval()); + + } + + structure->push_back(item); + prev = item; + + } + + file_mapping[fn] = structure; + s = structure; + return 1; + + } + + create_from_thin_air = 0; + + } + if (entry == file_mapping.end()) { // this input file was not analyzed for tokens yet, so let's do it! @@ -574,8 +633,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); return 1; - } else - if (len > AFL_TXT_MAX_LEN) { + } else if (len > AFL_TXT_MAX_LEN) { fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again @@ -1088,6 +1146,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } + if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; } if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } if (getenv("AUTOTOKENS_LEARN_DICT")) { From e6120282556e4df79c01236849e5f6f225b8e428 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 5 Feb 2023 14:19:10 +0100 Subject: [PATCH 36/77] dict fix --- custom_mutators/autotokens/README | 3 +++ custom_mutators/autotokens/autotokens.cpp | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 86e7c9b3..d8613232 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -24,6 +24,9 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! 0 = none 1 = only -x or autodict 2 = -x, autodict and `CMPLOG` +`AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and + a dictionary loaded then create one initial + structure based on the dictionary. `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation (experimental) `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f1263600..d3ae7e9c 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -29,7 +29,7 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 -#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1 +#define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog #define AUTOTOKENS_LEARN_DICT 2 @@ -506,14 +506,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } // check if there are new dictionary entries and add them to the tokens - if (valid_structures && learn_state < learn_dictionary_tokens) { + if (likely(valid_structures || create_from_thin_air) && + learn_state < learn_dictionary_tokens) { if (unlikely(!learn_state)) { learn_state = 1; } while (extras_cnt < afl_ptr->extras_cnt) { u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; - u8 *ptr = afl_ptr->extras[extras_cnt].data; + u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data; for (u32 i = 0; i < l; ++i) { @@ -528,14 +529,17 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (ok) { - token_to_id[(char *)ptr] = current_id; - id_to_token[current_id] = (char *)ptr; + buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1); + memcpy(buf, afl_ptr->extras[extras_cnt].data, + afl_ptr->extras[extras_cnt].len); + buf[afl_ptr->extras[extras_cnt].len] = 0; + token_to_id[(char *)buf] = current_id; + id_to_token[current_id] = (char *)buf; ++current_id; } ++extras_cnt; - DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr); } @@ -600,8 +604,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - file_mapping[fn] = structure; s = structure; + file_mapping[fn] = structure; + id_mapping[valid_structures] = structure; + ++valid_structures; + all_structure_items += structure->size(); + return 1; } From 8a2547073c500fcd637a7b276b7a38313bb70b5f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 6 Feb 2023 08:51:20 +0100 Subject: [PATCH 37/77] more options --- custom_mutators/autotokens/README | 2 ++ custom_mutators/autotokens/TODO | 4 +++- custom_mutators/autotokens/autotokens.cpp | 26 +++++++++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index d8613232..e9c48662 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -24,6 +24,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! 0 = none 1 = only -x or autodict 2 = -x, autodict and `CMPLOG` +`AUTOTOKENS_CHANGE_MIN` - minimum number of mutations (1-256, default 8) +`AUTOTOKENS_CHANGE_MAX` - maximum number of mutations (1-4096, default 64) `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and a dictionary loaded then create one initial structure based on the dictionary. diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 528dff1f..496bfd45 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -9,7 +9,6 @@ analyse welche einen DICT haben, und welche davon rein ascii corpus analyse: + libxml - - hardbuzz - sqlite - libpcap min len, max len, % wenn 95/98/99/100 ascii @@ -20,3 +19,6 @@ AFL_TXT_MAX_LEN 65535 AFL_TXT_MIN_LEN 16 AFL_TXT_MIN_PERCENT=99 +-> KEIN FAV! + +change_min/_max werte diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index d3ae7e9c..ee35c68b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -32,7 +32,7 @@ extern "C" { #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog -#define AUTOTOKENS_LEARN_DICT 2 +#define AUTOTOKENS_LEARN_DICT 1 #ifndef AUTOTOKENS_SPLICE_DISABLE #define AUTOTOKENS_SPLICE_DISABLE 0 #endif @@ -64,6 +64,8 @@ static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR; +static int change_min = AUTOTOKENS_CHANGE_MIN; +static int change_max = AUTOTOKENS_CHANGE_MAX; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -151,8 +153,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 i, m_size = (u32)m.size(); u32 rounds = - MIN(AUTOTOKENS_CHANGE_MAX, - MAX(AUTOTOKENS_CHANGE_MIN, + MIN(change_max, + MAX(change_min, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256))); // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); @@ -1162,7 +1164,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT")); if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) { - learn_dictionary_tokens = 2; + learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; } @@ -1175,6 +1177,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_CHANGE_MIN")) { + + change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN")); + if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; } + + } + + if (getenv("AUTOTOKENS_CHANGE_MAX")) { + + change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX")); + if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; } + + } + + if (change_max < change_min) { change_max = change_min + 1; } + if (getenv("AUTOTOKENS_WHITESPACE")) { whitespace = getenv("AUTOTOKENS_WHITESPACE"); From ca063c92d20f4dee6ae9fd1d48dc531768b14ca5 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 6 Feb 2023 08:52:12 +0100 Subject: [PATCH 38/77] more options --- include/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/config.h b/include/config.h index ed8b844c..49d09174 100644 --- a/include/config.h +++ b/include/config.h @@ -494,7 +494,7 @@ /* What is the minimum percentage of ascii characters present to be classifed as "is_ascii"? */ -#define AFL_TXT_MIN_PERCENT 98 +#define AFL_TXT_MIN_PERCENT 99 /* How often to perform ASCII mutations 0 = disable, 1-8 are good values */ From ca2e8a1bf65d6f5d33244c9c7971a21294dc932b Mon Sep 17 00:00:00 2001 From: Dawin Schmidt Date: Mon, 6 Feb 2023 08:38:20 -0500 Subject: [PATCH 39/77] Add Qemu deferred initialization example --- .../README.deferred_initialization_example.md | 201 ++++++++++++++++++ qemu_mode/README.md | 2 + 2 files changed, 203 insertions(+) create mode 100644 qemu_mode/README.deferred_initialization_example.md diff --git a/qemu_mode/README.deferred_initialization_example.md b/qemu_mode/README.deferred_initialization_example.md new file mode 100644 index 00000000..0ba04b79 --- /dev/null +++ b/qemu_mode/README.deferred_initialization_example.md @@ -0,0 +1,201 @@ +# Fuzz ARM32 Python Native Extensions in Binary-only Mode (LLVM fork-based) + +This is an example on how to fuzz Python native extensions in LLVM mode with deferred initialization on ARM32. + +We use Ubuntu x86_64 to run AFL++ and an Alpine ARMv7 Chroot to build the fuzzing target. + +Check [Resources](#resources) for the code used in this example. + +## Setup Alpine ARM Chroot on your x86_64 Linux Host + +### Use systemd-nspawn + +1. Install `qemu-user-binfmt`, `qemu-user-static` and `systemd-container` dependencies. +2. Restart the systemd-binfmt service: `systemctl restart systemd-binfmt.service` +3. Download an Alpine ARM RootFS from https://alpinelinux.org/downloads/ +4. Create a new `alpine_sysroot` folder and extract: `tar xfz alpine-minirootfs-3.17.1-armv7.tar.gz -C alpine_sysroot/` +5. Copy `qemu-arm-static` to Alpine's RootFS: `cp $(which qemu-arm-static) ./alpine/usr/bin/` +6. Chroot into the container: `sudo systemd-nspawn -D alpine/ --bind-ro=/etc/resolv.conf` +7. Install dependencies: `apk update && apk add build-base musl-dev clang15 python3 python3-dev py3-pip` +8. Exit the container with `exit` + +### Alternatively use Docker + +1. Install `qemu-user-binfmt` and `qemu-user-static` +2. Run Qemu container: ```$ docker run --rm --privileged multiarch/qemu-user-static --reset -p yes``` +3. Run Alpine container: ```$ docker run -it --rm arm32v7/alpine sh``` + +## Build AFL++ Qemu Mode with ARM Support + +First, build AFL++ as described [here](https://github.com/AFLplusplus/AFLplusplus/blob/dev/docs/INSTALL.md). Then, run the Qemu build script: + +```bash +cd qemu_mode && CPU_TARGET=arm ./build_qemu_support.sh +``` + +## Compile and Build the Fuzzing Project +Build the native extension and the fuzzing harness for ARM using the Alpine container (check [Resources](#resources) for the code): +```bash +ALPINE_ROOT= +FUZZ= +sudo systemd-nspawn -D $ALPINE_ROOT --bind=$FUZZ:/fuzz +CC=$(which clang) CFLAGS="-g" LDSHARED="clang -shared" python3 -m pip install /fuzz +clang $(python3-config --embed --cflags) $(python3-config --embed --ldflags) -o /fuzz/fuzz_harness.a /fuzz/fuzz_harness.c +exit +``` + +Manually trigger bug: +```bash +echo -n "FUZZ" | qemu-arm-static -L $ALPINE_ROOT $FUZZ/fuzz_harness.a +``` + +## Run AFL++ +Make sure to start the forkserver *after* loading all the shared objects by setting the `AFL_ENTRYPOINT` environment variable (see [here](https://aflplus.plus/docs/env_variables/#5-settings-for-afl-qemu-trace) for details): + +Choose an address just before the `while()` loop, for example: +```bash +qemu-arm-static -L $ALPINE_ROOT $ALPINE_ROOT/usr/bin/objdump -d $FUZZ/fuzz_harness.a | grep -A 1 "PyObject_GetAttrString" + +00000584 : + 584: e28fc600 add ip, pc, #0, 12 +-- + 7c8: ebffff6d bl 584 + 7cc: e58d0008 str r0, [sp, #8] +... +``` + +Check Qemu memory maps using the instructions from [here](https://aflplus.plus/docs/tutorials/libxml2_tutorial/): +>The binary is position independent and QEMU persistent needs the real addresses, not the offsets. Fortunately, QEMU loads PIE executables at a fixed address, 0x4000000000 for x86_64. +> +> We can check it using `AFL_QEMU_DEBUG_MAPS`. You don’t need this step if your binary is not PIE. + +Setup Python environment variables and run `afl-qemu-trace`: +```bash +PYTHONPATH=$ALPINE_ROOT/usr/lib/python3.10/ PYTHONHOME=$ALPINE_ROOT/usr/bin/ QEMU_LD_PREFIX=$ALPINE_ROOT AFL_QEMU_DEBUG_MAPS=1 afl-qemu-trace $FUZZ/fuzz_harness.a + +... +40000000-40001000 r-xp 00000000 103:03 8002276 fuzz_harness.a +40001000-4001f000 ---p 00000000 00:00 0 +4001f000-40020000 r--p 0000f000 103:03 8002276 fuzz_harness.a +40020000-40021000 rw-p 00010000 103:03 8002276 fuzz_harness.a +40021000-40022000 ---p 00000000 00:00 0 +40022000-40023000 rw-p 00000000 00:00 0 +``` + +Finally, setup Qemu environment variables... +```bash +export QEMU_SET_ENV=PYTHONPATH=$ALPINE_ROOT/usr/lib/python310.zip:$ALPINE_ROOT/usr/lib/python3.10:$ALPINE_ROOT/usr/lib/python3.10/lib-dynload:$ALPINE_ROOT/usr/lib/python3.10/site-packages,PYTHONHOME=$ALPINE_ROOT/usr/bin/ +export QEMU_LD_PREFIX=$ALPINE_ROOT +``` + +... and run AFL++: +```bash +mkdir -p $FUZZ/in && echo -n "FU" > $FUZZ/in/seed +AFL_ENTRYPOINT=0x400007cc afl-fuzz -i $FUZZ/in -o $FUZZ/out -Q -- $FUZZ/fuzz_harness.a +``` + +## Resources + +### setup.py + +```python +from distutils.core import setup, Extension + +module = Extension("memory", sources=["fuzz_target.c"]) + +setup( + name="memory", + version="1.0", + description='A simple "BOOM!" extension', + ext_modules=[module], +) +``` + +### fuzz_target.c + +```c +#define PY_SSIZE_T_CLEAN +#include + +#pragma clang optimize off + +static PyObject *corruption(PyObject* self, PyObject* args) { + char arr[3]; + Py_buffer name; + + if (!PyArg_ParseTuple(args, "y*", &name)) + return NULL; + + if (name.buf != NULL) { + if (strcmp(name.buf, "FUZZ") == 0) { + arr[0] = 'B'; + arr[1] = 'O'; + arr[2] = 'O'; + arr[3] = 'M'; + } + } + + PyBuffer_Release(&name); + Py_RETURN_NONE; +} + +static PyMethodDef MemoryMethods[] = { + {"corruption", corruption, METH_VARARGS, "BOOM!"}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef memory_module = { + PyModuleDef_HEAD_INIT, + "memory", + "BOOM!", + -1, + MemoryMethods +}; + +PyMODINIT_FUNC PyInit_memory(void) { + return PyModule_Create(&memory_module); +} +``` + +### fuzz_harness.c + +```c +#include + +#pragma clang optimize off + +int main(int argc, char **argv) { + unsigned char buf[1024000]; + ssize_t size; + + Py_Initialize(); + PyObject* name = PyUnicode_DecodeFSDefault("memory"); + PyObject* module = PyImport_Import(name); + Py_DECREF(name); + + if (module != NULL) { + PyObject* corruption_func = PyObject_GetAttrString(module, "corruption"); + + while ((size = read(0, buf, sizeof(buf))) > 0 ? 1 : 0) { + PyObject* arg = PyBytes_FromStringAndSize((char *)buf, size); + + if (arg != NULL) { + PyObject* res = PyObject_CallFunctionObjArgs(corruption_func, arg, NULL); + + if (res != NULL) { + Py_XDECREF(res); + } + + Py_DECREF(arg); + } + } + + Py_DECREF(corruption_func); + Py_DECREF(module); + } + + // Py_Finalize() leaks memory on certain Python versions (see https://bugs.python.org/issue1635741) + // Py_Finalize(); + return 0; +} +``` diff --git a/qemu_mode/README.md b/qemu_mode/README.md index 4ed2f298..92038737 100644 --- a/qemu_mode/README.md +++ b/qemu_mode/README.md @@ -66,6 +66,8 @@ allows to move the forkserver to a different part, e.g., just before the file is opened (e.g., way after command line parsing and config file loading, etc.) which can be a huge speed improvement. +For an example, see [README.deferred_initialization_example.md](README.deferred_initialization_example.md). + ## 4) Persistent mode AFL++'s QEMU mode now supports also persistent mode for x86, x86_64, arm, and From 24e36212d507422bbbff78a514791d7f8d47301e Mon Sep 17 00:00:00 2001 From: Dawin Schmidt Date: Mon, 6 Feb 2023 09:04:33 -0500 Subject: [PATCH 40/77] Rename fuzzing harness --- .../README.deferred_initialization_example.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/qemu_mode/README.deferred_initialization_example.md b/qemu_mode/README.deferred_initialization_example.md index 0ba04b79..d940d6b5 100644 --- a/qemu_mode/README.deferred_initialization_example.md +++ b/qemu_mode/README.deferred_initialization_example.md @@ -40,13 +40,13 @@ ALPINE_ROOT= FUZZ= sudo systemd-nspawn -D $ALPINE_ROOT --bind=$FUZZ:/fuzz CC=$(which clang) CFLAGS="-g" LDSHARED="clang -shared" python3 -m pip install /fuzz -clang $(python3-config --embed --cflags) $(python3-config --embed --ldflags) -o /fuzz/fuzz_harness.a /fuzz/fuzz_harness.c +clang $(python3-config --embed --cflags) $(python3-config --embed --ldflags) -o /fuzz/fuzz_harness /fuzz/fuzz_harness.c exit ``` Manually trigger bug: ```bash -echo -n "FUZZ" | qemu-arm-static -L $ALPINE_ROOT $FUZZ/fuzz_harness.a +echo -n "FUZZ" | qemu-arm-static -L $ALPINE_ROOT $FUZZ/fuzz_harness ``` ## Run AFL++ @@ -54,7 +54,7 @@ Make sure to start the forkserver *after* loading all the shared objects by sett Choose an address just before the `while()` loop, for example: ```bash -qemu-arm-static -L $ALPINE_ROOT $ALPINE_ROOT/usr/bin/objdump -d $FUZZ/fuzz_harness.a | grep -A 1 "PyObject_GetAttrString" +qemu-arm-static -L $ALPINE_ROOT $ALPINE_ROOT/usr/bin/objdump -d $FUZZ/fuzz_harness | grep -A 1 "PyObject_GetAttrString" 00000584 : 584: e28fc600 add ip, pc, #0, 12 @@ -71,13 +71,13 @@ Check Qemu memory maps using the instructions from [here](https://aflplus.plus/d Setup Python environment variables and run `afl-qemu-trace`: ```bash -PYTHONPATH=$ALPINE_ROOT/usr/lib/python3.10/ PYTHONHOME=$ALPINE_ROOT/usr/bin/ QEMU_LD_PREFIX=$ALPINE_ROOT AFL_QEMU_DEBUG_MAPS=1 afl-qemu-trace $FUZZ/fuzz_harness.a +PYTHONPATH=$ALPINE_ROOT/usr/lib/python3.10/ PYTHONHOME=$ALPINE_ROOT/usr/bin/ QEMU_LD_PREFIX=$ALPINE_ROOT AFL_QEMU_DEBUG_MAPS=1 afl-qemu-trace $FUZZ/fuzz_harness ... -40000000-40001000 r-xp 00000000 103:03 8002276 fuzz_harness.a +40000000-40001000 r-xp 00000000 103:03 8002276 fuzz_harness 40001000-4001f000 ---p 00000000 00:00 0 -4001f000-40020000 r--p 0000f000 103:03 8002276 fuzz_harness.a -40020000-40021000 rw-p 00010000 103:03 8002276 fuzz_harness.a +4001f000-40020000 r--p 0000f000 103:03 8002276 fuzz_harness +40020000-40021000 rw-p 00010000 103:03 8002276 fuzz_harness 40021000-40022000 ---p 00000000 00:00 0 40022000-40023000 rw-p 00000000 00:00 0 ``` @@ -91,7 +91,7 @@ export QEMU_LD_PREFIX=$ALPINE_ROOT ... and run AFL++: ```bash mkdir -p $FUZZ/in && echo -n "FU" > $FUZZ/in/seed -AFL_ENTRYPOINT=0x400007cc afl-fuzz -i $FUZZ/in -o $FUZZ/out -Q -- $FUZZ/fuzz_harness.a +AFL_ENTRYPOINT=0x400007cc afl-fuzz -i $FUZZ/in -o $FUZZ/out -Q -- $FUZZ/fuzz_harness ``` ## Resources From dbfa23b40a6bdd1b8affc3920c68f11a6e63b231 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 6 Feb 2023 16:38:46 +0100 Subject: [PATCH 41/77] fixes --- instrumentation/afl-compiler-rt.o.c | 10 +++++++--- src/afl-fuzz-one.c | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index b1ce4427..9871d7f4 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -1518,9 +1518,13 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { _is_sancov = 1; - __afl_auto_first(); - __afl_auto_second(); - __afl_auto_early(); + if (!getenv("AFL_DUMP_MAP_SIZE")) { + + __afl_auto_first(); + __afl_auto_second(); + __afl_auto_early(); + + } if (__afl_debug) { diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 97855607..6367f597 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -5798,7 +5798,7 @@ void pso_updating(afl_state_t *afl) { u8 fuzz_one(afl_state_t *afl) { - int key_val_lv_1 = 0, key_val_lv_2 = 0; + int key_val_lv_1 = -1, key_val_lv_2 = -1; #ifdef _AFL_DOCUMENT_MUTATIONS @@ -5840,7 +5840,7 @@ u8 fuzz_one(afl_state_t *afl) { } - return (key_val_lv_1 | key_val_lv_2); + return (key_val_lv_1 == 0 || key_val_lv_2 == 0 ? 0 : 1 ); } From 6596284cc41484ec5062ca53109ec5bd7899e56f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 6 Feb 2023 17:59:17 +0100 Subject: [PATCH 42/77] endless loop fix --- src/afl-fuzz.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index b8114a7f..748c7acf 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -2210,8 +2210,8 @@ int main(int argc, char **argv_orig, char **envp) { cull_queue(afl); // ensure we have at least one seed that is not disabled. - u32 entry, valid_seeds = 0; - for (entry = 0; entry < afl->queued_items; ++entry) + u32 valid_seeds = 0; + for (u32 entry = 0; entry < afl->queued_items; ++entry) if (!afl->queue_buf[entry]->disabled) { ++valid_seeds; } if (!afl->pending_not_fuzzed || !valid_seeds) { @@ -2241,7 +2241,7 @@ int main(int argc, char **argv_orig, char **envp) { u64 max_ms = 0; - for (entry = 0; entry < afl->queued_items; ++entry) + for (u32 entry = 0; entry < afl->queued_items; ++entry) if (!afl->queue_buf[entry]->disabled) if (afl->queue_buf[entry]->exec_us > max_ms) max_ms = afl->queue_buf[entry]->exec_us; @@ -2285,7 +2285,7 @@ int main(int argc, char **argv_orig, char **envp) { #ifdef INTROSPECTION u32 prev_saved_crashes = 0, prev_saved_tmouts = 0; #endif - u32 prev_queued_items = 0, runs_in_current_cycle = (u32)-1; + u32 skip_count = 0, prev_queued_items = 0, runs_in_current_cycle = (u32)-1; u8 skipped_fuzz; #ifdef INTROSPECTION @@ -2547,8 +2547,57 @@ int main(int argc, char **argv_orig, char **envp) { } skipped_fuzz = fuzz_one(afl); + + if (unlikely(skipped_fuzz)) { + + ++skip_count; + + if (unlikely(skip_count > afl->active_items)) { + + if (afl->active_items > 1 && !afl->old_seed_selection) { + + u32 found = 0; + for (u32 i = 0; i < afl->queued_items; ++i) { + + if (likely(afl->queue_buf[i]->disabled && + !afl->queue_buf[i]->perf_score)) { + + ++found; + + } + + } + + if (found >= afl->active_items) { + + // all active items have a perf_score of 0 ... damn + for (u32 i = 0; i < afl->queued_items; ++i) { + + if (likely(afl->queue_buf[i]->disabled)) { + + afl->queue_buf[i]->perf_score = afl->queue_buf[i]->weight; + + } + + } + + } + + } + + skip_count = 0; + + } + + } else { + + skip_count = 0; + + } + #ifdef INTROSPECTION ++afl->queue_cur->stats_selected; + if (unlikely(skipped_fuzz)) { ++afl->queue_cur->stats_skipped; From 03e6d33a4044115c44afeb6c1ae735c0310018af Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 7 Feb 2023 15:27:31 +0100 Subject: [PATCH 43/77] fix perfscore 0 check --- src/afl-fuzz.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 748c7acf..8c2eb5b7 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -2559,8 +2559,8 @@ int main(int argc, char **argv_orig, char **envp) { u32 found = 0; for (u32 i = 0; i < afl->queued_items; ++i) { - if (likely(afl->queue_buf[i]->disabled && - !afl->queue_buf[i]->perf_score)) { + if (likely(!afl->queue_buf[i]->disabled && + afl->queue_buf[i]->perf_score == 0)) { ++found; @@ -2573,7 +2573,7 @@ int main(int argc, char **argv_orig, char **envp) { // all active items have a perf_score of 0 ... damn for (u32 i = 0; i < afl->queued_items; ++i) { - if (likely(afl->queue_buf[i]->disabled)) { + if (likely(!afl->queue_buf[i]->disabled)) { afl->queue_buf[i]->perf_score = afl->queue_buf[i]->weight; From ab26356bf73f2242555e6be72a004082fa22d402 Mon Sep 17 00:00:00 2001 From: Daniil Kutz Date: Tue, 7 Feb 2023 19:50:07 +0300 Subject: [PATCH 44/77] Increase fuzz_level for mopt_common_fuzzing Change performance score calculation for lin and quad power schedules --- src/afl-fuzz-one.c | 1 + src/afl-fuzz-queue.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 6367f597..76826945 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -5683,6 +5683,7 @@ pacemaker_fuzzing: } /* block */ + ++afl->queue_cur->fuzz_level; return ret_val; } diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index e3faa392..ebfc252c 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -1007,10 +1007,16 @@ u32 calculate_score(afl_state_t *afl, struct queue_entry *q) { break; case LIN: + // Don't modify perf_score for unfuzzed seeds + if (!q->fuzz_level) break; + factor = q->fuzz_level / (afl->n_fuzz[q->n_fuzz_entry] + 1); break; case QUAD: + // Don't modify perf_score for unfuzzed seeds + if (!q->fuzz_level) break; + factor = q->fuzz_level * q->fuzz_level / (afl->n_fuzz[q->n_fuzz_entry] + 1); break; From 846e910e0c6d09808ea6f87b59e2cf79769979dc Mon Sep 17 00:00:00 2001 From: Daniil Kutz Date: Wed, 8 Feb 2023 13:50:03 +0300 Subject: [PATCH 45/77] Validate -M and -p power schedule options --- src/afl-fuzz.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 8c2eb5b7..de41600b 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -1297,6 +1297,12 @@ int main(int argc, char **argv_orig, char **envp) { } + if (afl->is_main_node == 1 && afl->schedule != FAST && afl->schedule != EXPLORE) { + + FATAL("-M is compatible only with fast and explore -p power schedules"); + + } + if (optind == argc || !afl->in_dir || !afl->out_dir || show_help) { usage(argv[0], show_help); From 05b1189a55b573a4021abed078dab098f4591ad6 Mon Sep 17 00:00:00 2001 From: Marcello Maugeri Date: Wed, 8 Feb 2023 15:53:49 +0100 Subject: [PATCH 46/77] Update afl-forkserver.c Fix typo --- src/afl-forkserver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index 89d01460..5aa4c2ff 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -1370,7 +1370,7 @@ afl_fsrv_run_target(afl_forkserver_t *fsrv, u32 timeout, case Crash: case Asan: return FSRV_RUN_CRASH; - case Timout: + case Timeout: return FSRV_RUN_TMOUT; case InvalidWriteToPayload: /* ??? */ From c86d06849b46865f126a522b7b4c8eb0f72c6ba1 Mon Sep 17 00:00:00 2001 From: Marcello Maugeri Date: Wed, 8 Feb 2023 15:54:27 +0100 Subject: [PATCH 47/77] Update forkserver.h Fix typo --- include/forkserver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/forkserver.h b/include/forkserver.h index 35bc1771..50898a08 100644 --- a/include/forkserver.h +++ b/include/forkserver.h @@ -43,7 +43,7 @@ typedef enum NyxReturnValue { Normal, Crash, Asan, - Timout, + Timeout, InvalidWriteToPayload, Error, IoError, From f2be73186e2e16c3992f92b65ae9ba598d6fff2f Mon Sep 17 00:00:00 2001 From: Yaakov Saxon Date: Thu, 9 Feb 2023 21:37:35 +0000 Subject: [PATCH 48/77] cmplog exec with target_path --- src/afl-fuzz-cmplog.c | 2 +- src/afl-fuzz.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/afl-fuzz-cmplog.c b/src/afl-fuzz-cmplog.c index 8967d4bc..2bf26d19 100644 --- a/src/afl-fuzz-cmplog.c +++ b/src/afl-fuzz-cmplog.c @@ -41,7 +41,7 @@ void cmplog_exec_child(afl_forkserver_t *fsrv, char **argv) { } - execv(argv[0], argv); + execv(fsrv->target_path, argv); } diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 8c2eb5b7..e7fd3dfe 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -2081,6 +2081,7 @@ int main(int argc, char **argv_orig, char **envp) { afl->cmplog_fsrv.qemu_mode = afl->fsrv.qemu_mode; afl->cmplog_fsrv.frida_mode = afl->fsrv.frida_mode; afl->cmplog_fsrv.cmplog_binary = afl->cmplog_binary; + afl->cmplog_fsrv.target_path = afl->fsrv.target_path; afl->cmplog_fsrv.init_child_func = cmplog_exec_child; if ((map_size <= DEFAULT_SHMEM_SIZE || From 673a0a3866783bf28e31d14fbd7a9009c7816ec3 Mon Sep 17 00:00:00 2001 From: Yaakov Saxon Date: Thu, 9 Feb 2023 22:02:47 +0000 Subject: [PATCH 49/77] add test for unprefixed path --- frida_mode/test/cmplog/GNUmakefile | 17 +++++++++++++++-- frida_mode/test/cmplog/Makefile | 3 +++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/frida_mode/test/cmplog/GNUmakefile b/frida_mode/test/cmplog/GNUmakefile index bcaff42d..fca52f82 100644 --- a/frida_mode/test/cmplog/GNUmakefile +++ b/frida_mode/test/cmplog/GNUmakefile @@ -2,8 +2,9 @@ PWD:=$(shell pwd)/ ROOT:=$(PWD)../../../ BUILD_DIR:=$(PWD)build/ +TEST_CMPLOG_BASENAME=compcovtest TEST_CMPLOG_SRC=$(PWD)cmplog.c -TEST_CMPLOG_OBJ=$(BUILD_DIR)compcovtest +TEST_CMPLOG_OBJ=$(BUILD_DIR)$(TEST_CMPLOG_BASENAME) TEST_BIN:=$(PWD)../../build/test @@ -13,7 +14,7 @@ CMP_LOG_INPUT:=$(TEST_DATA_DIR)in QEMU_OUT:=$(BUILD_DIR)qemu-out FRIDA_OUT:=$(BUILD_DIR)frida-out -.PHONY: all 32 clean qemu frida frida-nocmplog format +.PHONY: all 32 clean qemu frida frida-nocmplog frida-unprefixedpath format all: $(TEST_CMPLOG_OBJ) make -C $(ROOT)frida_mode/ @@ -64,6 +65,18 @@ frida-nocmplog: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT) -- \ $(TEST_CMPLOG_OBJ) @@ + +frida-unprefixedpath: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT) + PATH=$(BUILD_DIR) $(ROOT)afl-fuzz \ + -O \ + -i $(TEST_DATA_DIR) \ + -o $(FRIDA_OUT) \ + -c 0 \ + -l 3AT \ + -Z \ + -- \ + $(TEST_CMPLOG_BASENAME) @@ + debug: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT) gdb \ --ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \ diff --git a/frida_mode/test/cmplog/Makefile b/frida_mode/test/cmplog/Makefile index 7ca9a9a5..b84e9218 100644 --- a/frida_mode/test/cmplog/Makefile +++ b/frida_mode/test/cmplog/Makefile @@ -19,6 +19,9 @@ frida: frida-nocmplog: @gmake frida-nocmplog +frida-unprefixedpath: + @gmake frida-unprefixedpath + format: @gmake format From d3cdeabf9297ed2b5a5c06ce5b59980d41cdcb40 Mon Sep 17 00:00:00 2001 From: Yaakov Saxon Date: Thu, 9 Feb 2023 22:04:18 +0000 Subject: [PATCH 50/77] Add myself to contributors :) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index eeab7aa1..821b8cb7 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,7 @@ Thank you! (For people sending pull requests - please add yourself to this list Thomas Rooijakkers David Carlier Ruben ten Hove Joey Jiao fuzzah @intrigus-lgtm + Yaakov Saxon ``` From 141c324eb935ddd25a9ea898bf94ed4f3afb7a79 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 12 Feb 2023 17:55:16 +0100 Subject: [PATCH 51/77] revert perfscore 0 fix attempt --- src/afl-fuzz.c | 56 ++++---------------------------------------------- 1 file changed, 4 insertions(+), 52 deletions(-) diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index e7fd3dfe..6bd81304 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -2211,8 +2211,8 @@ int main(int argc, char **argv_orig, char **envp) { cull_queue(afl); // ensure we have at least one seed that is not disabled. - u32 valid_seeds = 0; - for (u32 entry = 0; entry < afl->queued_items; ++entry) + u32 entry, valid_seeds = 0; + for (entry = 0; entry < afl->queued_items; ++entry) if (!afl->queue_buf[entry]->disabled) { ++valid_seeds; } if (!afl->pending_not_fuzzed || !valid_seeds) { @@ -2242,7 +2242,7 @@ int main(int argc, char **argv_orig, char **envp) { u64 max_ms = 0; - for (u32 entry = 0; entry < afl->queued_items; ++entry) + for (entry = 0; entry < afl->queued_items; ++entry) if (!afl->queue_buf[entry]->disabled) if (afl->queue_buf[entry]->exec_us > max_ms) max_ms = afl->queue_buf[entry]->exec_us; @@ -2286,7 +2286,7 @@ int main(int argc, char **argv_orig, char **envp) { #ifdef INTROSPECTION u32 prev_saved_crashes = 0, prev_saved_tmouts = 0; #endif - u32 skip_count = 0, prev_queued_items = 0, runs_in_current_cycle = (u32)-1; + u32 prev_queued_items = 0, runs_in_current_cycle = (u32)-1; u8 skipped_fuzz; #ifdef INTROSPECTION @@ -2548,54 +2548,6 @@ int main(int argc, char **argv_orig, char **envp) { } skipped_fuzz = fuzz_one(afl); - - if (unlikely(skipped_fuzz)) { - - ++skip_count; - - if (unlikely(skip_count > afl->active_items)) { - - if (afl->active_items > 1 && !afl->old_seed_selection) { - - u32 found = 0; - for (u32 i = 0; i < afl->queued_items; ++i) { - - if (likely(!afl->queue_buf[i]->disabled && - afl->queue_buf[i]->perf_score == 0)) { - - ++found; - - } - - } - - if (found >= afl->active_items) { - - // all active items have a perf_score of 0 ... damn - for (u32 i = 0; i < afl->queued_items; ++i) { - - if (likely(!afl->queue_buf[i]->disabled)) { - - afl->queue_buf[i]->perf_score = afl->queue_buf[i]->weight; - - } - - } - - } - - } - - skip_count = 0; - - } - - } else { - - skip_count = 0; - - } - #ifdef INTROSPECTION ++afl->queue_cur->stats_selected; From 7eaef449a1e92999c89df23ab474b3be3da595f8 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 08:14:04 +0100 Subject: [PATCH 52/77] remove ALTERNATIVE_TOKENIZE --- custom_mutators/autotokens/autotokens.cpp | 548 ++++++---------------- 1 file changed, 149 insertions(+), 399 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index ee35c68b..a027ac2b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -22,7 +22,6 @@ extern "C" { #define AUTOTOKENS_DEBUG 0 #define AUTOTOKENS_ONLY_FAV 0 -#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MAX 64 #define AUTOTOKENS_WHITESPACE " " @@ -60,7 +59,6 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; static int only_fav = AUTOTOKENS_ONLY_FAV; -static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR; @@ -142,7 +140,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, (void)(data); - if (s == NULL) { + if (unlikely(s == NULL)) { *out_buf = NULL; return 0; @@ -183,9 +181,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } while (unlikely( new_item == cur_item || - (!alternative_tokenize && - ((whitespace_ids < new_item && whitespace_ids >= cur_item) || - (whitespace_ids >= new_item && whitespace_ids < cur_item))))); + ((whitespace_ids < new_item && whitespace_ids >= cur_item) || + (whitespace_ids >= new_item && whitespace_ids < cur_item)))); DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; @@ -200,37 +197,33 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, new_item = rand_below(afl_ptr, current_id); - } while (unlikely(!alternative_tokenize && new_item >= whitespace_ids)); + } while (unlikely(new_item >= whitespace_ids)); u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); - if (likely(!alternative_tokenize)) { + // if we insert an identifier or string we might need whitespace + if (id_to_token[new_item].size() > 1) { - // if we insert an identifier or string we might need whitespace - if (id_to_token[new_item].size() > 1) { + // need to insert before? - // need to insert before? + if (pos && m[pos - 1] >= whitespace_ids && + id_to_token[m[pos - 1]].size() > 1) { - if (pos && m[pos - 1] >= whitespace_ids && - id_to_token[m[pos - 1]].size() > 1) { + m.insert(m.begin() + pos, good_whitespace_or_singleval()); + ++m_size; - m.insert(m.begin() + pos, good_whitespace_or_singleval()); - ++m_size; + } - } + if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && + id_to_token[m[pos + 1]].size() > 1) { - if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && - id_to_token[m[pos + 1]].size() > 1) { + // need to insert after? - // need to insert after? - - m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); - ++m_size; - - } + m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); + ++m_size; } @@ -290,26 +283,22 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - if (likely(!alternative_tokenize)) { + // do we need a whitespace/token at the beginning? + if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && + id_to_token[m[dst_off]].size() > 1) { - // do we need a whitespace/token at the beginning? - if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && - id_to_token[m[dst_off]].size() > 1) { + m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); + ++m_size; - m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); - ++m_size; + } - } + // do we need a whitespace/token at the end? + if (dst_off + n < m_size && + id_to_token[m[dst_off + n - 1]].size() > 1 && + id_to_token[m[dst_off + n]].size() > 1) { - // do we need a whitespace/token at the end? - if (dst_off + n < m_size && - id_to_token[m[dst_off + n - 1]].size() > 1 && - id_to_token[m[dst_off + n]].size() > 1) { - - m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); - ++m_size; - - } + m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); + ++m_size; } @@ -332,8 +321,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, // if what we delete will result in a missing whitespace/token, // instead of deleting we switch the item to a whitespace or token. - if (likely(!alternative_tokenize) && pos && pos + 1 < m_size && - id_to_token[m[pos - 1]].size() > 1 && + if (pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 && id_to_token[m[pos + 1]].size() > 1) { m[pos] = good_whitespace_or_singleval(); @@ -362,17 +350,11 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - u32 m_size_1 = m_size - 1; output = ""; for (i = 0; i < m_size; ++i) { output += id_to_token[m[i]]; - if (unlikely(alternative_tokenize && i < m_size_1)) { - - output += whitespace; - - } } @@ -725,336 +707,107 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "START!\n"); - if (likely(!alternative_tokenize)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { - while (my_search_string(cur, ende, &match_begin, &match_end)) { + prev = cur; + found = match_begin; + cur = match_end; - prev = cur; - found = match_begin; - cur = match_end; + IFDEBUG { - IFDEBUG { + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); - string foo(match_begin, match_end); - DEBUGF(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - foo.c_str(), prev - input.begin(), found - prev, - cur - input.begin()); + } - } + if (prev < found) { // there are items between search start and find + while (prev < found) { - if (prev < found) { // there are items between search start and find - while (prev < found) { + if (isspace(*prev)) { - if (isspace(*prev)) { + auto start = prev; + while (isspace(*prev)) { - auto start = prev; - while (isspace(*prev)) { - - ++prev; - - } - - tokens.push_back(std::string(start, prev)); - DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); - - } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { - - auto start = prev; - while (isalnum(*prev) || *prev == '$' || *prev == '_' || - *prev == '.' || *prev == '/') { - - ++prev; - - } - - tokens.push_back(string(start, prev)); - DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); - - } else { - - tokens.push_back(string(prev, prev + 1)); - DEBUGF(stderr, "OTHER \"%c\"\n", *prev); ++prev; } - } - - } - - tokens.push_back(string(match_begin, match_end)); - DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str()); - - } - - DEBUGF(stderr, "AFTER all strings\n"); - - if (cur < ende) { - - while (cur < ende) { - - if (isspace(*cur)) { - - auto start = cur; - while (isspace(*cur)) { - - ++cur; - - } - - tokens.push_back(std::string(start, cur)); - DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens.push_back(std::string(start, prev)); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, tokens[tokens.size() - 1].c_str()); - } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { + } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { - auto start = cur; - while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || - *cur == '/') { + auto start = prev; + while (isalnum(*prev) || *prev == '$' || *prev == '_' || + *prev == '.' || *prev == '/') { - ++cur; + ++prev; } - tokens.push_back(std::string(start, cur)); - DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens.push_back(string(start, prev)); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(cur, cur + 1)); - DEBUGF(stderr, "OTHER \"%c\"\n", *cur); + tokens.push_back(string(prev, prev + 1)); + DEBUGF(stderr, "OTHER \"%c\"\n", *prev); + ++prev; + + } + + } + + } + + tokens.push_back(string(match_begin, match_end)); + DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str()); + + } + + DEBUGF(stderr, "AFTER all strings\n"); + + if (cur < ende) { + + while (cur < ende) { + + if (isspace(*cur)) { + + auto start = cur; + while (isspace(*cur)) { + ++cur; } - } + tokens.push_back(std::string(start, cur)); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); - } + } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { - } else { + auto start = cur; + while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || + *cur == '/') { - // alternative tokenize - while (my_search_string(cur, ende, &match_begin, &match_end)) { - - prev = cur; - found = match_begin; - cur = match_end; - IFDEBUG { - - string foo(match_begin, match_end); - DEBUGF(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - foo.c_str(), prev - input.begin(), found - prev, - cur - input.begin()); - - } - - if (prev < found) { // there are items between search start and find - - sregex_token_iterator it{prev, found, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { - - return s.size() == 0; - - }), - - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - - IFDEBUG { - - DEBUGF(stderr, "tokens1: %lu input size: %lu\n", tokenized.size(), - input.size()); - for (auto x : tokenized) { - - cerr << x << endl; - - } + ++cur; } - for (auto token : tokenized) { + tokens.push_back(std::string(start, cur)); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; + } else { - while (regex_search(c, e, m, regex_word)) { - - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { - - // there are items between search start and find - while (p < f) { - - IFDEBUG { - - string foo(p, p + 1); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(p, p + 1)); - ++p; - - } - - IFDEBUG { - - string foo(p, f); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - tokens.push_back(std::string(p, f)); - - } - - } - - DEBUGF(stderr, - "SUBstring \"%s\" found at start %lu offset %lu continue " - "at %lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); - - } - - if (c < e) { - - while (c < e) { - - IFDEBUG { - - string foo(c, c + 1); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(c, c + 1)); - ++c; - - } - - IFDEBUG { - - string foo(c, e); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(c, e)); - - } - - } - - } - - tokens.push_back(string(match_begin, match_end)); - - } - - if (cur < ende) { - - sregex_token_iterator it{cur, ende, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase( - std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { return s.size() == 0; }), - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - - IFDEBUG { - - DEBUGF(stderr, "tokens2: %lu input size: %lu\n", tokenized.size(), - input.size()); - for (auto x : tokenized) { - - cerr << x << endl; - - } - - } - - for (auto token : tokenized) { - - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; - - while (regex_search(c, e, m, regex_word)) { - - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { - - // there are items between search start and find - while (p < f) { - - IFDEBUG { - - string foo(p, p + 1); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(p, p + 1)); - ++p; - - } - - IFDEBUG { - - string foo(p, f); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(p, f)); - - } - - DEBUGF(stderr, - "SUB2string \"%s\" found at start %lu offset %lu continue " - "at %lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); - - } - - if (c < e) { - - while (c < e) { - - IFDEBUG { - - string foo(c, c + 1); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(c, c + 1)); - ++c; - - } - - IFDEBUG { - - string foo(c, e); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(c, e)); - - } + tokens.push_back(std::string(cur, cur + 1)); + DEBUGF(stderr, "OTHER \"%c\"\n", *cur); + ++cur; } @@ -1065,15 +818,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, IFDEBUG { DEBUGF(stderr, "DUMPING TOKENS:\n"); - u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { DEBUGF(stderr, "%s", tokens[i].c_str()); - if (unlikely(alternative_tokenize && i < size_1)) { - - DEBUGF(stderr, "%s", whitespace.c_str()); - - } } @@ -1157,7 +904,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; } - if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } if (getenv("AUTOTOKENS_LEARN_DICT")) { @@ -1180,14 +926,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_CHANGE_MIN")) { change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN")); - if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; } + if (change_min < 1 || change_min > 256) { + + change_min = AUTOTOKENS_CHANGE_MIN; + + } } if (getenv("AUTOTOKENS_CHANGE_MAX")) { change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX")); - if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; } + if (change_max < 1 || change_max > 4096) { + + change_max = AUTOTOKENS_CHANGE_MAX; + + } } @@ -1212,53 +966,49 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { // set common whitespace tokens // we deliberately do not put uncommon ones here to these will count as // identifier tokens. - if (!alternative_tokenize) { - - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t"] = current_id; - id_to_token[current_id] = "\t"; - ++current_id; - token_to_id["\n"] = current_id; - id_to_token[current_id] = "\n"; - ++current_id; - token_to_id["\r\n"] = current_id; - id_to_token[current_id] = "\r\n"; - ++current_id; - token_to_id[" \n"] = current_id; - id_to_token[current_id] = " \n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t"] = current_id; - id_to_token[current_id] = "\t\t"; - ++current_id; - token_to_id["\n\n"] = current_id; - id_to_token[current_id] = "\n\n"; - ++current_id; - token_to_id["\r\n\r\n"] = current_id; - id_to_token[current_id] = "\r\n\r\n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t\t\t"] = current_id; - id_to_token[current_id] = "\t\t\t\t"; - ++current_id; - token_to_id["\n\n\n\n"] = current_id; - id_to_token[current_id] = "\n\n\n\n"; - ++current_id; - whitespace_ids = current_id; - token_to_id["\""] = current_id; - id_to_token[current_id] = "\""; - ++current_id; - token_to_id["'"] = current_id; - id_to_token[current_id] = "'"; - ++current_id; - - } + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t"] = current_id; + id_to_token[current_id] = "\t"; + ++current_id; + token_to_id["\n"] = current_id; + id_to_token[current_id] = "\n"; + ++current_id; + token_to_id["\r\n"] = current_id; + id_to_token[current_id] = "\r\n"; + ++current_id; + token_to_id[" \n"] = current_id; + id_to_token[current_id] = " \n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t"] = current_id; + id_to_token[current_id] = "\t\t"; + ++current_id; + token_to_id["\n\n"] = current_id; + id_to_token[current_id] = "\n\n"; + ++current_id; + token_to_id["\r\n\r\n"] = current_id; + id_to_token[current_id] = "\r\n\r\n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t\t\t"] = current_id; + id_to_token[current_id] = "\t\t\t\t"; + ++current_id; + token_to_id["\n\n\n\n"] = current_id; + id_to_token[current_id] = "\n\n\n\n"; + ++current_id; + whitespace_ids = current_id; + token_to_id["\""] = current_id; + id_to_token[current_id] = "\""; + ++current_id; + token_to_id["'"] = current_id; + id_to_token[current_id] = "'"; + ++current_id; return data; From 240f6421d8240b4b4d4d5bd509c0c3277a083896 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 08:23:47 +0100 Subject: [PATCH 53/77] optimize performance --- custom_mutators/autotokens/autotokens.cpp | 80 +++++++---------------- 1 file changed, 23 insertions(+), 57 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index a027ac2b..ca738d0b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -204,31 +204,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, ++m_size; DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); - // if we insert an identifier or string we might need whitespace - if (id_to_token[new_item].size() > 1) { - - // need to insert before? - - if (pos && m[pos - 1] >= whitespace_ids && - id_to_token[m[pos - 1]].size() > 1) { - - m.insert(m.begin() + pos, good_whitespace_or_singleval()); - ++m_size; - - } - - if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && - id_to_token[m[pos + 1]].size() > 1) { - - // need to insert after? - - m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); - ++m_size; - - } - - } - break; } @@ -283,25 +258,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - // do we need a whitespace/token at the beginning? - if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && - id_to_token[m[dst_off]].size() > 1) { - - m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); - ++m_size; - - } - - // do we need a whitespace/token at the end? - if (dst_off + n < m_size && - id_to_token[m[dst_off + n - 1]].size() > 1 && - id_to_token[m[dst_off + n]].size() > 1) { - - m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); - ++m_size; - - } - break; } @@ -319,19 +275,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } while (unlikely(m[pos] < whitespace_ids)); - // if what we delete will result in a missing whitespace/token, - // instead of deleting we switch the item to a whitespace or token. - if (pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 && - id_to_token[m[pos + 1]].size() > 1) { - - m[pos] = good_whitespace_or_singleval(); - - } else { - - m.erase(m.begin() + pos); - --m_size; - - } + m.erase(m.begin() + pos); + --m_size; } else { @@ -350,10 +295,31 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + /* Now we create the output */ + output = ""; + u32 prev_size = 0; for (i = 0; i < m_size; ++i) { + if (likely(i + 1 < m_size)) { + + u32 this_size = id_to_token[m[i]].size(); + + /* The output we are generating might need repairing. + General rule: two items that have a size larger than 2 are strings + or identifizers and need a whitespace or an item of length 1 in + between. */ + if (unlikely(prev_size > 1 && this_size > 1)) { + + output += id_to_token[good_whitespace_or_singleval()]; + + } + + prev_size = this_size; + + } + output += id_to_token[m[i]]; } From 61439859cece05cd3e204af60bb5ff08556c490d Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 08:26:30 +0100 Subject: [PATCH 54/77] cleanup --- custom_mutators/autotokens/README | 4 ---- custom_mutators/autotokens/autotokens.cpp | 8 -------- 2 files changed, 12 deletions(-) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index e9c48662..904b5fa3 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -29,7 +29,3 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and a dictionary loaded then create one initial structure based on the dictionary. -`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation - (experimental) -`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, - default is " " diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index ca738d0b..10afa2c2 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -24,7 +24,6 @@ extern "C" { #define AUTOTOKENS_ONLY_FAV 0 #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MAX 64 -#define AUTOTOKENS_WHITESPACE " " #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 @@ -75,7 +74,6 @@ static unordered_map *> file_mapping; static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -static string whitespace = AUTOTOKENS_WHITESPACE; static string output; static regex *regex_comment_custom; // multiline requires g++-11 libs :( @@ -913,12 +911,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (change_max < change_min) { change_max = change_min + 1; } - if (getenv("AUTOTOKENS_WHITESPACE")) { - - whitespace = getenv("AUTOTOKENS_WHITESPACE"); - - } - if (getenv("AUTOTOKENS_COMMENT")) { char buf[256]; From 54fa78d32ce6779117a656c72f5c630713e7033f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 09:52:57 +0100 Subject: [PATCH 55/77] autodisable and better performance --- custom_mutators/autotokens/Makefile | 12 +- custom_mutators/autotokens/TODO | 21 ---- custom_mutators/autotokens/autotokens.cpp | 143 +++++++++++++++++----- include/config.h | 4 + src/afl-fuzz-queue.c | 89 +++++++++----- 5 files changed, 179 insertions(+), 90 deletions(-) diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index ab1da4b6..6ee7d324 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,16 +1,22 @@ ifdef debug - CFLAGS += -fsanitize=address -Wall + CPPLAGS += -fsanitize=address + CXXFLAGS += -Wall + CC := clang CXX := clang++ endif ifdef DEBUG - CFLAGS += -fsanitize=address -Wall + CPPFLAGS += -fsanitize=address + CXXFLAGS += -Wall + CC := clang CXX := clang++ endif all: autotokens.so autotokens.so: autotokens.cpp - $(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o + $(CC) -D_STANDALONE_MODULE=1 -I../../include -g -O3 $(CPPFLAGS) -fPIC -c -o ./afl-fuzz-queue.o ../../src/afl-fuzz-queue.c + $(CC) -I../../include -g -O3 $(CPPFLAGS) -DBIN_PATH=\"dummy\" -Wno-pointer-sign -fPIC -c -o ./afl-common.o ../../src/afl-common.c + $(CXX) -Wno-deprecated -g -O3 $(CXXFLAGS) $(CPPFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ./afl-fuzz-queue.o ../../src/afl-performance.o ./afl-common.o clean: rm -f autotokens.so *~ core diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 496bfd45..2e99e147 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,24 +1,3 @@ -create from thin air if no good seed after a cycle and dict large enough? -(static u32 no_of_struct_inputs;) - -splicing -> check if whitespace/token is needed - -whitespace/token check only AFTER mutation - -analyse welche einen DICT haben, und welche davon rein ascii - -corpus analyse: - + libxml - - sqlite - - libpcap -min len, max len, % wenn 95/98/99/100 ascii - env für menge an per mutation run -AFL_TXT_MAX_LEN 65535 -AFL_TXT_MIN_LEN 16 -AFL_TXT_MIN_PERCENT=99 - --> KEIN FAV! - change_min/_max werte diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 10afa2c2..cda90a38 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -25,10 +25,12 @@ extern "C" { #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MAX 64 #define AUTOTOKENS_SIZE_MIN 8 +#define AUTOTOKENS_SIZE_MAX 65535 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 +#define AUTOTOKENS_AUTO_DISABLE 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog #define AUTOTOKENS_LEARN_DICT 1 #ifndef AUTOTOKENS_SPLICE_DISABLE @@ -56,6 +58,8 @@ typedef struct my_mutator { #define IFDEBUG if (unlikely(debug)) static afl_state *afl_ptr; +static int module_disabled = 0; +static int auto_disable = AUTOTOKENS_AUTO_DISABLE; static int debug = AUTOTOKENS_DEBUG; static int only_fav = AUTOTOKENS_ONLY_FAV; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; @@ -93,6 +97,99 @@ static void first_run(void *data) { (void)(data); + /* For auto-loading this module we check here if we can analyze from the + input if the inputs look like text inputs and disable the module if + not. */ + + if (afl_ptr->custom_only || !auto_disable) { return; } + + if (unlikely(afl_ptr->active_items == 1 && + afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN)) { + + if (afl_ptr->extras_cnt > 8) { + + u32 valid = 0; + + while (extras_cnt < afl_ptr->extras_cnt) { + + u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; + u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1); + memcpy(buf, afl_ptr->extras[extras_cnt].data, + afl_ptr->extras[extras_cnt].len); + buf[afl_ptr->extras[extras_cnt].len] = 0; + token_to_id[(char *)buf] = current_id; + id_to_token[current_id] = (char *)buf; + ++current_id; + ++valid; + + } + + ++extras_cnt; + + } + + if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; } + + } else { + + module_disabled = 1; + + } + + return; + + } + + u32 is_ascii = 0, valid = 0; + + for (u32 i = 0; i < afl_ptr->queued_items; ++i) { + + struct queue_entry *q; + + q = afl_ptr->queue_buf[i]; + + if (!q->disabled && q->len >= AUTOTOKENS_SIZE_MIN && + q->len <= AFL_TXT_MAX_LEN) { + + ++valid; + u8 *input = queue_testcase_get(afl_ptr, q); + + u32 valid_chars = 0; + for (u32 i = 0; i < q->len; ++i) { + + if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; } + + } + + // we want at least 99% of text characters ... + if (((q->len * AFL_TXT_MIN_PERCENT) / 100) <= valid_chars) { + + ++is_ascii; + q->is_ascii = 1; + + } + + } + + } + + if ((is_ascii * 100) / valid < 70) { module_disabled = 1; } + } static u32 good_whitespace_or_singleval() { @@ -441,21 +538,25 @@ extern "C" unsigned char afl_custom_queue_get(void *data, is_first_run = 0; first_run(data); + if (module_disabled) { WARNF("Autotokens custom module is disabled."); } + } - if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air && - ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || - (only_fav && !afl_ptr->queue_cur->favored))) { + if (likely(module_disabled) || + (unlikely(!afl_ptr->custom_only) && !create_from_thin_air && + ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored)))) { s = NULL; - DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); + DEBUGF(stderr, + "cmplog not ascii or only_fav and not favorite or disabled\n"); return 1; } // check if there are new dictionary entries and add them to the tokens - if (likely(valid_structures || create_from_thin_air) && - learn_state < learn_dictionary_tokens) { + if (unlikely(learn_state < learn_dictionary_tokens) && + likely(valid_structures || create_from_thin_air)) { if (unlikely(!learn_state)) { learn_state = 1; } @@ -569,21 +670,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (entry == file_mapping.end()) { // this input file was not analyzed for tokens yet, so let's do it! - - FILE *fp = fopen((char *)filename, "rb"); - if (!fp) { - - s = NULL; - return 1; - - } // should not happen - - fseek(fp, 0, SEEK_END); - size_t len = (size_t)ftell(fp); + size_t len = afl_ptr->queue_cur->len; if (len < AFL_TXT_MIN_LEN) { - fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); @@ -591,7 +681,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } else if (len > AFL_TXT_MAX_LEN) { - fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; DEBUGF(stderr, "Too long (%lu) %s\n", len, filename); @@ -599,19 +688,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - string input; - input.resize(len); - rewind(fp); - - if (fread((void *)input.data(), 1, len, fp) != len) { - - s = NULL; - DEBUGF(stderr, "Too short read %s\n", filename); - return 1; - - } - - fclose(fp); + u8 *input_buf = queue_testcase_get(afl_ptr, afl_ptr->queue_cur); + string input((char *)input_buf, afl_ptr->queue_cur->len); if (!afl_ptr->shm.cmplog_mode) { @@ -866,6 +944,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } + if (getenv("AUTOTOKENS_AUTO_DISABLE")) { auto_disable = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; } diff --git a/include/config.h b/include/config.h index 49d09174..ad8b76a8 100644 --- a/include/config.h +++ b/include/config.h @@ -491,6 +491,10 @@ #define AFL_TXT_MIN_LEN 16 +/* Maximum length of a queue input to be evaluated for "is_ascii"? */ + +#define AFL_TXT_MAX_LEN 65535 + /* What is the minimum percentage of ascii characters present to be classifed as "is_ascii"? */ diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index e3faa392..3c8a3e46 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -27,6 +27,22 @@ #include #include +#ifdef _STANDALONE_MODULE +void minimize_bits(afl_state_t *afl, u8 *dst, u8 *src) { + + return; + +} + +void run_afl_custom_queue_new_entry(afl_state_t *afl, struct queue_entry *q, + u8 *a, u8 *b) { + + return; + +} + +#endif + /* select next queue entry based on alias algo - fast! */ inline u32 select_next_queue_entry(afl_state_t *afl) { @@ -78,8 +94,8 @@ void create_alias_table(afl_state_t *afl) { afl->alias_probability = (double *)afl_realloc( (void **)&afl->alias_probability, n * sizeof(double)); double *P = (double *)afl_realloc(AFL_BUF_PARAM(out), n * sizeof(double)); - int *S = (u32 *)afl_realloc(AFL_BUF_PARAM(out_scratch), n * sizeof(u32)); - int *L = (u32 *)afl_realloc(AFL_BUF_PARAM(in_scratch), n * sizeof(u32)); + int *S = (int *)afl_realloc(AFL_BUF_PARAM(out_scratch), n * sizeof(u32)); + int *L = (int *)afl_realloc(AFL_BUF_PARAM(in_scratch), n * sizeof(u32)); if (!P || !S || !L || !afl->alias_table || !afl->alias_probability) { @@ -247,11 +263,11 @@ void create_alias_table(afl_state_t *afl) { void mark_as_det_done(afl_state_t *afl, struct queue_entry *q) { - u8 fn[PATH_MAX]; - s32 fd; + char fn[PATH_MAX]; + s32 fd; snprintf(fn, PATH_MAX, "%s/queue/.state/deterministic_done/%s", afl->out_dir, - strrchr(q->fname, '/') + 1); + strrchr((char *)q->fname, '/') + 1); fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION); if (fd < 0) { PFATAL("Unable to create '%s'", fn); } @@ -266,10 +282,10 @@ void mark_as_det_done(afl_state_t *afl, struct queue_entry *q) { void mark_as_variable(afl_state_t *afl, struct queue_entry *q) { - u8 fn[PATH_MAX]; - u8 ldest[PATH_MAX]; + char fn[PATH_MAX]; + char ldest[PATH_MAX]; - u8 *fn_name = strrchr(q->fname, '/') + 1; + char *fn_name = strrchr((char *)q->fname, '/') + 1; sprintf(ldest, "../../%s", fn_name); sprintf(fn, "%s/queue/.state/variable_behavior/%s", afl->out_dir, fn_name); @@ -293,12 +309,12 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) { if (likely(state == q->fs_redundant)) { return; } - u8 fn[PATH_MAX]; + char fn[PATH_MAX]; q->fs_redundant = state; sprintf(fn, "%s/queue/.state/redundant_edges/%s", afl->out_dir, - strrchr(q->fname, '/') + 1); + strrchr((char *)q->fname, '/') + 1); if (state) { @@ -409,7 +425,7 @@ u8 check_if_text_buf(u8 *buf, u32 len) { static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) { - if (q->len < AFL_TXT_MIN_LEN) return 0; + if (q->len < AFL_TXT_MIN_LEN || q->len < AFL_TXT_MAX_LEN) return 0; u8 *buf; int fd; @@ -417,8 +433,8 @@ static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) { ssize_t comp; if (len >= MAX_FILE) len = MAX_FILE - 1; - if ((fd = open(q->fname, O_RDONLY)) < 0) return 0; - buf = afl_realloc(AFL_BUF_PARAM(in_scratch), len + 1); + if ((fd = open((char *)q->fname, O_RDONLY)) < 0) return 0; + buf = (u8 *)afl_realloc(AFL_BUF_PARAM(in_scratch), len + 1); comp = read(fd, buf, len); close(fd); if (comp != (ssize_t)len) return 0; @@ -520,7 +536,8 @@ static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) { void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) { - struct queue_entry *q = ck_alloc(sizeof(struct queue_entry)); + struct queue_entry *q = + (struct queue_entry *)ck_alloc(sizeof(struct queue_entry)); q->fname = fname; q->len = len; @@ -554,7 +571,7 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) { afl->cycles_wo_finds = 0; - struct queue_entry **queue_buf = afl_realloc( + struct queue_entry **queue_buf = (struct queue_entry **)afl_realloc( AFL_BUF_PARAM(queue), afl->queued_items * sizeof(struct queue_entry *)); if (unlikely(!queue_buf)) { PFATAL("alloc"); } queue_buf[afl->queued_items - 1] = q; @@ -574,7 +591,11 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) { } /* only redqueen currently uses is_ascii */ - if (afl->shm.cmplog_mode) q->is_ascii = check_if_text(afl, q); + if (unlikely(afl->shm.cmplog_mode && !q->is_ascii)) { + + q->is_ascii = check_if_text(afl, q); + + } } @@ -704,7 +725,7 @@ void update_bitmap_score(afl_state_t *afl, struct queue_entry *q) { if (!q->trace_mini) { u32 len = (afl->fsrv.map_size >> 3); - q->trace_mini = ck_alloc(len); + q->trace_mini = (u8 *)ck_alloc(len); minimize_bits(afl, q->trace_mini, afl->fsrv.trace_bits); } @@ -1090,19 +1111,19 @@ inline void queue_testcase_retake(afl_state_t *afl, struct queue_entry *q, if (len != old_len) { afl->q_testcase_cache_size = afl->q_testcase_cache_size + len - old_len; - q->testcase_buf = realloc(q->testcase_buf, len); + q->testcase_buf = (u8 *)realloc(q->testcase_buf, len); if (unlikely(!q->testcase_buf)) { - PFATAL("Unable to malloc '%s' with len %u", q->fname, len); + PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len); } } - int fd = open(q->fname, O_RDONLY); + int fd = open((char *)q->fname, O_RDONLY); - if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", q->fname); } + if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", (char *)q->fname); } ck_read(fd, q->testcase_buf, len, q->fname); close(fd); @@ -1122,7 +1143,7 @@ inline void queue_testcase_retake_mem(afl_state_t *afl, struct queue_entry *q, if (likely(len != old_len)) { - u8 *ptr = realloc(q->testcase_buf, len); + u8 *ptr = (u8 *)realloc(q->testcase_buf, len); if (likely(ptr)) { @@ -1154,23 +1175,23 @@ inline u8 *queue_testcase_get(afl_state_t *afl, struct queue_entry *q) { if (unlikely(q == afl->queue_cur)) { - buf = afl_realloc((void **)&afl->testcase_buf, len); + buf = (u8 *)afl_realloc((void **)&afl->testcase_buf, len); } else { - buf = afl_realloc((void **)&afl->splicecase_buf, len); + buf = (u8 *)afl_realloc((void **)&afl->splicecase_buf, len); } if (unlikely(!buf)) { - PFATAL("Unable to malloc '%s' with len %u", q->fname, len); + PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len); } - int fd = open(q->fname, O_RDONLY); + int fd = open((char *)q->fname, O_RDONLY); - if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", q->fname); } + if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", (char *)q->fname); } ck_read(fd, buf, len, q->fname); close(fd); @@ -1214,7 +1235,7 @@ inline u8 *queue_testcase_get(afl_state_t *afl, struct queue_entry *q) { do_once = 1; // release unneeded memory - afl->q_testcase_cache = ck_realloc( + afl->q_testcase_cache = (struct queue_entry **)ck_realloc( afl->q_testcase_cache, (afl->q_testcase_max_cache_entries + 1) * sizeof(size_t)); @@ -1261,15 +1282,15 @@ inline u8 *queue_testcase_get(afl_state_t *afl, struct queue_entry *q) { /* Map the test case into memory. */ - int fd = open(q->fname, O_RDONLY); + int fd = open((char *)q->fname, O_RDONLY); - if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", q->fname); } + if (unlikely(fd < 0)) { PFATAL("Unable to open '%s'", (char *)q->fname); } - q->testcase_buf = malloc(len); + q->testcase_buf = (u8 *)malloc(len); if (unlikely(!q->testcase_buf)) { - PFATAL("Unable to malloc '%s' with len %u", q->fname, len); + PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len); } @@ -1332,11 +1353,11 @@ inline void queue_testcase_store_mem(afl_state_t *afl, struct queue_entry *q, /* Map the test case into memory. */ - q->testcase_buf = malloc(len); + q->testcase_buf = (u8 *)malloc(len); if (unlikely(!q->testcase_buf)) { - PFATAL("Unable to malloc '%s' with len %u", q->fname, len); + PFATAL("Unable to malloc '%s' with len %u", (char *)q->fname, len); } From 5a0100c6eece0d668c7040ec6e6ed3f59ef0d1ba Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 10:01:02 +0100 Subject: [PATCH 56/77] add to readme --- custom_mutators/autotokens/README | 2 ++ 1 file changed, 2 insertions(+) diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 904b5fa3..295cd736 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -20,6 +20,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! removed. Default: `/* ... */` `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting the value by this number set, e.g. 1. +`AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii + (or no input and no (ascii) dictionary) `AUTOTOKENS_LEARN_DICT` - learn from dictionaries? 0 = none 1 = only -x or autodict From 80eabd6e8a30c2ffc0f084ab34df8b9d582419c3 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 11:34:14 +0100 Subject: [PATCH 57/77] AFL_LLVM_DICT2FILE_NO_MAIN support --- TODO.md | 3 +-- docs/Changelog.md | 1 + docs/env_variables.md | 3 +++ docs/fuzzing_in_depth.md | 4 +++- include/envs.h | 1 + instrumentation/README.llvm.md | 4 ++++ instrumentation/SanitizerCoverageLTO.so.cc | 11 ++++++++++- instrumentation/afl-llvm-dict2file.so.cc | 17 ++++++++++++++--- src/afl-cc.c | 5 ++++- 9 files changed, 41 insertions(+), 8 deletions(-) diff --git a/TODO.md b/TODO.md index 862224f0..187fa191 100644 --- a/TODO.md +++ b/TODO.md @@ -9,13 +9,12 @@ - afl-plot to support multiple plot_data - parallel builds for source-only targets - get rid of check_binary, replace with more forkserver communication - - first fuzzer should be a main automatically + - first fuzzer should be a main automatically? not sure. ## Maybe - forkserver tells afl-fuzz if cmplog is supported and if so enable it by default, with AFL_CMPLOG_NO=1 (?) set to skip? - - afl_custom_fuzz_splice_optin() - afl_custom_splice() - cmdline option from-to range for mutations diff --git a/docs/Changelog.md b/docs/Changelog.md index eee88a51..89c37912 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -10,6 +10,7 @@ - add CFI sanitizer variant to gcc targets - llvm 16 support (thanks to @devnexen!) - support llvm 15 native pcguard changes + - LTO autoken and llvm_mode: added AFL_LLVM_DICT2FILE_NO_MAIN support - better sanitizer default options support for all tools - unicorn_mode: updated and minor issues fixed - frida_mode: fix issue on MacOS diff --git a/docs/env_variables.md b/docs/env_variables.md index 0a57d190..61fb1e2b 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -129,6 +129,9 @@ subset of the settings discussed in section 1, with the exception of: write all constant string comparisons to this file to be used later with afl-fuzz' `-x` option. + - An option to `AFL_LLVM_DICT2FILE` is `AFL_LLVM_DICT2FILE_NO_MAIN=1` which + skill not parse `main()`. + - `TMPDIR` and `AFL_KEEP_ASSEMBLY`, since no temporary assembly files are created. diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md index 87f31a58..efab0633 100644 --- a/docs/fuzzing_in_depth.md +++ b/docs/fuzzing_in_depth.md @@ -534,6 +534,8 @@ dictionaries/FORMAT.dict`. * With `afl-clang-fast`, you can set `AFL_LLVM_DICT2FILE=/full/path/to/new/file.dic` to automatically generate a dictionary during target compilation. + Adding `AFL_LLVM_DICT2FILE_NO_MAIN=1` to not parse main (usually command line + parameter parsing) is often a good idea too. * You also have the option to generate a dictionary yourself during an independent run of the target, see [utils/libtokencap/README.md](../utils/libtokencap/README.md). @@ -935,7 +937,7 @@ phase and start fuzzing at once. 3. Also randomize the afl-fuzz runtime options, e.g.: * 65% for `AFL_DISABLE_TRIM` * 50% for `AFL_KEEP_TIMEOUTS` - * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE` + * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE` + `AFL_LLVM_DICT2FILE_NO_MAIN=1` * 40% use MOpt (`-L 0`) * 40% for `AFL_EXPAND_HAVOC_NOW` * 20% for old queue processing (`-Z`) diff --git a/include/envs.h b/include/envs.h index 0770f94d..5018b0f8 100644 --- a/include/envs.h +++ b/include/envs.h @@ -133,6 +133,7 @@ static char *afl_environment_variables[] = { "AFL_LLVM_CTX", "AFL_LLVM_CTX_K", "AFL_LLVM_DICT2FILE", + "AFL_LLVM_DICT2FILE_NO_MAIN", "AFL_LLVM_DOCUMENT_IDS", "AFL_LLVM_INSTRIM_LOOPHEAD", "AFL_LLVM_INSTRUMENT", diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md index 9da1b0f6..c0677474 100644 --- a/instrumentation/README.llvm.md +++ b/instrumentation/README.llvm.md @@ -167,6 +167,10 @@ Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation all constant string compare parameters will be written to this file to be used with afl-fuzz' `-x` option. +Adding `AFL_LLVM_DICT2FILE_NO_MAIN=1` will skip parsing `main()` which often +does command line parsing which has string comparisons that are not helpful +for fuzzing. + ## 6) AFL++ Context Sensitive Branch Coverage ### What is this? diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc index 231151f5..f82224ed 100644 --- a/instrumentation/SanitizerCoverageLTO.so.cc +++ b/instrumentation/SanitizerCoverageLTO.so.cc @@ -236,6 +236,7 @@ class ModuleSanitizerCoverageLTO // const SpecialCaseList * Allowlist; // const SpecialCaseList * Blocklist; uint32_t autodictionary = 1; + uint32_t autodictionary_no_main = 0; uint32_t inst = 0; uint32_t afl_global_id = 0; uint32_t unhandled = 0; @@ -411,7 +412,8 @@ bool ModuleSanitizerCoverageLTO::instrumentModule( /* Show a banner */ setvbuf(stdout, NULL, _IONBF, 0); - if (getenv("AFL_DEBUG")) debug = 1; + if (getenv("AFL_DEBUG")) { debug = 1; } + if (getenv("AFL_LLVM_DICT2FILE_NO_MAIN")) { autodictionary_no_main = 1; } if ((isatty(2) && !getenv("AFL_QUIET")) || debug) { @@ -503,6 +505,13 @@ bool ModuleSanitizerCoverageLTO::instrumentModule( if (!isInInstrumentList(&F, MNAME) || !F.size()) { continue; } + if (autodictionary_no_main && + (!F.getName().compare("main") || !F.getName().compare("_main"))) { + + continue; + + } + for (auto &BB : F) { for (auto &IN : BB) { diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc index bbbbe32c..97f1d47f 100644 --- a/instrumentation/afl-llvm-dict2file.so.cc +++ b/instrumentation/afl-llvm-dict2file.so.cc @@ -182,7 +182,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { DenseMap valueMap; char *ptr; - int found = 0; + int found = 0, handle_main = 1; /* Show a banner */ setvbuf(stdout, NULL, _IONBF, 0); @@ -192,10 +192,14 @@ bool AFLdict2filePass::runOnModule(Module &M) { SAYF(cCYA "afl-llvm-dict2file" VERSION cRST " by Marc \"vanHauser\" Heuse \n"); - } else + } else { be_quiet = 1; + } + + if (getenv("AFL_LLVM_DICT2FILE_NO_MAIN")) { handle_main = 0; } + scanForDangerousFunctions(&M); ptr = getenv("AFL_LLVM_DICT2FILE"); @@ -210,7 +214,14 @@ bool AFLdict2filePass::runOnModule(Module &M) { for (auto &F : M) { - if (isIgnoreFunction(&F)) continue; + if (!handle_main && + (!F.getName().compare("main") || !F.getName().compare("_main"))) { + + continue; + + } + + if (isIgnoreFunction(&F)) { continue; } if (!isInInstrumentList(&F, MNAME) || !F.size()) { continue; } /* Some implementation notes. diff --git a/src/afl-cc.c b/src/afl-cc.c index 7c3682fb..7b059d40 100644 --- a/src/afl-cc.c +++ b/src/afl-cc.c @@ -2041,6 +2041,8 @@ int main(int argc, char **argv, char **envp) { " AFL_LLVM_DICT2FILE: generate an afl dictionary based on found " "comparisons\n" + " AFL_LLVM_DICT2FILE_NO_MAIN: skip parsing main() for the " + "dictionary\n" " AFL_LLVM_LAF_ALL: enables all LAF splits/transforms\n" " AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n" " AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n" @@ -2128,7 +2130,8 @@ int main(int argc, char **argv, char **envp) { "defaults.\n" "Recommended is afl-clang-lto with AFL_LLVM_CMPLOG or afl-clang-fast " "with\n" - "AFL_LLVM_CMPLOG and AFL_LLVM_DICT2FILE.\n\n"); + "AFL_LLVM_CMPLOG and " + "AFL_LLVM_DICT2FILE+AFL_LLVM_DICT2FILE_NO_MAIN.\n\n"); exit(1); From 8bc3fa1df286aac46a0a724f64e2e07010d2497e Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Mon, 13 Feb 2023 23:00:15 +0000 Subject: [PATCH 58/77] LLVM cmplog factoring custom Instruction iterator with added restriction --- instrumentation/afl-llvm-common.cc | 18 ++++++++++++++++++ instrumentation/afl-llvm-common.h | 2 ++ instrumentation/cmplog-instructions-pass.cc | 15 --------------- instrumentation/cmplog-switches-pass.cc | 15 --------------- 4 files changed, 20 insertions(+), 30 deletions(-) diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc index dc34d191..b50269fe 100644 --- a/instrumentation/afl-llvm-common.cc +++ b/instrumentation/afl-llvm-common.cc @@ -582,6 +582,24 @@ bool isInInstrumentList(llvm::Function *F, std::string Filename) { } +template +Iterator Unique(Iterator first, Iterator last) { + static_assert(std::is_trivially_copyable< + typename std::iterator_traits + >::value_type, "Invalid underlying type"); + + while (first != last) { + + Iterator next(first); + last = std::remove(++next, last, *first); + first = next; + + } + + return last; + +} + // Calculate the number of average collisions that would occur if all // location IDs would be assigned randomly (like normal afl/afl++). // This uses the "balls in bins" algorithm. diff --git a/instrumentation/afl-llvm-common.h b/instrumentation/afl-llvm-common.h index 0112c325..8b8dc756 100644 --- a/instrumentation/afl-llvm-common.h +++ b/instrumentation/afl-llvm-common.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "llvm/Config/llvm-config.h" @@ -53,6 +54,7 @@ void initInstrumentList(); bool isInInstrumentList(llvm::Function *F, std::string Filename); unsigned long long int calculateCollisions(uint32_t edges); void scanForDangerousFunctions(llvm::Module *M); +template Iterator Unique(Iterator, Iterator); #ifndef IS_EXTERN #define IS_EXTERN diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc index bca1f927..c6fd7c56 100644 --- a/instrumentation/cmplog-instructions-pass.cc +++ b/instrumentation/cmplog-instructions-pass.cc @@ -138,21 +138,6 @@ llvmGetPassPluginInfo() { char CmpLogInstructions::ID = 0; #endif -template -Iterator Unique(Iterator first, Iterator last) { - - while (first != last) { - - Iterator next(first); - last = std::remove(++next, last, *first); - first = next; - - } - - return last; - -} - bool CmpLogInstructions::hookInstrs(Module &M) { std::vector icomps; diff --git a/instrumentation/cmplog-switches-pass.cc b/instrumentation/cmplog-switches-pass.cc index cd0ae76d..f4a9fbd7 100644 --- a/instrumentation/cmplog-switches-pass.cc +++ b/instrumentation/cmplog-switches-pass.cc @@ -131,21 +131,6 @@ llvmGetPassPluginInfo() { char CmplogSwitches::ID = 0; #endif -template -Iterator Unique(Iterator first, Iterator last) { - - while (first != last) { - - Iterator next(first); - last = std::remove(++next, last, *first); - first = next; - - } - - return last; - -} - bool CmplogSwitches::hookInstrs(Module &M) { std::vector switches; From a7c43484e1e3afe6d1db440927e72e0f103ba977 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 07:45:45 +0100 Subject: [PATCH 59/77] bettern custom mut warning --- src/afl-fuzz-mutators.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/afl-fuzz-mutators.c b/src/afl-fuzz-mutators.c index 22e5262e..f722374f 100644 --- a/src/afl-fuzz-mutators.c +++ b/src/afl-fuzz-mutators.c @@ -312,12 +312,18 @@ struct custom_mutator *load_custom_mutator(afl_state_t *afl, const char *fn) { if (notrim) { + if (mutator->afl_custom_init_trim || mutator->afl_custom_trim || + mutator->afl_custom_post_trim) { + + WARNF( + "Custom mutator does not implement all three trim APIs, standard " + "trimming will be used."); + + } + mutator->afl_custom_init_trim = NULL; mutator->afl_custom_trim = NULL; mutator->afl_custom_post_trim = NULL; - ACTF( - "Custom mutator does not implement all three trim APIs, standard " - "trimming will be used."); } From 668f5e1fa9c126bb8c751a6e4ef038ae60a442fa Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 09:32:32 +0100 Subject: [PATCH 60/77] debug output --- custom_mutators/autotokens/Makefile | 8 ++++++-- custom_mutators/autotokens/autotokens.cpp | 17 ++++++++++++++++- docs/Changelog.md | 1 + docs/env_variables.md | 2 ++ include/afl-fuzz.h | 2 +- include/envs.h | 1 + src/afl-fuzz-init.c | 2 +- src/afl-fuzz-one.c | 2 +- src/afl-fuzz-run.c | 2 +- src/afl-fuzz-state.c | 7 +++++++ 10 files changed, 37 insertions(+), 7 deletions(-) diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 6ee7d324..0daba17d 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -13,10 +13,14 @@ endif all: autotokens.so -autotokens.so: autotokens.cpp +afl-fuzz-queue.o: ../../src/afl-fuzz-queue.c $(CC) -D_STANDALONE_MODULE=1 -I../../include -g -O3 $(CPPFLAGS) -fPIC -c -o ./afl-fuzz-queue.o ../../src/afl-fuzz-queue.c + +afl-common.o: ../../src/afl-common.c $(CC) -I../../include -g -O3 $(CPPFLAGS) -DBIN_PATH=\"dummy\" -Wno-pointer-sign -fPIC -c -o ./afl-common.o ../../src/afl-common.c + +autotokens.so: afl-fuzz-queue.o afl-common.o autotokens.cpp $(CXX) -Wno-deprecated -g -O3 $(CXXFLAGS) $(CPPFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ./afl-fuzz-queue.o ../../src/afl-performance.o ./afl-common.o clean: - rm -f autotokens.so *~ core + rm -f autotokens.so *.o *~ core diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index cda90a38..043d9588 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -145,6 +145,9 @@ static void first_run(void *data) { if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; } + DEBUGF(stderr, "DICT: valid %u, total %u, %u < 95 == disable\n", valid, + afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt)); + } else { module_disabled = 1; @@ -190,6 +193,10 @@ static void first_run(void *data) { if ((is_ascii * 100) / valid < 70) { module_disabled = 1; } + DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u < 70 == disabled\n", + afl_ptr->active_items, valid, is_ascii, + (u32)((is_ascii * 100) / valid)); + } static u32 good_whitespace_or_singleval() { @@ -538,7 +545,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, is_first_run = 0; first_run(data); - if (module_disabled) { WARNF("Autotokens custom module is disabled."); } + if (module_disabled) { + + WARNF("Autotokens custom module is disabled."); + + } else if (auto_disable) { + + OKF("Autotokens custom module is enabled."); + + } } diff --git a/docs/Changelog.md b/docs/Changelog.md index 89c37912..5f253064 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -6,6 +6,7 @@ ### Version ++4.06a (dev) - afl-fuzz: - ensure temporary file descriptor is closed when not used + - added `AFL_NO_WARN_INSTABILITY` - afl-cc: - add CFI sanitizer variant to gcc targets - llvm 16 support (thanks to @devnexen!) diff --git a/docs/env_variables.md b/docs/env_variables.md index 61fb1e2b..7a574e59 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -482,6 +482,8 @@ checks or alter some of the more exotic semantics of the tool: - Setting `AFL_NO_STARTUP_CALIBRATION` will skip the initial calibration of all starting seeds, and start fuzzing at once. + - Setting `AFL_NO_WARN_INSTABILITY` will suppress instability warnings. + - In QEMU mode (-Q) and FRIDA mode (-O), `AFL_PATH` will be searched for afl-qemu-trace and afl-frida-trace.so. diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 229bc025..9bf91faf 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -399,7 +399,7 @@ typedef struct afl_env_vars { afl_cycle_schedules, afl_expand_havoc, afl_statsd, afl_cmplog_only_new, afl_exit_on_seed_issues, afl_try_affinity, afl_ignore_problems, afl_keep_timeouts, afl_pizza_mode, afl_no_crash_readme, - afl_ignore_timeouts, afl_no_startup_calibration; + afl_ignore_timeouts, afl_no_startup_calibration, afl_no_warn_instability; u8 *afl_tmpdir, *afl_custom_mutator_library, *afl_python_module, *afl_path, *afl_hang_tmout, *afl_forksrv_init_tmout, *afl_preload, diff --git a/include/envs.h b/include/envs.h index 5018b0f8..56675eda 100644 --- a/include/envs.h +++ b/include/envs.h @@ -172,6 +172,7 @@ static char *afl_environment_variables[] = { "AFL_NO_UI", "AFL_NO_PYTHON", "AFL_NO_STARTUP_CALIBRATION", + "AFL_NO_WARN_INSTABILITY", "AFL_UNTRACER_FILE", "AFL_LLVM_USE_TRACE_PC", "AFL_MAP_SIZE", diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 1182bd41..c20965b4 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -1120,7 +1120,7 @@ void perform_dry_run(afl_state_t *afl) { } - if (q->var_behavior) { + if (unlikely(q->var_behavior && !afl->afl_env.afl_no_warn_instability)) { WARNF("Instrumentation output varies across runs."); diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 2f016217..e97db273 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -1988,7 +1988,7 @@ custom_mutator_stage: if (unlikely(!mutated_buf)) { - //FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size); + // FATAL("Error in custom_fuzz. Size returned: %zu", mutated_size); break; } diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index 7dd83150..f5425011 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -523,7 +523,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem, } - if (unlikely(!var_detected)) { + if (unlikely(!var_detected && !afl->afl_env.afl_no_warn_instability)) { // note: from_queue seems to only be set during initialization if (afl->afl_env.afl_no_ui || from_queue) { diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c index 104b1e4b..6d8c8758 100644 --- a/src/afl-fuzz-state.c +++ b/src/afl-fuzz-state.c @@ -204,6 +204,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) { afl->afl_env.afl_no_affinity = get_afl_env(afl_environment_variables[i]) ? 1 : 0; + } else if (!strncmp(env, "AFL_NO_WARN_INSTABILITY", + + afl_environment_variable_len)) { + + afl->afl_env.afl_no_warn_instability = + get_afl_env(afl_environment_variables[i]) ? 1 : 0; + } else if (!strncmp(env, "AFL_TRY_AFFINITY", afl_environment_variable_len)) { From 2090f17a9bb9cc225c1d24e8b21ed0c993a2665f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:23:42 +0100 Subject: [PATCH 61/77] opt --- custom_mutators/autotokens/autotokens.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 043d9588..a2b2814f 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -143,9 +143,9 @@ static void first_run(void *data) { } - if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; } + if ((valid * 100) / afl_ptr->extras_cnt <= 70) { module_disabled = 1; } - DEBUGF(stderr, "DICT: valid %u, total %u, %u < 95 == disable\n", valid, + DEBUGF(stderr, "DICT: valid %u, total %u, %u <= 70 == disable\n", valid, afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt)); } else { @@ -191,9 +191,9 @@ static void first_run(void *data) { } - if ((is_ascii * 100) / valid < 70) { module_disabled = 1; } + if ((is_ascii * 100) / valid <= 70) { module_disabled = 1; } - DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u < 70 == disabled\n", + DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u <= 70 == disabled\n", afl_ptr->active_items, valid, is_ascii, (u32)((is_ascii * 100) / valid)); From 04356ecbbe2c6cb72d279081702a6044fcc3ae92 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:28:43 +0100 Subject: [PATCH 62/77] fix --- custom_mutators/autotokens/autotokens.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index a2b2814f..b1f1542e 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -145,8 +145,9 @@ static void first_run(void *data) { if ((valid * 100) / afl_ptr->extras_cnt <= 70) { module_disabled = 1; } - DEBUGF(stderr, "DICT: valid %u, total %u, %u <= 70 == disable\n", valid, - afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt)); + DEBUGF(stderr, "DICT: total %u, valid %u, %u <= 70 == disable\n", + afl_ptr->extras_cnt, valid, + (u32)((valid * 100) / afl_ptr->extras_cnt)); } else { From ae94499503596d1e7f45e1a93bc5f7148c6163b6 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:48:49 +0100 Subject: [PATCH 63/77] fix --- custom_mutators/autotokens/autotokens.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index b1f1542e..e6b9931d 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1078,6 +1078,8 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) { /* we use this to print statistics at exit :-) needs to be stderr as stdout is filtered */ + if (module_disabled) { return; } + fprintf(stderr, "\n\nAutotoken mutator statistics:\n" " Number of all seen tokens: %u\n" From 7f2bafbb8b709720cd3703789071c08064e518bd Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:54:39 +0100 Subject: [PATCH 64/77] remove some debug --- custom_mutators/autotokens/autotokens.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index e6b9931d..22c78a60 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -287,7 +287,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, ((whitespace_ids < new_item && whitespace_ids >= cur_item) || (whitespace_ids >= new_item && whitespace_ids < cur_item)))); - DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); + // DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; break; @@ -305,7 +305,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; - DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); + // DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); break; @@ -334,7 +334,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + dst_off, src->begin() + src_off, src->begin() + src_off + n); m_size += n; - DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + // DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); break; @@ -354,7 +354,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, copy(src->begin() + src_off, src->begin() + src_off + n, m.begin() + dst_off); - DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); + // DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; } @@ -432,6 +432,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, if (unlikely(mutated_size > max_size)) { mutated_size = max_size; } + /* IFDEBUG { DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size); @@ -440,6 +441,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + */ + *out_buf = mutated_out; ++fuzz_count; return mutated_size; @@ -633,7 +636,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++a_extras_cnt; - DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr); } @@ -751,8 +753,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, u32 tabs = count(input.begin(), input.end(), '\t'); u32 linefeeds = count(input.begin(), input.end(), '\n'); bool ends_with_linefeed = input[input.length() - 1] == '\n'; + DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, linefeeds, ends_with_linefeed); + all_spaces += spaces; all_tabs += tabs; all_lf += linefeeds; From 1faf6f67313e726c645ac3b9ecd2d8b5e65f605a Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 16 Feb 2023 07:47:36 +0100 Subject: [PATCH 65/77] fix --- custom_mutators/autotokens/autotokens.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 22c78a60..8135aba1 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -401,25 +401,28 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, /* Now we create the output */ output = ""; - u32 prev_size = 0; + u32 prev_size = 1, was_whitespace = 1; for (i = 0; i < m_size; ++i) { if (likely(i + 1 < m_size)) { u32 this_size = id_to_token[m[i]].size(); + u32 is_whitespace = m[i] < whitespace_ids; /* The output we are generating might need repairing. General rule: two items that have a size larger than 2 are strings or identifizers and need a whitespace or an item of length 1 in between. */ - if (unlikely(prev_size > 1 && this_size > 1)) { + if (unlikely(!(prev_size == 1 || was_whitespace || this_size == 1 || + is_whitespace))) { output += id_to_token[good_whitespace_or_singleval()]; } prev_size = this_size; + was_whitespace = is_whitespace; } From 9da3a2ed4522d1a980ad7ddc7806f02833dd99fc Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 16 Feb 2023 13:11:11 +0100 Subject: [PATCH 66/77] fixes --- src/afl-fuzz-redqueen.c | 2 ++ src/afl-gotcpu.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 8da1df13..290be881 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -1624,6 +1624,8 @@ static void try_to_add_to_dictN(afl_state_t *afl, u128 v, u8 size) { } + if (cons_0 > 1 || cons_ff > 1) { return; } + } maybe_add_auto(afl, (u8 *)&v + off, size); diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c index fd9e9f54..8988fd54 100644 --- a/src/afl-gotcpu.c +++ b/src/afl-gotcpu.c @@ -92,7 +92,7 @@ static u32 measure_preemption(u32 target_ms) { volatile u32 v1, v2 = 0; u64 st_t, en_t, st_c, en_c, real_delta, slice_delta; - s32 loop_repeats = 0; + //s32 loop_repeats = 0; st_t = get_cur_time_us(); st_c = get_cpu_usage_us(); @@ -113,7 +113,7 @@ repeat_loop: if (en_t - st_t < target_ms * 1000) { - loop_repeats++; + //loop_repeats++; goto repeat_loop; } From ebaac23a514cd3950d4a6cb597bd921e13ab9baa Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 20 Feb 2023 11:42:40 +0100 Subject: [PATCH 67/77] clarify AFL_NO_STARTUP_CALIBRATION --- docs/env_variables.md | 3 ++- docs/fuzzing_in_depth.md | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/env_variables.md b/docs/env_variables.md index 22a5c386..646db3f2 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -474,7 +474,8 @@ checks or alter some of the more exotic semantics of the tool: output from afl-fuzz is redirected to a file or to a pipe. - Setting `AFL_NO_STARTUP_CALIBRATION` will skip the initial calibration - of all starting seeds, and start fuzzing at once. + of all starting seeds, and start fuzzing at once. Use with care, this + degrades the fuzzing performance! - In QEMU mode (-Q) and FRIDA mode (-O), `AFL_PATH` will be searched for afl-qemu-trace and afl-frida-trace.so. diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md index 87f31a58..2a088201 100644 --- a/docs/fuzzing_in_depth.md +++ b/docs/fuzzing_in_depth.md @@ -628,7 +628,8 @@ If you have a large corpus, a corpus from a previous run or are fuzzing in a CI, then also set `export AFL_CMPLOG_ONLY_NEW=1` and `export AFL_FAST_CAL=1`. If the queue in the CI is huge and/or the execution time is slow then you can also add `AFL_NO_STARTUP_CALIBRATION=1` to skip the initial queue calibration -phase and start fuzzing at once. +phase and start fuzzing at once - but only do this if the calibration phase +would be too long for your fuzz run time. You can also use different fuzzers. If you are using AFL spinoffs or AFL conforming fuzzers, then just use the same -o directory and give it a unique @@ -914,7 +915,8 @@ normal fuzzing campaigns as these are much shorter runnings. If the queue in the CI is huge and/or the execution time is slow then you can also add `AFL_NO_STARTUP_CALIBRATION=1` to skip the initial queue calibration -phase and start fuzzing at once. +phase and start fuzzing at once. But only do that if the calibration time is +too long for your overall available fuzz run time. 1. Always: * LTO has a much longer compile time which is diametrical to short fuzzing - From b786558dea5fd5dca471a0e36a8b420ff6a65846 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 20 Feb 2023 15:43:54 +0100 Subject: [PATCH 68/77] Revert "LLVM cmplog factoring custom Instruction iterator with added restriction" This reverts commit 8bc3fa1df286aac46a0a724f64e2e07010d2497e. --- instrumentation/afl-llvm-common.cc | 18 ------------------ instrumentation/afl-llvm-common.h | 2 -- instrumentation/cmplog-instructions-pass.cc | 15 +++++++++++++++ instrumentation/cmplog-switches-pass.cc | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc index b50269fe..dc34d191 100644 --- a/instrumentation/afl-llvm-common.cc +++ b/instrumentation/afl-llvm-common.cc @@ -582,24 +582,6 @@ bool isInInstrumentList(llvm::Function *F, std::string Filename) { } -template -Iterator Unique(Iterator first, Iterator last) { - static_assert(std::is_trivially_copyable< - typename std::iterator_traits - >::value_type, "Invalid underlying type"); - - while (first != last) { - - Iterator next(first); - last = std::remove(++next, last, *first); - first = next; - - } - - return last; - -} - // Calculate the number of average collisions that would occur if all // location IDs would be assigned randomly (like normal afl/afl++). // This uses the "balls in bins" algorithm. diff --git a/instrumentation/afl-llvm-common.h b/instrumentation/afl-llvm-common.h index 8b8dc756..0112c325 100644 --- a/instrumentation/afl-llvm-common.h +++ b/instrumentation/afl-llvm-common.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "llvm/Config/llvm-config.h" @@ -54,7 +53,6 @@ void initInstrumentList(); bool isInInstrumentList(llvm::Function *F, std::string Filename); unsigned long long int calculateCollisions(uint32_t edges); void scanForDangerousFunctions(llvm::Module *M); -template Iterator Unique(Iterator, Iterator); #ifndef IS_EXTERN #define IS_EXTERN diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc index c6fd7c56..bca1f927 100644 --- a/instrumentation/cmplog-instructions-pass.cc +++ b/instrumentation/cmplog-instructions-pass.cc @@ -138,6 +138,21 @@ llvmGetPassPluginInfo() { char CmpLogInstructions::ID = 0; #endif +template +Iterator Unique(Iterator first, Iterator last) { + + while (first != last) { + + Iterator next(first); + last = std::remove(++next, last, *first); + first = next; + + } + + return last; + +} + bool CmpLogInstructions::hookInstrs(Module &M) { std::vector icomps; diff --git a/instrumentation/cmplog-switches-pass.cc b/instrumentation/cmplog-switches-pass.cc index f4a9fbd7..cd0ae76d 100644 --- a/instrumentation/cmplog-switches-pass.cc +++ b/instrumentation/cmplog-switches-pass.cc @@ -131,6 +131,21 @@ llvmGetPassPluginInfo() { char CmplogSwitches::ID = 0; #endif +template +Iterator Unique(Iterator first, Iterator last) { + + while (first != last) { + + Iterator next(first); + last = std::remove(++next, last, *first); + first = next; + + } + + return last; + +} + bool CmplogSwitches::hookInstrs(Module &M) { std::vector switches; From 91b7f1c9f2dc429b7d4beaafb7497203f456bcd3 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 21 Feb 2023 01:05:46 +0100 Subject: [PATCH 69/77] fix regression --- src/afl-fuzz-one.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 76826945..0f237126 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -5841,7 +5841,10 @@ u8 fuzz_one(afl_state_t *afl) { } - return (key_val_lv_1 == 0 || key_val_lv_2 == 0 ? 0 : 1 ); + if (key_val_lv_1 == -1) { key_val_lv_1 = 0; } + if (key_val_lv_2 == -1) { key_val_lv_2 = 0; } + + return (key_val_lv_1 | key_val_lv_2); } From 6f4b5ae0832774389b12c5a8cd3fb95821b438e5 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 21 Feb 2023 01:07:02 +0100 Subject: [PATCH 70/77] nit --- src/afl-fuzz-one.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 0f237126..cce3d7cf 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -5841,8 +5841,8 @@ u8 fuzz_one(afl_state_t *afl) { } - if (key_val_lv_1 == -1) { key_val_lv_1 = 0; } - if (key_val_lv_2 == -1) { key_val_lv_2 = 0; } + if (unlikely(key_val_lv_1 == -1)) { key_val_lv_1 = 0; } + if (likely(key_val_lv_2 == -1)) { key_val_lv_2 = 0; } return (key_val_lv_1 | key_val_lv_2); From 8a8e350f34fa4fe5eb862d1a71921be9b739e8bb Mon Sep 17 00:00:00 2001 From: lazymio Date: Wed, 22 Feb 2023 22:48:03 +0100 Subject: [PATCH 71/77] Also install libclang-rt-dev --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fd47a59f..59ce8778 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,7 +47,7 @@ RUN apt-get update && \ clang-${LLVM_VERSION} clang-tools-${LLVM_VERSION} libc++1-${LLVM_VERSION} \ libc++-${LLVM_VERSION}-dev libc++abi1-${LLVM_VERSION} libc++abi-${LLVM_VERSION}-dev \ libclang1-${LLVM_VERSION} libclang-${LLVM_VERSION}-dev \ - libclang-common-${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} \ + libclang-common-${LLVM_VERSION}-dev libclang-rt-${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} \ libclang-cpp${LLVM_VERSION}-dev liblld-${LLVM_VERSION} \ liblld-${LLVM_VERSION}-dev liblldb-${LLVM_VERSION} liblldb-${LLVM_VERSION}-dev \ libllvm${LLVM_VERSION} libomp-${LLVM_VERSION}-dev libomp5-${LLVM_VERSION} \ From 0c0a6c3bfabf0facaed33fae1aa5ad54a6a11b32 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 23 Feb 2023 11:22:40 +0100 Subject: [PATCH 72/77] regression fix --- include/config.h | 2 +- src/afl-forkserver.c | 2 +- src/afl-fuzz-cmplog.c | 8 ++++++-- src/afl-fuzz.c | 3 ++- src/afl-gotcpu.c | 4 ++-- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/config.h b/include/config.h index ad8b76a8..e46f515a 100644 --- a/include/config.h +++ b/include/config.h @@ -489,7 +489,7 @@ /* Minimum length of a queue input to be evaluated for "is_ascii"? */ -#define AFL_TXT_MIN_LEN 16 +#define AFL_TXT_MIN_LEN 12 /* Maximum length of a queue input to be evaluated for "is_ascii"? */ diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index 5aa4c2ff..50dc7a26 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -59,7 +59,7 @@ static list_t fsrv_list = {.element_prealloc_count = 0}; static void fsrv_exec_child(afl_forkserver_t *fsrv, char **argv) { - if (fsrv->qemu_mode || fsrv->cs_mode) { + if (fsrv->qemu_mode || fsrv->frida_mode || fsrv->cs_mode) { setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0); diff --git a/src/afl-fuzz-cmplog.c b/src/afl-fuzz-cmplog.c index 2bf26d19..229aef09 100644 --- a/src/afl-fuzz-cmplog.c +++ b/src/afl-fuzz-cmplog.c @@ -33,11 +33,15 @@ void cmplog_exec_child(afl_forkserver_t *fsrv, char **argv) { setenv("___AFL_EINS_ZWEI_POLIZEI___", "1", 1); - if (fsrv->qemu_mode) { setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0); } + if (fsrv->qemu_mode || fsrv->frida_mode || fsrv->cs_mode) { + + setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0); + + } if (!fsrv->qemu_mode && !fsrv->frida_mode && argv[0] != fsrv->cmplog_binary) { - argv[0] = fsrv->cmplog_binary; + fsrv->target_path = argv[0] = fsrv->cmplog_binary; } diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index ea467401..4914ce0b 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -1298,7 +1298,8 @@ int main(int argc, char **argv_orig, char **envp) { } - if (afl->is_main_node == 1 && afl->schedule != FAST && afl->schedule != EXPLORE) { + if (afl->is_main_node == 1 && afl->schedule != FAST && + afl->schedule != EXPLORE) { FATAL("-M is compatible only with fast and explore -p power schedules"); diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c index 8988fd54..4f851099 100644 --- a/src/afl-gotcpu.c +++ b/src/afl-gotcpu.c @@ -92,7 +92,7 @@ static u32 measure_preemption(u32 target_ms) { volatile u32 v1, v2 = 0; u64 st_t, en_t, st_c, en_c, real_delta, slice_delta; - //s32 loop_repeats = 0; + // s32 loop_repeats = 0; st_t = get_cur_time_us(); st_c = get_cpu_usage_us(); @@ -113,7 +113,7 @@ repeat_loop: if (en_t - st_t < target_ms * 1000) { - //loop_repeats++; + // loop_repeats++; goto repeat_loop; } From eeccb2da69d7e6f32ee74c431e7c5053e8379dff Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 23 Feb 2023 11:45:26 +0100 Subject: [PATCH 73/77] nits --- docs/Changelog.md | 1 + qemu_mode/qemuafl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/Changelog.md b/docs/Changelog.md index 5f253064..8f71fd83 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -11,6 +11,7 @@ - add CFI sanitizer variant to gcc targets - llvm 16 support (thanks to @devnexen!) - support llvm 15 native pcguard changes + - new custom module: autotoken, grammar free fuzzer for text inputs - LTO autoken and llvm_mode: added AFL_LLVM_DICT2FILE_NO_MAIN support - better sanitizer default options support for all tools - unicorn_mode: updated and minor issues fixed diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl index a120c3fe..a8af9cbd 160000 --- a/qemu_mode/qemuafl +++ b/qemu_mode/qemuafl @@ -1 +1 @@ -Subproject commit a120c3feb573d4cade292cdeb7c1f6b1ce109efe +Subproject commit a8af9cbde71e333ce72a46f15e655d0b82ed0939 From ffdb5ec9b1d92e9feb226d83c78d057cb613eeb0 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 23 Feb 2023 14:32:54 +0100 Subject: [PATCH 74/77] improve cmplog ci --- test/test-llvm.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test-llvm.sh b/test/test-llvm.sh index ce64d76c..52be04ef 100755 --- a/test/test-llvm.sh +++ b/test/test-llvm.sh @@ -257,12 +257,13 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { } rm -f test-compcov test.out instrumentlist.txt AFL_LLVM_CMPLOG=1 ../afl-clang-fast -o test-cmplog test-cmplog.c > /dev/null 2>&1 + ../afl-clang-fast -o test-c test-cmplog.c > /dev/null 2>&1 test -e test-cmplog && { $ECHO "$GREY[*] running afl-fuzz for llvm_mode cmplog, this will take approx 10 seconds" { mkdir -p in echo 00000000000000000000000000000000 > in/in - AFL_BENCH_UNTIL_CRASH=1 ../afl-fuzz -m none -V60 -i in -o out -c./test-cmplog -- ./test-cmplog >>errors 2>&1 + AFL_BENCH_UNTIL_CRASH=1 ../afl-fuzz -m none -V60 -i in -o out -c./test-cmplog -- ./test-c >>errors 2>&1 } >>errors 2>&1 test -n "$( ls out/default/crashes/id:000000* out/default/hangs/id:000000* 2>/dev/null )" & { $ECHO "$GREEN[+] afl-fuzz is working correctly with llvm_mode cmplog" @@ -277,7 +278,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { $ECHO "$YELLOW[-] we cannot test llvm_mode cmplog because it is not present" INCOMPLETE=1 } - rm -rf errors test-cmplog in core.* + rm -rf errors test-cmplog test-c in core.* ../afl-clang-fast -o test-persistent ../utils/persistent_mode/persistent_demo.c > /dev/null 2>&1 test -e test-persistent && { echo foo | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -q -r ./test-persistent && { From add2eb42c0f0e2b590fcb17427e5fce29c2fdd54 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 23 Feb 2023 15:26:41 +0100 Subject: [PATCH 75/77] nits --- .gitignore | 133 +++++++++++++++--------------- custom_mutators/autotokens/README | 7 +- custom_mutators/autotokens/TODO | 3 - 3 files changed, 72 insertions(+), 71 deletions(-) delete mode 100644 custom_mutators/autotokens/TODO diff --git a/.gitignore b/.gitignore index 45d8676c..c01750e1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,104 +1,107 @@ -.test -.test2 -.sync_tmp -.vscode +!coresight_mode +!coresight_mode/coresight-trace +*.dSYM *.o +*.o.tmp +*.pyc *.so *.swp -*.pyc -*.dSYM -as -a.out -ld -in -out -core* -compile_commands.json +.sync_tmp +.test +.test2 +.vscode afl-analyze +afl-analyze.8 afl-as +afl-as.8 +afl-c++ +afl-c++.8 +afl-cc +afl-cc.8 afl-clang afl-clang++ afl-clang-fast afl-clang-fast++ -afl-clang-lto -afl-clang-lto++ -afl-fuzz -afl-g++ -afl-gcc -afl-gcc-fast -afl-g++-fast -afl-gotcpu -afl-ld -afl-ld-lto -afl-cs-proxy -afl-qemu-trace -afl-showmap -afl-tmin -afl-analyze.8 -afl-as.8 afl-clang-fast++.8 afl-clang-fast.8 -afl-clang-lto.8 +afl-clang-lto +afl-clang-lto++ afl-clang-lto++.8 +afl-clang-lto.8 afl-cmin.8 afl-cmin.bash.8 +afl-cs-proxy +afl-frida-trace.so +afl-fuzz afl-fuzz.8 -afl-c++.8 -afl-cc.8 -afl-gcc.8 +afl-g++ afl-g++.8 +afl-gcc +afl-gcc.8 +afl-gcc-fast afl-gcc-fast.8 +afl-g++-fast afl-g++-fast.8 +afl-gotcpu afl-gotcpu.8 -afl-plot.8 -afl-showmap.8 -afl-system-config.8 -afl-tmin.8 -afl-whatsup.8 -afl-persistent-config.8 -afl-c++ -afl-cc +afl-ld +afl-ld-lto afl-lto afl-lto++ afl-lto++.8 afl-lto.8 +afl-persistent-config.8 +afl-plot.8 +afl-qemu-trace +afl-showmap +afl-showmap.8 +afl-system-config.8 +afl-tmin +afl-tmin.8 +afl-whatsup.8 +a.out +as +compile_commands.json +core* +examples/afl_frida/afl-frida +examples/afl_frida/frida-gum-example.c +examples/afl_frida/frida-gum.h +examples/afl_frida/libtestinstr.so +examples/afl_network_proxy/afl-network-client +examples/afl_network_proxy/afl-network-server +examples/aflpp_driver/libAFLDriver.a +examples/aflpp_driver/libAFLQemuDriver.a +gmon.out +in +ld +libAFLDriver.a +libAFLQemuDriver.a +out qemu_mode/libcompcov/compcovtest qemu_mode/qemu-* qemu_mode/qemuafl -unicorn_mode/samples/*/\.test-* -unicorn_mode/samples/*/output/ -test/unittests/unit_maybe_alloc -test/unittests/unit_preallocable -test/unittests/unit_list -test/unittests/unit_rand -test/unittests/unit_hash -examples/afl_network_proxy/afl-network-server -examples/afl_network_proxy/afl-network-client -examples/afl_frida/afl-frida -examples/afl_frida/libtestinstr.so -examples/afl_frida/frida-gum-example.c -examples/afl_frida/frida-gum.h -examples/aflpp_driver/libAFLDriver.a -examples/aflpp_driver/libAFLQemuDriver.a -libAFLDriver.a -libAFLQemuDriver.a test/.afl_performance test-instr test/output +test/test-c +test/test-cmplog +test/test-compcov test/test-instr.ts test/test-persistent -gmon.out -afl-frida-trace.so +test/unittests/unit_hash +test/unittests/unit_list +test/unittests/unit_maybe_alloc +test/unittests/unit_preallocable +test/unittests/unit_rand +unicorn_mode/samples/*/output/ +unicorn_mode/samples/*/\.test-* utils/afl_network_proxy/afl-network-client utils/afl_network_proxy/afl-network-server -utils/plot_ui/afl-plot-ui -*.o.tmp utils/afl_proxy/afl-proxy utils/optimin/build utils/optimin/optimin utils/persistent_mode/persistent_demo utils/persistent_mode/persistent_demo_new utils/persistent_mode/test-instr -!coresight_mode -!coresight_mode/coresight-trace -vuln_prog \ No newline at end of file +utils/plot_ui/afl-plot-ui +vuln_prog diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 295cd736..cca168fd 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -1,8 +1,9 @@ -# autotokens +# Autotokens This implements an improved autotoken grammar fuzzing idea presented in [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. -It is a grammar fuzzer without actually knowing the grammar. +It is a grammar fuzzer without actually knowing the grammar, but only works +with text based inputs. It is recommended to run with together in an instance with `CMPLOG`. @@ -19,7 +20,7 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be removed. Default: `/* ... */` `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting - the value by this number set, e.g. 1. + the value by this number, e.g. 1. `AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii (or no input and no (ascii) dictionary) `AUTOTOKENS_LEARN_DICT` - learn from dictionaries? diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO deleted file mode 100644 index 2e99e147..00000000 --- a/custom_mutators/autotokens/TODO +++ /dev/null @@ -1,3 +0,0 @@ -env für menge an per mutation run - -change_min/_max werte From 2bea77e28a969fcb62921862bef61cd751d7b9d5 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 23 Feb 2023 17:09:06 +0100 Subject: [PATCH 76/77] fix custom python splice optout --- src/afl-fuzz-python.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c index 69c305f7..2799268b 100644 --- a/src/afl-fuzz-python.c +++ b/src/afl-fuzz-python.c @@ -231,8 +231,12 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) { PyObject_GetAttrString(py_module, "describe"); py_functions[PY_FUNC_FUZZ_COUNT] = PyObject_GetAttrString(py_module, "fuzz_count"); - if (!py_functions[PY_FUNC_FUZZ]) + if (!py_functions[PY_FUNC_FUZZ]) { + WARNF("fuzz function not found in python module"); + + } + py_functions[PY_FUNC_POST_PROCESS] = PyObject_GetAttrString(py_module, "post_process"); py_functions[PY_FUNC_INIT_TRIM] = @@ -250,6 +254,7 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) { PyObject_GetAttrString(py_module, "fuzz_send"); py_functions[PY_FUNC_SPLICE_OPTOUT] = PyObject_GetAttrString(py_module, "splice_optout"); + if (py_functions[PY_FUNC_SPLICE_OPTOUT]) { afl->custom_splice_optout = 1; } py_functions[PY_FUNC_QUEUE_NEW_ENTRY] = PyObject_GetAttrString(py_module, "queue_new_entry"); py_functions[PY_FUNC_INTROSPECTION] = From 65d4d10762a14e2dab6b89962b0ccf0cbdc8de2e Mon Sep 17 00:00:00 2001 From: "Dongjia \"toka\" Zhang" Date: Fri, 24 Feb 2023 23:14:40 +0900 Subject: [PATCH 77/77] Update afl-fuzz-redqueen.c --- src/afl-fuzz-redqueen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 8da1df13..07736537 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -1035,7 +1035,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } else { - diff = 0; + o_diff = 0; }