create from thin air, max mutation

This commit is contained in:
vanhauser-thc 2023-02-05 13:15:06 +01:00
parent 90f61552f7
commit f99656e22b

View File

@ -24,10 +24,12 @@ extern "C" {
#define AUTOTOKENS_ONLY_FAV 0 #define AUTOTOKENS_ONLY_FAV 0
#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
#define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MIN 8
#define AUTOTOKENS_CHANGE_MAX 64
#define AUTOTOKENS_WHITESPACE " " #define AUTOTOKENS_WHITESPACE " "
#define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SIZE_MIN 8
#define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MIN 4
#define AUTOTOKENS_SPLICE_MAX 64 #define AUTOTOKENS_SPLICE_MAX 64
#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1
#define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
// 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
#define AUTOTOKENS_LEARN_DICT 2 #define AUTOTOKENS_LEARN_DICT 2
@ -61,6 +63,7 @@ static int only_fav = AUTOTOKENS_ONLY_FAV;
static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR;
static u32 current_id; static u32 current_id;
static u32 valid_structures; static u32 valid_structures;
static u32 whitespace_ids; static u32 whitespace_ids;
@ -83,7 +86,18 @@ static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
static regex regex_whitespace(R"([ \t]+)", regex::optimize); static regex regex_whitespace(R"([ \t]+)", regex::optimize);
static vector<u32> *s; // the structure of the currently selected input static vector<u32> *s; // the structure of the currently selected input
u32 good_whitespace_or_singleval() { // FUNCTIONS
/* This function is called once after everything is set up but before
any fuzzing attempt has been performed.
This is called in afl_custom_queue_get() */
static void first_run(void *data) {
(void)(data);
}
static u32 good_whitespace_or_singleval() {
u32 i = rand_below(afl_ptr, current_id); u32 i = rand_below(afl_ptr, current_id);
if (id_to_token[i].size() == 1) { return i; } if (id_to_token[i].size() == 1) { return i; }
@ -105,6 +119,8 @@ u32 good_whitespace_or_singleval() {
extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf, extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf,
size_t buf_size) { size_t buf_size) {
(void)(data);
if (s == NULL) return 0; if (s == NULL) return 0;
u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8; u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8;
@ -135,9 +151,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
u32 i, m_size = (u32)m.size(); u32 i, m_size = (u32)m.size();
u32 rounds = u32 rounds =
MAX(AUTOTOKENS_CHANGE_MIN, MIN(AUTOTOKENS_CHANGE_MAX,
MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * MAX(AUTOTOKENS_CHANGE_MIN,
afl_ptr->havoc_div / 256)); MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
afl_ptr->havoc_div / 256)));
// DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
#if AUTOTOKENS_SPLICE_DISABLE == 1 #if AUTOTOKENS_SPLICE_DISABLE == 1
@ -379,9 +396,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
/* I get f*cking stack overflow using C++ regex with a regex of /* I get f*cking stack overflow using C++ regex with a regex of
"\"[[:print:]]*?\"" if this matches a long string even with regex::optimize "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize
enabled :-( */ enabled :-( */
u8 my_search_string(string::const_iterator cur, string::const_iterator ende, static u8 my_search_string(string::const_iterator cur,
string::const_iterator *match_begin, string::const_iterator ende,
string::const_iterator *match_end) { string::const_iterator *match_begin,
string::const_iterator *match_end) {
string::const_iterator start = cur, found_begin; string::const_iterator start = cur, found_begin;
u8 quote_type = 0; u8 quote_type = 0;
@ -460,25 +478,30 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
} }
/* We are not using afl_custom_queue_new_entry() because not every corpus entry /* We are not using afl_custom_queue_new_entry() because not every corpus entry
will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ will be necessarily fuzzed with this custom mutator.
So we use afl_custom_queue_get() instead. */
extern "C" unsigned char afl_custom_queue_get(void *data, extern "C" unsigned char afl_custom_queue_get(void *data,
const unsigned char *filename) { const unsigned char *filename) {
static int learn_state; static int learn_state = 0;
static int is_first_run = 1;
(void)(data); (void)(data);
if (likely(!debug)) { if (unlikely(is_first_run)) {
if (unlikely(!afl_ptr->custom_only) && is_first_run = 0;
((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || first_run(data);
(only_fav && !afl_ptr->queue_cur->favored))) {
s = NULL; }
DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
return 1;
} if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air &&
((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
(only_fav && !afl_ptr->queue_cur->favored))) {
s = NULL;
DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
return 1;
} }
@ -551,6 +574,42 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
string fn = (char *)filename; string fn = (char *)filename;
auto entry = file_mapping.find(fn); auto entry = file_mapping.find(fn);
// if there is only one active queue item at start and it is very small
// the we create once a structure randomly.
if (unlikely(create_from_thin_air)) {
if (current_id > whitespace_ids + 6 && afl_ptr->active_items == 1 &&
afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN) {
DEBUGF(stderr, "Creating an entry from thin air...\n");
structure = new vector<u32>();
u32 item, prev, cnt = current_id >> 1;
structure->reserve(cnt + 4);
for (u32 i = 0; i < cnt; i++) {
item = rand_below(afl_ptr, current_id);
if (i && id_to_token[item].length() > 1 &&
id_to_token[prev].length() > 1) {
structure->push_back(good_whitespace_or_singleval());
}
structure->push_back(item);
prev = item;
}
file_mapping[fn] = structure;
s = structure;
return 1;
}
create_from_thin_air = 0;
}
if (entry == file_mapping.end()) { if (entry == file_mapping.end()) {
// this input file was not analyzed for tokens yet, so let's do it! // this input file was not analyzed for tokens yet, so let's do it!
@ -574,8 +633,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
return 1; return 1;
} else } else if (len > AFL_TXT_MAX_LEN) {
if (len > AFL_TXT_MAX_LEN) {
fclose(fp); fclose(fp);
file_mapping[fn] = structure; // NULL ptr so we don't read the file again file_mapping[fn] = structure; // NULL ptr so we don't read the file again
@ -1088,6 +1146,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; }
if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
if (getenv("AUTOTOKENS_LEARN_DICT")) { if (getenv("AUTOTOKENS_LEARN_DICT")) {