better text mutation

This commit is contained in:
van Hauser
2020-07-24 13:26:07 +02:00
parent 3e04dbd5a1
commit 30c0991543
6 changed files with 258 additions and 237 deletions

View File

@ -334,7 +334,7 @@ typedef struct afl_env_vars {
afl_dumb_forksrv, afl_import_first, afl_custom_mutator_only, afl_no_ui,
afl_force_ui, afl_i_dont_care_about_missing_crashes, afl_bench_just_one,
afl_bench_until_crash, afl_debug_child_output, afl_autoresume,
afl_cal_fast, afl_cycle_schedules;
afl_cal_fast, afl_cycle_schedules, afl_expand_havoc;
u8 *afl_tmpdir, *afl_custom_mutator_library, *afl_python_module, *afl_path,
*afl_hang_tmout, *afl_skip_crashes, *afl_preload;

View File

@ -403,15 +403,14 @@
/* Text mutations */
/* What is the minimum length of a queue input to be evaluated for "is_ascii"?
+ */
/* Minimum length of a queue input to be evaluated for "is_ascii"? */
#define AFL_TXT_MIN_LEN 12
/* What is the minimum percentage of ascii characters present to be classifed
as "is_ascii"? */
#define AFL_TXT_MIN_PERCENT 95
#define AFL_TXT_MIN_PERCENT 94
/* How often to perform ASCII mutations 0 = disable, 1-8 are good values */
@ -423,7 +422,7 @@
/* Maximum mutations on a string */
#define AFL_TXT_STRING_MAX_MUTATIONS 8
#define AFL_TXT_STRING_MAX_MUTATIONS 6
#endif /* ! _HAVE_CONFIG_H */

View File

@ -559,13 +559,25 @@ static u32 string_replace(u8 **out_buf, s32 *temp_len, u32 pos, u8 *from,
/* Returns 1 if a mutant was generated and placed in out_buf, 0 if none
* generated. */
static const uint8_t text_mutation_special_chars[] = {
'\t', '\n', '\r', ' ', '!', '"', '$', '%', '&', '\'', '(', ')', '*',
'+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[',
'\\', ']', '^', '_', '`', '{', '|', '}', '~', ' ' // space is here twice
};
static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) {
if (*orig_temp_len < AFL_TXT_MIN_LEN) { return 0; }
s32 temp_len;
u32 pos, yes = 0,
mutations = rand_below(afl, AFL_TXT_STRING_MAX_MUTATIONS) + 1;
u8 *new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch),
*orig_temp_len + AFL_TXT_STRING_MAX_MUTATIONS +1);
mutations = rand_below(afl, AFL_TXT_STRING_MAX_MUTATIONS) + 16;
u8 *new_buf =
ck_maybe_grow(BUF_PARAMS(out_scratch),
*orig_temp_len + AFL_TXT_STRING_MAX_MUTATIONS + 16);
u8 fromc[2] = {0, 0}, toc[2] = {0, 0};
temp_len = *orig_temp_len;
memcpy(new_buf, *out_buf, temp_len);
new_buf[temp_len] = 0;
@ -575,9 +587,12 @@ static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) {
if (temp_len < AFL_TXT_MIN_LEN) { return 0; }
pos = rand_below(afl, temp_len - 1);
int choice = rand_below(afl, 80);
int choice = rand_below(afl, 100);
switch (choice) {
/* 50% -> fixed replacements */
case 0: /* Semantic statement deletion */
yes += string_replace(&new_buf, &temp_len, pos, "\n", "#");
break;
@ -624,191 +639,130 @@ static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) {
yes += string_replace(&new_buf, &temp_len, pos, "+", "-");
break;
case 15:
yes += string_replace(&new_buf, &temp_len, pos, "+", "*");
break;
case 16:
yes += string_replace(&new_buf, &temp_len, pos, "+", "/");
break;
case 17:
yes += string_replace(&new_buf, &temp_len, pos, "+", "%");
break;
case 18:
yes += string_replace(&new_buf, &temp_len, pos, "*", "-");
break;
case 19:
yes += string_replace(&new_buf, &temp_len, pos, "*", "+");
break;
case 20:
yes += string_replace(&new_buf, &temp_len, pos, "*", "/");
break;
case 21:
yes += string_replace(&new_buf, &temp_len, pos, "*", "%");
break;
case 22:
yes += string_replace(&new_buf, &temp_len, pos, "-", "+");
break;
case 23:
yes += string_replace(&new_buf, &temp_len, pos, "-", "*");
break;
case 24:
yes += string_replace(&new_buf, &temp_len, pos, "-", "/");
break;
case 25:
yes += string_replace(&new_buf, &temp_len, pos, "-", "%");
break;
case 26:
yes += string_replace(&new_buf, &temp_len, pos, "/", "-");
break;
case 27:
yes += string_replace(&new_buf, &temp_len, pos, "/", "*");
break;
case 28:
yes += string_replace(&new_buf, &temp_len, pos, "/", "+");
break;
case 29:
yes += string_replace(&new_buf, &temp_len, pos, "/", "%");
break;
case 30:
yes += string_replace(&new_buf, &temp_len, pos, "%", "-");
break;
case 31:
yes += string_replace(&new_buf, &temp_len, pos, "%", "*");
break;
case 32:
yes += string_replace(&new_buf, &temp_len, pos, "%", "/");
break;
case 33:
yes += string_replace(&new_buf, &temp_len, pos, "%", "+");
break;
case 34:
yes += string_replace(&new_buf, &temp_len, pos, " ", "|");
break;
case 35:
yes += string_replace(&new_buf, &temp_len, pos, " ", "$");
break;
case 36:
case 16:
yes += string_replace(&new_buf, &temp_len, pos, "0", "1");
break;
case 37:
case 17:
yes += string_replace(&new_buf, &temp_len, pos, "1", "0");
break;
case 38:
yes += string_replace(&new_buf, &temp_len, pos, " ", "`");
break;
case 39:
yes += string_replace(&new_buf, &temp_len, pos, " ", "\"");
break;
case 40:
yes += string_replace(&new_buf, &temp_len, pos, ";", " ");
break;
case 41:
case 18:
yes += string_replace(&new_buf, &temp_len, pos, "&&", "||");
break;
case 42:
case 19:
yes += string_replace(&new_buf, &temp_len, pos, "||", "&&");
break;
case 43:
case 20:
yes += string_replace(&new_buf, &temp_len, pos, "!", "");
break;
case 44:
case 21:
yes += string_replace(&new_buf, &temp_len, pos, "==", "=");
break;
case 45:
case 22:
yes += string_replace(&new_buf, &temp_len, pos, "=", "==");
break;
case 23:
yes += string_replace(&new_buf, &temp_len, pos, "--", "");
break;
case 46:
case 24:
yes += string_replace(&new_buf, &temp_len, pos, "<<", "<");
break;
case 47:
case 25:
yes += string_replace(&new_buf, &temp_len, pos, ">>", ">");
break;
case 48:
case 26:
yes += string_replace(&new_buf, &temp_len, pos, "<", "<<");
break;
case 49:
case 27:
yes += string_replace(&new_buf, &temp_len, pos, ">", ">>");
break;
case 50:
yes += string_replace(&new_buf, &temp_len, pos, "\"", "'");
break;
case 51:
case 28:
yes += string_replace(&new_buf, &temp_len, pos, "'", "\"");
break;
case 52:
yes += string_replace(&new_buf, &temp_len, pos, "(", "\"");
case 29:
yes += string_replace(&new_buf, &temp_len, pos, "\"", "'");
break;
case 53: /* Remove a semicolon delimited statement after a semicolon */
case 30: /* Remove a semicolon delimited statement after a semicolon */
yes += delim_replace(&new_buf, &temp_len, pos, ";", ";", ";");
break;
case 54: /* Remove a semicolon delimited statement after a left curly
case 31: /* Remove a semicolon delimited statement after a left curly
brace */
yes += delim_replace(&new_buf, &temp_len, pos, "}", ";", "}");
break;
case 55: /* Remove a curly brace construct */
case 32: /* Remove a curly brace construct */
yes += delim_replace(&new_buf, &temp_len, pos, "{", "}", "");
break;
case 56: /* Replace a curly brace construct with an empty one */
case 33: /* Replace a curly brace construct with an empty one */
yes += delim_replace(&new_buf, &temp_len, pos, "{", "}", "{}");
break;
case 57:
case 34:
yes += delim_swap(&new_buf, &temp_len, pos, ";", ";", ";");
break;
case 58:
case 35:
yes += delim_swap(&new_buf, &temp_len, pos, "}", ";", ";");
break;
case 59: /* Swap comma delimited things case 1 */
case 36: /* Swap comma delimited things case 1 */
yes += delim_swap(&new_buf, &temp_len, pos, "(", ",", ")");
break;
case 60: /* Swap comma delimited things case 2 */
case 37: /* Swap comma delimited things case 2 */
yes += delim_swap(&new_buf, &temp_len, pos, "(", ",", ",");
break;
case 61: /* Swap comma delimited things case 3 */
case 38: /* Swap comma delimited things case 3 */
yes += delim_swap(&new_buf, &temp_len, pos, ",", ",", ",");
break;
case 62: /* Swap comma delimited things case 4 */
case 39: /* Swap comma delimited things case 4 */
yes += delim_swap(&new_buf, &temp_len, pos, ",", ",", ")");
break;
case 63: /* Just delete a line */
case 40: /* Just delete a line */
yes += delim_replace(&new_buf, &temp_len, pos, "\n", "\n", "");
break;
case 64: /* Delete something like "const" case 1 */
case 41: /* Delete something like "const" case 1 */
yes += delim_replace(&new_buf, &temp_len, pos, " ", " ", "");
break;
case 65: /* Delete something like "const" case 2 */
case 42: /* Delete something like "const" case 2 */
yes += delim_replace(&new_buf, &temp_len, pos, "\n", " ", "");
break;
case 66: /* Delete something like "const" case 3 */
case 43: /* Delete something like "const" case 3 */
yes += delim_replace(&new_buf, &temp_len, pos, "(", " ", "");
break;
case 67: /* Swap space delimited things case 1 */
case 44: /* Swap space delimited things case 1 */
yes += delim_swap(&new_buf, &temp_len, pos, " ", " ", " ");
break;
case 68: /* Swap space delimited things case 2 */
case 45: /* Swap space delimited things case 2 */
yes += delim_swap(&new_buf, &temp_len, pos, " ", " ", ")");
break;
case 69: /* Swap space delimited things case 3 */
case 46: /* Swap space delimited things case 3 */
yes += delim_swap(&new_buf, &temp_len, pos, "(", " ", " ");
break;
case 70: /* Swap space delimited things case 4 */
case 47: /* Swap space delimited things case 4 */
yes += delim_swap(&new_buf, &temp_len, pos, "(", " ", ")");
break;
case 71: /* Duplicate a single line of code */
case 48: /* Duplicate a single line of code */
yes += delim_replace(&new_buf, &temp_len, pos, "\n", "\n", NULL);
break;
case 72: /* Duplicate a construct (most often, a non-nested for loop */
case 49: /* Duplicate a construct (most often, a non-nested for loop */
yes += delim_replace(&new_buf, &temp_len, pos, "\n", "}", NULL);
break;
default: {
/* 10% is transforming ascii numbers */
if (choice < 60) {
for (u32 j = pos; j < temp_len; ++j) {
if (isdigit(new_buf[j])) {
new_buf[temp_len] = 0; // should be safe thanks to the initial grow
new_buf[temp_len] =
0; // should be safe thanks to the initial grow
u8 * endptr;
unsigned long long num = strtoull(new_buf +j, (char**)&endptr, 0);
unsigned long long num =
strtoull(new_buf + j, (char **)&endptr, 0);
switch (rand_below(afl, 8)) {
case 0:
num = rand_below(afl, INT_MAX);
break;
@ -833,6 +787,7 @@ static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) {
case 7:
num = ~num;
break;
}
const char *fmt = "%llu";
@ -842,22 +797,76 @@ static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) {
size_t num_len = snprintf(NULL, 0, fmt, num);
size_t old_len = endptr - (new_buf + j);
if (num_len < old_len) {
memmove(new_buf +j +num_len, new_buf +j +old_len, temp_len - (j + old_len));
memmove(new_buf + j + num_len, new_buf + j + old_len,
temp_len - (j + old_len));
snprintf(new_buf + j, num_len, fmt, num);
temp_len -= old_len - num_len;
} else if (num_len == old_len) {
snprintf(new_buf + j, num_len, fmt, num);
} else {
new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch), temp_len + (num_len - old_len) + AFL_TXT_STRING_MAX_MUTATIONS +1);
memmove(new_buf +j +num_len, new_buf +j +old_len, temp_len - (j + old_len));
new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch),
temp_len + (num_len - old_len) +
AFL_TXT_STRING_MAX_MUTATIONS + 1);
memmove(new_buf + j + num_len, new_buf + j + old_len,
temp_len - (j + old_len));
snprintf(new_buf + j, num_len, fmt, num);
temp_len += num_len - old_len;
}
yes += 1;
break;
}
}
} else if (choice < 90) {
/* 30% is special character transform */
fromc[0] = text_mutation_special_chars[rand_below(
afl, sizeof(text_mutation_special_chars))];
do {
toc[0] = text_mutation_special_chars[rand_below(
afl, sizeof(text_mutation_special_chars))];
} while (toc[0] == fromc[0]);
yes += string_replace(&new_buf, &temp_len, pos, fromc, toc);
break;
} else {
/* 10% is random text character transform */
u32 iter, cnt, loc, prev_loc = temp_len;
if (temp_len > 32) {
cnt = 1 + rand_below(afl, 5);
} else {
cnt = rand_below(afl, 2);
}
for (iter = 0; iter <= cnt; iter++) {
while ((loc = rand_below(afl, temp_len)) == prev_loc)
;
new_buf[loc] = 32 + rand_below(afl, 'z' - ' ' + 1);
prev_loc = loc;
}
}
}

View File

@ -264,6 +264,7 @@ static u8 its_fuzz(afl_state_t *afl, u8 *buf, u32 len, u8 *status) {
}
static long long strntoll(const char *str, size_t sz, char **end, int base) {
char buf[64];
long long ret;
const char *beg = str;
@ -272,22 +273,24 @@ static long long strntoll(const char *str, size_t sz, char **end, int base) {
;
if (!sz || sz >= sizeof(buf)) {
if (end)
*end = (char *)str;
if (end) *end = (char *)str;
return 0;
}
memcpy(buf, beg, sz);
buf[sz] = '\0';
ret = strtoll(buf, end, base);
if (ret == LLONG_MIN || ret == LLONG_MAX)
return ret;
if (end)
*end = (char *)beg + (*end - buf);
if (ret == LLONG_MIN || ret == LLONG_MAX) return ret;
if (end) *end = (char *)beg + (*end - buf);
return ret;
}
static unsigned long long strntoull(const char *str, size_t sz, char **end, int base) {
static unsigned long long strntoull(const char *str, size_t sz, char **end,
int base) {
char buf[64];
unsigned long long ret;
const char * beg = str;
@ -296,17 +299,18 @@ static unsigned long long strntoull(const char *str, size_t sz, char **end, int
;
if (!sz || sz >= sizeof(buf)) {
if (end)
*end = (char *)str;
if (end) *end = (char *)str;
return 0;
}
memcpy(buf, beg, sz);
buf[sz] = '\0';
ret = strtoull(buf, end, base);
if (end)
*end = (char *)beg + (*end - buf);
if (end) *end = (char *)beg + (*end - buf);
return ret;
}
#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size
@ -337,10 +341,12 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
endptr = buf_8;
num = strntoll(buf_8, len - idx, (char **)&endptr, 0);
if (endptr == buf_8) {
unum = strntoull(buf_8, len - idx, (char **)&endptr, 0);
if (endptr == buf_8)
use_unum = 1;
if (endptr == buf_8) use_unum = 1;
} else
use_num = 1;
}

View File

@ -300,6 +300,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
afl->cycle_schedules = afl->afl_env.afl_cycle_schedules =
get_afl_env(afl_environment_variables[i]) ? 1 : 0;
} else if (!strncmp(env, "AFL_EXPAND_HAVOC_NOW",
afl_environment_variable_len)) {
afl->expand_havoc = afl->afl_env.afl_expand_havoc =
get_afl_env(afl_environment_variables[i]) ? 1 : 0;
} else if (!strncmp(env, "AFL_CAL_FAST",
afl_environment_variable_len)) {