mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-07 15:51:32 +00:00
521 lines
15 KiB
Bash
Executable File
521 lines
15 KiB
Bash
Executable File
#!/usr/bin/env sh
|
|
export AFL_QUIET=1
|
|
export ASAN_OPTIONS=detect_leaks=0
|
|
THISPATH=`dirname ${0}`
|
|
export PATH="${THISPATH}:$PATH"
|
|
awk -f - -- ${@+"$@"} <<'EOF'
|
|
#!/usr/bin/awk -f
|
|
|
|
# awk script to minimize a test corpus of input files
|
|
#
|
|
# based on afl-cmin bash script written by Michal Zalewski
|
|
# rewritten by Heiko Eißfeldt (hexcoder-)
|
|
# tested with:
|
|
# gnu awk (x86 Linux)
|
|
# bsd awk (x86 *BSD)
|
|
# mawk (arm32 raspbian)
|
|
#
|
|
# uses getopt.awk package from Arnold Robbins
|
|
#
|
|
# external tools used by this script:
|
|
# test
|
|
# grep
|
|
# rm
|
|
# mkdir
|
|
# ln
|
|
# cp
|
|
# pwd
|
|
# type
|
|
# cd
|
|
# find
|
|
# stat
|
|
# sort
|
|
# cut
|
|
# and afl-showmap from this project :-)
|
|
|
|
# getopt.awk --- Do C library getopt(3) function in awk
|
|
|
|
# External variables:
|
|
# Optind -- index in ARGV of first nonoption argument
|
|
# Optarg -- string value of argument to current option
|
|
# Opterr -- if nonzero, print our own diagnostic
|
|
# Optopt -- current option letter
|
|
|
|
# Returns:
|
|
# -1 at end of options
|
|
# "?" for unrecognized option
|
|
# <c> a character representing the current option
|
|
|
|
# Private Data:
|
|
# _opti -- index in multiflag option, e.g., -abc
|
|
|
|
function getopt(argc, argv, options, thisopt, i)
|
|
{
|
|
if (length(options) == 0) # no options given
|
|
return -1
|
|
|
|
if (argv[Optind] == "--") { # all done
|
|
Optind++
|
|
_opti = 0
|
|
return -1
|
|
} else if (argv[Optind] !~ /^-[^:\t ]/) {
|
|
_opti = 0
|
|
return -1
|
|
}
|
|
if (_opti == 0)
|
|
_opti = 2
|
|
thisopt = substr(argv[Optind], _opti, 1)
|
|
Optopt = thisopt
|
|
i = index(options, thisopt)
|
|
if (i == 0) {
|
|
if (Opterr)
|
|
printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
|
|
if (_opti >= length(argv[Optind])) {
|
|
Optind++
|
|
_opti = 0
|
|
} else
|
|
_opti++
|
|
return "?"
|
|
}
|
|
if (substr(options, i + 1, 1) == ":") {
|
|
# get option argument
|
|
if (length(substr(argv[Optind], _opti + 1)) > 0)
|
|
Optarg = substr(argv[Optind], _opti + 1)
|
|
else
|
|
Optarg = argv[++Optind]
|
|
_opti = 0
|
|
} else
|
|
Optarg = ""
|
|
if (_opti == 0 || _opti >= length(argv[Optind])) {
|
|
Optind++
|
|
_opti = 0
|
|
} else
|
|
_opti++
|
|
return thisopt
|
|
}
|
|
|
|
function usage() {
|
|
print \
|
|
"afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \
|
|
"\n" \
|
|
"Required parameters:\n" \
|
|
" -i dir - input directory with starting corpus\n" \
|
|
" -o dir - output directory for minimized files\n" \
|
|
"\n" \
|
|
"Execution control settings:\n" \
|
|
" -f file - location read by the fuzzed program (stdin)\n" \
|
|
" -m megs - memory limit for child process ("mem_limit" MB)\n" \
|
|
" -t msec - run time limit for child process (none)\n" \
|
|
" -Q - use binary-only instrumentation (QEMU mode)\n" \
|
|
" -U - use unicorn-based instrumentation (unicorn mode)\n" \
|
|
"\n" \
|
|
"Minimization settings:\n" \
|
|
" -C - keep crashing inputs, reject everything else\n" \
|
|
" -e - solve for edge coverage only, ignore hit counts\n" \
|
|
"\n" \
|
|
"For additional tips, please consult README.md\n" \
|
|
"\n" \
|
|
"Environment variables used:\n" \
|
|
"AFL_ALLOW_TMP: allow unsafe use of input/output directories under {/var}/tmp\n" \
|
|
"AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash\n" \
|
|
"AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the target to come up, initially\n" \
|
|
"AFL_KEEP_TRACES: leave the temporary <out_dir>/.traces directory\n" \
|
|
"AFL_KILL_SIGNAL: Signal ID delivered to child processes on timeout, etc. (default: SIGKILL)\n"
|
|
"AFL_PATH: path for the afl-showmap binary if not found anywhere else\n" \
|
|
"AFL_SKIP_BIN_CHECK: skip check for target binary\n"
|
|
exit 1
|
|
}
|
|
|
|
function exists_and_is_executable(binarypath) {
|
|
return 0 == system("test -f "binarypath" -a -x "binarypath)
|
|
}
|
|
|
|
BEGIN {
|
|
print "corpus minimization tool for afl++ (awk version)\n"
|
|
|
|
# defaults
|
|
extra_par = ""
|
|
AFL_CMIN_CRASHES_ONLY = ""
|
|
|
|
# process options
|
|
Opterr = 1 # default is to diagnose
|
|
Optind = 1 # skip ARGV[0]
|
|
while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) {
|
|
if (_go_c == "i") {
|
|
if (!Optarg) usage()
|
|
if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
|
|
in_dir = Optarg
|
|
continue
|
|
} else
|
|
if (_go_c == "o") {
|
|
if (!Optarg) usage()
|
|
if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
|
|
out_dir = Optarg
|
|
continue
|
|
} else
|
|
if (_go_c == "f") {
|
|
if (!Optarg) usage()
|
|
if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
|
|
stdin_file = Optarg
|
|
continue
|
|
} else
|
|
if (_go_c == "m") {
|
|
if (!Optarg) usage()
|
|
if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
|
|
mem_limit = Optarg
|
|
mem_limit_given = 1
|
|
continue
|
|
} else
|
|
if (_go_c == "t") {
|
|
if (!Optarg) usage()
|
|
if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
|
|
timeout = Optarg
|
|
continue
|
|
} else
|
|
if (_go_c == "C") {
|
|
AFL_CMIN_CRASHES_ONLY = "AFL_CMIN_CRASHES_ONLY=1 "
|
|
continue
|
|
} else
|
|
if (_go_c == "e") {
|
|
extra_par = extra_par " -e"
|
|
continue
|
|
} else
|
|
if (_go_c == "Q") {
|
|
if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
|
|
extra_par = extra_par " -Q"
|
|
qemu_mode = 1
|
|
continue
|
|
} else
|
|
if (_go_c == "U") {
|
|
if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
|
|
extra_par = extra_par " -U"
|
|
unicorn_mode = 1
|
|
continue
|
|
} else
|
|
if (_go_c == "?") {
|
|
exit 1
|
|
} else
|
|
usage()
|
|
} # while options
|
|
|
|
if (!mem_limit) mem_limit = "none"
|
|
if (!timeout) timeout = "none"
|
|
|
|
# get program args
|
|
i = 0
|
|
prog_args_string = ""
|
|
for (; Optind < ARGC; Optind++) {
|
|
prog_args[i++] = ARGV[Optind]
|
|
if (i > 1)
|
|
prog_args_string = prog_args_string" "ARGV[Optind]
|
|
}
|
|
|
|
# sanity checks
|
|
if (!prog_args[0] || !in_dir || !out_dir) usage()
|
|
|
|
target_bin = prog_args[0]
|
|
|
|
# Do a sanity check to discourage the use of /tmp, since we can't really
|
|
# handle this safely from an awk script.
|
|
|
|
if (!ENVIRON["AFL_ALLOW_TMP"]) {
|
|
dirlist[0] = in_dir
|
|
dirlist[1] = target_bin
|
|
dirlist[2] = out_dir
|
|
dirlist[3] = stdin_file
|
|
"pwd" | getline dirlist[4] # current directory
|
|
for (dirind in dirlist) {
|
|
dir = dirlist[dirind]
|
|
|
|
if (dir ~ /^(\/var)?\/tmp/) {
|
|
print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
}
|
|
delete dirlist
|
|
}
|
|
|
|
# If @@ is specified, but there's no -f, let's come up with a temporary input
|
|
# file name.
|
|
|
|
trace_dir = out_dir "/.traces"
|
|
|
|
if (!stdin_file) {
|
|
found_atat = 0
|
|
for (prog_args_ind in prog_args) {
|
|
if ("@@" == prog_args[prog_args_ind]) {
|
|
found_atat = 1
|
|
break
|
|
}
|
|
}
|
|
if (found_atat) {
|
|
stdin_file = trace_dir "/.cur_input"
|
|
}
|
|
}
|
|
|
|
# Check for obvious errors.
|
|
|
|
if (mem_limit && mem_limit != "none" && mem_limit < 5) {
|
|
print "[-] Error: dangerously low memory limit." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
if (timeout && timeout != "none" && timeout < 10) {
|
|
print "[-] Error: dangerously low timeout." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
if (target_bin && !exists_and_is_executable(target_bin)) {
|
|
|
|
"command -v "target_bin" 2>/dev/null" | getline tnew
|
|
if (!tnew || !exists_and_is_executable(tnew)) {
|
|
print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
target_bin = tnew
|
|
}
|
|
|
|
if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) {
|
|
if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) {
|
|
print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
}
|
|
|
|
if (0 != system( "test -d "in_dir )) {
|
|
print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
if (0 == system( "test -d "in_dir"/queue" )) {
|
|
in_dir = in_dir "/queue"
|
|
}
|
|
|
|
system("rm -rf "trace_dir" 2>/dev/null");
|
|
system("rm "out_dir"/id[:_]* 2>/dev/null")
|
|
|
|
"ls "out_dir"/* 2>/dev/null | wc -l" | getline noofentries
|
|
if (0 == system( "test -d "out_dir" -a "noofentries" -gt 0" )) {
|
|
print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
# Check for the more efficient way to copy files...
|
|
if (0 != system("mkdir -p -m 0700 "trace_dir)) {
|
|
print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
if (stdin_file) {
|
|
# truncate input file
|
|
printf "" > stdin_file
|
|
close( stdin_file )
|
|
}
|
|
|
|
# First we look in PATH
|
|
if (0 == system("command -v afl-showmap >/dev/null 2>&1")) {
|
|
"command -v afl-showmap 2>/dev/null" | getline showmap
|
|
} else {
|
|
# then we look in the current directory
|
|
if (0 == system("test -x ./afl-showmap")) {
|
|
showmap = "./afl-showmap"
|
|
} else {
|
|
if (ENVIRON["AFL_PATH"]) {
|
|
showmap = ENVIRON["AFL_PATH"] "/afl-showmap"
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!showmap || 0 != system("test -x "showmap )) {
|
|
print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
# get list of input filenames sorted by size
|
|
i = 0
|
|
# yuck, gnu stat is option incompatible to bsd stat
|
|
# we use a heuristic to differentiate between
|
|
# GNU stat and other stats
|
|
"stat --version 2>/dev/null" | getline statversion
|
|
if (statversion ~ /GNU coreutils/) {
|
|
stat_format = "-c '%s %n'" # GNU
|
|
} else {
|
|
stat_format = "-f '%z %N'" # *BSD, MacOS
|
|
}
|
|
cmdline = "cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} \\; | sort -k1n -k2r"
|
|
cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r"
|
|
while (cmdline | getline) {
|
|
sub(/^[0-9]+ (\.\/)?/,"",$0)
|
|
infilesSmallToBig[i++] = $0
|
|
}
|
|
in_count = i
|
|
|
|
first_file = infilesSmallToBig[0]
|
|
|
|
# Make sure that we're not dealing with a directory.
|
|
|
|
if (0 == system("test -d "in_dir"/"first_file)) {
|
|
print "[-] Error: The input directory is empty or contains subdirectories - please fix." > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) {
|
|
cp_tool = "ln"
|
|
} else {
|
|
cp_tool = "cp"
|
|
}
|
|
|
|
if (!ENVIRON["AFL_SKIP_BIN_CHECK"]) {
|
|
# Make sure that we can actually get anything out of afl-showmap before we
|
|
# waste too much time.
|
|
|
|
print "[*] Testing the target binary..."
|
|
|
|
if (!stdin_file) {
|
|
system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
|
|
} else {
|
|
system("cp "in_dir"/"first_file" "stdin_file)
|
|
system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
|
|
}
|
|
|
|
first_count = 0
|
|
|
|
runtest = trace_dir"/.run_test"
|
|
while ((getline < runtest) > 0) {
|
|
++first_count
|
|
}
|
|
|
|
if (first_count) {
|
|
print "[+] OK, "first_count" tuples recorded."
|
|
} else {
|
|
print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
|
|
if (!ENVIRON["AFL_KEEP_TRACES"]) {
|
|
system("rm -rf "trace_dir" 2>/dev/null")
|
|
}
|
|
exit 1
|
|
}
|
|
}
|
|
|
|
# Let's roll!
|
|
|
|
#############################
|
|
# STEP 1: Collecting traces #
|
|
#############################
|
|
|
|
print "[*] Obtaining traces for "in_count" input files in '"in_dir"'."
|
|
|
|
cur = 0;
|
|
if (!stdin_file) {
|
|
print " Processing "in_count" files (forkserver mode)..."
|
|
# print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string
|
|
retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string)
|
|
} else {
|
|
print " Processing "in_count" files (forkserver mode)..."
|
|
# print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string" </dev/null"
|
|
retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string" </dev/null")
|
|
}
|
|
|
|
if (retval && !AFL_CMIN_CRASHES_ONLY) {
|
|
print "[!] Exit code "retval" != 0 received from afl-showmap, terminating..."
|
|
|
|
if (!ENVIRON["AFL_KEEP_TRACES"]) {
|
|
system("rm -rf "trace_dir" 2>/dev/null")
|
|
system("rmdir "out_dir)
|
|
}
|
|
exit retval
|
|
}
|
|
|
|
#######################################################
|
|
# STEP 2: register smallest input file for each tuple #
|
|
# STEP 3: copy that file (at most once) #
|
|
#######################################################
|
|
|
|
print "[*] Processing traces for input files in '"in_dir"'."
|
|
|
|
cur = 0
|
|
out_count = 0
|
|
tuple_count = 0
|
|
|
|
# from rare to frequent new tuples
|
|
# get the best (smallest) file for it
|
|
# and copy it
|
|
while (cur < in_count) {
|
|
fn = infilesSmallToBig[cur]
|
|
++cur
|
|
printf "\r Processing file "cur"/"in_count
|
|
# create path for the trace file from afl-showmap
|
|
tracefile_path = trace_dir"/"fn
|
|
# gather all keys, and count them
|
|
while ((getline line < tracefile_path) > 0) {
|
|
key = line
|
|
if (!(key in key_count)) {
|
|
++tuple_count
|
|
}
|
|
++key_count[key]
|
|
if (! (key in best_file)) {
|
|
# this is the best file for this key
|
|
best_file[key] = fn
|
|
#printf "BEST_FILE[%d]=\"%s\"\n",key,fn | "sort -t'[' -k2 > "trace_dir"/.candidate_script"
|
|
}
|
|
#printf "%d %s\n",key,fn > trace_dir"/.candidate_list"
|
|
}
|
|
close(tracefile_path)
|
|
}
|
|
print ""
|
|
|
|
# sort keys
|
|
sortedKeys = trace_dir"/.all_uniq"
|
|
sortKeysCmd = "sort -k1n > "sortedKeys
|
|
for (key in key_count) {
|
|
printf "%7d %s\n",key_count[key],key | sortKeysCmd
|
|
}
|
|
close(sortKeysCmd)
|
|
|
|
# iterate over keys from rare to frequent and
|
|
# copy best file
|
|
while ((getline < sortedKeys) > 0) {
|
|
|
|
# split
|
|
nrFields = split($0, field, / +/)
|
|
#print nrFields" Felder: '"field[0]"', '"field[1]"', '"field[2]"', '"field[3]"'"
|
|
key = field[nrFields]
|
|
|
|
++tcnt;
|
|
printf "\r Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..."
|
|
if (key in keyAlreadyKnown) {
|
|
continue
|
|
}
|
|
|
|
fn = best_file[key]
|
|
# gather all tuples from the best file for this key
|
|
tracedfn = trace_dir"/"fn
|
|
while ((getline < tracedfn) > 0) {
|
|
keyAlreadyKnown[$0] = ""
|
|
}
|
|
close(tracedfn)
|
|
|
|
# copy file unless already done
|
|
if (! (fn in file_already_copied)) {
|
|
system(cp_tool" "in_dir"/"fn" "out_dir"/"fn)
|
|
file_already_copied[fn] = ""
|
|
++out_count
|
|
#printf "tuple nr %d (%d cnt=%d) -> %s\n",tcnt,key,key_count[key],fn > trace_dir"/.log"
|
|
}
|
|
}
|
|
close(sortedKeys)
|
|
print ""
|
|
print "[+] Found "tuple_count" unique tuples across "in_count" files."
|
|
|
|
if (out_count == 1) {
|
|
print "[!] WARNING: All test cases had the same traces, check syntax!"
|
|
}
|
|
print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'."
|
|
|
|
if (!ENVIRON["AFL_KEEP_TRACES"]) {
|
|
system("rm -rf "trace_dir" 2>/dev/null")
|
|
}
|
|
|
|
exit 0
|
|
}
|
|
EOF
|