Merge branch 'master' into CmpLog

This commit is contained in:
Andrea Fioraldi
2020-01-30 22:52:27 +01:00
committed by GitHub
70 changed files with 2617 additions and 838 deletions

View File

@ -9,6 +9,9 @@ RUN apt-get update && apt-get install -y \
clang \
clang-9 \
flex \
git \
python3.7 \
python3.7-dev \
gcc-9 \
gcc-9-plugin-dev \
gcc-9-multilib \
@ -23,10 +26,12 @@ RUN apt-get update && apt-get install -y \
ca-certificates \
libpixman-1-dev \
&& rm -rf /var/lib/apt/lists/*
ARG CC=gcc-9
ARG CXX=g++-9
ARG LLVM_CONFIG=llvm-config-9
COPY . /app
RUN cd /app && make clean && make distrib && \
make install && cd .. && rm -rf /app
WORKDIR /work
RUN git clone https://github.com/vanhauser-thc/AFLplusplus
RUN cd AFLplusplus && make clean && make distrib && \
make install && cd .. && rm -rf AFLplusplus

View File

@ -29,7 +29,7 @@ VERSION = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
# PROGS intentionally omit afl-as, which gets installed elsewhere.
PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
SH_PROGS = afl-plot afl-cmin afl-whatsup afl-system-config
SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config
MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8)
ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
@ -48,6 +48,14 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .te
CFLAGS_OPT = -march=native
endif
ifneq "$(shell uname -m)" "x86_64"
ifneq "$(shell uname -m)" "i386"
ifneq "$(shell uname -m)" "amd64"
AFL_NO_X86=1
endif
endif
endif
CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT)
CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \
-DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
@ -55,17 +63,17 @@ CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \
AFL_FUZZ_FILES = $(wildcard src/afl-fuzz*.c)
ifneq "($filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null)" ""
ifneq "$(filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null))" ""
PYTHON_INCLUDE ?= $(shell python3.7m-config --includes)
PYTHON_LIB ?= $(shell python3.7m-config --ldflags)
PYTHON_VERSION = 3.7m
else
ifneq "($filter %3.7, $(shell python3.7-config --includes) 2> /dev/null" ""
ifneq "$(filter %3.7, $(shell python3.7-config --includes 2>/dev/null))" ""
PYTHON_INCLUDE ?= $(shell python3.7-config --includes)
PYTHON_LIB ?= $(shell python3.7-config --ldflags)
PYTHON_VERSION = 3.7
else
ifneq "($filter %2.7, $(shell python2.7-config --includes) 2> /dev/null" ""
ifneq "$(filter %2.7, $(shell python2.7-config --includes 2>/dev/null))" ""
PYTHON_INCLUDE ?= $(shell python2.7-config --includes)
PYTHON_LIB ?= $(shell python2.7-config --ldflags)
PYTHON_VERSION = 2.7
@ -77,14 +85,14 @@ PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7m && echo /usr/include/p
PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7 && echo /usr/include/python3.7)
PYTHON_INCLUDE ?= $(shell test -e /usr/include/python2.7 && echo /usr/include/python2.7)
ifneq "($filter %3.7m, $(PYTHON_INCLUDE))" ""
ifneq "$(filter %3.7m, $(PYTHON_INCLUDE))" ""
PYTHON_VERSION ?= 3.7m
PYTHON_LIB ?= -lpython3.7m
else
ifneq "($filter %3.7, $(PYTHON_INCLUDE))" ""
ifneq "$(filter %3.7, $(PYTHON_INCLUDE))" ""
PYTHON_VERSION ?= 3.7
else
ifneq "($filter %2.7, $(PYTHON_INCLUDE))" ""
ifneq "$(filter %2.7, $(PYTHON_INCLUDE))" ""
PYTHON_VERSION ?= 2.7
PYTHON_LIB ?= -lpython2.7
else

View File

@ -27,7 +27,7 @@
get any feature improvements since November 2017.
Among other changes afl++ has a more performant llvm_mode, supports
llvm up to version 10, QEMU 3.1, more speed and crashfixes for QEMU,
llvm up to version 11, QEMU 3.1, more speed and crashfixes for QEMU,
better *BSD and Android support and much, much more.
Additionally the following features and patches have been integrated:
@ -204,7 +204,7 @@ superior to blind fuzzing or coverage-only tools.
PLEASE NOTE: llvm_mode compilation with afl-clang-fast/afl-clang-fast++
instead of afl-gcc/afl-g++ is much faster and has a few cool features.
See llvm_mode/ - however few code does not compile with llvm.
We support llvm versions 3.8.0 to 10.
We support llvm versions 3.8.0 to 11.
When source code is available, instrumentation can be injected by a companion
tool that works as a drop-in replacement for gcc or clang in any standard build
@ -227,7 +227,7 @@ For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
clang users may also opt to leverage a higher-performance instrumentation mode,
as described in [llvm_mode/README.md](llvm_mode/README.md).
Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 10.
Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 11.
Using the LAF Intel performance enhancements are also recommended, see
[llvm_mode/README.laf-intel.md](llvm_mode/README.laf-intel.md)
@ -272,7 +272,7 @@ $ ./build_qemu_support.sh
For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md).
The mode is approximately 2-5x slower than compile-time instrumentation, is
less conductive to parallelization, and may have some other quirks.
less conducive to parallelization, and may have some other quirks.
If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for
your binary, then you can use afl-fuzz normally and it will have twice

858
afl-cmin
View File

@ -1,470 +1,464 @@
#!/usr/bin/env bash
#!/usr/bin/env sh
THISPATH=`dirname ${0}`
export PATH=${THISPATH}:$PATH
awk -f - -- ${@+"$@"} <<'EOF'
#!/usr/bin/awk -f
# awk script to minimize a test corpus of input files
#
# american fuzzy lop++ - corpus minimization tool
# ---------------------------------------------
# based on afl-cmin bash script written by Michal Zalewski
# rewritten by Heiko Eißfeldt (hexcoder-)
# tested with:
# gnu awk (x86 Linux)
# bsd awk (x86 *BSD)
# mawk (arm32 raspbian)
#
# Originally written by Michal Zalewski
#
# Copyright 2014, 2015 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This tool tries to find the smallest subset of files in the input directory
# that still trigger the full range of instrumentation data points seen in
# the starting corpus. This has two uses:
#
# - Screening large corpora of input files before using them as a seed for
# afl-fuzz. The tool will remove functionally redundant files and likely
# leave you with a much smaller set.
#
# (In this case, you probably also want to consider running afl-tmin on
# the individual files later on to reduce their size.)
#
# - Minimizing the corpus generated organically by afl-fuzz, perhaps when
# planning to feed it to more resource-intensive tools. The tool achieves
# this by removing all entries that used to trigger unique behaviors in the
# past, but have been made obsolete by later finds.
#
# Note that the tool doesn't modify the files themselves. For that, you want
# afl-tmin.
#
# This script must use bash because other shells may have hardcoded limits on
# array sizes.
# uses getopt.awk package from Arnold Robbins
#
# external tools used by this script:
# test
# grep
# rm
# mkdir
# ln
# cp
# pwd
# which
# cd
# find
# stat
# sort
# cut
# and afl-showmap from this project :-)
echo "corpus minimization tool for afl-fuzz by Michal Zalewski"
echo
# getopt.awk --- Do C library getopt(3) function in awk
#########
# SETUP #
#########
# External variables:
# Optind -- index in ARGV of first nonoption argument
# Optarg -- string value of argument to current option
# Opterr -- if nonzero, print our own diagnostic
# Optopt -- current option letter
# Process command-line options...
# Returns:
# -1 at end of options
# "?" for unrecognized option
# <c> a character representing the current option
MEM_LIMIT=200
TIMEOUT=none
# Private Data:
# _opti -- index in multiflag option, e.g., -abc
unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
function getopt(argc, argv, options, thisopt, i)
{
if (length(options) == 0) # no options given
return -1
while getopts "+i:o:f:m:t:eQUCh" opt; do
if (argv[Optind] == "--") { # all done
Optind++
_opti = 0
return -1
} else if (argv[Optind] !~ /^-[^:\t ]/) {
_opti = 0
return -1
}
if (_opti == 0)
_opti = 2
thisopt = substr(argv[Optind], _opti, 1)
Optopt = thisopt
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
} else
_opti++
return "?"
}
if (substr(options, i + 1, 1) == ":") {
# get option argument
if (length(substr(argv[Optind], _opti + 1)) > 0)
Optarg = substr(argv[Optind], _opti + 1)
else
Optarg = argv[++Optind]
_opti = 0
} else
Optarg = ""
if (_opti == 0 || _opti >= length(argv[Optind])) {
Optind++
_opti = 0
} else
_opti++
return thisopt
}
case "$opt" in
function usage() {
print \
"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \
"\n" \
"Required parameters:\n" \
"\n" \
" -i dir - input directory with starting corpus\n" \
" -o dir - output directory for minimized files\n" \
"\n" \
"Execution control settings:\n" \
"\n" \
" -f file - location read by the fuzzed program (stdin)\n" \
" -m megs - memory limit for child process ("mem_limit" MB)\n" \
" -t msec - run time limit for child process (none)\n" \
" -Q - use binary-only instrumentation (QEMU mode)\n" \
" -U - use unicorn-based instrumentation (unicorn mode)\n" \
"\n" \
"Minimization settings:\n" \
" -C - keep crashing inputs, reject everything else\n" \
" -e - solve for edge coverage only, ignore hit counts\n" \
"\n" \
"For additional tips, please consult docs/README.md\n" \
"\n" \
> "/dev/stderr"
exit 1
}
"h")
;;
function exists_and_is_executable(binarypath) {
return 0 == system("test -f "binarypath" -a -x "binarypath)
}
"i")
IN_DIR="$OPTARG"
;;
BEGIN {
print "corpus minimization tool for afl++ (awk version)\n"
"o")
OUT_DIR="$OPTARG"
;;
"f")
STDIN_FILE="$OPTARG"
;;
"m")
MEM_LIMIT="$OPTARG"
MEM_LIMIT_GIVEN=1
;;
"t")
TIMEOUT="$OPTARG"
;;
"e")
EXTRA_PAR="$EXTRA_PAR -e"
;;
"C")
export AFL_CMIN_CRASHES_ONLY=1
;;
"Q")
EXTRA_PAR="$EXTRA_PAR -Q"
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
QEMU_MODE=1
;;
"U")
EXTRA_PAR="$EXTRA_PAR -U"
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
UNICORN_MODE=1
;;
"?")
exit 1
;;
# defaults
extra_par = ""
# process options
Opterr = 1 # default is to diagnose
Optind = 1 # skip ARGV[0]
while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) {
if (_go_c == "i") {
if (!Optarg) usage()
if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
in_dir = Optarg
continue
} else
if (_go_c == "o") {
if (!Optarg) usage()
if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
out_dir = Optarg
continue
} else
if (_go_c == "f") {
if (!Optarg) usage()
if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
stdin_file = Optarg
continue
} else
if (_go_c == "m") {
if (!Optarg) usage()
if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
mem_limit = Optarg
mem_limit_given = 1
continue
} else
if (_go_c == "t") {
if (!Optarg) usage()
if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
timeout = Optarg
continue
} else
if (_go_c == "C") {
ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1
continue
} else
if (_go_c == "e") {
extra_par = extra_par " -e"
continue
} else
if (_go_c == "Q") {
if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
extra_par = extra_par " -Q"
if ( !mem_limit_given ) mem_limit = "250"
qemu_mode = 1
continue
} else
if (_go_c == "U") {
if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
extra_par = extra_par " -U"
if ( !mem_limit_given ) mem_limit = "250"
unicorn_mode = 1
continue
} else
if (_go_c == "?") {
exit 1
} else
usage()
} # while options
esac
if (!mem_limit) mem_limit = 200
if (!timeout) timeout = "none"
done
# get program args
i = 0
prog_args_string = ""
for (; Optind < ARGC; Optind++) {
prog_args[i++] = ARGV[Optind]
if (i > 1)
prog_args_string = prog_args_string" "ARGV[Optind]
}
shift $((OPTIND-1))
# sanity checks
if (!prog_args[0] || !in_dir || !out_dir) usage()
TARGET_BIN="$1"
target_bin = prog_args[0]
if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
# Do a sanity check to discourage the use of /tmp, since we can't really
# handle this safely from an awk script.
cat 1>&2 <<_EOF_
Usage: $0 [ options ] -- /path/to/target_app [ ... ]
if (!ENVIRON["AFL_ALLOW_TMP"]) {
dirlist[0] = in_dir
dirlist[1] = target_bin
dirlist[2] = out_dir
dirlist[3] = stdin_file
"pwd" | getline dirlist[4] # current directory
for (dirind in dirlist) {
dir = dirlist[dirind]
Required parameters:
if (dir ~ /^(\/var)?\/tmp/) {
print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr"
exit 1
}
}
delete dirlist
}
-i dir - input directory with the starting corpus
-o dir - output directory for minimized files
# If @@ is specified, but there's no -f, let's come up with a temporary input
# file name.
Execution control settings:
trace_dir = out_dir "/.traces"
-f file - location read by the fuzzed program (stdin)
-m megs - memory limit for child process ($MEM_LIMIT MB)
-t msec - run time limit for child process (none)
-Q - use binary-only instrumentation (QEMU mode)
-U - use unicorn-based instrumentation (Unicorn mode)
if (!stdin_file) {
found_atat = 0
for (prog_args_ind in prog_args) {
if ("@@" == prog_args[prog_args_ind]) {
found_atat = 1
break
}
}
if (found_atat) {
stdin_file = trace_dir "/.cur_input"
}
}
Minimization settings:
# Check for obvious errors.
-C - keep crashing inputs, reject everything else
-e - solve for edge coverage only, ignore hit counts
For additional tips, please consult docs/README.
_EOF_
exit 1
fi
# Do a sanity check to discourage the use of /tmp, since we can't really
# handle this safely from a shell script.
if [ "$AFL_ALLOW_TMP" = "" ]; then
echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
T1="$?"
echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
T2="$?"
echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
T3="$?"
echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
T4="$?"
echo "$PWD" | grep -qE '^(/var)?/tmp/'
T5="$?"
if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
if (mem_limit && mem_limit != "none" && mem_limit < 5) {
print "[-] Error: dangerously low memory limit." > "/dev/stderr"
exit 1
fi
}
fi
# If @@ is specified, but there's no -f, let's come up with a temporary input
# file name.
TRACE_DIR="$OUT_DIR/.traces"
if [ "$STDIN_FILE" = "" ]; then
if echo "$*" | grep -qF '@@'; then
STDIN_FILE="$TRACE_DIR/.cur_input"
fi
fi
# Check for obvious errors.
if [ ! "$MEM_LIMIT" = "none" ]; then
if [ "$MEM_LIMIT" -lt "5" ]; then
echo "[-] Error: dangerously low memory limit." 1>&2
if (timeout && timeout != "none" && timeout < 10) {
print "[-] Error: dangerously low timeout." > "/dev/stderr"
exit 1
fi
}
fi
if (target_bin && !exists_and_is_executable(target_bin)) {
if [ ! "$TIMEOUT" = "none" ]; then
"which "target_bin" 2>/dev/null" | getline tnew
if (!tnew || !exists_and_is_executable(tnew)) {
print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr"
exit 1
}
target_bin = tnew
}
if [ "$TIMEOUT" -lt "10" ]; then
echo "[-] Error: dangerously low timeout." 1>&2
if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) {
if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) {
print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr"
exit 1
}
}
if (0 != system( "test -d "in_dir )) {
print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr"
exit 1
fi
}
fi
if (0 == system( "test -d "in_dir"/queue" )) {
in_dir = in_dir "/queue"
}
if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
system("rm -rf "trace_dir" 2>/dev/null");
system("rm "out_dir"/id[:_]* 2>/dev/null")
TNEW="`which "$TARGET_BIN" 2>/dev/null`"
if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) {
print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr"
exit 1
fi
}
TARGET_BIN="$TNEW"
fi
if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
# Check for the more efficient way to copy files...
if (0 != system("mkdir -p -m 0700 "trace_dir)) {
print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr"
exit 1
fi
fi
if [ ! -d "$IN_DIR" ]; then
echo "[-] Error: directory '$IN_DIR' not found." 1>&2
exit 1
fi
test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
rm -rf "$TRACE_DIR" 2>/dev/null
rmdir "$OUT_DIR" 2>/dev/null
if [ -d "$OUT_DIR" ]; then
echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2
exit 1
fi
mkdir -m 700 -p "$TRACE_DIR" || exit 1
if [ ! "$STDIN_FILE" = "" ]; then
rm -f "$STDIN_FILE" || exit 1
touch "$STDIN_FILE" || exit 1
fi
if [ "$AFL_PATH" = "" ]; then
SHOWMAP="${0%/afl-cmin}/afl-showmap"
else
SHOWMAP="$AFL_PATH/afl-showmap"
fi
if [ ! -x "$SHOWMAP" ]; then
echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
if [ "$IN_COUNT" = "0" ]; then
echo "[+] Hmm, no inputs in the target directory. Nothing to be done."
rm -rf "$TRACE_DIR"
exit 1
fi
FIRST_FILE=`ls "$IN_DIR" | head -1`
# Make sure that we're not dealing with a directory.
if [ -d "$IN_DIR/$FIRST_FILE" ]; then
echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
# Check for the more efficient way to copy files...
if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
CP_TOOL=ln
else
CP_TOOL=cp
fi
# Make sure that we can actually get anything out of afl-showmap before we
# waste too much time.
echo "[*] Testing the target binary..."
if [ "$STDIN_FILE" = "" ]; then
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
else
cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
fi
FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
if [ "$FIRST_COUNT" -gt "0" ]; then
echo "[+] OK, $FIRST_COUNT tuples recorded."
else
echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# Let's roll!
#############################
# STEP 1: COLLECTING TRACES #
#############################
echo "[*] Obtaining traces for input files in '$IN_DIR'..."
(
CUR=0
if [ "$STDIN_FILE" = "" ]; then
ls "$IN_DIR" | while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
done
else
ls "$IN_DIR" | while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
cp "$IN_DIR/$fn" "$STDIN_FILE"
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
done
fi
)
echo
##########################
# STEP 2: SORTING TUPLES #
##########################
# With this out of the way, we sort all tuples by popularity across all
# datasets. The reasoning here is that we won't be able to avoid the files
# that trigger unique tuples anyway, so we will want to start with them and
# see what's left.
echo "[*] Sorting trace sets (this may take a while)..."
ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
sort | uniq -c | sort -k 1,1 -n >"$TRACE_DIR/.all_uniq"
TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
#####################################
# STEP 3: SELECTING CANDIDATE FILES #
#####################################
# The next step is to find the best candidate for each tuple. The "best"
# part is understood simply as the smallest input that includes a particular
# tuple in its trace. Empirical evidence suggests that this produces smaller
# datasets than more involved algorithms that could be still pulled off in
# a shell script.
echo "[*] Finding best candidates for each tuple..."
CUR=0
ls -rS "$IN_DIR" | while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
done
echo
##############################
# STEP 4: LOADING CANDIDATES #
##############################
# At this point, we have a file of tuple-file pairs, sorted by file size
# in ascending order (as a consequence of ls -rS). By doing sort keyed
# only by tuple (-k 1,1) and configured to output only the first line for
# every key (-s -u), we end up with the smallest file for each tuple.
echo "[*] Sorting candidate list (be patient)..."
sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# The sed command converted the sorted list to a shell script that populates
# BEST_FILE[tuple]="fname". Let's load that!
. "$TRACE_DIR/.candidate_script"
##########################
# STEP 5: WRITING OUTPUT #
##########################
# The final trick is to grab the top pick for each tuple, unless said tuple is
# already set due to the inclusion of an earlier candidate; and then put all
# tuples associated with the newly-added file to the "already have" list. The
# loop works from least popular tuples and toward the most common ones.
echo "[*] Processing candidates and writing output files..."
CUR=0
touch "$TRACE_DIR/.already_have"
while read -r cnt tuple; do
CUR=$((CUR+1))
printf "\\r Processing tuple $CUR/$TUPLE_COUNT... "
# If we already have this tuple, skip it.
grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
FN=${BEST_FILE[tuple]}
$CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
if [ "$((CUR % 5))" = "0" ]; then
sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
else
cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
fi
done <"$TRACE_DIR/.all_uniq"
echo
OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
if [ "$OUT_COUNT" = "1" ]; then
echo "[!] WARNING: All test cases had the same traces, check syntax!"
fi
echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
echo
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 0
}
if (stdin_file) {
# truncate input file
printf "" > stdin_file
close( stdin_file )
}
if (!ENVIRON["AFL_PATH"]) {
if (0 == system("test -f afl-cmin")) {
showmap = "./afl-showmap"
} else {
"which afl-showmap 2>/dev/null" | getline showmap
}
} else {
showmap = ENVIRON["AFL_PATH"] "/afl-showmap"
}
if (!showmap || 0 != system("test -x "showmap )) {
print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr"
exit 1
}
# get list of input filenames sorted by size
i = 0
# yuck, gnu stat is option incompatible to bsd stat
# we use a heuristic to differentiate between
# GNU stat and other stats
"stat --version 2>/dev/null" | getline statversion
if (statversion ~ /GNU coreutils/) {
stat_format = "-c '%s %n'" # GNU
} else {
stat_format = "-f '%z %N'" # *BSD, MacOS
}
cmdline = "cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-"
while (cmdline | getline) {
infilesSmallToBig[i++] = $0
}
in_count = i
first_file = infilesSmallToBig[0]
# Make sure that we're not dealing with a directory.
if (0 == system("test -d "in_dir"/"first_file)) {
print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr"
exit 1
}
if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) {
cp_tool = "ln"
} else {
cp_tool = "cp"
}
# Make sure that we can actually get anything out of afl-showmap before we
# waste too much time.
print "[*] Testing the target binary..."
if (!stdin_file) {
system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
} else {
system("cp "in_dir"/"first_file" "stdin_file)
system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
}
first_count = 0
runtest = trace_dir"/.run_test"
while ((getline < runtest) > 0) {
++first_count
}
if (first_count) {
print "[+] OK, "first_count" tuples recorded."
} else {
print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
if (!ENVIRON["AFL_KEEP_TRACES"]) {
system("rm -rf "trace_dir" 2>/dev/null")
}
exit 1
}
# Let's roll!
#############################
# STEP 1: Collecting traces #
#############################
print "[*] Obtaining traces for "in_count" input files in '"in_dir"'."
cur = 0;
if (!stdin_file) {
while (cur < in_count) {
fn = infilesSmallToBig[cur]
++cur;
printf "\r Processing file "cur"/"in_count
system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"")
}
} else {
while (cur < in_count) {
fn = infilesSmallToBig[cur]
++cur
printf "\r Processing file "cur"/"in_count
system("cp "in_dir"/"fn" "stdin_file)
system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
}
}
print ""
#######################################################
# STEP 2: register smallest input file for each tuple #
# STEP 3: copy that file (at most once) #
#######################################################
print "[*] Processing traces for input files in '"in_dir"'."
cur = 0
out_count = 0
tuple_count = 0
while (cur < in_count) {
fn = infilesSmallToBig[cur]
++cur
printf "\r Processing file "cur"/"in_count
# create path for the trace file from afl-showmap
tracefile_path = trace_dir"/"fn
# gather all keys, and count them
while ((getline line < tracefile_path) > 0) {
key = line
if (!(key in key_count)) {
++tuple_count
}
++key_count[key]
if (! (key in best_file)) {
# this is the best file for this key
best_file[key] = fn
# copy file unless already done
if (! (fn in file_already_copied)) {
system(cp_tool" "in_dir"/"fn" "out_dir"/"fn)
file_already_copied[fn] = ""
++out_count
}
}
}
close(tracefile_path)
}
print ""
print "[+] Found "tuple_count" unique tuples across "in_count" files."
if (out_count == 1) {
print "[!] WARNING: All test cases had the same traces, check syntax!"
}
print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'."
if (!ENVIRON["AFL_KEEP_TRACES"]) {
system("rm -rf "trace_dir" 2>/dev/null")
}
exit 0
}
EOF

470
afl-cmin.bash Executable file
View File

@ -0,0 +1,470 @@
#!/usr/bin/env bash
#
# american fuzzy lop++ - corpus minimization tool
# ---------------------------------------------
#
# Originally written by Michal Zalewski
#
# Copyright 2014, 2015 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This tool tries to find the smallest subset of files in the input directory
# that still trigger the full range of instrumentation data points seen in
# the starting corpus. This has two uses:
#
# - Screening large corpora of input files before using them as a seed for
# afl-fuzz. The tool will remove functionally redundant files and likely
# leave you with a much smaller set.
#
# (In this case, you probably also want to consider running afl-tmin on
# the individual files later on to reduce their size.)
#
# - Minimizing the corpus generated organically by afl-fuzz, perhaps when
# planning to feed it to more resource-intensive tools. The tool achieves
# this by removing all entries that used to trigger unique behaviors in the
# past, but have been made obsolete by later finds.
#
# Note that the tool doesn't modify the files themselves. For that, you want
# afl-tmin.
#
# This script must use bash because other shells may have hardcoded limits on
# array sizes.
#
echo "corpus minimization tool for afl-fuzz by Michal Zalewski"
echo
#########
# SETUP #
#########
# Process command-line options...
MEM_LIMIT=200
TIMEOUT=none
unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
while getopts "+i:o:f:m:t:eQUCh" opt; do
case "$opt" in
"h")
;;
"i")
IN_DIR="$OPTARG"
;;
"o")
OUT_DIR="$OPTARG"
;;
"f")
STDIN_FILE="$OPTARG"
;;
"m")
MEM_LIMIT="$OPTARG"
MEM_LIMIT_GIVEN=1
;;
"t")
TIMEOUT="$OPTARG"
;;
"e")
EXTRA_PAR="$EXTRA_PAR -e"
;;
"C")
export AFL_CMIN_CRASHES_ONLY=1
;;
"Q")
EXTRA_PAR="$EXTRA_PAR -Q"
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
QEMU_MODE=1
;;
"U")
EXTRA_PAR="$EXTRA_PAR -U"
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
UNICORN_MODE=1
;;
"?")
exit 1
;;
esac
done
shift $((OPTIND-1))
TARGET_BIN="$1"
if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
cat 1>&2 <<_EOF_
Usage: $0 [ options ] -- /path/to/target_app [ ... ]
Required parameters:
-i dir - input directory with the starting corpus
-o dir - output directory for minimized files
Execution control settings:
-f file - location read by the fuzzed program (stdin)
-m megs - memory limit for child process ($MEM_LIMIT MB)
-t msec - run time limit for child process (none)
-Q - use binary-only instrumentation (QEMU mode)
-U - use unicorn-based instrumentation (Unicorn mode)
Minimization settings:
-C - keep crashing inputs, reject everything else
-e - solve for edge coverage only, ignore hit counts
For additional tips, please consult docs/README.
_EOF_
exit 1
fi
# Do a sanity check to discourage the use of /tmp, since we can't really
# handle this safely from a shell script.
if [ "$AFL_ALLOW_TMP" = "" ]; then
echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
T1="$?"
echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
T2="$?"
echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
T3="$?"
echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
T4="$?"
echo "$PWD" | grep -qE '^(/var)?/tmp/'
T5="$?"
if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
exit 1
fi
fi
# If @@ is specified, but there's no -f, let's come up with a temporary input
# file name.
TRACE_DIR="$OUT_DIR/.traces"
if [ "$STDIN_FILE" = "" ]; then
if echo "$*" | grep -qF '@@'; then
STDIN_FILE="$TRACE_DIR/.cur_input"
fi
fi
# Check for obvious errors.
if [ ! "$MEM_LIMIT" = "none" ]; then
if [ "$MEM_LIMIT" -lt "5" ]; then
echo "[-] Error: dangerously low memory limit." 1>&2
exit 1
fi
fi
if [ ! "$TIMEOUT" = "none" ]; then
if [ "$TIMEOUT" -lt "10" ]; then
echo "[-] Error: dangerously low timeout." 1>&2
exit 1
fi
fi
if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
TNEW="`which "$TARGET_BIN" 2>/dev/null`"
if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
exit 1
fi
TARGET_BIN="$TNEW"
fi
if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
exit 1
fi
fi
if [ ! -d "$IN_DIR" ]; then
echo "[-] Error: directory '$IN_DIR' not found." 1>&2
exit 1
fi
test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
rm -rf "$TRACE_DIR" 2>/dev/null
rmdir "$OUT_DIR" 2>/dev/null
if [ -d "$OUT_DIR" ]; then
echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2
exit 1
fi
mkdir -m 700 -p "$TRACE_DIR" || exit 1
if [ ! "$STDIN_FILE" = "" ]; then
rm -f "$STDIN_FILE" || exit 1
touch "$STDIN_FILE" || exit 1
fi
if [ "$AFL_PATH" = "" ]; then
SHOWMAP="${0%/afl-cmin}/afl-showmap"
else
SHOWMAP="$AFL_PATH/afl-showmap"
fi
if [ ! -x "$SHOWMAP" ]; then
echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
if [ "$IN_COUNT" = "0" ]; then
echo "[+] Hmm, no inputs in the target directory. Nothing to be done."
rm -rf "$TRACE_DIR"
exit 1
fi
FIRST_FILE=`ls "$IN_DIR" | head -1`
# Make sure that we're not dealing with a directory.
if [ -d "$IN_DIR/$FIRST_FILE" ]; then
echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
# Check for the more efficient way to copy files...
if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
CP_TOOL=ln
else
CP_TOOL=cp
fi
# Make sure that we can actually get anything out of afl-showmap before we
# waste too much time.
echo "[*] Testing the target binary..."
if [ "$STDIN_FILE" = "" ]; then
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
else
cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
fi
FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
if [ "$FIRST_COUNT" -gt "0" ]; then
echo "[+] OK, $FIRST_COUNT tuples recorded."
else
echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# Let's roll!
#############################
# STEP 1: COLLECTING TRACES #
#############################
echo "[*] Obtaining traces for input files in '$IN_DIR'..."
(
CUR=0
if [ "$STDIN_FILE" = "" ]; then
ls "$IN_DIR" | while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
done
else
ls "$IN_DIR" | while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
cp "$IN_DIR/$fn" "$STDIN_FILE"
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
done
fi
)
echo
##########################
# STEP 2: SORTING TUPLES #
##########################
# With this out of the way, we sort all tuples by popularity across all
# datasets. The reasoning here is that we won't be able to avoid the files
# that trigger unique tuples anyway, so we will want to start with them and
# see what's left.
echo "[*] Sorting trace sets (this may take a while)..."
ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
sort | uniq -c | sort -k 1,1 -n >"$TRACE_DIR/.all_uniq"
TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
#####################################
# STEP 3: SELECTING CANDIDATE FILES #
#####################################
# The next step is to find the best candidate for each tuple. The "best"
# part is understood simply as the smallest input that includes a particular
# tuple in its trace. Empirical evidence suggests that this produces smaller
# datasets than more involved algorithms that could be still pulled off in
# a shell script.
echo "[*] Finding best candidates for each tuple..."
CUR=0
ls -rS "$IN_DIR" | while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
done
echo
##############################
# STEP 4: LOADING CANDIDATES #
##############################
# At this point, we have a file of tuple-file pairs, sorted by file size
# in ascending order (as a consequence of ls -rS). By doing sort keyed
# only by tuple (-k 1,1) and configured to output only the first line for
# every key (-s -u), we end up with the smallest file for each tuple.
echo "[*] Sorting candidate list (be patient)..."
sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# The sed command converted the sorted list to a shell script that populates
# BEST_FILE[tuple]="fname". Let's load that!
. "$TRACE_DIR/.candidate_script"
##########################
# STEP 5: WRITING OUTPUT #
##########################
# The final trick is to grab the top pick for each tuple, unless said tuple is
# already set due to the inclusion of an earlier candidate; and then put all
# tuples associated with the newly-added file to the "already have" list. The
# loop works from least popular tuples and toward the most common ones.
echo "[*] Processing candidates and writing output files..."
CUR=0
touch "$TRACE_DIR/.already_have"
while read -r cnt tuple; do
CUR=$((CUR+1))
printf "\\r Processing tuple $CUR/$TUPLE_COUNT... "
# If we already have this tuple, skip it.
grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
FN=${BEST_FILE[tuple]}
$CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
if [ "$((CUR % 5))" = "0" ]; then
sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
else
cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
fi
done <"$TRACE_DIR/.all_uniq"
echo
OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
if [ "$OUT_COUNT" = "1" ]; then
echo "[!] WARNING: All test cases had the same traces, check syntax!"
fi
echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
echo
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 0

View File

@ -1,6 +1,6 @@
#!/bin/sh
test "$1" = "-h" && {
echo afl-system-config by Marc Heuse
echo 'afl-system-config by Marc Heuse <mh@mh-sec.de>'
echo
echo $0
echo
@ -12,55 +12,72 @@ test "$1" = "-h" && {
exit 1
}
DONE=
PLATFORM=`uname -s`
echo This reconfigures the system to have a better fuzzing performance
echo This reconfigures the system to have a better fuzzing performance.
if [ '!' "$EUID" = 0 ] && [ '!' `id -u` = 0 ] ; then
echo Error you need to be root to run this
exit 1
echo "Warning: you need to be root to run this!"
# we do not exit as other mechanisms exist that allows to do this than
# being root. let the errors speak for themselves.
fi
if [ "$PLATFORM" = "Linux" ] ; then
sysctl -w kernel.core_pattern=core
sysctl -w kernel.randomize_va_space=0
sysctl -w kernel.sched_child_runs_first=1
sysctl -w kernel.sched_autogroup_enabled=1
sysctl -w kernel.sched_migration_cost_ns=50000000
sysctl -w kernel.sched_latency_ns=250000000
echo never > /sys/kernel/mm/transparent_hugepage/enabled
test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor
test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor
test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo
test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost
echo
echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this:
echo '/etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"'
{
sysctl -w kernel.core_pattern=core
sysctl -w kernel.randomize_va_space=0
sysctl -w kernel.sched_child_runs_first=1
sysctl -w kernel.sched_autogroup_enabled=1
sysctl -w kernel.sched_migration_cost_ns=50000000
sysctl -w kernel.sched_latency_ns=250000000
echo never > /sys/kernel/mm/transparent_hugepage/enabled
test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor
test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor
test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo
test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost
} > /dev/null
echo Settings applied.
dmesg | egrep -q 'nospectre_v2|spectre_v2=off' || {
echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this:
echo ' /etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"'
}
DONE=1
fi
if [ "$PLATFORM" = "FreeBSD" ] ; then
sysctl kern.elf32.aslr.enable=0
sysctl kern.elf64.aslr.enable=0
echo
echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this:
echo 'sysctl hw.ibrs_disable=1'
echo
echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.'
{
sysctl kern.elf32.aslr.enable=0
sysctl kern.elf64.aslr.enable=0
} > /dev/null
echo Settings applied.
echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this:
echo ' sysctl hw.ibrs_disable=1'
echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.'
DONE=1
fi
if [ "$PLATFORM" = "OpenBSD" ] ; then
echo
echo 'System security features cannot be disabled on OpenBSD.'
echo
echo 'System security features cannot be disabled on OpenBSD.'
DONE=1
fi
if [ "$PLATFORM" = "NetBSD" ] ; then
echo
echo It is recommended to enable unprivileged users to set cpu affinity
echo to be able to use afl-gotcpu meaningfully.
/sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1
{
#echo It is recommended to enable unprivileged users to set cpu affinity
#echo to be able to use afl-gotcpu meaningfully.
/sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1
} > /dev/null
echo Settings applied.
DONE=1
fi
if [ "$PLATFORM" = "Darwin" ] ; then
if [ $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') ] ; then
echo We unload the default crash reporter here
SL=/System/Library; PL=com.apple.ReportCrash
launchctl unload -w ${SL}/LaunchAgents/${PL}.plist
sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist
echo We unload the default crash reporter here
SL=/System/Library; PL=com.apple.ReportCrash
launchctl unload -w ${SL}/LaunchAgents/${PL}.plist
sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist
echo Settings applied.
else
echo Nothing to do.
fi
DONE=1
fi
echo
echo Also use AFL_TMPDIR to use a tmpfs for the input file
test -z "$DONE" && echo Error: Unknown platform: $PLATFORM
test -z "$AFL_TMPDIR" && echo Also use AFL_TMPDIR and point it to a tmpfs for the input file caching

View File

@ -21,9 +21,15 @@ Version ++2.60d (develop):
- afl-fuzz:
- now prints the real python version support compiled in
- set stronger performance compile options and little tweaks
- afl-clang-fast now shows in the help output for which llvm version it
was compiled for
- added blacklisted function check in llvm_mode
- Android: prefer bigcores when selecting a CPU
- afl-clang-fast:
- show in the help output for which llvm version it was compiled for
- now does not need to be recompiled between trace-pc and pass
instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :)
- llvm 11 is supported
- afl-cmin is now a sh script (invoking awk) instead of bash for portability
the original script is still present as afl-cmin.bash
- added blacklist and whitelisting function check in all modules of llvm_mode
- added fix from Debian project to compile libdislocator and libtokencap

161
docs/binaryonly_fuzzing.md Normal file
View File

@ -0,0 +1,161 @@
# Fuzzing binary-only programs with afl++
afl++, libfuzzer and others are great if you have the source code, and
it allows for very fast and coverage guided fuzzing.
However, if there is only the binary program and no source code available,
then standard `afl-fuzz -n` (dumb mode) is not effective.
The following is a description of how these binaries can be fuzzed with afl++
!!!!!
TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
use afl -Q qemu_mode, or better: use both in parallel.
!!!!!
## QEMU
Qemu is the "native" solution to the program.
It is available in the ./qemu_mode/ directory and once compiled it can
be accessed by the afl-fuzz -Q command line option.
The speed decrease is at about 50%.
It is the easiest to use alternative and even works for cross-platform binaries.
Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz)
which now has a qemu_mode, but its performance is just 1.5%!
As it is included in afl++ this needs no URL.
## WINE+QEMU
Wine mode can run Win32 PE binaries with the QEMU instrumentation.
It needs Wine, python3 and the pefile python package installed.
As it is included in afl++ this needs no URL.
## UNICORN
Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
In contrast to QEMU, Unicorn does not offer a full system or even userland
emulation. Runtime environment and/or loaders have to be written from scratch,
if needed. On top, block chaining has been removed. This means the speed boost
introduced in the patched QEMU Mode of afl++ cannot simply be ported over to
Unicorn. For further information, check out ./unicorn_mode.txt.
As it is included in afl++ this needs no URL.
## DYNINST
Dyninst is a binary instrumentation framework similar to Pintool and
Dynamorio (see far below). However whereas Pintool and Dynamorio work at
runtime, dyninst instruments the target at load time, and then let it run -
or save the binary with the changes.
This is great for some things, e.g. fuzzing, and not so effective for others,
e.g. malware analysis.
So what we can do with dyninst is taking every basic block, and put afl's
instrumention code in there - and then save the binary.
Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
Sounds great? It is. The issue though - it is a non-trivial problem to
insert instructions, which change addresses in the process space, so that
everything is still working afterwards. Hence more often than not binaries
crash when they are run.
The speed decrease is about 15-35%, depending on the optimization options
used with afl-dyninst.
So if Dyninst works, it is the best option available. Otherwise it just
doesn't work well.
[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst)
## INTEL-PT
If you have a newer Intel CPU, you can make use of Intels processor trace.
The big issue with Intel's PT is the small buffer size and the complex
encoding of the debug information collected through PT.
This makes the decoding very CPU intensive and hence slow.
As a result, the overall speed decrease is about 70-90% (depending on
the implementation and other factors).
There are two afl intel-pt implementations:
1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt)
=> this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer)
=> this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
be used. This one is faster than the other.
Note that there is also honggfuzz: https://github.com/google/honggfuzz
But its IPT performance is just 6%!
## CORESIGHT
Coresight is ARM's answer to Intel's PT.
There is no implementation so far which handle coresight and getting
it working on an ARM Linux is very difficult due to custom kernel building
on embedded systems is difficult. And finding one that has coresight in
the ARM chip is difficult too.
My guess is that it is slower than Qemu, but faster than Intel PT.
If anyone finds any coresight implementation for afl please ping me: vh@thc.org
## FRIDA
Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio.
What is special is that it is written Python, and scripted with Javascript.
It is mostly used to reverse binaries on mobile phones however can be used
everywhere.
There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer)
## PIN & DYNAMORIO
Pintool and Dynamorio are dynamic instrumentation engines, and they can be
used for getting basic block information at runtime.
Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
whereas Dynamorio is additionally available for ARM and AARCH64.
Dynamorio is also 10x faster than Pintool.
The big issue with Dynamorio (and therefore Pintool too) is speed.
Dynamorio has a speed decrease of 98-99%
Pintool has a speed decrease of 99.5%
Hence Dynamorio is the option to go for if everything fails, and Pintool
only if Dynamorio fails too.
Dynamorio solutions:
* [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio)
* [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL)
* [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only
Pintool solutions:
* [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin)
* [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin)
* [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported
## Non-AFL solutions
There are many binary-only fuzzing frameworks.
Some are great for CTFs but don't work with large binaries, others are very
slow but have good path discovery, some are very hard to set-up ...
* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
* S2E: [https://github.com/S2E](https://github.com/S2E)
* ... please send me any missing that are good
## Closing words
That's it! News, corrections, updates? Send an email to vh@thc.org

View File

@ -1,144 +0,0 @@
Fuzzing binary-only programs with afl++
=======================================
afl++, libfuzzer and others are great if you have the source code, and
it allows for very fast and coverage guided fuzzing.
However, if there is only the binary program and not source code available,
then standard afl++ (dumb mode) is not effective.
The following is a description of how these can be fuzzed with afl++
!!!!!
TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
use afl -Q qemu_mode, or better: use both in parallel.
!!!!!
QEMU
----
Qemu is the "native" solution to the program.
It is available in the ./qemu_mode/ directory and once compiled it can
be accessed by the afl-fuzz -Q command line option.
The speed decrease is at about 50%
It is the easiest to use alternative and even works for cross-platform binaries.
As it is included in afl++ this needs no URL.
WINE+QEMU
---------
Wine mode can run Win32 PE with the QEMU instrumentation.
It needs Wine, python3 and the pefile python package installed.
UNICORN
-------
Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
In contrast to QEMU, Unicorn does not offer a full system or even userland emulation.
Runtime environment and/or loaders have to be written from scratch, if needed.
On top, block chaining has been removed. This means the speed boost introduced in
to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
For further information, check out ./unicorn_mode.txt.
DYNINST
-------
Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst
instruments the target at load time, and then let it run.
This is great for some things, e.g. fuzzing, and not so effective for others,
e.g. malware analysis.
So what we can do with dyninst is taking every basic block, and put afl's
instrumention code in there - and then save the binary.
Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
Sounds great? It is. The issue though - it is a non-trivial problem to
insert instructions, which change addresses in the process space, so
everything is still working afterwards. Hence more often than not binaries
crash when they are run (because of instrumentation).
The speed decrease is about 15-35%, depending on the optimization options
used with afl-dyninst.
So if dyninst works, it is the best option available. Otherwise it just doesn't
work well.
https://github.com/vanhauser-thc/afl-dyninst
INTEL-PT
--------
If you have a newer Intel CPU, you can make use of Intels processor trace.
The big issue with Intel's PT is the small buffer size and the complex
encoding of the debug information collected through PT.
This makes the decoding very CPU intensive and hence slow.
As a result, the overall speed decrease is about 70-90% (depending on
the implementation and other factors).
There are two afl intel-pt implementations:
1. https://github.com/junxzm1990/afl-pt
=> this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
2. https://github.com/hunter-ht-2018/ptfuzzer
=> this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
be used. This one is faster than the other.
CORESIGHT
---------
Coresight is ARM's answer to Intel's PT.
There is no implementation so far which handle coresight and getting
it working on an ARM Linux is very difficult due to custom kernel building
on embedded systems is difficult. And finding one that has coresight in
the ARM chip is difficult too.
My guess is that it is slower than Qemu, but faster than Intel PT.
If anyone finds any coresight implementation for afl please ping me:
vh@thc.org
PIN & DYNAMORIO
---------------
Pintool and Dynamorio are dynamic instrumentation engines, and they can be
used for getting basic block information at runtime.
Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
whereas Dynamorio is additionally available for ARM and AARCH64.
Dynamorio is also 10x faster than Pintool.
The big issue with Dynamorio (and therefore Pintool too) is speed.
Dynamorio has a speed decrease of 98-99%
Pintool has a speed decrease of 99.5%
Hence Dynamorio is the option to go for if everything fails, and Pintool
only if Dynamorio fails too.
Dynamorio solutions:
https://github.com/vanhauser-thc/afl-dynamorio
https://github.com/mxmssh/drAFL
https://github.com/googleprojectzero/winafl/ <= very good but windows only
Pintool solutions:
https://github.com/vanhauser-thc/afl-pin
https://github.com/mothran/aflpin
https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported
Non-AFL solutions
-----------------
There are many binary-only fuzzing frameworks. Some are great for CTFs but don't
work with large binaries, others are very slow but have good path discovery,
some are very hard to set-up ...
QSYM: https://github.com/sslab-gatech/qsym
Manticore: https://github.com/trailofbits/manticore
S2E: https://github.com/S2E
<please send me any missing that are good>
That's it!
News, corrections, updates?
Email vh@thc.org

View File

@ -29,6 +29,9 @@ Here's a quick overview of the stuff you can find in this directory:
- post_library - an example of how to build postprocessors for AFL.
- socket_fuzzing - a LD_PRELOAD library 'redirects' a socket to stdin
for fuzzing access with afl++
Note that the minimize_corpus.sh tool has graduated from the experimental/
directory and is now available as ../afl-cmin. The LLVM mode has likewise
graduated to ../llvm_mode/*.

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>
@ -67,7 +67,8 @@
#else
#define MEM_LIMIT 50
#endif /* ^!WORD_SIZE_64 */
#else
#else /* NetBSD's kernel needs more space for stack, see discussion for issue \
#165 */
#define MEM_LIMIT 200
#endif
/* Default memory limit when running in QEMU mode (MB): */

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -6,7 +6,7 @@
Forkserver design by Jann Horn <jannhorn@googlemail.com>
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -6,7 +6,7 @@
Forkserver design by Jann Horn <jannhorn@googlemail.com>
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -397,6 +397,29 @@ void* aligned_alloc(size_t align, size_t len) {
}
/* specific BSD api mainly checking possible overflow for the size */
void* reallocarray(void* ptr, size_t elem_len, size_t elem_cnt) {
const size_t elem_lim = 1UL << (sizeof(size_t) * 4);
const size_t elem_tot = elem_len * elem_cnt;
void* ret = NULL;
if ((elem_len >= elem_lim || elem_cnt >= elem_lim) && elem_len > 0 &&
elem_cnt > (SIZE_MAX / elem_len)) {
DEBUGF("reallocarray size overflow (%zu)", elem_tot);
} else {
ret = realloc(ptr, elem_tot);
}
return ret;
}
__attribute__((constructor)) void __dislocator_init(void) {
u8* tmp = (u8*)getenv("AFL_LD_LIMIT_MB");

View File

@ -3,10 +3,23 @@
#include <stdarg.h>
#include <unistd.h>
#include "llvm/Config/llvm-config.h"
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
typedef long double max_align_t;
#endif
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/DebugInfo.h"
#else
#include "llvm/Support/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/DebugInfo.h"
#endif
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
@ -16,9 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include <unordered_set>
#include <random>
#include <list>
@ -97,7 +108,7 @@ struct InsTrim : public ModulePass {
// ripped from aflgo
static bool isBlacklisted(const Function *F) {
static const SmallVector<std::string, 4> Blacklist = {
static const char *Blacklist[] = {
"asan.",
"llvm.",
@ -144,19 +155,6 @@ struct InsTrim : public ModulePass {
// this is our default
MarkSetOpt = true;
/* // I dont think this makes sense to port into LLVMInsTrim
char* inst_ratio_str = getenv("AFL_INST_RATIO");
unsigned int inst_ratio = 100;
if (inst_ratio_str) {
if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio ||
inst_ratio > 100) FATAL("Bad value of AFL_INST_RATIO (must be between 1
and 100)");
}
*/
LLVMContext &C = M.getContext();
IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
@ -186,6 +184,8 @@ struct InsTrim : public ModulePass {
StringRef instFilename;
unsigned int instLine = 0;
#if LLVM_VERSION_MAJOR >= 4 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
for (auto &BB : F) {
BasicBlock::iterator IP = BB.getFirstInsertionPt();
@ -240,6 +240,48 @@ struct InsTrim : public ModulePass {
}
#else
for (auto &BB : F) {
BasicBlock::iterator IP = BB.getFirstInsertionPt();
IRBuilder<> IRB(&(*IP));
if (Loc.isUnknown()) Loc = IP->getDebugLoc();
}
if (!Loc.isUnknown()) {
DILocation cDILoc(Loc.getAsMDNode(C));
instLine = cDILoc.getLineNumber();
instFilename = cDILoc.getFilename();
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#endif
/* Either we couldn't figure out our location or the location is
* not whitelisted, so we skip instrumentation. */
if (!instrumentBlock) {
@ -432,28 +474,19 @@ struct InsTrim : public ModulePass {
IRB.CreateStore(Incr, MapPtrIdx)
->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
/* Set prev_loc to cur_loc >> 1 */
/*
StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, L >> 1),
OldPrev); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
None));
*/
total_instr++;
}
}
OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n" /*", ratio
%u%%)."*/
,
total_instr, total_rs, total_hs,
OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr,
total_rs, total_hs,
getenv("AFL_HARDEN")
? "hardened"
: ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN"))
? "ASAN/MSAN"
: "non-hardened") /*, inst_ratio*/);
: "non-hardened"));
return false;
}

View File

@ -29,14 +29,14 @@ ifeq "$(shell uname)" "OpenBSD"
LLVM_CONFIG ?= $(BIN_PATH)/llvm-config
HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1)
ifeq "$(HAS_OPT)" "1"
$(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 10) -> e.g. "pkg_add llvm-7.0.1p9")
$(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9")
endif
else
LLVM_CONFIG ?= llvm-config
endif
LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null )
LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[1-9]' && echo 1 || echo 0 )
LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 )
LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//')
LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
@ -48,7 +48,7 @@ ifeq "$(LLVMVER)" ""
endif
ifeq "$(LLVM_UNSUPPORTED)" "1"
$(warning llvm_mode only supports llvm versions 3.8.0 up to 10)
$(warning llvm_mode only supports llvm versions 3.8.0 up to 11)
endif
ifeq "$(LLVM_MAJOR)" "9"
@ -201,7 +201,7 @@ endif
ln -sf afl-clang-fast ../afl-clang-fast++
../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps
$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
-$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps
$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)

View File

@ -3,11 +3,22 @@
#include <queue>
#include <set>
#include <vector>
#include "llvm/Config/llvm-config.h"
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
typedef long double max_align_t;
#endif
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
#include "llvm/IR/CFG.h"
#else
#include "llvm/Support/CFG.h"
#endif
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@ -65,16 +76,11 @@ void buildCFG(Function *F) {
}
// uint32_t FakeID = 0;
for (auto S = F->begin(), E = F->end(); S != E; ++S) {
BasicBlock *BB = &*S;
uint32_t MyID = LMap[BB];
// if (succ_begin(BB) == succ_end(BB)) {
// Succs[MyID].push_back(FakeID);
// Marked.insert(MyID);
//}
for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
Succs[MyID].push_back(LMap[*I]);
@ -113,7 +119,7 @@ void DFStree(size_t now_id) {
}
void turnCFGintoDAG(Function *F) {
void turnCFGintoDAG() {
tSuccs = Succs;
tag.resize(Blocks.size());
@ -176,7 +182,7 @@ void DFS(uint32_t now) {
}
void DominatorTree(Function *F) {
void DominatorTree() {
if (Blocks.empty()) return;
uint32_t s = start_point;
@ -390,7 +396,7 @@ void MarkSubGraph(uint32_t ss, uint32_t tt) {
}
void MarkVertice(Function *F) {
void MarkVertice() {
uint32_t s = start_point;
@ -411,8 +417,6 @@ void MarkVertice(Function *F) {
timeStamp = 0;
uint32_t t = 0;
// MarkSubGraph(s, t);
// return;
while (s != t) {
@ -432,9 +436,9 @@ std::pair<std::vector<BasicBlock *>, std::vector<BasicBlock *> > markNodes(
reset();
labelEachBlock(F);
buildCFG(F);
turnCFGintoDAG(F);
DominatorTree::DominatorTree(F);
MarkVertice(F);
turnCFGintoDAG();
DominatorTree::DominatorTree();
MarkVertice();
std::vector<BasicBlock *> Result, ResultAbove;
for (uint32_t x : Markabove) {

View File

@ -5,7 +5,7 @@
## 1) Introduction
! llvm_mode works with llvm versions 3.8.0 up to 10 !
! llvm_mode works with llvm versions 3.8.0 up to 11 !
The code in this directory allows you to instrument programs for AFL using
true compiler-level instrumentation, instead of the more crude
@ -198,24 +198,23 @@ PS. Because there are task switches still involved, the mode isn't as fast as
faster than the normal fork() model, and compared to in-process fuzzing,
should be a lot more robust.
## 8) Bonus feature #3: new 'trace-pc-guard' mode
## 8) Bonus feature #3: 'trace-pc-guard' mode
Recent versions of LLVM are shipping with a built-in execution tracing feature
LLVM is shipping with a built-in execution tracing feature
that provides AFL with the necessary tracing data without the need to
post-process the assembly or install any compiler plugins. See:
http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards
If you have a sufficiently recent compiler and want to give it a try, build
afl-clang-fast this way:
If you have not an outdated compiler and want to give it a try, build
targets this way:
```
AFL_TRACE_PC=1 make clean all
libtarget-1.0 $ AFL_LLVM_USE_TRACE_PC=1 make
```
Note that this mode is currently about 20% slower than "vanilla" afl-clang-fast,
Note that this mode is about 20% slower than "vanilla" afl-clang-fast,
and about 5-10% slower than afl-clang. This is likely because the
instrumentation is not inlined, and instead involves a function call. On systems
that support it, compiling your target with -flto should help.
instrumentation is not inlined, and instead involves a function call.
On systems that support it, compiling your target with -flto can help
a bit.

View File

@ -212,13 +212,24 @@ static void edit_params(u32 argc, char** argv) {
// "-fsanitize-coverage=trace-cmp,trace-div,trace-gep";
// cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0";
#else
cc_params[cc_par_cnt++] = "-Xclang";
cc_params[cc_par_cnt++] = "-load";
cc_params[cc_par_cnt++] = "-Xclang";
if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL)
cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path);
else
cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path);
if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") ||
getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) {
cc_params[cc_par_cnt++] =
"-fsanitize-coverage=trace-pc-guard"; // edge coverage by default
} else {
cc_params[cc_par_cnt++] = "-Xclang";
cc_params[cc_par_cnt++] = "-load";
cc_params[cc_par_cnt++] = "-Xclang";
if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL)
cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path);
else
cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path);
}
#endif /* ^USE_TRACE_PC */
}
@ -292,8 +303,10 @@ static void edit_params(u32 argc, char** argv) {
#ifdef USE_TRACE_PC
if (getenv("AFL_INST_RATIO"))
FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'.");
if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") ||
getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC"))
if (getenv("AFL_INST_RATIO"))
FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'.");
#endif /* USE_TRACE_PC */
@ -474,6 +487,8 @@ int main(int argc, char** argv) {
#ifdef USE_TRACE_PC
SAYF(cCYA "afl-clang-fast" VERSION cRST
" [tpcg] by <lszekeres@google.com>\n");
#warning \
"You do not need to specifically compile with USE_TRACE_PC anymore, setting the environment variable AFL_LLVM_USE_TRACE_PC is enough."
#else
SAYF(cCYA "afl-clang-fast" VERSION cRST " by <lszekeres@google.com>\n");
#endif /* ^USE_TRACE_PC */

View File

@ -37,14 +37,26 @@
#include <fstream>
#include <sys/time.h>
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/Config/llvm-config.h"
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
typedef long double max_align_t;
#endif
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/CFG.h"
#else
#include "llvm/DebugInfo.h"
#include "llvm/Support/CFG.h"
#endif
using namespace llvm;
@ -78,7 +90,7 @@ class AFLCoverage : public ModulePass {
// ripped from aflgo
static bool isBlacklisted(const Function *F) {
static const SmallVector<std::string, 4> Blacklist = {
static const char *Blacklist[] = {
"asan.",
"llvm.",
@ -197,6 +209,8 @@ bool AFLCoverage::runOnModule(Module &M) {
* For now, just instrument the block if we are not able
* to determine our location. */
DebugLoc Loc = IP->getDebugLoc();
#if LLVM_VERSION_MAJOR >= 4 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
if (Loc) {
DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
@ -249,6 +263,47 @@ bool AFLCoverage::runOnModule(Module &M) {
}
#else
if (!Loc.isUnknown()) {
DILocation cDILoc(Loc.getAsMDNode(C));
unsigned int instLine = cDILoc.getLineNumber();
StringRef instFilename = cDILoc.getFilename();
(void)instLine;
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
/* We don't check for filename equality here because
* filenames might actually be full paths. Instead we
* check that the actual filename ends in the filename
* specified in the list. */
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#endif
/* Either we couldn't figure out our location or the location is
* not whitelisted, so we skip instrumentation. */
if (!instrumentBlock) continue;
@ -273,13 +328,19 @@ bool AFLCoverage::runOnModule(Module &M) {
// result: a little more speed and less map pollution
int more_than_one = -1;
// fprintf(stderr, "BB %u: ", cur_loc);
for (BasicBlock *Pred : predecessors(&BB)) {
for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); PI != E;
++PI) {
BasicBlock *Pred = *PI;
int count = 0;
if (more_than_one == -1) more_than_one = 0;
// fprintf(stderr, " %p=>", Pred);
for (BasicBlock *Succ : successors(Pred)) {
for (succ_iterator SI = succ_begin(Pred), E = succ_end(Pred); SI != E;
++SI) {
BasicBlock *Succ = *SI;
// if (count > 0)
// fprintf(stderr, "|");

View File

@ -18,6 +18,12 @@
#include <stdlib.h>
#include <unistd.h>
#include <list>
#include <string>
#include <fstream>
#include <sys/time.h>
#include "llvm/Config/llvm-config.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
@ -26,10 +32,19 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ValueTracking.h"
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
#include "llvm/IR/Verifier.h"
#include "llvm/IR/DebugInfo.h"
#else
#include "llvm/Analysis/Verifier.h"
#include "llvm/DebugInfo.h"
#define nullptr 0
#endif
#include <set>
using namespace llvm;
@ -42,6 +57,23 @@ class CompareTransform : public ModulePass {
static char ID;
CompareTransform() : ModulePass(ID) {
char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
if (instWhiteListFilename) {
std::string line;
std::ifstream fileStream;
fileStream.open(instWhiteListFilename);
if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
getline(fileStream, line);
while (fileStream) {
myWhitelist.push_back(line);
getline(fileStream, line);
}
}
}
bool runOnModule(Module &M) override;
@ -57,6 +89,9 @@ class CompareTransform : public ModulePass {
}
protected:
std::list<std::string> myWhitelist;
private:
bool transformCmps(Module &M, const bool processStrcmp,
const bool processMemcmp, const bool processStrncmp,
@ -89,7 +124,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
c = M.getOrInsertFunction("tolower", Int32Ty, Int32Ty
#if LLVM_VERSION_MAJOR < 5
,
nullptr
NULL
#endif
);
#if LLVM_VERSION_MAJOR < 9
@ -104,6 +139,117 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
for (auto &BB : F) {
if (!myWhitelist.empty()) {
BasicBlock::iterator IP = BB.getFirstInsertionPt();
bool instrumentBlock = false;
/* Get the current location using debug information.
* For now, just instrument the block if we are not able
* to determine our location. */
DebugLoc Loc = IP->getDebugLoc();
#if LLVM_VERSION_MAJOR >= 4 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
if (Loc) {
DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
unsigned int instLine = cDILoc->getLine();
StringRef instFilename = cDILoc->getFilename();
if (instFilename.str().empty()) {
/* If the original location is empty, try using the inlined location
*/
DILocation *oDILoc = cDILoc->getInlinedAt();
if (oDILoc) {
instFilename = oDILoc->getFilename();
instLine = oDILoc->getLine();
}
}
(void)instLine;
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
/* We don't check for filename equality here because
* filenames might actually be full paths. Instead we
* check that the actual filename ends in the filename
* specified in the list. */
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#else
if (!Loc.isUnknown()) {
DILocation cDILoc(Loc.getAsMDNode(C));
unsigned int instLine = cDILoc.getLineNumber();
StringRef instFilename = cDILoc.getFilename();
(void)instLine;
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
/* We don't check for filename equality here because
* filenames might actually be full paths. Instead we
* check that the actual filename ends in the filename
* specified in the list. */
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#endif
/* Either we couldn't figure out our location or the location is
* not whitelisted, so we skip instrumentation. */
if (!instrumentBlock) continue;
}
for (auto &IN : BB) {
CallInst *callInst = nullptr;

View File

@ -15,15 +15,34 @@
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <list>
#include <string>
#include <fstream>
#include <sys/time.h>
#include "llvm/Config/llvm-config.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/IRBuilder.h"
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
#include "llvm/IR/Verifier.h"
#include "llvm/IR/DebugInfo.h"
#else
#include "llvm/Analysis/Verifier.h"
#include "llvm/DebugInfo.h"
#define nullptr 0
#endif
using namespace llvm;
@ -35,6 +54,41 @@ class SplitComparesTransform : public ModulePass {
static char ID;
SplitComparesTransform() : ModulePass(ID) {
char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
if (instWhiteListFilename) {
std::string line;
std::ifstream fileStream;
fileStream.open(instWhiteListFilename);
if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
getline(fileStream, line);
while (fileStream) {
myWhitelist.push_back(line);
getline(fileStream, line);
}
}
}
static bool isBlacklisted(const Function *F) {
static const char *Blacklist[] = {
"asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign."
};
for (auto const &BlacklistFunc : Blacklist) {
if (F->getName().startswith(BlacklistFunc)) { return true; }
}
return false;
}
bool runOnModule(Module &M) override;
@ -49,6 +103,9 @@ class SplitComparesTransform : public ModulePass {
}
protected:
std::list<std::string> myWhitelist;
private:
int enableFPSplit;
@ -77,8 +134,121 @@ bool SplitComparesTransform::simplifyCompares(Module &M) {
* all integer comparisons with >= and <= predicates to the icomps vector */
for (auto &F : M) {
if (isBlacklisted(&F)) continue;
for (auto &BB : F) {
if (!myWhitelist.empty()) {
bool instrumentBlock = false;
BasicBlock::iterator IP = BB.getFirstInsertionPt();
/* Get the current location using debug information.
* For now, just instrument the block if we are not able
* to determine our location. */
DebugLoc Loc = IP->getDebugLoc();
#if LLVM_VERSION_MAJOR >= 4 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
if (Loc) {
DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
unsigned int instLine = cDILoc->getLine();
StringRef instFilename = cDILoc->getFilename();
if (instFilename.str().empty()) {
/* If the original location is empty, try using the inlined location
*/
DILocation *oDILoc = cDILoc->getInlinedAt();
if (oDILoc) {
instFilename = oDILoc->getFilename();
instLine = oDILoc->getLine();
}
}
(void)instLine;
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
/* We don't check for filename equality here because
* filenames might actually be full paths. Instead we
* check that the actual filename ends in the filename
* specified in the list. */
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#else
if (!Loc.isUnknown()) {
DILocation cDILoc(Loc.getAsMDNode(C));
unsigned int instLine = cDILoc.getLineNumber();
StringRef instFilename = cDILoc.getFilename();
(void)instLine;
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
/* We don't check for filename equality here because
* filenames might actually be full paths. Instead we
* check that the actual filename ends in the filename
* specified in the list. */
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#endif
/* Either we couldn't figure out our location or the location is
* not whitelisted, so we skip instrumentation. */
if (!instrumentBlock) continue;
}
for (auto &IN : BB) {
CmpInst *selectcmpInst = nullptr;
@ -165,7 +335,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) {
* block bb it is now at the position where the old IcmpInst was */
Instruction *icmp_np;
icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1);
bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_np);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
icmp_np);
/* create a new basic block which holds the new EQ icmp */
Instruction *icmp_eq;
@ -230,7 +401,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) {
* block bb it is now at the position where the old IcmpInst was */
Instruction *fcmp_np;
fcmp_np = CmpInst::Create(Instruction::FCmp, new_pred, op0, op1);
bb->getInstList().insert(bb->getTerminator()->getIterator(), fcmp_np);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
fcmp_np);
/* create a new basic block which holds the new EQ fcmp */
Instruction *fcmp_eq;
@ -351,20 +523,21 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) {
s_op0 = BinaryOperator::Create(Instruction::LShr, op0,
ConstantInt::get(IntType, bitw - 1));
bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0);
t_op0 = new TruncInst(s_op0, Int1Ty);
bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op0);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0);
s_op1 = BinaryOperator::Create(Instruction::LShr, op1,
ConstantInt::get(IntType, bitw - 1));
bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1);
t_op1 = new TruncInst(s_op1, Int1Ty);
bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op1);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1);
/* compare of the sign bits */
icmp_sign_bit =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1);
bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
icmp_sign_bit);
/* create a new basic block which is executed if the signedness bit is
* different */
@ -439,6 +612,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
LLVMContext &C = M.getContext();
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
const DataLayout &dl = M.getDataLayout();
/* define unions with floating point and (sign, exponent, mantissa) triples
@ -453,6 +628,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
}
#endif
std::vector<CmpInst *> fcomps;
/* get all EQ, NE, GT, and LT fcmps. if the other two
@ -551,11 +728,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
Instruction *b_op0, *b_op1;
b_op0 = CastInst::Create(Instruction::BitCast, op0,
IntegerType::get(C, op_size));
bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op0);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op0);
b_op1 = CastInst::Create(Instruction::BitCast, op1,
IntegerType::get(C, op_size));
bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op1);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op1);
/* isolate signs of value of floating point type */
@ -566,21 +743,22 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
s_s0 =
BinaryOperator::Create(Instruction::LShr, b_op0,
ConstantInt::get(b_op0->getType(), op_size - 1));
bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s0);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s0);
t_s0 = new TruncInst(s_s0, Int1Ty);
bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s0);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s0);
s_s1 =
BinaryOperator::Create(Instruction::LShr, b_op1,
ConstantInt::get(b_op1->getType(), op_size - 1));
bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s1);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s1);
t_s1 = new TruncInst(s_s1, Int1Ty);
bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s1);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s1);
/* compare of the sign bits */
icmp_sign_bit =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_s0, t_s1);
bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
icmp_sign_bit);
/* create a new basic block which is executed if the signedness bits are
* equal */
@ -612,16 +790,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
Instruction::LShr, b_op1,
ConstantInt::get(b_op1->getType(), shiftR_exponent));
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), s_e0);
BasicBlock::iterator(signequal_bb->getTerminator()), s_e0);
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), s_e1);
BasicBlock::iterator(signequal_bb->getTerminator()), s_e1);
t_e0 = new TruncInst(s_e0, IntExponentTy);
t_e1 = new TruncInst(s_e1, IntExponentTy);
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), t_e0);
BasicBlock::iterator(signequal_bb->getTerminator()), t_e0);
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), t_e1);
BasicBlock::iterator(signequal_bb->getTerminator()), t_e1);
if (sizeInBits - precision < exTySizeBytes * 8) {
@ -632,9 +810,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
Instruction::And, t_e1,
ConstantInt::get(t_e1->getType(), mask_exponent));
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), m_e0);
BasicBlock::iterator(signequal_bb->getTerminator()), m_e0);
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), m_e1);
BasicBlock::iterator(signequal_bb->getTerminator()), m_e1);
} else {
@ -662,7 +840,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
icmp_exponent =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1);
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), icmp_exponent);
BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent);
icmp_exponent_result =
BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0);
break;
@ -671,7 +849,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
icmp_exponent =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1);
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), icmp_exponent);
BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent);
icmp_exponent_result =
BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0);
break;
@ -680,7 +858,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
}
signequal_bb->getInstList().insert(
signequal_bb->getTerminator()->getIterator(), icmp_exponent_result);
BasicBlock::iterator(signequal_bb->getTerminator()),
icmp_exponent_result);
{
@ -704,19 +883,19 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
m_f1 = BinaryOperator::Create(
Instruction::And, b_op1,
ConstantInt::get(b_op1->getType(), mask_fraction));
middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(),
m_f0);
middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(),
m_f1);
middle_bb->getInstList().insert(
BasicBlock::iterator(middle_bb->getTerminator()), m_f0);
middle_bb->getInstList().insert(
BasicBlock::iterator(middle_bb->getTerminator()), m_f1);
if (needTrunc) {
t_f0 = new TruncInst(m_f0, IntFractionTy);
t_f1 = new TruncInst(m_f1, IntFractionTy);
middle_bb->getInstList().insert(
middle_bb->getTerminator()->getIterator(), t_f0);
BasicBlock::iterator(middle_bb->getTerminator()), t_f0);
middle_bb->getInstList().insert(
middle_bb->getTerminator()->getIterator(), t_f1);
BasicBlock::iterator(middle_bb->getTerminator()), t_f1);
} else {
@ -732,9 +911,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
t_f0 = new TruncInst(b_op0, IntFractionTy);
t_f1 = new TruncInst(b_op1, IntFractionTy);
middle_bb->getInstList().insert(
middle_bb->getTerminator()->getIterator(), t_f0);
BasicBlock::iterator(middle_bb->getTerminator()), t_f0);
middle_bb->getInstList().insert(
middle_bb->getTerminator()->getIterator(), t_f1);
BasicBlock::iterator(middle_bb->getTerminator()), t_f1);
} else {
@ -764,7 +943,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
icmp_fraction =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1);
middle_bb->getInstList().insert(
middle_bb->getTerminator()->getIterator(), icmp_fraction);
BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction);
icmp_fraction_result =
BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0);
break;
@ -773,7 +952,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
icmp_fraction =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1);
middle_bb->getInstList().insert(
middle_bb->getTerminator()->getIterator(), icmp_fraction);
BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction);
icmp_fraction_result =
BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0);
break;
@ -781,8 +960,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
}
middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(),
icmp_fraction_result);
middle_bb->getInstList().insert(
BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction_result);
PHINode *PN = PHINode::Create(Int1Ty, 3, "");
@ -919,18 +1098,21 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) {
s_op0 = BinaryOperator::Create(Instruction::LShr, op0,
ConstantInt::get(OldIntType, bitw / 2));
bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0);
op0_high = new TruncInst(s_op0, NewIntType);
bb->getInstList().insert(bb->getTerminator()->getIterator(), op0_high);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
op0_high);
s_op1 = BinaryOperator::Create(Instruction::LShr, op1,
ConstantInt::get(OldIntType, bitw / 2));
bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1);
op1_high = new TruncInst(s_op1, NewIntType);
bb->getInstList().insert(bb->getTerminator()->getIterator(), op1_high);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
op1_high);
icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high);
bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_high);
bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
icmp_high);
/* now we have to destinguish between == != and > < */
if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) {
@ -1076,13 +1258,19 @@ bool SplitComparesTransform::runOnModule(Module &M) {
<< "bit: " << splitIntCompares(M, bitw) << " splitted\n";
bitw >>= 1;
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
[[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */
#endif
case 32:
errs() << "Split-integer-compare-pass " << bitw
<< "bit: " << splitIntCompares(M, bitw) << " splitted\n";
bitw >>= 1;
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
[[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */
#endif
case 16:
errs() << "Split-integer-compare-pass " << bitw
<< "bit: " << splitIntCompares(M, bitw) << " splitted\n";

View File

@ -18,6 +18,13 @@
#include <stdlib.h>
#include <unistd.h>
#include <list>
#include <string>
#include <fstream>
#include <sys/time.h>
#include "llvm/Config/llvm-config.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
@ -26,10 +33,20 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#if LLVM_VERSION_MAJOR > 3 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
#include "llvm/IR/Verifier.h"
#include "llvm/IR/DebugInfo.h"
#else
#include "llvm/Analysis/Verifier.h"
#include "llvm/DebugInfo.h"
#define nullptr 0
#endif
#include <set>
using namespace llvm;
@ -42,6 +59,41 @@ class SplitSwitchesTransform : public ModulePass {
static char ID;
SplitSwitchesTransform() : ModulePass(ID) {
char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
if (instWhiteListFilename) {
std::string line;
std::ifstream fileStream;
fileStream.open(instWhiteListFilename);
if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
getline(fileStream, line);
while (fileStream) {
myWhitelist.push_back(line);
getline(fileStream, line);
}
}
}
static bool isBlacklisted(const Function *F) {
static const char *Blacklist[] = {
"asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign."
};
for (auto const &BlacklistFunc : Blacklist) {
if (F->getName().startswith(BlacklistFunc)) { return true; }
}
return false;
}
bool runOnModule(Module &M) override;
@ -71,6 +123,9 @@ class SplitSwitchesTransform : public ModulePass {
typedef std::vector<CaseExpr> CaseVector;
protected:
std::list<std::string> myWhitelist;
private:
bool splitSwitches(Module &M);
bool transformCmps(Module &M, const bool processStrcmp,
@ -96,7 +151,7 @@ BasicBlock *SplitSwitchesTransform::switchConvert(
IntegerType * ByteType = IntegerType::get(OrigBlock->getContext(), 8);
unsigned BytesInValue = bytesChecked.size();
std::vector<uint8_t> setSizes;
std::vector<std::set<uint8_t>> byteSets(BytesInValue, std::set<uint8_t>());
std::vector<std::set<uint8_t> > byteSets(BytesInValue, std::set<uint8_t>());
assert(ValTypeBitWidth >= 8 && ValTypeBitWidth <= 64);
@ -169,8 +224,25 @@ BasicBlock *SplitSwitchesTransform::switchConvert(
NewNode->getInstList().push_back(Comp);
bytesChecked[smallestIndex] = true;
if (std::all_of(bytesChecked.begin(), bytesChecked.end(),
[](bool b) { return b; })) {
bool allBytesAreChecked = true;
for (std::vector<bool>::iterator BCI = bytesChecked.begin(),
E = bytesChecked.end();
BCI != E; ++BCI) {
if (!*BCI) {
allBytesAreChecked = false;
break;
}
}
// if (std::all_of(bytesChecked.begin(), bytesChecked.end(),
// [](bool b) { return b; })) {
if (allBytesAreChecked) {
assert(Cases.size() == 1);
BranchInst::Create(Cases[0].BB, NewDefault, Comp, NewNode);
@ -262,16 +334,132 @@ BasicBlock *SplitSwitchesTransform::switchConvert(
bool SplitSwitchesTransform::splitSwitches(Module &M) {
#if (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
LLVMContext &C = M.getContext();
#endif
std::vector<SwitchInst *> switches;
/* iterate over all functions, bbs and instruction and add
* all switches to switches vector for later processing */
for (auto &F : M) {
if (isBlacklisted(&F)) continue;
for (auto &BB : F) {
SwitchInst *switchInst = nullptr;
if (!myWhitelist.empty()) {
bool instrumentBlock = false;
BasicBlock::iterator IP = BB.getFirstInsertionPt();
/* Get the current location using debug information.
* For now, just instrument the block if we are not able
* to determine our location. */
DebugLoc Loc = IP->getDebugLoc();
#if LLVM_VERSION_MAJOR >= 4 || \
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
if (Loc) {
DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
unsigned int instLine = cDILoc->getLine();
StringRef instFilename = cDILoc->getFilename();
if (instFilename.str().empty()) {
/* If the original location is empty, try using the inlined location
*/
DILocation *oDILoc = cDILoc->getInlinedAt();
if (oDILoc) {
instFilename = oDILoc->getFilename();
instLine = oDILoc->getLine();
}
}
(void)instLine;
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
/* We don't check for filename equality here because
* filenames might actually be full paths. Instead we
* check that the actual filename ends in the filename
* specified in the list. */
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#else
if (!Loc.isUnknown()) {
DILocation cDILoc(Loc.getAsMDNode(C));
unsigned int instLine = cDILoc.getLineNumber();
StringRef instFilename = cDILoc.getFilename();
(void)instLine;
/* Continue only if we know where we actually are */
if (!instFilename.str().empty()) {
for (std::list<std::string>::iterator it = myWhitelist.begin();
it != myWhitelist.end(); ++it) {
/* We don't check for filename equality here because
* filenames might actually be full paths. Instead we
* check that the actual filename ends in the filename
* specified in the list. */
if (instFilename.str().length() >= it->length()) {
if (instFilename.str().compare(
instFilename.str().length() - it->length(),
it->length(), *it) == 0) {
instrumentBlock = true;
break;
}
}
}
}
}
#endif
/* Either we couldn't figure out our location or the location is
* not whitelisted, so we skip instrumentation. */
if (!instrumentBlock) continue;
}
if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) {
if (switchInst->getNumCases() < 1) continue;
@ -313,8 +501,7 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) {
* if the default block is set as an unreachable we avoid creating one
* because will never be a valid target.*/
BasicBlock *NewDefault = nullptr;
NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
NewDefault->insertInto(F, Default);
NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault", F, Default);
BranchInst::Create(Default, NewDefault);
/* Prepare cases vector. */

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -6,7 +6,7 @@
Forkserver design by Jann Horn <jannhorn@googlemail.com>
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>
@ -184,11 +184,21 @@ void bind_to_free_cpu(void) {
"For this platform we do not have free CPU binding code yet. If possible, please supply a PR to https://github.com/vanhauser-thc/AFLplusplus"
#endif
for (i = 0; i < cpu_core_count; ++i)
if (!cpu_used[i]) break;
size_t cpu_start = 0;
try:
#ifndef __ANDROID__
for (i = cpu_start; i < cpu_core_count; i++)
if (!cpu_used[i]) break;
if (i == cpu_core_count) {
#else
for (i = cpu_core_count - cpu_start - 1; i > -1; i--)
if (!cpu_used[i]) break;
if (i == -1) {
#endif
SAYF("\n" cLRD "[-] " cRST
"Uh-oh, looks like all %d CPU cores on your system are allocated to\n"
" other instances of afl-fuzz (or similar CPU-locked tasks). "
@ -197,12 +207,11 @@ void bind_to_free_cpu(void) {
"you are\n"
" absolutely sure, you can set AFL_NO_AFFINITY and try again.\n",
cpu_core_count);
FATAL("No more free CPU cores");
}
OKF("Found a free CPU core, binding to #%u.", i);
OKF("Found a free CPU core, try binding to #%u.", i);
cpu_aff = i;
@ -212,22 +221,31 @@ void bind_to_free_cpu(void) {
#elif defined(__NetBSD__)
c = cpuset_create();
if (c == NULL) PFATAL("cpuset_create failed");
cpuset_set(i, c);
#endif
#if defined(__linux__)
if (sched_setaffinity(0, sizeof(c), &c)) PFATAL("sched_setaffinity failed");
if (sched_setaffinity(0, sizeof(c), &c)) {
if (cpu_start == cpu_core_count)
PFATAL("sched_setaffinity failed for CPU %d, exit", i);
WARNF("sched_setaffinity failed to CPU %d, trying next CPU", i);
cpu_start++;
goto try
;
}
#elif defined(__FreeBSD__) || defined(__DragonFly__)
if (pthread_setaffinity_np(pthread_self(), sizeof(c), &c))
PFATAL("pthread_setaffinity failed");
#elif defined(__NetBSD__)
if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c))
PFATAL("pthread_setaffinity failed");
if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c))
PFATAL("pthread_setaffinity failed");
cpuset_destroy(c);
cpuset_destroy(c);
#else
// this will need something for other platforms
// this will need something for other platforms
#endif
}
@ -1940,17 +1958,17 @@ void check_binary(u8* fname) {
}
if ((qemu_mode || unicorn_mode) &&
if ((qemu_mode) &&
memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
SAYF("\n" cLRD "[-] " cRST
"This program appears to be instrumented with afl-gcc, but is being "
"run in\n"
" QEMU or Unicorn mode (-Q or -U). This is probably not what you "
" QEMU mode (-Q). This is probably not what you "
"want -\n"
" this setup will be slow and offer no practical benefits.\n");
FATAL("Instrumentation found in -Q or -U mode");
FATAL("Instrumentation found in -Q mode");
}

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -4,7 +4,7 @@
Originally written by Michal Zalewski
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -6,7 +6,7 @@
Forkserver design by Jann Horn <jannhorn@googlemail.com>
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -6,7 +6,7 @@
Forkserver design by Jann Horn <jannhorn@googlemail.com>
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

View File

@ -6,7 +6,7 @@
Forkserver design by Jann Horn <jannhorn@googlemail.com>
Now maintained by by Marc Heuse <mh@mh-sec.de>,
Now maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
Andrea Fioraldi <andreafioraldi@gmail.com>

25
test/test-unsigaction.c Normal file
View File

@ -0,0 +1,25 @@
#include <signal.h> /* sigemptyset(), sigaction(), kill(), SIGUSR1 */
#include <stdlib.h> /* exit() */
#include <unistd.h> /* getpid() */
#include <errno.h> /* errno */
#include <stdio.h> /* fprintf() */
static void mysig_handler(int sig)
{
exit(2);
}
int main()
{
/* setup sig handler */
struct sigaction sa;
sa.sa_handler = mysig_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
if (sigaction(SIGCHLD, &sa, NULL)) {
fprintf(stderr, "could not set signal handler %d, aborted\n", errno);
exit(1);
}
kill(getpid(), SIGCHLD);
return 0;
}

View File

@ -150,13 +150,13 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && {
}
echo 000000000000000000000000 > in/in2
mkdir -p in2
../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1
../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null
CNT=`ls in2/ | wc -l`
case "$CNT" in
1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;;
*) $ECHO "$RED[!] afl-cmin did not correctly minimize testcase numbers"
CODE=1
;;
*1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;;
*) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases"
CODE=1
;;
esac
../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1
SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'`
@ -176,14 +176,16 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && {
$ECHO "$YELLOW[-] not an intel platform, cannot test afl-gcc"
}
$ECHO "$BLUE[*] Testing: llvm_mode"
$ECHO "$BLUE[*] Testing: llvm_mode, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
# on FreeBSD need to set AFL_CC
if which clang >/dev/null; then
export AFL_CC=`which clang`
else
export AFL_CC=`$LLVM_CONFIG --bindir`/clang
fi
test `uname -s` = 'FreeBSD' && {
if which clang >/dev/null; then
export AFL_CC=`which clang`
else
export AFL_CC=`$LLVM_CONFIG --bindir`/clang
fi
}
../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1
AFL_HARDEN=1 ../afl-clang-fast -o test-compcov.harden test-compcov.c > /dev/null 2>&1
test -e test-instr.plain && {
@ -251,6 +253,26 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
$ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode"
CODE=1
}
test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" || {
echo 000000000000000000000000 > in/in2
mkdir -p in2
../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null
CNT=`ls in2/ | wc -l`
case "$CNT" in
*1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;;
*) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases"
CODE=1
;;
esac
../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1
SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'`
test "$SIZE" = 1 && $ECHO "$GREEN[+] afl-tmin correctly minimized the testcase"
test "$SIZE" = 1 || {
$ECHO "$RED[!] afl-tmin did incorrectly minimize the testcase to $SIZE"
CODE=1
}
rm -rf in2
}
rm -rf in out errors
}
rm -f test-instr.plain
@ -334,7 +356,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
$ECHO "$GREEN[+] gcc_plugin run reported $TUPLES instrumented locations which is fine"
} || {
$ECHO "$RED[!] gcc_plugin instrumentation produces a weird number of instrumented locations: $TUPLES"
$ECHO "$YELLOW[-] the gcc_plugin instrumentation issue is not flagged as an error because travis builds would all fail otherwise :-("
$ECHO "$YELLOW[-] this is a known issue in gcc, not afl++. It is not flagged as an error because travis builds would all fail otherwise :-("
#CODE=1
}
}
@ -457,6 +479,15 @@ test -e ../libdislocator.so && {
}
rm -f test-compcov
test -e ../libradamsa.so && {
# on FreeBSD need to set AFL_CC
test `uname -s` = 'FreeBSD' && {
if which clang >/dev/null; then
export AFL_CC=`which clang`
else
export AFL_CC=`$LLVM_CONFIG --bindir`/clang
fi
}
test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1
test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1
test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1
@ -560,8 +591,64 @@ test -e ../afl-qemu-trace && {
CODE=1
exit 1
}
$ECHO "$YELLOW[-] we need a test case for qemu_mode unsigaction library"
rm -rf in out errors
test -e ../qemu_mode/unsigaction/unsigaction32.so && {
${AFL_CC} -o test-unsigaction32 -m32 test-unsigaction.c >> errors 2>&1 && {
./test-unsigaction32
RETVAL_NORMAL32=$?
LD_PRELOAD=../qemu_mode/unsigaction/unsigaction32.so ./test-unsigaction32
RETVAL_LIBUNSIGACTION32=$?
test $RETVAL_NORMAL32 = "2" -a $RETVAL_LIBUNSIGACTION32 = "0" && {
$ECHO "$GREEN[+] qemu_mode unsigaction library (32 bit) ignores signals"
} || {
test $RETVAL_NORMAL32 != "2" && {
$ECHO "$RED[!] cannot trigger signal in test program (32 bit)"
}
test $RETVAL_LIBUNSIGACTION32 != "0" && {
$ECHO "$RED[!] signal in test program (32 bit) is not ignored with unsigaction"
}
CODE=1
}
} || {
echo CUT------------------------------------------------------------------CUT
cat errors
echo CUT------------------------------------------------------------------CUT
$ECHO "$RED[!] cannot compile test program (32 bit) for unsigaction library"
CODE=1
}
} || {
$ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (32 bit) because it is not present"
INCOMPLETE=1
}
test -e ../qemu_mode/unsigaction/unsigaction64.so && {
${AFL_CC} -o test-unsigaction64 -m64 test-unsigaction.c >> errors 2>&1 && {
./test-unsigaction64
RETVAL_NORMAL64=$?
LD_PRELOAD=../qemu_mode/unsigaction/unsigaction64.so ./test-unsigaction64
RETVAL_LIBUNSIGACTION64=$?
test $RETVAL_NORMAL64 = "2" -a $RETVAL_LIBUNSIGACTION64 = "0" && {
$ECHO "$GREEN[+] qemu_mode unsigaction library (64 bit) ignores signals"
} || {
test $RETVAL_NORMAL64 != "2" && {
$ECHO "$RED[!] cannot trigger signal in test program (64 bit)"
}
test $RETVAL_LIBUNSIGACTION64 != "0" && {
$ECHO "$RED[!] signal in test program (64 bit) is not ignored with unsigaction"
}
CODE=1
}
} || {
echo CUT------------------------------------------------------------------CUT
cat errors
echo CUT------------------------------------------------------------------CUT
$ECHO "$RED[!] cannot compile test program (64 bit) for unsigaction library"
CODE=1
}
} || {
$ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (64 bit) because it is not present"
INCOMPLETE=1
}
rm -rf errors test-unsigaction32 test-unsigaction64
}
} || {
$ECHO "$RED[!] gcc compilation of test targets failed - what is going on??"

Binary file not shown.

View File

@ -28,35 +28,56 @@
#include <unicorn/unicorn.h>
// Path to the file containing the binary to emulate
#define BINARY_FILE ("simple_target_x86_64")
#define BINARY_FILE ("persistent_target_x86_64")
// Memory map for the code to be tested
// Arbitrary address where code to test will be loaded
#define BASE_ADDRESS (0x100000)
#define CODE_ADDRESS (0x101119)
#define END_ADDRESS (0x1011d7)
static const int64_t BASE_ADDRESS = 0x100000;
static const int64_t CODE_ADDRESS = 0x101139;
static const int64_t END_ADDRESS = 0x10120d;
// Address of the stack (Some random address again)
#define STACK_ADDRESS (((int64_t) 0x01) << 58)
static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58);
// Size of the stack (arbitrarily chosen, just make it big enough)
#define STACK_SIZE (0x10000)
static const int64_t STACK_SIZE = 0x10000;
// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) )
#define INPUT_LOCATION (0x10000)
static const int64_t INPUT_LOCATION = 0x10000;
// Inside the location, we have an ofset in our special case
#define INPUT_OFFSET (0x16)
static const int64_t INPUT_OFFSET = 0x16;
// Maximum allowable size of mutated data from AFL
#define INPUT_SIZE_MAX (0x10000)
static const int64_t INPUT_SIZE_MAX = 0x10000;
// Alignment for unicorn mappings (seems to be needed)
#define ALIGNMENT ((uint64_t) 0x1000)
static const int64_t ALIGNMENT = 0x1000;
// In our special case, we emulate main(), so argc is needed.
static const uint64_t EMULATED_ARGC = 2;
// The return from our fake strlen
static size_t current_input_len = 0;
static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size);
}
static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data)
{
static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size);
}
/*
The sample uses strlen, since we don't have a loader or libc, we'll fake it.
We know the strlen will return the lenght of argv[1] that we just planted.
It will be a lot faster than an actual strlen for this specific purpose.
*/
static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
//Hook
//116b: e8 c0 fe ff ff call 1030 <strlen@plt>
// We place the return at RAX
//printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len);
uc_reg_write(uc, UC_X86_REG_RAX, &current_input_len);
// We skip the actual call by updating RIP
uint64_t next_addr = address + size;
uc_reg_write(uc, UC_X86_REG_RIP, &next_addr);
}
/* Unicorn page needs to be 0x1000 aligned, apparently */
static uint64_t pad(uint64_t size) {
if (size % ALIGNMENT == 0) return size;
@ -99,11 +120,25 @@ static bool place_input_callback(
void *data
){
// printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS);
if (input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) {
// Test input too long, ignore this testcase
if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) {
// Test input too short or too long, ignore this testcase
return false;
}
// For persistent mode, we have to set up stack and memory each time.
uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back
// Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly)
uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv
uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2
// We need a valid c string, make sure it never goes out of bounds.
input[input_len-1] = '\0';
// Write the testcase to unicorn.
uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len);
// store input_len for the faux strlen hook
current_input_len = input_len;
return true;
}
@ -187,11 +222,6 @@ int main(int argc, char **argv, char **envp) {
uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above
// Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly)
uint64_t input_location = INPUT_LOCATION;
uc_reg_write(uc, UC_X86_REG_RSI, &input_location); // argv
uint64_t emulated_argc = 2;
uc_reg_write(uc, UC_X86_REG_RDI, &emulated_argc); // argc == 2
// If we want tracing output, set the callbacks here
if (tracing) {
@ -200,6 +230,11 @@ int main(int argc, char **argv, char **envp) {
uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1);
}
// Add our strlen hook (for this specific testcase only)
int strlen_hook_pos = BASE_ADDRESS + 0x116b;
uc_hook strlen_hook;
uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos);
printf("Starting to fuzz :)\n");
fflush(stdout);
@ -211,9 +246,9 @@ int main(int argc, char **argv, char **envp) {
&end_address, // Where to exit (this is an array)
1, // Count of end addresses
NULL, // Optional calback to run after each exec
false,
1, // For persistent mode: How many rounds to run
NULL
false, // true, if the optional callback should be run also for non-crashes
100, // For persistent mode: How many rounds to run
NULL // additional data pointer
);
switch(afl_ret) {
case UC_AFL_RET_ERROR:

View File

@ -0,0 +1,39 @@
/*
* Sample target file to test afl-unicorn fuzzing capabilities.
* This is a very trivial example that will crash pretty easily
* in several different exciting ways.
*
* Input is assumed to come from a buffer located at DATA_ADDRESS
* (0x00300000), so make sure that your Unicorn emulation of this
* puts user data there.
*
* Written by Nathan Voss <njvoss99@gmail.com>
* Adapted by Lukas Seidel <seidel.1@campus.tu-berlin.de>
*/
#include <stdint.h>
#include <string.h>
int main(int argc, char** argv) {
if (argc < 2) return -1;
char *data_buf = argv[1];
uint64_t data_len = strlen(data_buf);
if (data_len < 20) return -2;
for (; data_len --> 0 ;) {
if (data_len >= 18) continue;
if (data_len > 2 && data_len < 18) {
((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1];
} else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) {
// Cause a crash if data[10] is not zero, but [9] and [11] are zero
unsigned char invalid_read = *(unsigned char *) 0x00000000;
}
}
if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) {
// Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
unsigned char invalid_read = *(unsigned char *) 0x00000000;
}
return 0;
}

Binary file not shown.

View File

@ -0,0 +1,3 @@
harness
harness-debug
out

View File

@ -0,0 +1,24 @@
# C Sample
This shows a simple persistent harness for unicornafl in C
In contrast to the normal c harness, this harness manually resets the unicorn state on each new input.
Thanks to this, we can rerun the testcase in unicorn multiple times, without the need to fork again.
## Compiling sample.c
The target can be built using the `make` command.
Just make sure you have built unicorn support first:
```bash
cd /path/to/afl/unicorn_mode
./build_unicorn_support.sh
```
## Compiling persistent_target.c
You don't need to compile persistent_target.c since a X86_64 binary version is
pre-built and shipped in this sample folder. This file documents how the binary
was built in case you want to rebuild it or recompile it for any reason.
The pre-built binary (persistent_target_x86_64.bin) was built using -g -O0 in gcc.
We then load the binary we execute the main function directly.

View File

@ -0,0 +1,42 @@
# UnicornAFL Usage
# Original Unicorn Example Makefile by Nguyen Anh Quynh <aquynh@gmail.com>, 2015
# Adapted for AFL++ by domenukk <domenukk@gmail.com>, 2020
UNAME_S := $(shell uname -s)
LIBDIR = ../../unicornafl
BIN_EXT =
AR_EXT = a
# Verbose output?
V ?= 0
CFLAGS += -Wall -Werror -I../../unicornafl/include
LDFLAGS += -L$(LIBDIR) -lpthread -lm
ifeq ($(UNAME_S), Linux)
LDFLAGS += -lrt
endif
ifneq ($(CROSS),)
CC = $(CROSS)gcc
endif
.PHONY: all clean
all: harness
clean:
rm -rf *.o harness harness-debug
harness.o: harness.c ../../unicornafl/include/unicorn/*.h
${CC} ${CFLAGS} -O3 -c $<
harness-debug.o: harness.c ../../unicornafl/include/unicorn/*.h
${CC} ${CFLAGS} -g -c $< -o $@
harness: harness.o
${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
debug: harness-debug.o
${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug

View File

@ -0,0 +1,269 @@
/*
Persistent test harness for AFL++'s unicornafl c mode.
This loads the persistent_target.bin binary (precompiled as X86_64 code) into
Unicorn's memory map for emulation, places the specified input into
the argv buffer (handed in as first parameter), and executes 'main()'.
Any crashes during emulation will automatically be handled by the afl-fuzz() function.
Run under AFL as follows:
$ cd <afl_path>/unicorn_mode/samples/persistent/
$ make
$ ../../../afl-fuzz -m none -i sample_inputs -o out -- ./harness @@
(Re)run a simgle input with block tracing using:
$ ./harness -t [inputfile]
*/
// This is not your everyday Unicorn.
#define UNICORN_AFL
#include <string.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdbool.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unicorn/unicorn.h>
// Path to the file containing the binary to emulate
#define BINARY_FILE ("persistent_target_x86_64")
// Memory map for the code to be tested
// Arbitrary address where code to test will be loaded
static const int64_t BASE_ADDRESS = 0x100000;
static const int64_t CODE_ADDRESS = 0x101139;
static const int64_t END_ADDRESS = 0x10120d;
// Address of the stack (Some random address again)
static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58);
// Size of the stack (arbitrarily chosen, just make it big enough)
static const int64_t STACK_SIZE = 0x10000;
// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) )
static const int64_t INPUT_LOCATION = 0x10000;
// Inside the location, we have an ofset in our special case
static const int64_t INPUT_OFFSET = 0x16;
// Maximum allowable size of mutated data from AFL
static const int64_t INPUT_SIZE_MAX = 0x10000;
// Alignment for unicorn mappings (seems to be needed)
static const int64_t ALIGNMENT = 0x1000;
// In our special case, we emulate main(), so argc is needed.
static const uint64_t EMULATED_ARGC = 2;
// The return from our fake strlen
static size_t current_input_len = 0;
static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size);
}
static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size);
}
/*
The sample uses strlen, since we don't have a loader or libc, we'll fake it.
We know the strlen will return the lenght of argv[1] that we just planted.
It will be a lot faster than an actual strlen for this specific purpose.
*/
static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
//Hook
//116b: e8 c0 fe ff ff call 1030 <strlen@plt>
// We place the return at RAX
uc_reg_write(uc, UC_X86_REG_RAX, &current_input_len);
// We skip the actual call by updating RIP
//printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len);
uint64_t next_addr = address + size;
uc_reg_write(uc, UC_X86_REG_RIP, &next_addr);
}
/* Unicorn page needs to be 0x1000 aligned, apparently */
static uint64_t pad(uint64_t size) {
if (size % ALIGNMENT == 0) return size;
return ((size / ALIGNMENT) + 1) * ALIGNMENT;
}
/* returns the filesize in bytes, -1 or error. */
static off_t afl_mmap_file(char *filename, char **buf_ptr) {
off_t ret = -1;
int fd = open(filename, O_RDONLY);
struct stat st = {0};
if (fstat(fd, &st)) goto exit;
off_t in_len = st.st_size;
if (in_len == -1) {
/* This can only ever happen on 32 bit if the file is exactly 4gb. */
fprintf(stderr, "Filesize of %s too large", filename);
goto exit;
}
*buf_ptr = mmap(0, in_len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (*buf_ptr != MAP_FAILED) ret = in_len;
exit:
close(fd);
return ret;
}
/* Place the input at the right spot inside unicorn */
static bool place_input_callback(
uc_engine *uc,
char *input,
size_t input_len,
uint32_t persistent_round,
void *data
){
// printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS);
if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) {
// Test input too short or too long, ignore this testcase
return false;
}
// For persistent mode, we have to set up stack and memory each time.
uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back
// Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly)
uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv
uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2
// We need a valid c string, make sure it never goes out of bounds.
input[input_len-1] = '\0';
// Write the testcase to unicorn.
uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len);
// store input_len for the faux strlen hook
current_input_len = input_len;
return true;
}
static void mem_map_checked(uc_engine *uc, uint64_t addr, size_t size, uint32_t mode) {
size = pad(size);
//printf("SIZE %lx, align: %lx\n", size, ALIGNMENT);
uc_err err = uc_mem_map(uc, addr, size, mode);
if (err != UC_ERR_OK) {
printf("Error mapping %ld bytes at 0x%lx: %s (mode: %d)\n", size, addr, uc_strerror(err), mode);
exit(1);
}
}
int main(int argc, char **argv, char **envp) {
if (argc == 1) {
printf("Test harness for simple_target.bin. Usage: harness [-t] <inputfile>\n");
exit(1);
}
bool tracing = false;
char *filename = argv[1];
if (argc > 2 && !strcmp(argv[1], "-t")) {
tracing = true;
filename = argv[2];
}
uc_engine *uc;
uc_err err;
uc_hook hooks[2];
char *file_contents;
// Initialize emulator in X86_64 mode
err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc);
if (err) {
printf("Failed on uc_open() with error returned: %u (%s)\n",
err, uc_strerror(err));
return -1;
}
printf("Loading data input from %s\n", BINARY_FILE);
off_t len = afl_mmap_file(BINARY_FILE, &file_contents);
if (len < 0) {
perror("Could not read binary to emulate");
return -2;
}
if (len == 0) {
fprintf(stderr, "File at '%s' is empty\n", BINARY_FILE);
return -3;
}
// Map memory.
mem_map_checked(uc, BASE_ADDRESS, len, UC_PROT_ALL);
printf("Len: %lx", len);
fflush(stdout);
// write machine code to be emulated to memory
if (uc_mem_write(uc, BASE_ADDRESS, file_contents, len) != UC_ERR_OK) {
printf("Error writing to CODE");
}
// Release copied contents
munmap(file_contents, len);
// Set the program counter to the start of the code
uint64_t start_address = CODE_ADDRESS; // address of entry point of main()
uint64_t end_address = END_ADDRESS; // Address of last instruction in main()
uc_reg_write(uc, UC_X86_REG_RIP, &start_address); // address of entry point of main()
// Setup the Stack
mem_map_checked(uc, STACK_ADDRESS - STACK_SIZE, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE);
uint64_t stack_val = STACK_ADDRESS;
printf("%ld", stack_val);
uc_reg_write(uc, UC_X86_REG_RSP, &stack_val);
// reserve some space for our input data
mem_map_checked(uc, INPUT_LOCATION, INPUT_SIZE_MAX, UC_PROT_READ);
// build a "dummy" argv with lenth 2 at 0x10000:
// 0x10000 argv[0] NULL
// 0x10008 argv[1] (char *)0x10016 --. points to the next offset.
// 0x10016 argv[1][0], ... <-^ contains the acutal input data. (INPUT_LOCATION + INPUT_OFFSET)
uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above
// If we want tracing output, set the callbacks here
if (tracing) {
// tracing all basic blocks with customized callback
uc_hook_add(uc, &hooks[0], UC_HOOK_BLOCK, hook_block, NULL, 1, 0);
uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1);
}
// Add our strlen hook (for this specific testcase only)
int strlen_hook_pos = BASE_ADDRESS + 0x116b;
uc_hook strlen_hook;
uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos);
printf("Starting to fuzz :)\n");
fflush(stdout);
// let's gooo
uc_afl_ret afl_ret = uc_afl_fuzz(
uc, // The unicorn instance we prepared
filename, // Filename of the input to process. In AFL this is usually the '@@' placeholder, outside it's any input file.
place_input_callback, // Callback that places the input (automatically loaded from the file at filename) in the unicorninstance
&end_address, // Where to exit (this is an array)
1, // Count of end addresses
NULL, // Optional calback to run after each exec
false, // true, if the optional callback should be run also for non-crashes
1000, // For persistent mode: How many rounds to run
NULL // additional data pointer
);
switch(afl_ret) {
case UC_AFL_RET_ERROR:
printf("Error starting to fuzz");
return -3;
break;
case UC_AFL_RET_NO_AFL:
printf("No AFL attached - We are done with a single run.");
break;
default:
break;
}
return 0;
}

View File

@ -0,0 +1,39 @@
/*
* Sample target file to test afl-unicorn fuzzing capabilities.
* This is a very trivial example that will crash pretty easily
* in several different exciting ways.
*
* Input is assumed to come from a buffer located at DATA_ADDRESS
* (0x00300000), so make sure that your Unicorn emulation of this
* puts user data there.
*
* Written by Nathan Voss <njvoss99@gmail.com>
* Adapted by Lukas Seidel <seidel.1@campus.tu-berlin.de>
*/
#include <stdint.h>
#include <string.h>
int main(int argc, char** argv) {
if (argc < 2) return -1;
char *data_buf = argv[1];
uint64_t data_len = strlen(data_buf);
if (data_len < 20) return -2;
for (; data_len --> 0 ;) {
if (data_len >= 18) continue;
if (data_len > 2 && data_len < 18) {
((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1];
} else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) {
// Cause a crash if data[10] is not zero, but [9] and [11] are zero
unsigned char invalid_read = *(unsigned char *) 0x00000000;
}
}
if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) {
// Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
unsigned char invalid_read = *(unsigned char *) 0x00000000;
}
return 0;
}

View File

@ -0,0 +1,18 @@
#!/bin/sh
[ -z "${UNAME}" ] && UNAME=$(uname)
DIR=`dirname $0`
if [ "$UNAME" = Darwin ]; then
export DYLD_LIBRARY_PATH=../../unicorn
else
export LD_LIBRARY_PATH=../../unicorn
fi
if [ ! test -e $DIR/harness]; then
echo "[!] harness not found in $DIR"
exit 1
fi

View File

@ -0,0 +1 @@
abcd

View File

@ -0,0 +1 @@


View File

@ -0,0 +1 @@


View File

@ -0,0 +1 @@


View File

@ -19,12 +19,11 @@ int main(int argc, char** argv) {
char *data_buf = argv[1];
if len(data_buf < 20) {
if (data_buf[20] != 0) {
// Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04'
unsigned char invalid_read = *(unsigned char *) 0x00000000;
printf("Not crashing");
} else if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) {
// Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
unsigned char invalid_read = *(unsigned char *) 0x00000000;
printf("Also not crashing with databuf[0] == %c", data_buf[0])
} else if (data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00) {
// Cause a crash if data[10] is not zero, but [9] and [11] are zero
unsigned char invalid_read = *(unsigned char *) 0x00000000;

Binary file not shown.