LTO llvm11 (#302)

* new LTO mode for llvm 11

* remove unneeded afl-ld and env vars
This commit is contained in:
van Hauser 2020-04-09 10:36:28 +02:00 committed by Dominik Maier
parent 0c2e998f69
commit c1395bb543
6 changed files with 60 additions and 1004 deletions

View File

@ -45,7 +45,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
easier: DEFAULT, CFG (INSTRIM), LTO, CTX, NGRAM-x (x=2-16)
- made USE_TRACE_PC compile obsolete
- LTO collision free instrumented added in llvm_mode with afl-clang-lto -
note that this mode is amazing, but quite some targets won't compile
this mode is amazing but requires you to build llvm 11 yourself
- Added llvm_mode NGRAM prev_loc coverage by Adrean Herrera
(https://github.com/adrianherrera/afl-ngram-pass/), activate by setting
AFL_LLVM_INSTRUMENT=NGRAM-<value> or AFL_LLVM_NGRAM_SIZE=<value>

View File

@ -58,19 +58,20 @@ endif
ifeq "$(LLVM_MAJOR)" "9"
$(info [+] llvm_mode detected llvm 9, enabling neverZero implementation)
$(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation)
LLVM_LTO = 1
endif
ifeq "$(LLVM_NEW_API)" "1"
$(info [+] llvm_mode detected llvm 10+, enabling neverZero implementation and c++14)
$(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation)
LLVM_STDCXX = c++14
endif
ifeq "$(LLVM_MAJOR)" "11"
$(info [+] llvm_mode detected llvm 11, enabling afl-clang-lto LTO implementation)
LLVM_LTO = 1
endif
ifeq "$(LLVM_LTO)" "0"
$(info [+] llvm_mode detected llvm < 9, afl-clang-lto LTO will not be build.)
$(info [+] llvm_mode detected llvm < 11, afl-clang-lto LTO will not be build.)
endif
ifeq "$(LLVM_APPLE)" "1"
@ -125,14 +126,18 @@ else
endif
endif
ifneq "$(AFL_CLANG_FLTO)" ""
ifeq "$(AFL_REAL_LD)" ""
AFL_REAL_LD = $(shell readlink /bin/ld 2>/dev/null)
ifeq "$(AFL_REAL_LD)" ""
AFL_REAL_LD = $(shell readlink /usr/bin/ld 2>/dev/null)
ifeq "$(LLVM_LTO)" "1"
ifneq "$(AFL_CLANG_FLTO)" ""
ifeq "$(AFL_REAL_LD)" ""
ifneq "$(shell readlink $(LLVM_BINDIR)/ld.lld 2>&1)" ""
AFL_REAL_LD = $(LLVM_BINDIR)/ld.lld
else
$(warn ld.lld not found, can not enable LTO mode)
LLVM_LTO = 0
endif
endif
endif
endif
endif
AFL_CLANG_FUSELD=
ifneq "$(AFL_CLANG_FLTO)" ""
@ -257,10 +262,11 @@ ifeq "$(LLVM_LTO)" "1"
$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
ln -sf afl-ld ../ld
@rm -f .test-instr
@-export AFL_QUIET=1 AFL_PATH=.. PATH="..:$(PATH)" ; ../afl-clang-lto -Wl,--afl -o .test-instr ../test-instr.c && echo "[+] afl-clang-lto and afl-ld seem to work fine :)" || echo "[!] WARNING: clang seems to have a hardcoded "'/bin/ld'" - check README.lto"
@-export AFL_QUIET=1 AFL_PATH=.. PATH="..:$(PATH)" ; ../afl-clang-lto -o .test-instr ../test-instr.c && echo "[+] afl-clang-lto seems to work fine :)" || echo "[!] WARNING: clang seems to have a hardcoded "'/bin/ld'" - check README.lto"
@rm -f .test-instr
endif
endif
../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps
-$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
@ -275,9 +281,9 @@ ifeq "$(LLVM_LTO)" "1"
$(CXX) $(CLANG_CFL) -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)
endif
../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc MarkNodes.cc
../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc
ifeq "$(LLVM_LTO)" "1"
$(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
$(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)
endif
# laf
@ -323,7 +329,7 @@ all_done: test_build
install: all
install -d -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH)
if [ -f ../afl-clang-fast -a -f ../libLLVMInsTrim.so -a -f ../afl-llvm-rt.o ]; then set -e; install -m 755 ../afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 ../libLLVMInsTrim.so ../afl-llvm-pass.so ../afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
if [ -f ../afl-clang-lto -a -f ../afl-ld ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-ld $${DESTDIR}$(HELPER_PATH); ln -sf afl-ld $${DESTDIR}$(HELPER_PATH)/ld; install -m 755 ../afl-llvm-lto-instrumentation.so $${DESTDIR}$(HELPER_PATH); install -m 755 ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi
if [ -f ../afl-clang-lto ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-llvm-lto-instrumentation.so $${DESTDIR}$(HELPER_PATH); install -m 755 ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi
if [ -f ../afl-llvm-rt-32.o ]; then set -e; install -m 755 ../afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi
if [ -f ../afl-llvm-rt-64.o ]; then set -e; install -m 755 ../afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi
if [ -f ../compare-transform-pass.so ]; then set -e; install -m 755 ../compare-transform-pass.so $${DESTDIR}$(HELPER_PATH); fi

View File

@ -2,16 +2,15 @@
## TLDR;
1. This compile mode is very frickle if it works it is amazing, if it fails
- well use afl-clang-fast
This version requires a current llvm 11 compiled from the github master.
2. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
coverage than anything else that is out there in the AFL world
3. You can use it together with llvm_mode: laf-intel and whitelisting
2. You can use it together with llvm_mode: laf-intel and whitelisting
features and can be combined with cmplog/Redqueen
4. It only works with llvm 9 (and likely 10+ but is not tested there yet)
3. It only works with llvm 11 (current github master state)
## Introduction and problem description
@ -63,6 +62,26 @@ afl-llvm-lto++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de>
[+] Linker was successful
```
## Building llvm 11
```
$ sudo apt install binutils-dev
$ git clone https://github.com/llvm/llvm-project
$ cd llvm-project
$ mkdir build
$ cd build
$ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
$ make
$ export PATH=`pwd`/bin:$PATH
$ export LLVM_CONFIG=`pwd`/bin/llcm-config
$ cd /path/to/AFLplusplus/
$ make
$ cd llvm_mode
$ make
$ cd ..
$ make install
```
## How to use afl-clang-lto
Just use afl-clang-lto like you did afl-clang-fast or afl-gcc.
@ -94,129 +113,12 @@ AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure -
```
and on some target you have to to AR=/RANLIB= even for make as the configure script does not save it ...
### "linking globals named '...': symbol multiply defined" error
The target program is using multiple global variables or functions with the
same name. This is a common error when compiling a project with LTO, and
the fix is `-Wl,--allow-multiple-definition` - however llvm-link which we
need to link all llvm IR LTO files does not support this - yet (hopefully).
Hence if you see this error either you have to remove the duplicate global
variable (think `#ifdef` ...) or you are out of luck. :-(
### "expected top-level entity" + binary ouput error
This happens if multiple .a archives are to be linked and they contain the
same object filenames, the first in LTO form, the other in ELF form.
This can not be fixed programmatically, but can be fixed by hand.
You can try to delete the file from either archive
(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing
and instrumentation by hand (see below).
### "undefined reference to ..."
This *can* be the opposite situation of the "expected top-level entity" error -
the library with the ELF file is before the LTO library.
However it can also be a bug in the program - try to compile it normally. If
fails then it is a bug in the program.
Solutions: You can try to delete the file from either archive, e.g.
(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing
and instrumentation by hand (see below).
### "File format not recognized"
This happens if the build system has fixed LDFLAGS, CPPFLAGS, CXXFLAGS and/or
CFLAGS. Ensure that they all contain the `-flto` flag that afl-clang-lto was
compiled with (you can see that by typing `afl-clang-lto -h` and inspecting
the last line of the help output) and add them otherwise
### clang is hardcoded to /bin/ld
Some clang packages have 'ld' hardcoded to /bin/ld. This is an issue as this
prevents "our" afl-ld being called.
-fuse-ld=/path/to/afl-ld should be set through makefile magic in llvm_mode -
if it is supported - however if this fails you can try:
```
LDFLAGS=-fuse-ld=</path/to/afl-ld
```
As workaround attempt #2 you will have to switch /bin/ld:
```
mv /bin/ld /bin/ld.orig
cp afl-ld /bin/ld
```
This can result in two problems though:
!1!
When compiling afl-ld, the build process looks at where the /bin/ld link
is going to. So when the workaround was applied and a recompiling afl-ld
is performed then the link is gone and the new afl-ld clueless where
the real ld is.
In this case set AFL_REAL_LD=/bin/ld.orig
!2!
When you install an updated gcc/clang/... package, your OS might restore
the ld link.
### Performing the steps by hand
It is possible to perform all the steps afl-ld by hand to workaround issues
in the target.
1. Recompile with AFL_DEBUG=1 and collect the afl-clang-lto command that fails
e.g.: `AFL_DEBUG=1 make 2>&1 | grep afl-clang-lto | tail -n 1`
2. run this command prepended with AFL_DEBUG=1 and collect the afl-ld command
parameters, e.g. `AFL_DEBUG=1 afl-clang-lto[++] .... | grep /afl/ld`
3. for every .a archive you want to instrument unpack it into a seperate
directory, e.g.
`mkdir archive1.dir ; cd archive1.dir ; llvm-link x ../<archive>.a`
4. run `file archive*.dir/*.o` and make two lists, one containing all ELF files
and one containing all LLVM IR bitcode files.
You do the same for all .o files of the ../afl/ld command options
5. Create a single bitcode file by using llvm-link, e.g.
`llvm-link -o all-bitcode.bc <list of all LLVM IR .o files>`
If this fails it is game over - or you modify the source code
6. Run the optimizer on the new bitcode file:
`opt -O3 --polly -o all-optimized.bc all-bitcode.bc`
7. Instrument the optimized bitcode file:
`opt --load=$AFL_PATH/afl-llvm-lto-instrumentation.so --disable-opt --afl-lto all-optimized.bc -o all-instrumented.bc
8. If the parameter `--allow-multiple-definition` is not in the list, add it
as first command line option.
9. Link everything together.
a) You use the afl-ld command and instead of e.g. `/usr/local/lib/afl/ld`
you replace that with `ld`, the real linker.
b) Every .a archive you instrumented files from you remove the <archive>.a
or -l<archive> from the command
c) If you have entries in your ELF files list (see step 4), you put them to
the command line - but them in the same order!
d) put the all-instrumented.bc before the first library or .o file
e) run the command and hope it compiles, if it doesn't you have to analyze
what the issue is and fix that in the approriate step above.
Yes this is long and complicated. That is why there is afl-ld doing this and
that why this can easily fail and not all different ways how it *can* fail can
be implemented ...
### compiling programs still fail
afl-clang-lto is still work in progress.
Complex targets are still likely not to compile and this needs to be fixed.
Please report issues at:
[https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226)
Known issues:
* ffmpeg
* bogofilter
* libjpeg-turbo-1.3.1
## Upcoming Work
1. Currently the LTO whitelist feature does not allow to not instrument main, start and init functions
@ -225,15 +127,6 @@ Known issues:
Result: faster fork in the target and faster map analysis in afl-fuzz
=> more speed :-)
## Tested and working targets
* libpng-1.2.53
* libxml2-2.9.2
* tiff-4.0.4
* unrar-nonfree-5.6.6
* exiv 0.27
* jpeg-6b
## History
This was originally envisioned by hexcoder- in Summer 2019, however we saw no
@ -252,11 +145,17 @@ very difficult with a program that has so many paths and therefore so many
dependencies. At lot of stratgies were implemented - and failed.
And then sat solvers were tried, but with over 10.000 variables that turned
out to be a dead-end too.
The final idea to solve this came from domenukk who proposed to insert a block
into an edge and then just use incremental counters ... and this worked!
After some trials and errors to implement this vanhauser-thc found out that
there is actually an llvm function for this: SplitEdge() :-)
Still more problems came up though as this only works without bugs from
llvm 9 onwards, and with high optimization the link optimization ruins
the instrumented control flow graph.
As long as there are no larger changes in llvm this all should work well now ...
This is all now fixed with llvm 11. The llvm's own linker is now able to
load passes and this bypasses all problems we had.
Happy end :)

View File

@ -269,12 +269,6 @@ static void edit_params(u32 argc, char **argv, char **envp) {
if (instrument_mode == INSTRUMENT_LTO) {
char *old_path = getenv("PATH");
char *new_path = alloc_printf("%s:%s", AFL_PATH, old_path);
setenv("PATH", new_path, 1);
setenv("AFL_LD", "1", 1);
if (getenv("AFL_LLVM_WHITELIST") != NULL) {
cc_params[cc_par_cnt++] = "-Xclang";
@ -285,13 +279,10 @@ static void edit_params(u32 argc, char **argv, char **envp) {
}
#ifdef AFL_CLANG_FUSELD
cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s/afl-ld", AFL_PATH);
#endif
cc_params[cc_par_cnt++] = "-B";
cc_params[cc_par_cnt++] = AFL_PATH;
cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s", AFL_REAL_LD);
cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition";
cc_params[cc_par_cnt++] = alloc_printf(
"-Wl,-mllvm=-load=%s/afl-llvm-lto-instrumentation.so", obj_path);
cc_params[cc_par_cnt++] = lto_flag;
} else {
@ -738,9 +729,7 @@ int main(int argc, char **argv, char **envp) {
"bb\n"
"AFL_LLVM_LTO_DONTWRITEID: don't write the highest ID used to a "
"global var\n"
"AFL_REAL_LD: use this linker instead of the compiled in path\n"
"AFL_LD_PASSTHROUGH: do not perform instrumentation (for configure "
"scripts)\n"
"AFL_REAL_LD: use this lld linker instead of the compiled in path\n"
"\nafl-clang-lto was built with linker target \"%s\" and LTO flags "
"\"%s\"\n"
"If anything fails - be sure to read README.lto.md!\n",

View File

@ -1,839 +0,0 @@
/*
american fuzzy lop++ - wrapper for GNU ld
-----------------------------------------
Written by Marc Heuse <mh@mh-sec.de> for afl++
Maintained by Marc Heuse <mh@mh-sec.de>,
Heiko Eißfeldt <heiko.eissfeldt@hexco.de>
Andrea Fioraldi <andreafioraldi@gmail.com>
Dominik Maier <domenukk@gmail.com>
Copyright 2019-2020 AFLplusplus Project. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
The sole purpose of this wrapper is to preprocess clang LTO files before
linking by ld and perform the instrumentation on the whole program.
*/
#define AFL_MAIN
#include "config.h"
#include "types.h"
#include "debug.h"
#include "alloc-inl.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <dirent.h>
#define MAX_PARAM_COUNT 4096
static u8 **ld_params, /* Parameters passed to the real 'ld' */
**link_params, /* Parameters passed to 'llvm-link' */
**opt_params, /* Parameters passed to 'opt' opt */
**inst_params; /* Parameters passed to 'opt' inst */
static u8 *input_file; /* Originally specified input file */
static u8 *final_file, /* Instrumented file for the real 'ld' */
*linked_file, /* file where we link all files */
*modified_file; /* file that was optimized before instr */
static u8 *afl_path = AFL_PATH;
static u8 *real_ld = AFL_REAL_LD;
static u8 cwd[4096];
static u8 *tmp_dir;
static u8 *ar_dir;
static u8 ar_dir_cnt;
static u8 *libdirs[254];
static u8 libdir_cnt;
static u8 be_quiet, /* Quiet mode (no stderr output) */
debug, /* AFL_DEBUG */
passthrough, /* AFL_LD_PASSTHROUGH - no link+optimize*/
we_link, /* we have bc/ll -> link + optimize */
just_version; /* Just show version? */
static u32 ld_param_cnt = 1, /* Number of params to 'ld' */
link_param_cnt = 1, /* Number of params to 'llvm-link' */
opt_param_cnt = 1, /* Number of params to 'opt' opt */
inst_param_cnt = 1; /* Number of params to 'opt' instr */
/* This function wipes a directory - our AR unpack directory in this case */
static u8 wipe_directory(u8 *path) {
DIR * d;
struct dirent *d_ent;
d = opendir(path);
if (!d) return 0;
while ((d_ent = readdir(d))) {
if (strcmp(d_ent->d_name, ".") != 0 && strcmp(d_ent->d_name, "..") != 0) {
u8 *fname = alloc_printf("%s/%s", path, d_ent->d_name);
if (unlink(fname)) PFATAL("Unable to delete '%s'", fname);
ck_free(fname);
}
}
closedir(d);
return !!rmdir(path);
}
/* remove temporary files on fatal errors */
static void at_exit_handler(void) {
if (!getenv("AFL_KEEP_ASSEMBLY")) {
if (linked_file) {
unlink(linked_file);
linked_file = NULL;
}
if (modified_file) {
unlink(modified_file);
modified_file = NULL;
}
if (final_file) {
unlink(final_file);
final_file = NULL;
}
if (ar_dir != NULL) {
wipe_directory(ar_dir);
ar_dir = NULL;
}
}
}
/* This function checks if the parameter is a) an existing file and b)
if it is a BC or LL file, if both are true it returns 1 and 0 otherwise */
int is_llvm_file(const char *file) {
int fd;
u8 buf[5];
if ((fd = open(file, O_RDONLY)) < 0) {
if (debug) SAYF(cMGN "[D] " cRST "File %s not found", file);
return 0;
}
if (read(fd, buf, 4) != 4) return 0;
buf[sizeof(buf) - 1] = 0;
close(fd);
if (strncmp(buf, "; Mo", 4) == 0) return 1;
if (buf[0] == 'B' && buf[1] == 'C' && buf[2] == 0xc0 && buf[3] == 0xde)
return 1;
return 0;
}
/* Return the current working directory, not thread safe ;-) */
u8 *getthecwd() {
static u8 fail[] = "";
if (getcwd(cwd, sizeof(cwd)) == NULL) return fail;
return cwd;
}
/* Check if an ar extracted file is already in the parameter list */
int is_duplicate(u8 **params, u32 ld_param_cnt, u8 *ar_file) {
for (uint32_t i = 0; i < ld_param_cnt; i++)
if (params[i] != NULL)
if (strcmp(params[i], ar_file) == 0) return 1;
return 0;
}
/* Examine and modify parameters to pass to 'ld', 'llvm-link' and 'llmv-ar'.
Note that the file name is always the last parameter passed by GCC,
so we exploit this property to keep the code "simple". */
static void edit_params(int argc, char **argv) {
u32 i, have_lto = 0, libdir_index;
u8 libdir_file[4096];
if (tmp_dir == NULL) {
tmp_dir = getenv("TMPDIR");
if (!tmp_dir) tmp_dir = getenv("TEMP");
if (!tmp_dir) tmp_dir = getenv("TMP");
if (!tmp_dir) tmp_dir = "/tmp";
}
linked_file =
alloc_printf("%s/.afl-%u-%u-1.ll", tmp_dir, getpid(), (u32)time(NULL));
modified_file =
alloc_printf("%s/.afl-%u-%u-2.bc", tmp_dir, getpid(), (u32)time(NULL));
final_file =
alloc_printf("%s/.afl-%u-%u-3.bc", tmp_dir, getpid(), (u32)time(NULL));
ld_params = ck_alloc(4096 * sizeof(u8 *));
link_params = ck_alloc(4096 * sizeof(u8 *));
inst_params = ck_alloc(12 * sizeof(u8 *));
opt_params = ck_alloc(12 * sizeof(u8 *));
ld_params[0] = (u8 *)real_ld;
ld_params[ld_param_cnt++] = "--allow-multiple-definition";
link_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-link");
link_params[link_param_cnt++] = "-S"; // we create the linked file as .ll
link_params[link_param_cnt++] = "-o";
link_params[link_param_cnt++] = linked_file;
opt_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt");
if (getenv("AFL_DONT_OPTIMIZE") == NULL)
opt_params[opt_param_cnt++] = "-O3";
else
opt_params[opt_param_cnt++] = "-O0";
// opt_params[opt_param_cnt++] = "-S"; // only when debugging
opt_params[opt_param_cnt++] = linked_file; // input: .ll file
opt_params[opt_param_cnt++] = "-o";
opt_params[opt_param_cnt++] = modified_file; // output: .bc file
inst_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt");
inst_params[inst_param_cnt++] =
alloc_printf("--load=%s/afl-llvm-lto-instrumentation.so", afl_path);
// inst_params[inst_param_cnt++] = "-S"; // only when debugging
inst_params[inst_param_cnt++] = "--disable-opt";
inst_params[inst_param_cnt++] = "--afl-lto";
inst_params[inst_param_cnt++] = modified_file; // input: .bc file
inst_params[inst_param_cnt++] = "-o";
inst_params[inst_param_cnt++] = final_file; // output: .bc file
// first we must collect all library search paths
for (i = 1; i < argc; i++)
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == 'L')
libdirs[libdir_cnt++] = argv[i] + 2;
// then we inspect all options to the target linker
for (i = 1; i < argc; i++) {
if (ld_param_cnt >= MAX_PARAM_COUNT || link_param_cnt >= MAX_PARAM_COUNT)
FATAL(
"Too many command line parameters because of unpacking .a archives, "
"this would need to be done by hand ... sorry! :-(");
if (strncmp(argv[i], "-flto", 5) == 0) have_lto = 1;
if (!strcmp(argv[i], "-version")) {
just_version = 1;
ld_params[1] = argv[i];
ld_params[2] = NULL;
final_file = input_file;
return;
}
if (strcmp(argv[i], "--afl") == 0) {
if (!be_quiet) OKF("afl++ test command line flag detected, exiting.");
exit(0);
}
// if a -l library is linked and no .so is found but an .a archive is there
// then the archive will be used. So we have to emulate this and check
// if an archive will be used and if yes we will instrument it too
libdir_file[0] = 0;
libdir_index = libdir_cnt;
if (strncmp(argv[i], "-l", 2) == 0 && libdir_cnt > 0 &&
strncmp(argv[i], "-lgcc", 5) != 0) {
u8 found = 0;
for (uint32_t j = 0; j < libdir_cnt && !found; j++) {
snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j],
argv[i] + 2, ".so");
if (access(libdir_file, R_OK) != 0) { // no .so found?
snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j],
argv[i] + 2, ".a");
if (access(libdir_file, R_OK) == 0) { // but .a found?
libdir_index = j;
found = 1;
if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file);
}
} else {
found = 1;
if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file);
}
}
}
// is the parameter an .a AR archive? If so, unpack and check its files
if (libdir_index < libdir_cnt ||
(argv[i][0] != '-' && strlen(argv[i]) > 2 &&
argv[i][strlen(argv[i]) - 1] == 'a' &&
argv[i][strlen(argv[i]) - 2] == '.')) {
// This gets a bit odd. I encountered several .a files being linked and
// where the same "foo.o" was in both .a archives. llvm-link does not
// like this so we have to work around that ...
u8 this_wd[4096], *this_ar;
u8 ar_params_cnt = 4;
u8 * ar_params[ar_params_cnt];
u8 * file = argv[i];
s32 pid, status;
DIR * arx;
struct dirent *dir_ent;
if (libdir_index < libdir_cnt) file = libdir_file;
if (ar_dir_cnt == 0) { // first archive, we setup up the basics
ar_dir = alloc_printf("%s/.afl-%u-%u.dir", tmp_dir, getpid(),
(u32)time(NULL));
if (mkdir(ar_dir, 0700) != 0)
FATAL("can not create temporary directory %s", ar_dir);
}
if (getcwd(this_wd, sizeof(this_wd)) == NULL)
FATAL("can not get the current working directory");
if (chdir(ar_dir) != 0)
FATAL("can not chdir to temporary directory %s", ar_dir);
if (file[0] == '/')
this_ar = file;
else
this_ar = alloc_printf("%s/%s", this_wd, file);
ar_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-ar");
ar_params[1] = "x";
ar_params[2] = this_ar;
ar_params[3] = NULL;
if (!be_quiet) OKF("Running ar unpacker on %s into %s", this_ar, ar_dir);
if (debug) {
SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
for (uint32_t j = 0; j < ar_params_cnt; j++)
SAYF(" \"%s\"", ar_params[j]);
SAYF("\n");
}
if (!(pid = fork())) {
execvp(ar_params[0], (char **)ar_params);
FATAL("Oops, failed to execute '%s'", ar_params[0]);
}
if (pid < 0) FATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) FATAL("waitpid() failed");
if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status));
if (chdir(this_wd) != 0)
FATAL("can not chdir back to our working directory %s", this_wd);
if (!(arx = opendir(ar_dir))) FATAL("can not open directory %s", ar_dir);
while ((dir_ent = readdir(arx)) != NULL) {
u8 *ar_file = alloc_printf("%s/%s", ar_dir, dir_ent->d_name);
if (dir_ent->d_name[strlen(dir_ent->d_name) - 1] == 'o' &&
dir_ent->d_name[strlen(dir_ent->d_name) - 2] == '.') {
if (passthrough || is_llvm_file(ar_file) == 0) {
if (is_duplicate(ld_params, ld_param_cnt, ar_file) == 0) {
ld_params[ld_param_cnt++] = ar_file;
if (debug)
SAYF(cMGN "[D] " cRST "not a LTO link file: %s\n", ar_file);
}
} else {
if (is_duplicate(link_params, link_param_cnt, ar_file) == 0) {
if (we_link == 0) { // we have to honor order ...
ld_params[ld_param_cnt++] = final_file;
we_link = 1;
}
link_params[link_param_cnt++] = ar_file;
if (debug) SAYF(cMGN "[D] " cRST "is a link file: %s\n", ar_file);
}
}
} else
if (dir_ent->d_name[0] != '.' && !be_quiet)
WARNF("Unusual file found in ar archive %s: %s", argv[i], ar_file);
}
closedir(arx);
ar_dir_cnt++;
continue;
}
if (passthrough || argv[i][0] == '-' || is_llvm_file(argv[i]) == 0) {
// -O3 fucks up the CFG and instrumentation, so we downgrade to O2
// which is as we want things. Lets hope this is not too different
// in the various llvm versions!
if (strncmp(argv[i], "-plugin-opt=O", 13) == 0 &&
!getenv("AFL_DONT_OPTIMIZE"))
ld_params[ld_param_cnt++] = "-plugin-opt=O2";
else
ld_params[ld_param_cnt++] = argv[i];
} else {
if (we_link == 0) { // we have to honor order ...
ld_params[ld_param_cnt++] = final_file;
we_link = 1;
}
link_params[link_param_cnt++] = argv[i];
}
}
// if (have_lto == 0) ld_params[ld_param_cnt++] = AFL_CLANG_FLTO; // maybe we
// should not ...
ld_params[ld_param_cnt] = NULL;
link_params[link_param_cnt] = NULL;
opt_params[opt_param_cnt] = NULL;
inst_params[inst_param_cnt] = NULL;
}
/* clean AFL_PATH from PATH */
void clean_path() {
char *tmp, *newpath = NULL, *path = getenv("PATH");
u8 done = 0;
if (debug)
SAYF(cMGN "[D]" cRST " old PATH=%s, AFL_PATH=%s\n", path, AFL_PATH);
// wipe AFL paths from PATH that we set
// we added two paths so we remove the two paths
while (!done) {
if (*path == 0)
done = 1;
else if (*path++ == ':')
done = 1;
}
while (*path == ':')
path++;
// AFL_PATH could be additionally in PATH so check and remove to not call our
// 'ld'
const size_t pathlen = strlen(path);
const size_t afl_pathlen = strlen(AFL_PATH);
newpath = malloc(pathlen + 1);
if (strcmp(AFL_PATH, "/bin") != 0 && strcmp(AFL_PATH, "/usr/bin") != 0 &&
afl_pathlen > 1 && (tmp = strstr(path, AFL_PATH)) != NULL && // it exists
(tmp == path ||
(tmp > path &&
tmp[-1] == ':')) && // either starts with it or has a colon before
(tmp + afl_pathlen == path + pathlen ||
(tmp + afl_pathlen <
path + (pathlen && tmp[afl_pathlen] ==
':')) // end with it or has a colon at the end
)) {
int one_colon = 1;
if (tmp > path) {
memcpy(newpath, path, tmp - path);
newpath[tmp - path - 1] = 0; // remove ':'
one_colon = 0;
}
if (tmp + afl_pathlen < path + pathlen) tmp += afl_pathlen + one_colon;
setenv("PATH", newpath, 1);
} else
setenv("PATH", path, 1);
if (debug) SAYF(cMGN "[D]" cRST " new PATH=%s\n", getenv("PATH"));
free(newpath);
}
/* Main entry point */
int main(int argc, char **argv) {
s32 pid, i;
int status;
u8 *ptr, exe[4096], exe2[4096], proc[32], val[2] = " ";
int have_afl_ld_caller = 0;
if (isatty(2) && !getenv("AFL_QUIET") && !getenv("AFL_DEBUG")) {
if (getenv("AFL_LD") != NULL)
SAYF(cCYA "afl-ld" VERSION cRST
" by Marc \"vanHauser\" Heuse <mh@mh-sec.de> (level %d)\n",
have_afl_ld_caller);
} else
be_quiet = 1;
if (getenv("AFL_DEBUG") != NULL) debug = 1;
if (getenv("AFL_PATH") != NULL) afl_path = getenv("AFL_PATH");
if (getenv("AFL_LD_PASSTHROUGH") != NULL) passthrough = 1;
if (getenv("AFL_REAL_LD") != NULL) real_ld = getenv("AFL_REAL_LD");
if (real_ld == NULL || strlen(real_ld) < 2) real_ld = "/bin/ld";
if (real_ld != NULL && real_ld[0] != '/')
real_ld = alloc_printf("/bin/%s", real_ld);
if ((ptr = getenv("AFL_LD_CALLER")) != NULL) have_afl_ld_caller = atoi(ptr);
val[0] = 0x31 + have_afl_ld_caller;
setenv("AFL_LD_CALLER", val, 1);
if (debug) {
SAYF(cMGN "[D] " cRST
"AFL_LD=%s, set AFL_LD_CALLER=%s, have_afl_ld_caller=%d, "
"real_ld=%s\n",
getenv("AFL_LD"), val, have_afl_ld_caller, real_ld);
SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
for (i = 0; i < argc; i++)
SAYF(" \"%s\"", argv[i]);
SAYF("\n");
}
sprintf(proc, "/proc/%d/exe", getpid());
if (readlink(proc, exe, sizeof(exe) - 1) > 0) {
if (readlink(real_ld, exe2, sizeof(exe2) - 1) < 1) exe2[0] = 0;
exe[sizeof(exe) - 1] = 0;
exe[sizeof(exe2) - 1] = 0;
if (strcmp(exe, real_ld) == 0 || strcmp(exe, exe2) == 0)
PFATAL(cLRD "[!] " cRST
"Error: real 'ld' path points to afl-ld, set AFL_REAL_LD to "
"the real 'ld' program!");
}
if (have_afl_ld_caller > 1)
PFATAL(cLRD "[!] " cRST
"Error: afl-ld calls itself in a loop, set AFL_REAL_LD to the "
"real 'ld' program!");
if (argc < 2) {
SAYF(
"\n"
"This is a helper application for afl-fuzz. It is a wrapper around GNU "
"'ld',\n"
"executed by the toolchain whenever using "
"afl-clang-lto/afl-clang-lto++.\n"
"You probably don't want to run this program directly.\n\n"
"Environment variables:\n"
" AFL_LD_PASSTHROUGH do not link+optimize == no instrumentation\n"
" AFL_REAL_LD point to the real ld if necessary\n"
"\nafl-ld was compiled with the fixed real 'ld' path of %s and the "
"clang "
"bin path of %s\n\n",
real_ld, LLVM_BINDIR);
exit(1);
}
if (getenv("AFL_LD") == NULL) {
/* if someone install clang/ld into the same directory as afl++ then
they are out of luck ... */
if (have_afl_ld_caller == 1) { clean_path(); }
if (real_ld != NULL && strlen(real_ld) > 1) execvp(real_ld, argv);
execvp("ld", argv); // fallback
PFATAL("Oops, failed to execute 'ld' - check your PATH");
}
atexit(at_exit_handler); // ensure to wipe temp files if things fail
edit_params(argc, argv); // here most of the magic happens :-)
if (debug)
SAYF(cMGN "[D] " cRST
"param counts: ar:%u lib:%u ld:%u link:%u opt:%u instr:%u\n",
ar_dir_cnt, libdir_cnt, ld_param_cnt, link_param_cnt, opt_param_cnt,
inst_param_cnt);
if (!just_version) {
if (we_link == 0) {
if (!getenv("AFL_QUIET"))
WARNF("No LTO input file found, cannot instrument!");
} else {
/* first we link all files */
if (!be_quiet) OKF("Running bitcode linker, creating %s", linked_file);
if (debug) {
SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
for (i = 0; i < link_param_cnt; i++)
SAYF(" \"%s\"", link_params[i]);
SAYF("\n");
}
if (!(pid = fork())) {
execvp(link_params[0], (char **)link_params);
FATAL("Oops, failed to execute '%s'", link_params[0]);
}
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
if (WEXITSTATUS(status) != 0) {
SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD
"\n[-] PROGRAM ABORT : " cRST);
SAYF(
"llvm-link failed! Probable causes:\n\n"
" #1 If the error is \"linking globals named '...': symbol "
"multiply defined\"\n"
" then there is nothing we can do - llvm-link is missing an "
"important feature\n\n"
" #2 If the error is \"expected top-level entity\" and then "
"binary output, this\n"
" is because the same file is present in different .a archives "
"in different\n"
" formats. This can be fixed by manual doing the steps afl-ld "
"is doing but\n"
" programmatically - sorry!\n\n");
exit(WEXITSTATUS(status));
}
/* then we perform an optimization on the collected objects files */
if (!be_quiet)
OKF("Performing optimization via opt, creating %s", modified_file);
if (debug) {
SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
for (i = 0; i < opt_param_cnt; i++)
SAYF(" \"%s\"", opt_params[i]);
SAYF("\n");
}
if (!(pid = fork())) {
execvp(opt_params[0], (char **)opt_params);
FATAL("Oops, failed to execute '%s'", opt_params[0]);
}
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status));
/* then we run the instrumentation through the optimizer */
if (!be_quiet)
OKF("Performing instrumentation via opt, creating %s", final_file);
if (debug) {
SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
for (i = 0; i < inst_param_cnt; i++)
SAYF(" \"%s\"", inst_params[i]);
SAYF("\n");
}
if (!(pid = fork())) {
execvp(inst_params[0], (char **)inst_params);
FATAL("Oops, failed to execute '%s'", inst_params[0]);
}
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status));
}
/* next step - run the linker! :-) */
}
if (!be_quiet) OKF("Running real linker %s", real_ld);
if (debug) {
SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
for (i = 0; i < ld_param_cnt; i++)
SAYF(" \"%s\"", ld_params[i]);
SAYF("\n");
}
if (!(pid = fork())) {
clean_path();
unsetenv("AFL_LD");
if (strlen(real_ld) > 1) execvp(real_ld, (char **)ld_params);
execvp("ld", (char **)ld_params); // fallback
FATAL("Oops, failed to execute 'ld' - check your PATH");
}
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
if (debug) SAYF(cMGN "[D] " cRST "linker result: %d\n", status);
if (!just_version) {
if (!getenv("AFL_KEEP_ASSEMBLY")) {
if (linked_file) {
unlink(linked_file);
linked_file = NULL;
}
if (modified_file) {
unlink(modified_file);
modified_file = NULL;
}
if (final_file) {
unlink(final_file);
final_file = NULL;
}
if (ar_dir != NULL) {
wipe_directory(ar_dir);
ar_dir = NULL;
}
} else {
if (!be_quiet) {
SAYF(
"[!] afl-ld: keeping link file %s, optimized bitcode %s and "
"instrumented bitcode %s",
linked_file, modified_file, final_file);
if (ar_dir_cnt > 0 && ar_dir)
SAYF(" and ar archive unpack directory %s", ar_dir);
SAYF("\n");
}
}
if (status == 0) {
if (!be_quiet) OKF("Linker was successful");
} else {
SAYF(cLRD "[-] " cRST
"Linker failed, please investigate and send a bug report. Most "
"likely an 'ld' option is incompatible with %s. Try "
"AFL_KEEP_ASSEMBLY=1 and AFL_DEBUG=1 for replaying.\n",
AFL_CLANG_FLTO);
}
}
exit(WEXITSTATUS(status));
}

View File

@ -378,7 +378,8 @@ bool AFLLTOPass::runOnModule(Module &M) {
M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc", 0,
GlobalVariable::GeneralDynamicTLSModel, 0, false);
ConstantInt *const_loc = ConstantInt::get(Int32Ty, afl_global_id);
AFLFinalLoc->setAlignment(4);
MaybeAlign Align = MaybeAlign(4);
AFLFinalLoc->setAlignment(Align);
AFLFinalLoc->setInitializer(const_loc);
}
@ -423,5 +424,5 @@ static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass",
false, false);
static RegisterStandardPasses RegisterAFLLTOPass(
PassManagerBuilder::EP_OptimizerLast, registerAFLLTOPass);
PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass);