mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-11 09:41:35 +00:00
Merge branch 'dev' into docs_edit_readme_frida_mode_qemu_mode
This commit is contained in:
commit
da13111117
@ -16,6 +16,8 @@ env NO_ARCH_OPT 1
|
||||
RUN apt-get update && \
|
||||
apt-get -y install --no-install-suggests --no-install-recommends \
|
||||
automake \
|
||||
cmake \
|
||||
meson \
|
||||
ninja-build \
|
||||
bison flex \
|
||||
build-essential \
|
||||
|
@ -308,7 +308,7 @@ ifeq "$(TEST_MMAP)" "1"
|
||||
endif
|
||||
|
||||
PROGS_ALWAYS = ./afl-cc ./afl-compiler-rt.o ./afl-compiler-rt-32.o ./afl-compiler-rt-64.o
|
||||
PROGS = $(PROGS_ALWAYS) ./afl-llvm-pass.so ./SanitizerCoveragePCGUARD.so ./split-compares-pass.so ./split-switches-pass.so ./cmplog-routines-pass.so ./cmplog-instructions-pass.so ./cmplog-switches-pass.so ./afl-llvm-dict2file.so ./compare-transform-pass.so ./afl-ld-lto ./afl-llvm-lto-instrumentlist.so ./afl-llvm-lto-instrumentation.so ./SanitizerCoverageLTO.so
|
||||
PROGS = $(PROGS_ALWAYS) ./afl-llvm-pass.so ./SanitizerCoveragePCGUARD.so ./split-compares-pass.so ./split-switches-pass.so ./cmplog-routines-pass.so ./cmplog-instructions-pass.so ./cmplog-switches-pass.so ./afl-llvm-dict2file.so ./compare-transform-pass.so ./afl-ld-lto ./afl-llvm-lto-instrumentlist.so ./SanitizerCoverageLTO.so
|
||||
|
||||
# If prerequisites are not given, warn, do not build anything, and exit with code 0
|
||||
ifeq "$(LLVMVER)" ""
|
||||
@ -408,11 +408,6 @@ ifeq "$(LLVM_LTO)" "1"
|
||||
endif
|
||||
|
||||
./SanitizerCoverageLTO.so: instrumentation/SanitizerCoverageLTO.so.cc
|
||||
ifeq "$(LLVM_LTO)" "1"
|
||||
$(CXX) $(CLANG_CPPFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) instrumentation/afl-llvm-common.o
|
||||
endif
|
||||
|
||||
./afl-llvm-lto-instrumentation.so: instrumentation/afl-llvm-lto-instrumentation.so.cc instrumentation/afl-llvm-common.o
|
||||
ifeq "$(LLVM_LTO)" "1"
|
||||
$(CXX) $(CLANG_CPPFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) instrumentation/afl-llvm-common.o
|
||||
$(CLANG_BIN) $(CFLAGS_SAFE) $(CPPFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -fPIC -c instrumentation/afl-llvm-rt-lto.o.c -o ./afl-llvm-rt-lto.o
|
||||
@ -480,7 +475,7 @@ install: all
|
||||
@if [ -f ./afl-cc ]; then set -e; install -m 755 ./afl-cc $${DESTDIR}$(BIN_PATH); ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-c++; fi
|
||||
@rm -f $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt*.o $${DESTDIR}$(HELPER_PATH)/afl-gcc-rt*.o
|
||||
@if [ -f ./afl-compiler-rt.o ]; then set -e; install -m 755 ./afl-compiler-rt.o $${DESTDIR}$(HELPER_PATH); ln -sf afl-compiler-rt.o $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt.o ;fi
|
||||
@if [ -f ./afl-lto ]; then set -e; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-lto; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-lto++; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ./afl-llvm-lto-instrumentation.so ./afl-llvm-rt-lto*.o ./afl-llvm-lto-instrumentlist.so $${DESTDIR}$(HELPER_PATH); fi
|
||||
@if [ -f ./afl-lto ]; then set -e; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-lto; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-lto++; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ./afl-llvm-rt-lto*.o ./afl-llvm-lto-instrumentlist.so $${DESTDIR}$(HELPER_PATH); fi
|
||||
@if [ -f ./afl-ld-lto ]; then set -e; install -m 755 ./afl-ld-lto $${DESTDIR}$(BIN_PATH); fi
|
||||
@if [ -f ./afl-compiler-rt-32.o ]; then set -e; install -m 755 ./afl-compiler-rt-32.o $${DESTDIR}$(HELPER_PATH); ln -sf afl-compiler-rt-32.o $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt-32.o ;fi
|
||||
@if [ -f ./afl-compiler-rt-64.o ]; then set -e; install -m 755 ./afl-compiler-rt-64.o $${DESTDIR}$(HELPER_PATH); ln -sf afl-compiler-rt-64.o $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt-64.o ; fi
|
||||
|
119
README.md
119
README.md
@ -6,7 +6,7 @@ Release version: [3.14c](https://github.com/AFLplusplus/AFLplusplus/releases)
|
||||
|
||||
GitHub version: 3.15a
|
||||
|
||||
Repository:
|
||||
Repository:
|
||||
[https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
|
||||
|
||||
AFL++ is maintained by:
|
||||
@ -18,33 +18,33 @@ AFL++ is maintained by:
|
||||
|
||||
Originally developed by Michał "lcamtuf" Zalewski.
|
||||
|
||||
AFL++ is a superior fork to Google's AFL - more speed, more and better
|
||||
AFL++ is a superior fork to Google's AFL - more speed, more and better
|
||||
mutations, more and better instrumentation, custom module support, etc.
|
||||
|
||||
You are free to copy, modify, and distribute AFL++ with attribution under the
|
||||
You are free to copy, modify, and distribute AFL++ with attribution under the
|
||||
terms of the Apache-2.0 License. See the [LICENSE](LICENSE) for details.
|
||||
|
||||
## Getting started
|
||||
|
||||
Here is some information to get you started:
|
||||
|
||||
* For releases, please see the
|
||||
[Releases](https://github.com/AFLplusplus/AFLplusplus/releases) tab and
|
||||
[branches](docs/branches.md). Also take a look at the list of
|
||||
* For releases, please see the
|
||||
[Releases tab](https://github.com/AFLplusplus/AFLplusplus/releases) and
|
||||
[branches](#branches). Also take a look at the list of
|
||||
[important changes in AFL++](docs/important_changes.md).
|
||||
* If you want to use AFL++ for your academic work, check the
|
||||
* If you want to use AFL++ for your academic work, check the
|
||||
[papers page](https://aflplus.plus/papers/) on the website.
|
||||
* To cite our work, look at the [Cite](#cite) section.
|
||||
* For comparisons, use the fuzzbench `aflplusplus` setup, or use
|
||||
`afl-clang-fast` with `AFL_LLVM_CMPLOG=1`. You can find the `aflplusplus`
|
||||
default configuration on Google's
|
||||
* For comparisons, use the fuzzbench `aflplusplus` setup, or use
|
||||
`afl-clang-fast` with `AFL_LLVM_CMPLOG=1`. You can find the `aflplusplus`
|
||||
default configuration on Google's
|
||||
[fuzzbench](https://github.com/google/fuzzbench/tree/master/fuzzers/aflplusplus).
|
||||
* To get you started with tutorials, go to
|
||||
* To get you started with tutorials, go to
|
||||
[docs/tutorials.md](docs/tutorials.md).
|
||||
|
||||
## Building and installing AFL++
|
||||
|
||||
To have AFL++ easily available with everything compiled, pull the image
|
||||
To have AFL++ easily available with everything compiled, pull the image
|
||||
directly from the Docker Hub:
|
||||
|
||||
```shell
|
||||
@ -52,95 +52,110 @@ docker pull aflplusplus/aflplusplus
|
||||
docker run -ti -v /location/of/your/target:/src aflplusplus/aflplusplus
|
||||
```
|
||||
|
||||
This image is automatically generated when a push to the stable repo happens
|
||||
(see [docs/branches.md](docs/branches.md)). You will find your target source
|
||||
This image is automatically generated when a push to the stable repo happens
|
||||
(see [branches](#branches)). You will find your target source
|
||||
code in `/src` in the container.
|
||||
|
||||
To build AFL++ yourself, continue at [docs/INSTALL.md](docs/INSTALL.md).
|
||||
|
||||
## Quick start: Fuzzing with AFL++
|
||||
|
||||
*NOTE: Before you start, please read about the [common sense risks of
|
||||
fuzzing](docs/common_sense_risks.md).*
|
||||
*NOTE: Before you start, please read about the
|
||||
[common sense risks of fuzzing](docs/fuzzing_in_depth.md#0-common-sense-risks).*
|
||||
|
||||
This is a quick start for fuzzing targets with the source code available. To
|
||||
This is a quick start for fuzzing targets with the source code available. To
|
||||
read about the process in detail, see
|
||||
[docs/fuzzing_expert.md](docs/fuzzing_expert.md).
|
||||
[docs/fuzzing_in_depth.md](docs/fuzzing_in_depth.md).
|
||||
|
||||
To learn about fuzzing other targets, see:
|
||||
* Binary-only targets:
|
||||
* Binary-only targets:
|
||||
[docs/fuzzing_binary-only_targets.md](docs/fuzzing_binary-only_targets.md)
|
||||
* Network services:
|
||||
* Network services:
|
||||
[docs/best_practices.md#fuzzing-a-network-service](docs/best_practices.md#fuzzing-a-network-service)
|
||||
* GUI programs:
|
||||
* GUI programs:
|
||||
[docs/best_practices.md#fuzzing-a-gui-program](docs/best_practices.md#fuzzing-a-gui-program)
|
||||
|
||||
Step-by-step quick start:
|
||||
|
||||
1. Compile the program or library to be fuzzed using `afl-cc`. A common way to
|
||||
1. Compile the program or library to be fuzzed using `afl-cc`. A common way to
|
||||
do this would be:
|
||||
|
||||
CC=/path/to/afl-cc CXX=/path/to/afl-c++ ./configure --disable-shared
|
||||
make clean all
|
||||
```
|
||||
CC=/path/to/afl-cc CXX=/path/to/afl-c++ ./configure --disable-shared
|
||||
make clean all
|
||||
```
|
||||
|
||||
2. Get a small but valid input file that makes sense to the program. When
|
||||
fuzzing verbose syntax (SQL, HTTP, etc), create a dictionary as described in
|
||||
2. Get a small but valid input file that makes sense to the program. When
|
||||
fuzzing verbose syntax (SQL, HTTP, etc), create a dictionary as described in
|
||||
[dictionaries/README.md](dictionaries/README.md), too.
|
||||
|
||||
3. If the program reads from stdin, run `afl-fuzz` like so:
|
||||
|
||||
```
|
||||
```
|
||||
./afl-fuzz -i seeds_dir -o output_dir -- \
|
||||
/path/to/tested/program [...program's cmdline...]
|
||||
```
|
||||
/path/to/tested/program [...program's cmdline...]
|
||||
```
|
||||
|
||||
To add a dictionary, add `-x /path/to/dictionary.txt` to afl-fuzz.
|
||||
|
||||
If the program takes input from a file, you can put `@@` in the program's
|
||||
If the program takes input from a file, you can put `@@` in the program's
|
||||
command line; AFL will put an auto-generated file name in there for you.
|
||||
|
||||
4. Investigate anything shown in red in the fuzzer UI by promptly consulting
|
||||
[docs/status_screen.md](docs/status_screen.md).
|
||||
4. Investigate anything shown in red in the fuzzer UI by promptly consulting
|
||||
[docs/afl-fuzz_approach.md#understanding-the-status-screen](docs/afl-fuzz_approach.md#understanding-the-status-screen).
|
||||
|
||||
5. You will find found crashes and hangs in the subdirectories `crashes/` and
|
||||
`hangs/` in the `-o output_dir` directory. You can replay the crashes by
|
||||
feeding them to the target, e.g.: `cat output_dir/crashes/id:000000,* |
|
||||
/path/to/tested/program [...program's cmdline...]` You can generate cores or
|
||||
use gdb directly to follow up the crashes.
|
||||
5. You will find found crashes and hangs in the subdirectories `crashes/` and
|
||||
`hangs/` in the `-o output_dir` directory. You can replay the crashes by
|
||||
feeding them to the target, e.g.:
|
||||
|
||||
```
|
||||
cat output_dir/crashes/id:000000,* | /path/to/tested/program [...program's cmdline...]
|
||||
```
|
||||
|
||||
You can generate cores or use gdb directly to follow up the crashes.
|
||||
|
||||
## Contact
|
||||
|
||||
Questions? Concerns? Bug reports?
|
||||
|
||||
* The contributors can be reached via
|
||||
* The contributors can be reached via
|
||||
[https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus).
|
||||
* Take a look at our [FAQ](docs/FAQ.md). If you find an interesting or
|
||||
important question missing, submit it via
|
||||
* Take a look at our [FAQ](docs/FAQ.md). If you find an interesting or
|
||||
important question missing, submit it via
|
||||
[https://github.com/AFLplusplus/AFLplusplus/discussions](https://github.com/AFLplusplus/AFLplusplus/discussions).
|
||||
* There is a mailing list for the AFL/AFL++ project
|
||||
([browse archive](https://groups.google.com/group/afl-users)). To compare
|
||||
notes with other users or to get notified about major new features, send an
|
||||
* There is a mailing list for the AFL/AFL++ project
|
||||
([browse archive](https://groups.google.com/group/afl-users)). To compare
|
||||
notes with other users or to get notified about major new features, send an
|
||||
email to <afl-users+subscribe@googlegroups.com>.
|
||||
* Or join the [Awesome Fuzzing](https://discord.gg/gCraWct) Discord server.
|
||||
|
||||
## Branches
|
||||
|
||||
The following branches exist:
|
||||
|
||||
* [release](https://github.com/AFLplusplus/AFLplusplus/tree/release): the latest release
|
||||
* [stable/trunk](https://github.com/AFLplusplus/AFLplusplus/): stable state of AFL++ - it is synced from dev from time to time when we are satisfied with its stability
|
||||
* [dev](https://github.com/AFLplusplus/AFLplusplus/tree/dev): development state of AFL++ - bleeding edge and you might catch a checkout which does not compile or has a bug. *We only accept PRs in dev!!*
|
||||
* (any other): experimental branches to work on specific features or testing new functionality or changes.
|
||||
|
||||
## Help wanted
|
||||
|
||||
We have several [ideas](docs/ideas.md) we would like to see in AFL++ to make it
|
||||
even better. However, we already work on so many things that we do not have the
|
||||
We have several [ideas](docs/ideas.md) we would like to see in AFL++ to make it
|
||||
even better. However, we already work on so many things that we do not have the
|
||||
time for all the big ideas.
|
||||
|
||||
This can be your way to support and contribute to AFL++ - extend it to do
|
||||
This can be your way to support and contribute to AFL++ - extend it to do
|
||||
something cool.
|
||||
|
||||
For everyone who wants to contribute (and send pull requests), please read our
|
||||
For everyone who wants to contribute (and send pull requests), please read our
|
||||
[contributing guidelines](CONTRIBUTING.md) before your submit.
|
||||
|
||||
## Special thanks
|
||||
|
||||
Many of the improvements to the original AFL and AFL++ wouldn't be possible
|
||||
Many of the improvements to the original AFL and AFL++ wouldn't be possible
|
||||
without feedback, bug reports, or patches from our contributors.
|
||||
|
||||
Thank you! (For people sending pull requests - please add yourself to this list
|
||||
Thank you! (For people sending pull requests - please add yourself to this list
|
||||
:-)
|
||||
|
||||
<details>
|
||||
@ -200,8 +215,8 @@ Thank you! (For people sending pull requests - please add yourself to this list
|
||||
|
||||
## Cite
|
||||
|
||||
If you use AFL++ in scientific work, consider citing
|
||||
[our paper](https://www.usenix.org/conference/woot20/presentation/fioraldi)
|
||||
If you use AFL++ in scientific work, consider citing
|
||||
[our paper](https://www.usenix.org/conference/woot20/presentation/fioraldi)
|
||||
presented at WOOT'20:
|
||||
|
||||
Andrea Fioraldi, Dominik Maier, Heiko Eißfeldt, and Marc Heuse. “AFL++: Combining incremental steps of fuzzing research”. In 14th USENIX Workshop on Offensive Technologies (WOOT 20). USENIX Association, Aug. 2020.
|
||||
@ -221,4 +236,4 @@ presented at WOOT'20:
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
</details>
|
@ -83,7 +83,8 @@ If you find an interesting or important question missing, submit it via
|
||||
|
||||
However, if there is only the binary program and no source code available, then the standard non-instrumented mode is not effective.
|
||||
|
||||
To learn how these binaries can be fuzzed, read [binaryonly_fuzzing.md](binaryonly_fuzzing.md).
|
||||
To learn how these binaries can be fuzzed, read
|
||||
[fuzzing_binary-only_targets.md](fuzzing_binary-only_targets.md).
|
||||
</p></details>
|
||||
|
||||
<details>
|
||||
@ -143,7 +144,7 @@ If you find an interesting or important question missing, submit it via
|
||||
Target: x86_64-unknown-linux-gnu
|
||||
Thread model: posix
|
||||
InstalledDir: /prg/tmp/llvm-project/build/bin
|
||||
clang-13: note: diagnostic msg:
|
||||
clang-13: note: diagnostic msg:
|
||||
********************
|
||||
```
|
||||
|
||||
|
@ -1,37 +1,541 @@
|
||||
# The afl-fuzz approach
|
||||
|
||||
American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
|
||||
but rock-solid instrumentation-guided genetic algorithm. It uses a modified
|
||||
form of edge coverage to effortlessly pick up subtle, local-scale changes to
|
||||
program control flow.
|
||||
AFL++ is a brute-force fuzzer coupled with an exceedingly simple but rock-solid
|
||||
instrumentation-guided genetic algorithm. It uses a modified form of edge
|
||||
coverage to effortlessly pick up subtle, local-scale changes to program control
|
||||
flow.
|
||||
|
||||
Simplifying a bit, the overall algorithm can be summed up as:
|
||||
|
||||
1) Load user-supplied initial test cases into the queue,
|
||||
1) Load user-supplied initial test cases into the queue.
|
||||
|
||||
2) Take the next input file from the queue,
|
||||
2) Take the next input file from the queue.
|
||||
|
||||
3) Attempt to trim the test case to the smallest size that doesn't alter
|
||||
the measured behavior of the program,
|
||||
3) Attempt to trim the test case to the smallest size that doesn't alter the
|
||||
measured behavior of the program.
|
||||
|
||||
4) Repeatedly mutate the file using a balanced and well-researched variety
|
||||
of traditional fuzzing strategies,
|
||||
4) Repeatedly mutate the file using a balanced and well-researched variety of
|
||||
traditional fuzzing strategies.
|
||||
|
||||
5) If any of the generated mutations resulted in a new state transition
|
||||
recorded by the instrumentation, add mutated output as a new entry in the
|
||||
queue.
|
||||
5) If any of the generated mutations resulted in a new state transition recorded
|
||||
by the instrumentation, add mutated output as a new entry in the queue.
|
||||
|
||||
6) Go to 2.
|
||||
6) Go to 2.
|
||||
|
||||
The discovered test cases are also periodically culled to eliminate ones that
|
||||
have been obsoleted by newer, higher-coverage finds; and undergo several other
|
||||
instrumentation-driven effort minimization steps.
|
||||
|
||||
As a side result of the fuzzing process, the tool creates a small,
|
||||
self-contained corpus of interesting test cases. These are extremely useful
|
||||
for seeding other, labor- or resource-intensive testing regimes - for example,
|
||||
for stress-testing browsers, office applications, graphics suites, or
|
||||
closed-source tools.
|
||||
self-contained corpus of interesting test cases. These are extremely useful for
|
||||
seeding other, labor- or resource-intensive testing regimes - for example, for
|
||||
stress-testing browsers, office applications, graphics suites, or closed-source
|
||||
tools.
|
||||
|
||||
The fuzzer is thoroughly tested to deliver out-of-the-box performance far
|
||||
superior to blind fuzzing or coverage-only tools.
|
||||
superior to blind fuzzing or coverage-only tools.
|
||||
|
||||
## Understanding the status screen
|
||||
|
||||
This chapter provides an overview of the status screen - plus tips for
|
||||
troubleshooting any warnings and red text shown in the UI.
|
||||
|
||||
For the general instruction manual, see [README.md](../README.md).
|
||||
|
||||
### A note about colors
|
||||
|
||||
The status screen and error messages use colors to keep things readable and
|
||||
attract your attention to the most important details. For example, red almost
|
||||
always means "consult this doc" :-)
|
||||
|
||||
Unfortunately, the UI will only render correctly if your terminal is using
|
||||
traditional un*x palette (white text on black background) or something close to
|
||||
that.
|
||||
|
||||
If you are using inverse video, you may want to change your settings, say:
|
||||
|
||||
- For GNOME Terminal, go to `Edit > Profile` preferences, select the "colors"
|
||||
tab, and from the list of built-in schemes, choose "white on black".
|
||||
- For the MacOS X Terminal app, open a new window using the "Pro" scheme via the
|
||||
`Shell > New Window` menu (or make "Pro" your default).
|
||||
|
||||
Alternatively, if you really like your current colors, you can edit config.h to
|
||||
comment out USE_COLORS, then do `make clean all`.
|
||||
|
||||
We are not aware of any other simple way to make this work without causing other
|
||||
side effects - sorry about that.
|
||||
|
||||
With that out of the way, let's talk about what's actually on the screen...
|
||||
|
||||
### The status bar
|
||||
|
||||
```
|
||||
american fuzzy lop ++3.01a (default) [fast] {0}
|
||||
```
|
||||
|
||||
The top line shows you which mode afl-fuzz is running in (normal: "american
|
||||
fuzzy lop", crash exploration mode: "peruvian rabbit mode") and the version of
|
||||
AFL++. Next to the version is the banner, which, if not set with -T by hand,
|
||||
will either show the binary name being fuzzed, or the -M/-S main/secondary name
|
||||
for parallel fuzzing. Second to last is the power schedule mode being run
|
||||
(default: fast). Finally, the last item is the CPU id.
|
||||
|
||||
### Process timing
|
||||
|
||||
```
|
||||
+----------------------------------------------------+
|
||||
| run time : 0 days, 8 hrs, 32 min, 43 sec |
|
||||
| last new path : 0 days, 0 hrs, 6 min, 40 sec |
|
||||
| last uniq crash : none seen yet |
|
||||
| last uniq hang : 0 days, 1 hrs, 24 min, 32 sec |
|
||||
+----------------------------------------------------+
|
||||
```
|
||||
|
||||
This section is fairly self-explanatory: it tells you how long the fuzzer has
|
||||
been running and how much time has elapsed since its most recent finds. This is
|
||||
broken down into "paths" (a shorthand for test cases that trigger new execution
|
||||
patterns), crashes, and hangs.
|
||||
|
||||
When it comes to timing: there is no hard rule, but most fuzzing jobs should be
|
||||
expected to run for days or weeks; in fact, for a moderately complex project,
|
||||
the first pass will probably take a day or so. Every now and then, some jobs
|
||||
will be allowed to run for months.
|
||||
|
||||
There's one important thing to watch out for: if the tool is not finding new
|
||||
paths within several minutes of starting, you're probably not invoking the
|
||||
target binary correctly and it never gets to parse the input files we're
|
||||
throwing at it; other possible explanations are that the default memory limit
|
||||
(`-m`) is too restrictive and the program exits after failing to allocate a
|
||||
buffer very early on; or that the input files are patently invalid and always
|
||||
fail a basic header check.
|
||||
|
||||
If there are no new paths showing up for a while, you will eventually see a big
|
||||
red warning in this section, too :-)
|
||||
|
||||
### Overall results
|
||||
|
||||
```
|
||||
+-----------------------+
|
||||
| cycles done : 0 |
|
||||
| total paths : 2095 |
|
||||
| uniq crashes : 0 |
|
||||
| uniq hangs : 19 |
|
||||
+-----------------------+
|
||||
```
|
||||
|
||||
The first field in this section gives you the count of queue passes done so far
|
||||
- that is, the number of times the fuzzer went over all the interesting test
|
||||
cases discovered so far, fuzzed them, and looped back to the very beginning.
|
||||
Every fuzzing session should be allowed to complete at least one cycle; and
|
||||
ideally, should run much longer than that.
|
||||
|
||||
As noted earlier, the first pass can take a day or longer, so sit back and
|
||||
relax.
|
||||
|
||||
To help make the call on when to hit `Ctrl-C`, the cycle counter is color-coded.
|
||||
It is shown in magenta during the first pass, progresses to yellow if new finds
|
||||
are still being made in subsequent rounds, then blue when that ends - and
|
||||
finally, turns green after the fuzzer hasn't been seeing any action for a longer
|
||||
while.
|
||||
|
||||
The remaining fields in this part of the screen should be pretty obvious:
|
||||
there's the number of test cases ("paths") discovered so far, and the number of
|
||||
unique faults. The test cases, crashes, and hangs can be explored in real-time
|
||||
by browsing the output directory, see
|
||||
[#interpreting-output](#interpreting-output).
|
||||
|
||||
### Cycle progress
|
||||
|
||||
```
|
||||
+-------------------------------------+
|
||||
| now processing : 1296 (61.86%) |
|
||||
| paths timed out : 0 (0.00%) |
|
||||
+-------------------------------------+
|
||||
```
|
||||
|
||||
This box tells you how far along the fuzzer is with the current queue cycle: it
|
||||
shows the ID of the test case it is currently working on, plus the number of
|
||||
inputs it decided to ditch because they were persistently timing out.
|
||||
|
||||
The "*" suffix sometimes shown in the first line means that the currently
|
||||
processed path is not "favored" (a property discussed later on).
|
||||
|
||||
### Map coverage
|
||||
|
||||
```
|
||||
+--------------------------------------+
|
||||
| map density : 10.15% / 29.07% |
|
||||
| count coverage : 4.03 bits/tuple |
|
||||
+--------------------------------------+
|
||||
```
|
||||
|
||||
The section provides some trivia about the coverage observed by the
|
||||
instrumentation embedded in the target binary.
|
||||
|
||||
The first line in the box tells you how many branch tuples we have already hit,
|
||||
in proportion to how much the bitmap can hold. The number on the left describes
|
||||
the current input; the one on the right is the value for the entire input
|
||||
corpus.
|
||||
|
||||
Be wary of extremes:
|
||||
|
||||
- Absolute numbers below 200 or so suggest one of three things: that the program
|
||||
is extremely simple; that it is not instrumented properly (e.g., due to being
|
||||
linked against a non-instrumented copy of the target library); or that it is
|
||||
bailing out prematurely on your input test cases. The fuzzer will try to mark
|
||||
this in pink, just to make you aware.
|
||||
- Percentages over 70% may very rarely happen with very complex programs that
|
||||
make heavy use of template-generated code. Because high bitmap density makes
|
||||
it harder for the fuzzer to reliably discern new program states, we recommend
|
||||
recompiling the binary with `AFL_INST_RATIO=10` or so and trying again (see
|
||||
[env_variables.md](env_variables.md)). The fuzzer will flag high percentages
|
||||
in red. Chances are, you will never see that unless you're fuzzing extremely
|
||||
hairy software (say, v8, perl, ffmpeg).
|
||||
|
||||
The other line deals with the variability in tuple hit counts seen in the
|
||||
binary. In essence, if every taken branch is always taken a fixed number of
|
||||
times for all the inputs we have tried, this will read `1.00`. As we manage to
|
||||
trigger other hit counts for every branch, the needle will start to move toward
|
||||
`8.00` (every bit in the 8-bit map hit), but will probably never reach that
|
||||
extreme.
|
||||
|
||||
Together, the values can be useful for comparing the coverage of several
|
||||
different fuzzing jobs that rely on the same instrumented binary.
|
||||
|
||||
### Stage progress
|
||||
|
||||
```
|
||||
+-------------------------------------+
|
||||
| now trying : interest 32/8 |
|
||||
| stage execs : 3996/34.4k (11.62%) |
|
||||
| total execs : 27.4M |
|
||||
| exec speed : 891.7/sec |
|
||||
+-------------------------------------+
|
||||
```
|
||||
|
||||
This part gives you an in-depth peek at what the fuzzer is actually doing right
|
||||
now. It tells you about the current stage, which can be any of:
|
||||
|
||||
- calibration - a pre-fuzzing stage where the execution path is examined to
|
||||
detect anomalies, establish baseline execution speed, and so on. Executed very
|
||||
briefly whenever a new find is being made.
|
||||
- trim L/S - another pre-fuzzing stage where the test case is trimmed to the
|
||||
shortest form that still produces the same execution path. The length (L) and
|
||||
stepover (S) are chosen in general relationship to file size.
|
||||
- bitflip L/S - deterministic bit flips. There are L bits toggled at any given
|
||||
time, walking the input file with S-bit increments. The current L/S variants
|
||||
are: `1/1`, `2/1`, `4/1`, `8/8`, `16/8`, `32/8`.
|
||||
- arith L/8 - deterministic arithmetics. The fuzzer tries to subtract or add
|
||||
small integers to 8-, 16-, and 32-bit values. The stepover is always 8 bits.
|
||||
- interest L/8 - deterministic value overwrite. The fuzzer has a list of known
|
||||
"interesting" 8-, 16-, and 32-bit values to try. The stepover is 8 bits.
|
||||
- extras - deterministic injection of dictionary terms. This can be shown as
|
||||
"user" or "auto", depending on whether the fuzzer is using a user-supplied
|
||||
dictionary (`-x`) or an auto-created one. You will also see "over" or
|
||||
"insert", depending on whether the dictionary words overwrite existing data or
|
||||
are inserted by offsetting the remaining data to accommodate their length.
|
||||
- havoc - a sort-of-fixed-length cycle with stacked random tweaks. The
|
||||
operations attempted during this stage include bit flips, overwrites with
|
||||
random and "interesting" integers, block deletion, block duplication, plus
|
||||
assorted dictionary-related operations (if a dictionary is supplied in the
|
||||
first place).
|
||||
- splice - a last-resort strategy that kicks in after the first full queue cycle
|
||||
with no new paths. It is equivalent to 'havoc', except that it first splices
|
||||
together two random inputs from the queue at some arbitrarily selected
|
||||
midpoint.
|
||||
- sync - a stage used only when `-M` or `-S` is set (see
|
||||
[parallel_fuzzing.md](parallel_fuzzing.md)). No real fuzzing is involved, but
|
||||
the tool scans the output from other fuzzers and imports test cases as
|
||||
necessary. The first time this is done, it may take several minutes or so.
|
||||
|
||||
The remaining fields should be fairly self-evident: there's the exec count
|
||||
progress indicator for the current stage, a global exec counter, and a benchmark
|
||||
for the current program execution speed. This may fluctuate from one test case
|
||||
to another, but the benchmark should be ideally over 500 execs/sec most of the
|
||||
time - and if it stays below 100, the job will probably take very long.
|
||||
|
||||
The fuzzer will explicitly warn you about slow targets, too. If this happens,
|
||||
see the [perf_tips.md](perf_tips.md) file included with the fuzzer for ideas on
|
||||
how to speed things up.
|
||||
|
||||
### Findings in depth
|
||||
|
||||
```
|
||||
+--------------------------------------+
|
||||
| favored paths : 879 (41.96%) |
|
||||
| new edges on : 423 (20.19%) |
|
||||
| total crashes : 0 (0 unique) |
|
||||
| total tmouts : 24 (19 unique) |
|
||||
+--------------------------------------+
|
||||
```
|
||||
|
||||
This gives you several metrics that are of interest mostly to complete nerds.
|
||||
The section includes the number of paths that the fuzzer likes the most based on
|
||||
a minimization algorithm baked into the code (these will get considerably more
|
||||
air time), and the number of test cases that actually resulted in better edge
|
||||
coverage (versus just pushing the branch hit counters up). There are also
|
||||
additional, more detailed counters for crashes and timeouts.
|
||||
|
||||
Note that the timeout counter is somewhat different from the hang counter; this
|
||||
one includes all test cases that exceeded the timeout, even if they did not
|
||||
exceed it by a margin sufficient to be classified as hangs.
|
||||
|
||||
### Fuzzing strategy yields
|
||||
|
||||
```
|
||||
+-----------------------------------------------------+
|
||||
| bit flips : 57/289k, 18/289k, 18/288k |
|
||||
| byte flips : 0/36.2k, 4/35.7k, 7/34.6k |
|
||||
| arithmetics : 53/2.54M, 0/537k, 0/55.2k |
|
||||
| known ints : 8/322k, 12/1.32M, 10/1.70M |
|
||||
| dictionary : 9/52k, 1/53k, 1/24k |
|
||||
|havoc/splice : 1903/20.0M, 0/0 |
|
||||
|py/custom/rq : unused, 53/2.54M, unused |
|
||||
| trim/eff : 20.31%/9201, 17.05% |
|
||||
+-----------------------------------------------------+
|
||||
```
|
||||
|
||||
This is just another nerd-targeted section keeping track of how many paths we
|
||||
have netted, in proportion to the number of execs attempted, for each of the
|
||||
fuzzing strategies discussed earlier on. This serves to convincingly validate
|
||||
assumptions about the usefulness of the various approaches taken by afl-fuzz.
|
||||
|
||||
The trim strategy stats in this section are a bit different than the rest. The
|
||||
first number in this line shows the ratio of bytes removed from the input files;
|
||||
the second one corresponds to the number of execs needed to achieve this goal.
|
||||
Finally, the third number shows the proportion of bytes that, although not
|
||||
possible to remove, were deemed to have no effect and were excluded from some of
|
||||
the more expensive deterministic fuzzing steps.
|
||||
|
||||
Note that when deterministic mutation mode is off (which is the default because
|
||||
it is not very efficient) the first five lines display "disabled (default,
|
||||
enable with -D)".
|
||||
|
||||
Only what is activated will have counter shown.
|
||||
|
||||
### Path geometry
|
||||
|
||||
```
|
||||
+---------------------+
|
||||
| levels : 5 |
|
||||
| pending : 1570 |
|
||||
| pend fav : 583 |
|
||||
| own finds : 0 |
|
||||
| imported : 0 |
|
||||
| stability : 100.00% |
|
||||
+---------------------+
|
||||
```
|
||||
|
||||
The first field in this section tracks the path depth reached through the guided
|
||||
fuzzing process. In essence: the initial test cases supplied by the user are
|
||||
considered "level 1". The test cases that can be derived from that through
|
||||
traditional fuzzing are considered "level 2"; the ones derived by using these as
|
||||
inputs to subsequent fuzzing rounds are "level 3"; and so forth. The maximum
|
||||
depth is therefore a rough proxy for how much value you're getting out of the
|
||||
instrumentation-guided approach taken by afl-fuzz.
|
||||
|
||||
The next field shows you the number of inputs that have not gone through any
|
||||
fuzzing yet. The same stat is also given for "favored" entries that the fuzzer
|
||||
really wants to get to in this queue cycle (the non-favored entries may have to
|
||||
wait a couple of cycles to get their chance).
|
||||
|
||||
Next, we have the number of new paths found during this fuzzing section and
|
||||
imported from other fuzzer instances when doing parallelized fuzzing; and the
|
||||
extent to which identical inputs appear to sometimes produce variable behavior
|
||||
in the tested binary.
|
||||
|
||||
That last bit is actually fairly interesting: it measures the consistency of
|
||||
observed traces. If a program always behaves the same for the same input data,
|
||||
it will earn a score of 100%. When the value is lower but still shown in purple,
|
||||
the fuzzing process is unlikely to be negatively affected. If it goes into red,
|
||||
you may be in trouble, since AFL will have difficulty discerning between
|
||||
meaningful and "phantom" effects of tweaking the input file.
|
||||
|
||||
Now, most targets will just get a 100% score, but when you see lower figures,
|
||||
there are several things to look at:
|
||||
|
||||
- The use of uninitialized memory in conjunction with some intrinsic sources of
|
||||
entropy in the tested binary. Harmless to AFL, but could be indicative of a
|
||||
security bug.
|
||||
- Attempts to manipulate persistent resources, such as left over temporary files
|
||||
or shared memory objects. This is usually harmless, but you may want to
|
||||
double-check to make sure the program isn't bailing out prematurely. Running
|
||||
out of disk space, SHM handles, or other global resources can trigger this,
|
||||
too.
|
||||
- Hitting some functionality that is actually designed to behave randomly.
|
||||
Generally harmless. For example, when fuzzing sqlite, an input like `select
|
||||
random();` will trigger a variable execution path.
|
||||
- Multiple threads executing at once in semi-random order. This is harmless when
|
||||
the 'stability' metric stays over 90% or so, but can become an issue if not.
|
||||
Here's what to try:
|
||||
* Use afl-clang-fast from [instrumentation](../instrumentation/) - it uses a
|
||||
thread-local tracking model that is less prone to concurrency issues,
|
||||
* See if the target can be compiled or run without threads. Common
|
||||
`./configure` options include `--without-threads`, `--disable-pthreads`, or
|
||||
`--disable-openmp`.
|
||||
* Replace pthreads with GNU Pth (https://www.gnu.org/software/pth/), which
|
||||
allows you to use a deterministic scheduler.
|
||||
- In persistent mode, minor drops in the "stability" metric can be normal,
|
||||
because not all the code behaves identically when re-entered; but major dips
|
||||
may signify that the code within `__AFL_LOOP()` is not behaving correctly on
|
||||
subsequent iterations (e.g., due to incomplete clean-up or reinitialization of
|
||||
the state) and that most of the fuzzing effort goes to waste.
|
||||
|
||||
The paths where variable behavior is detected are marked with a matching entry
|
||||
in the `<out_dir>/queue/.state/variable_behavior/` directory, so you can look
|
||||
them up easily.
|
||||
|
||||
### CPU load
|
||||
|
||||
```
|
||||
[cpu: 25%]
|
||||
```
|
||||
|
||||
This tiny widget shows the apparent CPU utilization on the local system. It is
|
||||
calculated by taking the number of processes in the "runnable" state, and then
|
||||
comparing it to the number of logical cores on the system.
|
||||
|
||||
If the value is shown in green, you are using fewer CPU cores than available on
|
||||
your system and can probably parallelize to improve performance; for tips on how
|
||||
to do that, see [parallel_fuzzing.md](parallel_fuzzing.md).
|
||||
|
||||
If the value is shown in red, your CPU is *possibly* oversubscribed, and running
|
||||
additional fuzzers may not give you any benefits.
|
||||
|
||||
Of course, this benchmark is very simplistic; it tells you how many processes
|
||||
are ready to run, but not how resource-hungry they may be. It also doesn't
|
||||
distinguish between physical cores, logical cores, and virtualized CPUs; the
|
||||
performance characteristics of each of these will differ quite a bit.
|
||||
|
||||
If you want a more accurate measurement, you can run the `afl-gotcpu` utility
|
||||
from the command line.
|
||||
|
||||
## Interpreting output
|
||||
|
||||
See [#understanding-the-status-screen](#understanding-the-status-screen) for
|
||||
information on how to interpret the displayed stats and monitor the health of
|
||||
the process. Be sure to consult this file especially if any UI elements are
|
||||
highlighted in red.
|
||||
|
||||
The fuzzing process will continue until you press Ctrl-C. At a minimum, you want
|
||||
to allow the fuzzer to complete one queue cycle, which may take anywhere from a
|
||||
couple of hours to a week or so.
|
||||
|
||||
There are three subdirectories created within the output directory and updated
|
||||
in real-time:
|
||||
|
||||
- queue/ - test cases for every distinctive execution path, plus all the
|
||||
starting files given by the user. This is the synthesized corpus
|
||||
mentioned in section 2.
|
||||
|
||||
Before using this corpus for any other purposes, you can shrink
|
||||
it to a smaller size using the afl-cmin tool. The tool will find
|
||||
a smaller subset of files offering equivalent edge coverage.
|
||||
|
||||
- crashes/ - unique test cases that cause the tested program to receive a fatal
|
||||
signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are grouped by
|
||||
the received signal.
|
||||
|
||||
- hangs/ - unique test cases that cause the tested program to time out. The
|
||||
default time limit before something is classified as a hang is the
|
||||
larger of 1 second and the value of the -t parameter. The value can
|
||||
be fine-tuned by setting AFL_HANG_TMOUT, but this is rarely
|
||||
necessary.
|
||||
|
||||
Crashes and hangs are considered "unique" if the associated execution paths
|
||||
involve any state transitions not seen in previously-recorded faults. If a
|
||||
single bug can be reached in multiple ways, there will be some count inflation
|
||||
early in the process, but this should quickly taper off.
|
||||
|
||||
The file names for crashes and hangs are correlated with the parent, non-faulting
|
||||
queue entries. This should help with debugging.
|
||||
|
||||
## Visualizing
|
||||
|
||||
If you have gnuplot installed, you can also generate some pretty graphs for any
|
||||
active fuzzing task using afl-plot. For an example of how this looks like, see
|
||||
[https://lcamtuf.coredump.cx/afl/plot/](https://lcamtuf.coredump.cx/afl/plot/).
|
||||
|
||||
You can also manually build and install afl-plot-ui, which is a helper utility
|
||||
for showing the graphs generated by afl-plot in a graphical window using GTK.
|
||||
You can build and install it as follows:
|
||||
|
||||
```shell
|
||||
sudo apt install libgtk-3-0 libgtk-3-dev pkg-config
|
||||
cd utils/plot_ui
|
||||
make
|
||||
cd ../../
|
||||
sudo make install
|
||||
```
|
||||
|
||||
|
||||
### Addendum: status and plot files
|
||||
|
||||
For unattended operation, some of the key status screen information can be also
|
||||
found in a machine-readable format in the fuzzer_stats file in the output
|
||||
directory. This includes:
|
||||
|
||||
- `start_time` - unix time indicating the start time of afl-fuzz
|
||||
- `last_update` - unix time corresponding to the last update of this file
|
||||
- `run_time` - run time in seconds to the last update of this file
|
||||
- `fuzzer_pid` - PID of the fuzzer process
|
||||
- `cycles_done` - queue cycles completed so far
|
||||
- `cycles_wo_finds` - number of cycles without any new paths found
|
||||
- `execs_done` - number of execve() calls attempted
|
||||
- `execs_per_sec` - overall number of execs per second
|
||||
- `paths_total` - total number of entries in the queue
|
||||
- `paths_favored` - number of queue entries that are favored
|
||||
- `paths_found` - number of entries discovered through local fuzzing
|
||||
- `paths_imported` - number of entries imported from other instances
|
||||
- `max_depth` - number of levels in the generated data set
|
||||
- `cur_path` - currently processed entry number
|
||||
- `pending_favs` - number of favored entries still waiting to be fuzzed
|
||||
- `pending_total` - number of all entries waiting to be fuzzed
|
||||
- `variable_paths` - number of test cases showing variable behavior
|
||||
- `stability` - percentage of bitmap bytes that behave consistently
|
||||
- `bitmap_cvg` - percentage of edge coverage found in the map so far
|
||||
- `unique_crashes` - number of unique crashes recorded
|
||||
- `unique_hangs` - number of unique hangs encountered
|
||||
- `last_path` - seconds since the last path was found
|
||||
- `last_crash` - seconds since the last crash was found
|
||||
- `last_hang` - seconds since the last hang was found
|
||||
- `execs_since_crash` - execs since the last crash was found
|
||||
- `exec_timeout` - the -t command line value
|
||||
- `slowest_exec_ms` - real time of the slowest execution in ms
|
||||
- `peak_rss_mb` - max rss usage reached during fuzzing in MB
|
||||
- `edges_found` - how many edges have been found
|
||||
- `var_byte_count` - how many edges are non-deterministic
|
||||
- `afl_banner` - banner text (e.g. the target name)
|
||||
- `afl_version` - the version of AFL used
|
||||
- `target_mode` - default, persistent, qemu, unicorn, non-instrumented
|
||||
- `command_line` - full command line used for the fuzzing session
|
||||
|
||||
Most of these map directly to the UI elements discussed earlier on.
|
||||
|
||||
On top of that, you can also find an entry called `plot_data`, containing a
|
||||
plottable history for most of these fields. If you have gnuplot installed, you
|
||||
can turn this into a nice progress report with the included `afl-plot` tool.
|
||||
|
||||
### Addendum: automatically sending metrics with StatsD
|
||||
|
||||
In a CI environment or when running multiple fuzzers, it can be tedious to log
|
||||
into each of them or deploy scripts to read the fuzzer statistics. Using
|
||||
`AFL_STATSD` (and the other related environment variables `AFL_STATSD_HOST`,
|
||||
`AFL_STATSD_PORT`, `AFL_STATSD_TAGS_FLAVOR`) you can automatically send metrics
|
||||
to your favorite StatsD server. Depending on your StatsD server, you will be
|
||||
able to monitor, trigger alerts, or perform actions based on these metrics (e.g:
|
||||
alert on slow exec/s for a new build, threshold of crashes, time since last
|
||||
crash > X, etc).
|
||||
|
||||
The selected metrics are a subset of all the metrics found in the status and in
|
||||
the plot file. The list is the following: `cycle_done`, `cycles_wo_finds`,
|
||||
`execs_done`,`execs_per_sec`, `paths_total`, `paths_favored`, `paths_found`,
|
||||
`paths_imported`, `max_depth`, `cur_path`, `pending_favs`, `pending_total`,
|
||||
`variable_paths`, `unique_crashes`, `unique_hangs`, `total_crashes`,
|
||||
`slowest_exec_ms`, `edges_found`, `var_byte_count`, `havoc_expansion`. Their
|
||||
definitions can be found in the addendum above.
|
||||
|
||||
When using multiple fuzzer instances with StatsD, it is *strongly* recommended
|
||||
to setup the flavor (AFL_STATSD_TAGS_FLAVOR) to match your StatsD server. This
|
||||
will allow you to see individual fuzzer performance, detect bad ones, see the
|
||||
progress of each strategy...
|
@ -4,20 +4,26 @@
|
||||
|
||||
### Targets
|
||||
|
||||
* [Fuzzing a binary-only target](#fuzzing-a-binary-only-target)
|
||||
* [Fuzzing a GUI program](#fuzzing-a-gui-program)
|
||||
* [Fuzzing a network service](#fuzzing-a-network-service)
|
||||
* [Fuzzing a target with source code available](#fuzzing-a-target-with-source-code-available)
|
||||
* [Fuzzing a binary-only target](#fuzzing-a-binary-only-target)
|
||||
* [Fuzzing a GUI program](#fuzzing-a-gui-program)
|
||||
* [Fuzzing a network service](#fuzzing-a-network-service)
|
||||
|
||||
### Improvements
|
||||
|
||||
* [Improving speed](#improving-speed)
|
||||
* [Improving stability](#improving-stability)
|
||||
* [Improving speed](#improving-speed)
|
||||
* [Improving stability](#improving-stability)
|
||||
|
||||
## Targets
|
||||
|
||||
### Fuzzing a target with source code available
|
||||
|
||||
To learn how to fuzz a target if source code is available, see [fuzzing_in_depth.md](fuzzing_in_depth.md).
|
||||
|
||||
### Fuzzing a binary-only target
|
||||
|
||||
For a comprehensive guide, see [binaryonly_fuzzing.md](binaryonly_fuzzing.md).
|
||||
For a comprehensive guide, see
|
||||
[fuzzing_binary-only_targets.md](fuzzing_binary-only_targets.md).
|
||||
|
||||
### Fuzzing a GUI program
|
||||
|
||||
@ -48,7 +54,7 @@ to emulate the network. This is also much faster than the real network would be.
|
||||
See [utils/socket_fuzzing/](../utils/socket_fuzzing/).
|
||||
|
||||
There is an outdated AFL++ branch that implements networking if you are
|
||||
desperate though: [https://github.com/AFLplusplus/AFLplusplus/tree/networking](https://github.com/AFLplusplus/AFLplusplus/tree/networking) -
|
||||
desperate though: [https://github.com/AFLplusplus/AFLplusplus/tree/networking](https://github.com/AFLplusplus/AFLplusplus/tree/networking) -
|
||||
however a better option is AFLnet ([https://github.com/aflnet/aflnet](https://github.com/aflnet/aflnet))
|
||||
which allows you to define network state with different type of data packets.
|
||||
|
||||
@ -58,11 +64,11 @@ which allows you to define network state with different type of data packets.
|
||||
|
||||
1. Use [llvm_mode](../instrumentation/README.llvm.md): afl-clang-lto (llvm >= 11) or afl-clang-fast (llvm >= 9 recommended).
|
||||
2. Use [persistent mode](../instrumentation/README.persistent_mode.md) (x2-x20 speed increase).
|
||||
3. Use the [AFL++ snapshot module](https://github.com/AFLplusplus/AFL-Snapshot-LKM) (x2 speed increase).
|
||||
3. Instrument just what you are interested in, see [instrumentation/README.instrument_list.md](../instrumentation/README.instrument_list.md).
|
||||
4. If you do not use shmem persistent mode, use `AFL_TMPDIR` to put the input file directory on a tempfs location, see [env_variables.md](env_variables.md).
|
||||
5. Improve Linux kernel performance: modify `/etc/default/grub`, set `GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"`; then `update-grub` and `reboot` (warning: makes the system less secure).
|
||||
6. Running on an `ext2` filesystem with `noatime` mount option will be a bit faster than on any other journaling filesystem.
|
||||
7. Use your cores! [fuzzing_expert.md:b) Using multiple cores](fuzzing_expert.md#b-using-multiple-cores).
|
||||
7. Use your cores ([fuzzing_in_depth.md:3c) Using multiple cores](fuzzing_in_depth.md#c-using-multiple-cores))!
|
||||
|
||||
### Improving stability
|
||||
|
||||
|
@ -1,23 +0,0 @@
|
||||
# Going beyond crashes
|
||||
|
||||
Fuzzing is a wonderful and underutilized technique for discovering non-crashing
|
||||
design and implementation errors, too. Quite a few interesting bugs have been
|
||||
found by modifying the target programs to call abort() when say:
|
||||
|
||||
- Two bignum libraries produce different outputs when given the same
|
||||
fuzzer-generated input,
|
||||
|
||||
- An image library produces different outputs when asked to decode the same
|
||||
input image several times in a row,
|
||||
|
||||
- A serialization / deserialization library fails to produce stable outputs
|
||||
when iteratively serializing and deserializing fuzzer-supplied data,
|
||||
|
||||
- A compression library produces an output inconsistent with the input file
|
||||
when asked to compress and then decompress a particular blob.
|
||||
|
||||
Implementing these or similar sanity checks usually takes very little time;
|
||||
if you are the maintainer of a particular package, you can make this code
|
||||
conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
|
||||
shared with libfuzzer and honggfuzz) or `#ifdef __AFL_COMPILER` (this one is
|
||||
just for AFL).
|
@ -1,225 +0,0 @@
|
||||
# Fuzzing binary-only programs with AFL++
|
||||
|
||||
AFL++, libfuzzer and others are great if you have the source code, and
|
||||
it allows for very fast and coverage guided fuzzing.
|
||||
|
||||
However, if there is only the binary program and no source code available,
|
||||
then standard `afl-fuzz -n` (non-instrumented mode) is not effective.
|
||||
|
||||
The following is a description of how these binaries can be fuzzed with AFL++.
|
||||
|
||||
|
||||
## TL;DR:
|
||||
|
||||
qemu_mode in persistent mode is the fastest - if the stability is
|
||||
high enough. Otherwise try retrowrite, afl-dyninst and if these
|
||||
fail too then try standard qemu_mode with AFL_ENTRYPOINT to where you need it.
|
||||
|
||||
If your target is a library use utils/afl_frida/.
|
||||
|
||||
If your target is non-linux then use unicorn_mode/.
|
||||
|
||||
|
||||
## QEMU
|
||||
|
||||
Qemu is the "native" solution to the program.
|
||||
It is available in the ./qemu_mode/ directory and once compiled it can
|
||||
be accessed by the afl-fuzz -Q command line option.
|
||||
It is the easiest to use alternative and even works for cross-platform binaries.
|
||||
|
||||
The speed decrease is at about 50%.
|
||||
However various options exist to increase the speed:
|
||||
- using AFL_ENTRYPOINT to move the forkserver entry to a later basic block in
|
||||
the binary (+5-10% speed)
|
||||
- using persistent mode [qemu_mode/README.persistent.md](../qemu_mode/README.persistent.md)
|
||||
this will result in 150-300% overall speed increase - so 3-8x the original
|
||||
qemu_mode speed!
|
||||
- using AFL_CODE_START/AFL_CODE_END to only instrument specific parts
|
||||
|
||||
Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz)
|
||||
which now has a qemu_mode, but its performance is just 1.5% ...
|
||||
|
||||
As it is included in AFL++ this needs no URL.
|
||||
|
||||
If you like to code a customized fuzzer without much work, we highly
|
||||
recommend to check out our sister project libafl which will support QEMU
|
||||
too:
|
||||
[https://github.com/AFLplusplus/LibAFL](https://github.com/AFLplusplus/LibAFL)
|
||||
|
||||
|
||||
## AFL FRIDA
|
||||
|
||||
In frida_mode you can fuzz binary-only targets easily like with QEMU,
|
||||
with the advantage that frida_mode also works on MacOS (both intel and M1).
|
||||
|
||||
If you want to fuzz a binary-only library then you can fuzz it with
|
||||
frida-gum via utils/afl_frida/, you will have to write a harness to
|
||||
call the target function in the library, use afl-frida.c as a template.
|
||||
|
||||
Both come with AFL++ so this needs no URL.
|
||||
|
||||
You can also perform remote fuzzing with frida, e.g. if you want to fuzz
|
||||
on iPhone or Android devices, for this you can use
|
||||
[https://github.com/ttdennis/fpicker/](https://github.com/ttdennis/fpicker/)
|
||||
as an intermediate that uses AFL++ for fuzzing.
|
||||
|
||||
If you like to code a customized fuzzer without much work, we highly
|
||||
recommend to check out our sister project libafl which supports Frida too:
|
||||
[https://github.com/AFLplusplus/LibAFL](https://github.com/AFLplusplus/LibAFL)
|
||||
Working examples already exist :-)
|
||||
|
||||
|
||||
## WINE+QEMU
|
||||
|
||||
Wine mode can run Win32 PE binaries with the QEMU instrumentation.
|
||||
It needs Wine, python3 and the pefile python package installed.
|
||||
|
||||
As it is included in AFL++ this needs no URL.
|
||||
|
||||
|
||||
## UNICORN
|
||||
|
||||
Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
|
||||
In contrast to QEMU, Unicorn does not offer a full system or even userland
|
||||
emulation. Runtime environment and/or loaders have to be written from scratch,
|
||||
if needed. On top, block chaining has been removed. This means the speed boost
|
||||
introduced in the patched QEMU Mode of AFL++ cannot simply be ported over to
|
||||
Unicorn. For further information, check out [unicorn_mode/README.md](../unicorn_mode/README.md).
|
||||
|
||||
As it is included in AFL++ this needs no URL.
|
||||
|
||||
|
||||
## AFL UNTRACER
|
||||
|
||||
If you want to fuzz a binary-only shared library then you can fuzz it with
|
||||
utils/afl_untracer/, use afl-untracer.c as a template.
|
||||
It is slower than AFL FRIDA (see above).
|
||||
|
||||
|
||||
## ZAFL
|
||||
ZAFL is a static rewriting platform supporting x86-64 C/C++, stripped/unstripped,
|
||||
and PIE/non-PIE binaries. Beyond conventional instrumentation, ZAFL's API enables
|
||||
transformation passes (e.g., laf-Intel, context sensitivity, InsTrim, etc.).
|
||||
|
||||
Its baseline instrumentation speed typically averages 90-95% of afl-clang-fast's.
|
||||
|
||||
[https://git.zephyr-software.com/opensrc/zafl](https://git.zephyr-software.com/opensrc/zafl)
|
||||
|
||||
|
||||
## DYNINST
|
||||
|
||||
Dyninst is a binary instrumentation framework similar to Pintool and
|
||||
Dynamorio (see far below). However whereas Pintool and Dynamorio work at
|
||||
runtime, dyninst instruments the target at load time, and then let it run -
|
||||
or save the binary with the changes.
|
||||
This is great for some things, e.g. fuzzing, and not so effective for others,
|
||||
e.g. malware analysis.
|
||||
|
||||
So what we can do with dyninst is taking every basic block, and put afl's
|
||||
instrumention code in there - and then save the binary.
|
||||
Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
|
||||
Sounds great? It is. The issue though - it is a non-trivial problem to
|
||||
insert instructions, which change addresses in the process space, so that
|
||||
everything is still working afterwards. Hence more often than not binaries
|
||||
crash when they are run.
|
||||
|
||||
The speed decrease is about 15-35%, depending on the optimization options
|
||||
used with afl-dyninst.
|
||||
|
||||
[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst)
|
||||
|
||||
|
||||
## RETROWRITE
|
||||
|
||||
If you have an x86/x86_64 binary that still has its symbols, is compiled
|
||||
with position independant code (PIC/PIE) and does not use most of the C++
|
||||
features then the retrowrite solution might be for you.
|
||||
It decompiles to ASM files which can then be instrumented with afl-gcc.
|
||||
|
||||
It is at about 80-85% performance.
|
||||
|
||||
[https://github.com/HexHive/retrowrite](https://github.com/HexHive/retrowrite)
|
||||
|
||||
|
||||
## MCSEMA
|
||||
|
||||
Theoretically you can also decompile to llvm IR with mcsema, and then
|
||||
use llvm_mode to instrument the binary.
|
||||
Good luck with that.
|
||||
|
||||
[https://github.com/lifting-bits/mcsema](https://github.com/lifting-bits/mcsema)
|
||||
|
||||
|
||||
## INTEL-PT
|
||||
|
||||
If you have a newer Intel CPU, you can make use of Intels processor trace.
|
||||
The big issue with Intel's PT is the small buffer size and the complex
|
||||
encoding of the debug information collected through PT.
|
||||
This makes the decoding very CPU intensive and hence slow.
|
||||
As a result, the overall speed decrease is about 70-90% (depending on
|
||||
the implementation and other factors).
|
||||
|
||||
There are two AFL intel-pt implementations:
|
||||
|
||||
1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt)
|
||||
=> this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
|
||||
|
||||
2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer)
|
||||
=> this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
|
||||
be used. This one is faster than the other.
|
||||
|
||||
Note that there is also honggfuzz: https://github.com/google/honggfuzz
|
||||
But its IPT performance is just 6%!
|
||||
|
||||
|
||||
## CORESIGHT
|
||||
|
||||
Coresight is ARM's answer to Intel's PT.
|
||||
With afl++ v3.15 there is a coresight tracer implementation available in
|
||||
`coresight_mode/` which is faster than QEMU, however can not run in parallel.
|
||||
Currently only one process can be traced, it is WIP.
|
||||
|
||||
|
||||
## PIN & DYNAMORIO
|
||||
|
||||
Pintool and Dynamorio are dynamic instrumentation engines, and they can be
|
||||
used for getting basic block information at runtime.
|
||||
Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows,
|
||||
whereas Dynamorio is additionally available for ARM and AARCH64.
|
||||
Dynamorio is also 10x faster than Pintool.
|
||||
|
||||
The big issue with Dynamorio (and therefore Pintool too) is speed.
|
||||
Dynamorio has a speed decrease of 98-99%
|
||||
Pintool has a speed decrease of 99.5%
|
||||
|
||||
Hence Dynamorio is the option to go for if everything else fails, and Pintool
|
||||
only if Dynamorio fails too.
|
||||
|
||||
Dynamorio solutions:
|
||||
* [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio)
|
||||
* [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL)
|
||||
* [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only
|
||||
|
||||
Pintool solutions:
|
||||
* [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin)
|
||||
* [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin)
|
||||
* [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported
|
||||
|
||||
|
||||
## Non-AFL solutions
|
||||
|
||||
There are many binary-only fuzzing frameworks.
|
||||
Some are great for CTFs but don't work with large binaries, others are very
|
||||
slow but have good path discovery, some are very hard to set-up ...
|
||||
|
||||
* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
|
||||
* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
|
||||
* S2E: [https://github.com/S2E](https://github.com/S2E)
|
||||
* Tinyinst: [https://github.com/googleprojectzero/TinyInst](https://github.com/googleprojectzero/TinyInst) (Mac/Windows only)
|
||||
* Jackalope: [https://github.com/googleprojectzero/Jackalope](https://github.com/googleprojectzero/Jackalope)
|
||||
* ... please send me any missing that are good
|
||||
|
||||
|
||||
## Closing words
|
||||
|
||||
That's it! News, corrections, updates? Send an email to vh@thc.org
|
@ -1,11 +0,0 @@
|
||||
# Branches
|
||||
|
||||
The following branches exist:
|
||||
|
||||
* [release](https://github.com/AFLplusplus/AFLplusplus/tree/release): the latest release
|
||||
* [stable/trunk](https://github.com/AFLplusplus/AFLplusplus/): stable state of AFL++ - it is synced from dev from time to time when we are satisfied with its stability
|
||||
* [dev](https://github.com/AFLplusplus/AFLplusplus/tree/dev): development state of AFL++ - bleeding edge and you might catch a checkout which does not compile or has a bug. *We only accept PRs in dev!!*
|
||||
* (any other): experimental branches to work on specific features or testing new functionality or changes.
|
||||
|
||||
For releases, please see the [Releases](https://github.com/AFLplusplus/AFLplusplus/releases) tab.
|
||||
Also take a look at the list of [important changes in AFL++](important_changes.md).
|
@ -1,19 +0,0 @@
|
||||
# Choosing initial test cases
|
||||
|
||||
To operate correctly, the fuzzer requires one or more starting file that
|
||||
contains a good example of the input data normally expected by the targeted
|
||||
application. There are two basic rules:
|
||||
|
||||
- Keep the files small. Under 1 kB is ideal, although not strictly necessary.
|
||||
For a discussion of why size matters, see [perf_tips.md](perf_tips.md).
|
||||
|
||||
- Use multiple test cases only if they are functionally different from
|
||||
each other. There is no point in using fifty different vacation photos
|
||||
to fuzz an image library.
|
||||
|
||||
You can find many good examples of starting files in the testcases/ subdirectory
|
||||
that comes with this tool.
|
||||
|
||||
PS. If a large corpus of data is available for screening, you may want to use
|
||||
the afl-cmin utility to identify a subset of functionally distinct files that
|
||||
exercise different code paths in the target binary.
|
@ -1,29 +0,0 @@
|
||||
# CI Fuzzing
|
||||
|
||||
Some notes on CI Fuzzing - this fuzzing is different to normal fuzzing campaigns as these are much shorter runnings.
|
||||
|
||||
1. Always:
|
||||
* LTO has a much longer compile time which is diametrical to short fuzzing - hence use afl-clang-fast instead.
|
||||
* If you compile with CMPLOG then you can save fuzzing time and reuse that compiled target for both the -c option and the main fuzz target.
|
||||
This will impact the speed by ~15% though.
|
||||
* `AFL_FAST_CAL` - Enable fast calibration, this halfs the time the saturated corpus needs to be loaded.
|
||||
* `AFL_CMPLOG_ONLY_NEW` - only perform cmplog on new found paths, not the initial corpus as this very likely has been done for them already.
|
||||
* Keep the generated corpus, use afl-cmin and reuse it every time!
|
||||
|
||||
2. Additionally randomize the AFL++ compilation options, e.g.
|
||||
* 40% for `AFL_LLVM_CMPLOG`
|
||||
* 10% for `AFL_LLVM_LAF_ALL`
|
||||
|
||||
3. Also randomize the afl-fuzz runtime options, e.g.
|
||||
* 65% for `AFL_DISABLE_TRIM`
|
||||
* 50% use a dictionary generated by `AFL_LLVM_DICT2FILE`
|
||||
* 40% use MOpt (`-L 0`)
|
||||
* 40% for `AFL_EXPAND_HAVOC_NOW`
|
||||
* 20% for old queue processing (`-Z`)
|
||||
* for CMPLOG targets, 60% for `-l 2`, 40% for `-l 3`
|
||||
|
||||
4. Do *not* run any `-M` modes, just running `-S` modes is better for CI fuzzing.
|
||||
`-M` enables old queue handling etc. which is good for a fuzzing campaign but not good for short CI runs.
|
||||
|
||||
How this can look like can e.g. be seen at AFL++'s setup in Google's [oss-fuzz](https://github.com/google/oss-fuzz/blob/master/infra/base-images/base-builder/compile_afl)
|
||||
and [clusterfuzz](https://github.com/google/clusterfuzz/blob/master/src/clusterfuzz/_internal/bot/fuzzers/afl/launcher.py).
|
@ -1,36 +0,0 @@
|
||||
# Common sense risks
|
||||
|
||||
Please keep in mind that, similarly to many other computationally-intensive
|
||||
tasks, fuzzing may put a strain on your hardware and on the OS. In particular:
|
||||
|
||||
- Your CPU will run hot and will need adequate cooling. In most cases, if
|
||||
cooling is insufficient or stops working properly, CPU speeds will be
|
||||
automatically throttled. That said, especially when fuzzing on less
|
||||
suitable hardware (laptops, smartphones, etc), it's not entirely impossible
|
||||
for something to blow up.
|
||||
|
||||
- Targeted programs may end up erratically grabbing gigabytes of memory or
|
||||
filling up disk space with junk files. AFL++ tries to enforce basic memory
|
||||
limits, but can't prevent each and every possible mishap. The bottom line
|
||||
is that you shouldn't be fuzzing on systems where the prospect of data loss
|
||||
is not an acceptable risk.
|
||||
|
||||
- Fuzzing involves billions of reads and writes to the filesystem. On modern
|
||||
systems, this will be usually heavily cached, resulting in fairly modest
|
||||
"physical" I/O - but there are many factors that may alter this equation.
|
||||
It is your responsibility to monitor for potential trouble; with very heavy
|
||||
I/O, the lifespan of many HDDs and SSDs may be reduced.
|
||||
|
||||
A good way to monitor disk I/O on Linux is the 'iostat' command:
|
||||
|
||||
```shell
|
||||
$ iostat -d 3 -x -k [...optional disk ID...]
|
||||
```
|
||||
|
||||
Using the `AFL_TMPDIR` environment variable and a RAM-disk you can have the
|
||||
heavy writing done in RAM to prevent the aforementioned wear and tear. For
|
||||
example the following line will run a Docker container with all this preset:
|
||||
|
||||
```shell
|
||||
# docker run -ti --mount type=tmpfs,destination=/ramdisk -e AFL_TMPDIR=/ramdisk aflplusplus/aflplusplus
|
||||
```
|
@ -127,9 +127,9 @@ def deinit(): # optional for Python
|
||||
|
||||
- `describe` (optional):
|
||||
|
||||
When this function is called, it shall describe the current testcase,
|
||||
When this function is called, it shall describe the current test case,
|
||||
generated by the last mutation. This will be called, for example,
|
||||
to name the written testcase file after a crash occurred.
|
||||
to name the written test case file after a crash occurred.
|
||||
Using it can help to reproduce crashing mutations.
|
||||
|
||||
- `havoc_mutation` and `havoc_mutation_probability` (optional):
|
||||
@ -224,7 +224,7 @@ Optionally, the following environment variables are supported:
|
||||
|
||||
- `AFL_CUSTOM_MUTATOR_ONLY`
|
||||
|
||||
Disable all other mutation stages. This can prevent broken testcases
|
||||
Disable all other mutation stages. This can prevent broken test cases
|
||||
(those that your Python module can't work with anymore) to fill up your
|
||||
queue. Best combined with a custom trimming routine (see below) because
|
||||
trimming can cause the same test breakage like havoc and splice.
|
||||
|
124
docs/docs2.md
Normal file
124
docs/docs2.md
Normal file
@ -0,0 +1,124 @@
|
||||
# Restructure AFL++'s documentation - Case Study
|
||||
|
||||
## Problem statement
|
||||
|
||||
AFL++ inherited it's documentation from the original Google AFL project.
|
||||
Since then it has been massively improved - feature and performance wise -
|
||||
and although the documenation has likewise been continued it has grown out
|
||||
of proportion.
|
||||
The documentation is done by non-natives to the English language, plus
|
||||
none of us has a writer background.
|
||||
|
||||
We see questions on AFL++ usage on mailing lists (e.g. afl-users), discord
|
||||
channels, web forums and as issues in our repository.
|
||||
Most of them could be answered if people would read through all the
|
||||
documentation.
|
||||
|
||||
This only increases as AFL++ has been on the top of Google's fuzzbench
|
||||
statistics (which measures the performance of fuzzers) and has been
|
||||
integrated in Google's oss-fuzz and clusterfuzz - and is in many Unix
|
||||
packaging repositories, e.g. Debian, FreeBSD, etc.
|
||||
|
||||
AFL++ had 44 (!) documentation files with 13k total lines of content.
|
||||
This was way too much.
|
||||
|
||||
## Proposal abstract
|
||||
|
||||
AFL++'s documentatin needs a complete overhaul, both on a
|
||||
organisation/structural level as well as the content.
|
||||
|
||||
Overall the following actions have to be performed:
|
||||
* Create a better structure of documentation so it is easier to find the
|
||||
information that is being looked for, combining and/or splitting up the
|
||||
existing documents as needed.
|
||||
* Rewrite some documentation to remove duplication. Several information is
|
||||
present several times in the documentation. These should be removed to
|
||||
where needed so that we have as little bloat as possible.
|
||||
* The documents have been written and modified by a lot of different people,
|
||||
most of them non-native English speaker. Hence an overall review where
|
||||
parts should be rewritten has to be performed and then the rewrite done.
|
||||
* Create a cheat-sheet for a very short best-setup build and run of AFL++
|
||||
* Pictures explain more than 1000 words. We need at least 4 images that
|
||||
explain the workflow with AFL++:
|
||||
- the build workflow
|
||||
- the fuzzing workflow
|
||||
- the fuzzing campaign management workflow
|
||||
- the overall workflow that is an overview of the above
|
||||
- maybe more? where the technical writes seems it necessary for
|
||||
understanding.
|
||||
|
||||
Requirements:
|
||||
* Documentation has to be in Markdown format
|
||||
* Images have to be either in SVG or PNG format.
|
||||
* All documentation should be (moved) in(to) docs/
|
||||
|
||||
## Project description
|
||||
|
||||
We created our proposal by discussing in the team what the issues are and
|
||||
what was needed to fix it.
|
||||
This resulted in the [project proposal](https://github.com/AFLplusplus/AFLplusplus/blob/stable/docs/docs.md).
|
||||
|
||||
We did not want to be selected by a writer but select a writer ourselves, so
|
||||
we combed through the list and reviewed every single one of them.
|
||||
We were not looking for coders writing technical documentation, but rather
|
||||
someone who is an experienced writer and has documented experience with
|
||||
structuring documentation.
|
||||
Few fit that profile and we sent out messages to 6 people.
|
||||
We finally decided on Jana because she had a strong background in technical
|
||||
documentation and structuring information.
|
||||
She had no technical experience in fuzzing whatsoever, but we saw that as
|
||||
a plus - of course this made the whole process longer to explain details,
|
||||
but overall ensured that the documentation can be read by (mostly) everyone.
|
||||
|
||||
We communicated via video calls every few weeks and she kept a public kanban
|
||||
board about her todos, additional we used a Signal channel.
|
||||
Her changes were imported via PRs where we discussed details.
|
||||
|
||||
The project was off to a good start, but then Jana got pregnant with serious
|
||||
side effects that made working impossible for her for a longer time, hence
|
||||
the schedule was thrown back.
|
||||
She offered to rescind the payment and we select a new writer, but we saw
|
||||
little opportunity in that, as that would mean a new selection of a writer,
|
||||
someone else with a different vision on how the result should look like so
|
||||
basically a full restart of the project and a large impact on our own time.
|
||||
So we agreed on - after discussion with the Google GSoD team - that she
|
||||
continues the project after the GSoD completion deadline as best as she can.
|
||||
|
||||
End of November she took one week off from work and fully dedicated her time
|
||||
for the documenation which brought the project a big step forward.
|
||||
|
||||
Originally the project should have been ended begin of October, but now - at
|
||||
nearing the end of November, we are at about 85% completion, with the end
|
||||
being expected around mid of December.
|
||||
|
||||
## Metrics
|
||||
|
||||
We merged most of the changes in our development branch and are getting
|
||||
close to a state where the user documentation part is completed and we
|
||||
can create a new release. Only then the new documentatin is actually visible
|
||||
to users. Therefore no metrics could be collected so far.
|
||||
|
||||
We plan on a user-assisted QA review end of November/begin of December.
|
||||
|
||||
The documentation was reviewed by a few test users so far however who gave
|
||||
it a thumbs up.
|
||||
|
||||
## Summary
|
||||
|
||||
The GSoD project itself is great. It helps to get the documentation back in
|
||||
line.
|
||||
It was and is a larger time investment from our side, but we expected that.
|
||||
When the project is done, the documentation will be more accessible by users
|
||||
and also need less maintenance by us.
|
||||
There is still follow-up work to be done by us afterwards (web site for the
|
||||
docs, etc.).
|
||||
|
||||
Not sure what we would do differently next time. I think we prepared best as
|
||||
possible and reacted best as possible to the unexpected.
|
||||
|
||||
Recommendations for other organizations who would like to participate in GSoD:
|
||||
- expect the process to take a larger part of your time. the writer needs
|
||||
your full support.
|
||||
- have someone dedicated from the dev/org side to support, educate and
|
||||
supervice the writer
|
||||
- set clear goals and expectations
|
@ -143,7 +143,7 @@ Available options:
|
||||
- CLANG - outdated clang instrumentation
|
||||
- CLASSIC - classic AFL (map[cur_loc ^ prev_loc >> 1]++) (default)
|
||||
|
||||
You can also specify CTX and/or NGRAM, seperate the options with a comma ","
|
||||
You can also specify CTX and/or NGRAM, separate the options with a comma ","
|
||||
then, e.g.: `AFL_LLVM_INSTRUMENT=CLASSIC,CTX,NGRAM-4`
|
||||
|
||||
Note: It is actually not a good idea to use both CTX and NGRAM. :)
|
||||
@ -171,7 +171,7 @@ config.h to at least 18 and maybe up to 20 for this as otherwise too many map
|
||||
collisions occur.
|
||||
|
||||
For more information, see
|
||||
[instrumentation/README.ctx.md](../instrumentation/README.ctx.md).
|
||||
[instrumentation/README.llvm.md#6) AFL++ Context Sensitive Branch Coverage](../instrumentation/README.llvm.md#6-afl-context-sensitive-branch-coverage).
|
||||
|
||||
#### INSTRUMENT LIST (selectively instrument files and functions)
|
||||
|
||||
@ -247,7 +247,7 @@ in config.h to at least 18 and maybe up to 20 for this as otherwise too many map
|
||||
collisions occur.
|
||||
|
||||
For more information, see
|
||||
[instrumentation/README.ngram.md](../instrumentation/README.ngram.md).
|
||||
[instrumentation/README.llvm.md#7) AFL++ N-Gram Branch Coverage](../instrumentation/README.llvm.md#7-afl-n-gram-branch-coverage).
|
||||
|
||||
#### NOT_ZERO
|
||||
|
||||
@ -261,9 +261,6 @@ For more information, see
|
||||
If the target performs only a few loops, then this will give a small
|
||||
performance boost.
|
||||
|
||||
For more information, see
|
||||
[instrumentation/README.neverzero.md](../instrumentation/README.neverzero.md).
|
||||
|
||||
#### Thread safe instrumentation counters (in all modes)
|
||||
|
||||
Setting `AFL_LLVM_THREADSAFE_INST` will inject code that implements thread safe
|
||||
@ -306,8 +303,9 @@ checks or alter some of the more exotic semantics of the tool:
|
||||
exit soon after the first crash is found.
|
||||
|
||||
- `AFL_CMPLOG_ONLY_NEW` will only perform the expensive cmplog feature for
|
||||
newly found testcases and not for testcases that are loaded on startup (`-i
|
||||
in`). This is an important feature to set when resuming a fuzzing session.
|
||||
newly found test cases and not for test cases that are loaded on startup
|
||||
(`-i in`). This is an important feature to set when resuming a fuzzing
|
||||
session.
|
||||
|
||||
- Setting `AFL_CRASH_EXITCODE` sets the exit code AFL treats as crash. For
|
||||
example, if `AFL_CRASH_EXITCODE='-1'` is set, each input resulting in a `-1`
|
||||
@ -447,8 +445,8 @@ checks or alter some of the more exotic semantics of the tool:
|
||||
|
||||
- If you are using persistent mode (you should, see
|
||||
[instrumentation/README.persistent_mode.md](../instrumentation/README.persistent_mode.md)),
|
||||
some targets keep inherent state due which a detected crash testcase does
|
||||
not crash the target again when the testcase is given. To be able to still
|
||||
some targets keep inherent state due which a detected crash test case does
|
||||
not crash the target again when the test case is given. To be able to still
|
||||
re-trigger these crashes, you can use the `AFL_PERSISTENT_RECORD` variable
|
||||
with a value of how many previous fuzz cases to keep prio a crash. If set to
|
||||
e.g. 10, then the 9 previous inputs are written to out/default/crashes as
|
||||
@ -526,23 +524,23 @@ checks or alter some of the more exotic semantics of the tool:
|
||||
The QEMU wrapper used to instrument binary-only code supports several settings:
|
||||
|
||||
- Setting `AFL_COMPCOV_LEVEL` enables the CompareCoverage tracing of all cmp
|
||||
and sub in x86 and x86_64 and memory comparions functions (e.g. strcmp,
|
||||
and sub in x86 and x86_64 and memory comparison functions (e.g., strcmp,
|
||||
memcmp, ...) when libcompcov is preloaded using `AFL_PRELOAD`. More info at
|
||||
[qemu_mode/libcompcov/README.md](../qemu_mode/libcompcov/README.md).
|
||||
|
||||
There are two levels at the moment, `AFL_COMPCOV_LEVEL=1` that instruments
|
||||
only comparisons with immediate values / read-only memory and
|
||||
`AFL_COMPCOV_LEVEL=2` that instruments all the comparions. Level 2 is more
|
||||
`AFL_COMPCOV_LEVEL=2` that instruments all the comparisons. Level 2 is more
|
||||
accurate but may need a larger shared memory.
|
||||
|
||||
- `AFL_DEBUG` will print the found entrypoint for the binary to stderr. Use
|
||||
this if you are unsure if the entrypoint might be wrong - but use it
|
||||
- `AFL_DEBUG` will print the found entry point for the binary to stderr. Use
|
||||
this if you are unsure if the entry point might be wrong - but use it
|
||||
directly, e.g. `afl-qemu-trace ./program`.
|
||||
|
||||
- `AFL_ENTRYPOINT` allows you to specify a specific entrypoint into the binary
|
||||
(this can be very good for the performance!). The entrypoint is specified as
|
||||
hex address, e.g. `0x4004110`. Note that the address must be the address of
|
||||
a basic block.
|
||||
- `AFL_ENTRYPOINT` allows you to specify a specific entry point into the
|
||||
binary (this can be very good for the performance!). The entry point is
|
||||
specified as hex address, e.g. `0x4004110`. Note that the address must be
|
||||
the address of a basic block.
|
||||
|
||||
- Setting `AFL_INST_LIBS` causes the translator to also instrument the code
|
||||
inside any dynamically linked libraries (notably including glibc).
|
||||
@ -581,7 +579,92 @@ The QEMU wrapper used to instrument binary-only code supports several settings:
|
||||
emulation" variables (e.g., `QEMU_STACK_SIZE`), but there should be no
|
||||
reason to touch them.
|
||||
|
||||
## 6) Settings for afl-cmin
|
||||
## 7) Settings for afl-frida-trace
|
||||
|
||||
The FRIDA wrapper used to instrument binary-only code supports many of the same
|
||||
options as `afl-qemu-trace`, but also has a number of additional advanced
|
||||
options. These are listed in brief below (see [here](../frida_mode/README.md)
|
||||
for more details). These settings are provided for compatibiltiy with QEMU mode,
|
||||
the preferred way to configure FRIDA mode is through its
|
||||
[scripting](../frida_mode/Scripting.md) support.
|
||||
|
||||
* `AFL_FRIDA_DEBUG_MAPS` - See `AFL_QEMU_DEBUG_MAPS`
|
||||
* `AFL_FRIDA_DRIVER_NO_HOOK` - See `AFL_QEMU_DRIVER_NO_HOOK`. When using the
|
||||
QEMU driver to provide a `main` loop for a user provided
|
||||
`LLVMFuzzerTestOneInput`, this option configures the driver to read input from
|
||||
`stdin` rather than using in-memory test cases.
|
||||
* `AFL_FRIDA_EXCLUDE_RANGES` - See `AFL_QEMU_EXCLUDE_RANGES`
|
||||
* `AFL_FRIDA_INST_COVERAGE_FILE` - File to write DynamoRio format coverage
|
||||
information (e.g. to be loaded within IDA lighthouse).
|
||||
* `AFL_FRIDA_INST_DEBUG_FILE` - File to write raw assembly of original blocks
|
||||
and their instrumented counterparts during block compilation.
|
||||
* `AFL_FRIDA_INST_JIT` - Enable the instrumentation of Just-In-Time compiled
|
||||
code. Code is considered to be JIT if the executable segment is not backed by a
|
||||
file.
|
||||
* `AFL_FRIDA_INST_NO_OPTIMIZE` - Don't use optimized inline assembly coverage
|
||||
instrumentation (the default where available). Required to use
|
||||
`AFL_FRIDA_INST_TRACE`.
|
||||
* `AFL_FRIDA_INST_NO_BACKPATCH` - Disable backpatching. At the end of executing
|
||||
each block, control will return to FRIDA to identify the next block to execute.
|
||||
* `AFL_FRIDA_INST_NO_PREFETCH` - Disable prefetching. By default the child will
|
||||
report instrumented blocks back to the parent so that it can also instrument
|
||||
them and they be inherited by the next child on fork, implies
|
||||
`AFL_FRIDA_INST_NO_PREFETCH_BACKPATCH`.
|
||||
* `AFL_FRIDA_INST_NO_PREFETCH_BACKPATCH` - Disable prefetching of stalker
|
||||
backpatching information. By default the child will report applied backpatches
|
||||
to the parent so that they can be applied and then be inherited by the next
|
||||
child on fork.
|
||||
* `AFL_FRIDA_INST_RANGES` - See `AFL_QEMU_INST_RANGES`
|
||||
* `AFL_FRIDA_INST_SEED` - Sets the initial seed for the hash function used to
|
||||
generate block (and hence edge) IDs. Setting this to a constant value may be
|
||||
useful for debugging purposes, e.g. investigating unstable edges.
|
||||
* `AFL_FRIDA_INST_TRACE` - Log to stdout the address of executed blocks,
|
||||
implies `AFL_FRIDA_INST_NO_OPTIMIZE`.
|
||||
* `AFL_FRIDA_INST_TRACE_UNIQUE` - As per `AFL_FRIDA_INST_TRACE`, but each edge
|
||||
is logged only once, requires `AFL_FRIDA_INST_NO_OPTIMIZE`.
|
||||
* `AFL_FRIDA_INST_UNSTABLE_COVERAGE_FILE` - File to write DynamoRio format
|
||||
coverage information for unstable edges (e.g. to be loaded within IDA
|
||||
lighthouse).
|
||||
* `AFL_FRIDA_JS_SCRIPT` - Set the script to be loaded by the FRIDA scripting
|
||||
engine. See [here](Scripting.md) for details.
|
||||
* `AFL_FRIDA_OUTPUT_STDOUT` - Redirect the standard output of the target
|
||||
application to the named file (supersedes the setting of `AFL_DEBUG_CHILD`)
|
||||
* `AFL_FRIDA_OUTPUT_STDERR` - Redirect the standard error of the target
|
||||
application to the named file (supersedes the setting of `AFL_DEBUG_CHILD`)
|
||||
* `AFL_FRIDA_PERSISTENT_ADDR` - See `AFL_QEMU_PERSISTENT_ADDR`
|
||||
* `AFL_FRIDA_PERSISTENT_CNT` - See `AFL_QEMU_PERSISTENT_CNT`
|
||||
* `AFL_FRIDA_PERSISTENT_DEBUG` - Insert a Breakpoint into the instrumented code
|
||||
at `AFL_FRIDA_PERSISTENT_HOOK` and `AFL_FRIDA_PERSISTENT_RET` to allow the user
|
||||
to detect issues in the persistent loop using a debugger.
|
||||
* `AFL_FRIDA_PERSISTENT_HOOK` - See `AFL_QEMU_PERSISTENT_HOOK`
|
||||
* `AFL_FRIDA_PERSISTENT_RET` - See `AFL_QEMU_PERSISTENT_RET`
|
||||
* `AFL_FRIDA_SECCOMP_FILE` - Write a log of any syscalls made by the target to
|
||||
the specified file.
|
||||
* `AFL_FRIDA_STALKER_ADJACENT_BLOCKS` - Configure the number of adjacent blocks
|
||||
to fetch when generating instrumented code. By fetching blocks in the same
|
||||
order they appear in the original program, rather than the order of execution
|
||||
should help reduce locallity and adjacency. This includes allowing us to vector
|
||||
between adjancent blocks using a NOP slide rather than an immediate branch.
|
||||
* `AFL_FRIDA_STALKER_IC_ENTRIES` - Configure the number of inline cache entries
|
||||
stored along-side branch instructions which provide a cache to avoid having to
|
||||
call back into FRIDA to find the next block. Default is 32.
|
||||
* `AFL_FRIDA_STATS_FILE` - Write statistics information about the code being
|
||||
instrumented to the given file name. The statistics are written only for the
|
||||
child process when new block is instrumented (when the
|
||||
`AFL_FRIDA_STATS_INTERVAL` has expired). Note that simply because a new path is
|
||||
found does not mean a new block needs to be compiled. It could simply be that
|
||||
the existing blocks instrumented have been executed in a different order.
|
||||
* `AFL_FRIDA_STATS_INTERVAL` - The maximum frequency to output statistics
|
||||
information. Stats will be written whenever they are updated if the given
|
||||
interval has elapsed since last time they were written.
|
||||
* `AFL_FRIDA_TRACEABLE` - Set the child process to be traceable by any process
|
||||
to aid debugging and overcome the restrictions imposed by YAMA. Supported on
|
||||
Linux only. Permits a non-root user to use `gcore` or similar to collect a core
|
||||
dump of the instrumented target. Note that in order to capture the core dump you
|
||||
must set a sufficient timeout (using `-t`) to avoid `afl-fuzz` killing the
|
||||
process whilst it is being dumped.
|
||||
|
||||
## 8) Settings for afl-cmin
|
||||
|
||||
The corpus minimization script offers very little customization:
|
||||
|
||||
@ -599,7 +682,7 @@ The corpus minimization script offers very little customization:
|
||||
- `AFL_PRINT_FILENAMES` prints each filename to stdout, as it gets processed.
|
||||
This can help when embedding `afl-cmin` or `afl-showmap` in other scripts.
|
||||
|
||||
## 7) Settings for afl-tmin
|
||||
## 9) Settings for afl-tmin
|
||||
|
||||
Virtually nothing to play with. Well, in QEMU mode (`-Q`), `AFL_PATH` will be
|
||||
searched for afl-qemu-trace. In addition to this, `TMPDIR` may be used if a
|
||||
@ -610,12 +693,12 @@ to match when minimizing crashes. This will make minimization less useful, but
|
||||
may prevent the tool from "jumping" from one crashing condition to another in
|
||||
very buggy software. You probably want to combine it with the `-e` flag.
|
||||
|
||||
## 8) Settings for afl-analyze
|
||||
## 10) Settings for afl-analyze
|
||||
|
||||
You can set `AFL_ANALYZE_HEX` to get file offsets printed as hexadecimal instead
|
||||
of decimal.
|
||||
|
||||
## 9) Settings for libdislocator
|
||||
## 11) Settings for libdislocator
|
||||
|
||||
The library honors these environment variables:
|
||||
|
||||
@ -637,12 +720,12 @@ The library honors these environment variables:
|
||||
- `AFL_LD_VERBOSE` causes the library to output some diagnostic messages that
|
||||
may be useful for pinpointing the cause of any observed issues.
|
||||
|
||||
## 10) Settings for libtokencap
|
||||
## 11) Settings for libtokencap
|
||||
|
||||
This library accepts `AFL_TOKEN_FILE` to indicate the location to which the
|
||||
discovered tokens should be written.
|
||||
|
||||
## 11) Third-party variables set by afl-fuzz & other tools
|
||||
## 12) Third-party variables set by afl-fuzz & other tools
|
||||
|
||||
Several variables are not directly interpreted by afl-fuzz, but are set to
|
||||
optimal values if not already present in the environment:
|
||||
@ -687,4 +770,4 @@ optimal values if not already present in the environment:
|
||||
|
||||
- By default, `LD_BIND_NOW` is set to speed up fuzzing by forcing the linker
|
||||
to do all the work before the fork server kicks in. You can override this by
|
||||
setting `LD_BIND_LAZY` beforehand, but it is almost certainly pointless.
|
||||
setting `LD_BIND_LAZY` beforehand, but it is almost certainly pointless.
|
||||
|
@ -1,49 +1,61 @@
|
||||
# Important features of AFL++
|
||||
|
||||
AFL++ supports llvm from 3.8 up to version 12, very fast binary fuzzing with QEMU 5.1
|
||||
with laf-intel and redqueen, frida mode, unicorn mode, gcc plugin, full *BSD,
|
||||
Mac OS, Solaris and Android support and much, much, much more.
|
||||
AFL++ supports llvm from 3.8 up to version 12, very fast binary fuzzing with
|
||||
QEMU 5.1 with laf-intel and redqueen, frida mode, unicorn mode, gcc plugin, full
|
||||
*BSD, Mac OS, Solaris and Android support and much, much, much more.
|
||||
|
||||
| Feature/Instrumentation | afl-gcc | llvm | gcc_plugin | frida_mode(9) | qemu_mode(10) |unicorn_mode(10) |coresight_mode(11)|
|
||||
| -------------------------|:-------:|:---------:|:----------:|:----------------:|:----------------:|:----------------:|:----------------:|
|
||||
| Threadsafe counters | | x(3) | | | | | |
|
||||
| NeverZero | x86[_64]| x(1) | x | x | x | x | |
|
||||
| Persistent Mode | | x | x | x86[_64]/arm64 | x86[_64]/arm[64] | x | |
|
||||
| LAF-Intel / CompCov | | x | | | x86[_64]/arm[64] | x86[_64]/arm[64] | |
|
||||
| CmpLog | | x | | x86[_64]/arm64 | x86[_64]/arm[64] | | |
|
||||
| Selective Instrumentation| | x | x | x | x | | |
|
||||
| Non-Colliding Coverage | | x(4) | | | (x)(5) | | |
|
||||
| Ngram prev_loc Coverage | | x(6) | | | | | |
|
||||
| Context Coverage | | x(6) | | | | | |
|
||||
| Auto Dictionary | | x(7) | | | | | |
|
||||
| Snapshot LKM Support | | (x)(8) | (x)(8) | | (x)(5) | | |
|
||||
| Shared Memory Testcases | | x | x | x86[_64]/arm64 | x | x | |
|
||||
| Feature/Instrumentation | afl-gcc | llvm | gcc_plugin | frida_mode(9) | qemu_mode(10) |unicorn_mode(10) |coresight_mode(11)|
|
||||
| -------------------------|:-------:|:---------:|:----------:|:----------------:|:----------------:|:----------------:|:----------------:|
|
||||
| Threadsafe counters | | x(3) | | | | | |
|
||||
| NeverZero | x86[_64]| x(1) | x | x | x | x | |
|
||||
| Persistent Mode | | x | x | x86[_64]/arm64 | x86[_64]/arm[64] | x | |
|
||||
| LAF-Intel / CompCov | | x | | | x86[_64]/arm[64] | x86[_64]/arm[64] | |
|
||||
| CmpLog | | x | | x86[_64]/arm64 | x86[_64]/arm[64] | | |
|
||||
| Selective Instrumentation| | x | x | x | x | | |
|
||||
| Non-Colliding Coverage | | x(4) | | | (x)(5) | | |
|
||||
| Ngram prev_loc Coverage | | x(6) | | | | | |
|
||||
| Context Coverage | | x(6) | | | | | |
|
||||
| Auto Dictionary | | x(7) | | | | | |
|
||||
| Snapshot LKM Support | | (x)(8) | (x)(8) | | (x)(5) | | |
|
||||
| Shared Memory Test cases | | x | x | x86[_64]/arm64 | x | x | |
|
||||
|
||||
1. default for LLVM >= 9.0, env var for older version due an efficiency bug in previous llvm versions
|
||||
2. GCC creates non-performant code, hence it is disabled in gcc_plugin
|
||||
3. with `AFL_LLVM_THREADSAFE_INST`, disables NeverZero
|
||||
4. with pcguard mode and LTO mode for LLVM 11 and newer
|
||||
5. upcoming, development in the branch
|
||||
6. not compatible with LTO instrumentation and needs at least LLVM v4.1
|
||||
7. automatic in LTO mode with LLVM 11 and newer, an extra pass for all LLVM versions that write to a file to use with afl-fuzz' `-x`
|
||||
8. the snapshot LKM is currently unmaintained due to too many kernel changes coming too fast :-(
|
||||
9. frida mode is supported on Linux and MacOS for Intel and ARM
|
||||
10. QEMU/Unicorn is only supported on Linux
|
||||
11. Coresight mode is only available on AARCH64 Linux with a CPU with Coresight extension
|
||||
1. default for LLVM >= 9.0, env var for older version due an efficiency bug in
|
||||
previous llvm versions
|
||||
2. GCC creates non-performant code, hence it is disabled in gcc_plugin
|
||||
3. with `AFL_LLVM_THREADSAFE_INST`, disables NeverZero
|
||||
4. with pcguard mode and LTO mode for LLVM 11 and newer
|
||||
5. upcoming, development in the branch
|
||||
6. not compatible with LTO instrumentation and needs at least LLVM v4.1
|
||||
7. automatic in LTO mode with LLVM 11 and newer, an extra pass for all LLVM
|
||||
versions that write to a file to use with afl-fuzz' `-x`
|
||||
8. the snapshot LKM is currently unmaintained due to too many kernel changes
|
||||
coming too fast :-(
|
||||
9. frida mode is supported on Linux and MacOS for Intel and ARM
|
||||
10. QEMU/Unicorn is only supported on Linux
|
||||
11. Coresight mode is only available on AARCH64 Linux with a CPU with Coresight
|
||||
extension
|
||||
|
||||
Among others, the following features and patches have been integrated:
|
||||
Among others, the following features and patches have been integrated:
|
||||
|
||||
* NeverZero patch for afl-gcc, instrumentation, qemu_mode and unicorn_mode which prevents a wrapping map value to zero, increases coverage
|
||||
* Persistent mode, deferred forkserver and in-memory fuzzing for qemu_mode
|
||||
* Unicorn mode which allows fuzzing of binaries from completely different platforms (integration provided by domenukk)
|
||||
* The new CmpLog instrumentation for LLVM and QEMU inspired by [Redqueen](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf)
|
||||
* Win32 PE binary-only fuzzing with QEMU and Wine
|
||||
* AFLfast's power schedules by Marcel Böhme: [https://github.com/mboehme/aflfast](https://github.com/mboehme/aflfast)
|
||||
* The MOpt mutator: [https://github.com/puppet-meteor/MOpt-AFL](https://github.com/puppet-meteor/MOpt-AFL)
|
||||
* LLVM mode Ngram coverage by Adrian Herrera [https://github.com/adrianherrera/afl-ngram-pass](https://github.com/adrianherrera/afl-ngram-pass)
|
||||
* LAF-Intel/CompCov support for instrumentation, qemu_mode and unicorn_mode (with enhanced capabilities)
|
||||
* Radamsa and honggfuzz mutators (as custom mutators).
|
||||
* QBDI mode to fuzz android native libraries via Quarkslab's [QBDI](https://github.com/QBDI/QBDI) framework
|
||||
* Frida and ptrace mode to fuzz binary-only libraries, etc.
|
||||
* NeverZero patch for afl-gcc, instrumentation, qemu_mode and unicorn_mode which
|
||||
prevents a wrapping map value to zero, increases coverage
|
||||
* Persistent mode, deferred forkserver and in-memory fuzzing for qemu_mode
|
||||
* Unicorn mode which allows fuzzing of binaries from completely different
|
||||
platforms (integration provided by domenukk)
|
||||
* The new CmpLog instrumentation for LLVM and QEMU inspired by
|
||||
[Redqueen](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf)
|
||||
* Win32 PE binary-only fuzzing with QEMU and Wine
|
||||
* AFLfast's power schedules by Marcel Böhme:
|
||||
[https://github.com/mboehme/aflfast](https://github.com/mboehme/aflfast)
|
||||
* The MOpt mutator:
|
||||
[https://github.com/puppet-meteor/MOpt-AFL](https://github.com/puppet-meteor/MOpt-AFL)
|
||||
* LLVM mode Ngram coverage by Adrian Herrera
|
||||
[https://github.com/adrianherrera/afl-ngram-pass](https://github.com/adrianherrera/afl-ngram-pass)
|
||||
* LAF-Intel/CompCov support for instrumentation, qemu_mode and unicorn_mode
|
||||
(with enhanced capabilities)
|
||||
* Radamsa and honggfuzz mutators (as custom mutators).
|
||||
* QBDI mode to fuzz android native libraries via Quarkslab's
|
||||
[QBDI](https://github.com/QBDI/QBDI) framework
|
||||
* Frida and ptrace mode to fuzz binary-only libraries, etc.
|
||||
|
||||
So all in all this is the best-of AFL that is out there :-)
|
||||
So all in all this is the best-of AFL that is out there :-)
|
@ -1,83 +1,293 @@
|
||||
# Fuzzing binary-only targets
|
||||
|
||||
When source code is *NOT* available, AFL++ offers various support for fast,
|
||||
on-the-fly instrumentation of black-box binaries.
|
||||
AFL++, libfuzzer, and other fuzzers are great if you have the source code of the
|
||||
target. This allows for very fast and coverage guided fuzzing.
|
||||
|
||||
If you do not have to use Unicorn the following setup is recommended to use
|
||||
qemu_mode:
|
||||
* run 1 afl-fuzz -Q instance with CMPLOG (`-c 0` + `AFL_COMPCOV_LEVEL=2`)
|
||||
* run 1 afl-fuzz -Q instance with QASAN (`AFL_USE_QASAN=1`)
|
||||
* run 1 afl-fuzz -Q instance with LAF (`AFL_PRELOAD=libcmpcov.so` + `AFL_COMPCOV_LEVEL=2`)
|
||||
Alternatively you can use frida_mode, just switch `-Q` with `-O` and remove the
|
||||
LAF instance.
|
||||
However, if there is only the binary program and no source code available, then
|
||||
standard `afl-fuzz -n` (non-instrumented mode) is not effective.
|
||||
|
||||
Then run as many instances as you have cores left with either -Q mode or - better -
|
||||
use a binary rewriter like afl-dyninst, retrowrite, zafl, etc.
|
||||
For fast, on-the-fly instrumentation of black-box binaries, AFL++ still offers
|
||||
various support. The following is a description of how these binaries can be
|
||||
fuzzed with AFL++.
|
||||
|
||||
For Qemu and Frida mode, check out the persistent mode, it gives a huge speed
|
||||
improvement if it is possible to use.
|
||||
## TL;DR:
|
||||
|
||||
### QEMU
|
||||
Qemu_mode in persistent mode is the fastest - if the stability is high enough.
|
||||
Otherwise, try RetroWrite, Dyninst, and if these fail, too, then try standard
|
||||
qemu_mode with AFL_ENTRYPOINT to where you need it.
|
||||
|
||||
For linux programs and its libraries this is accomplished with a version of
|
||||
QEMU running in the lesser-known "user space emulation" mode.
|
||||
QEMU is a project separate from AFL, but you can conveniently build the
|
||||
feature by doing:
|
||||
If your target is a library, then use frida_mode.
|
||||
|
||||
If your target is non-linux, then use unicorn_mode.
|
||||
|
||||
## Fuzzing binary-only targets with AFL++
|
||||
### Qemu_mode
|
||||
|
||||
Qemu_mode is the "native" solution to the program. It is available in the
|
||||
./qemu_mode/ directory and, once compiled, it can be accessed by the afl-fuzz -Q
|
||||
command line option. It is the easiest to use alternative and even works for
|
||||
cross-platform binaries.
|
||||
|
||||
For linux programs and its libraries, this is accomplished with a version of
|
||||
QEMU running in the lesser-known "user space emulation" mode. QEMU is a project
|
||||
separate from AFL++, but you can conveniently build the feature by doing:
|
||||
|
||||
```shell
|
||||
cd qemu_mode
|
||||
./build_qemu_support.sh
|
||||
```
|
||||
|
||||
For additional instructions and caveats, see [qemu_mode/README.md](../qemu_mode/README.md).
|
||||
If possible you should use the persistent mode, see [qemu_mode/README.persistent.md](../qemu_mode/README.persistent.md).
|
||||
The mode is approximately 2-5x slower than compile-time instrumentation, and is
|
||||
less conducive to parallelization.
|
||||
The following setup to use qemu_mode is recommended:
|
||||
* run 1 afl-fuzz -Q instance with CMPLOG (`-c 0` + `AFL_COMPCOV_LEVEL=2`)
|
||||
* run 1 afl-fuzz -Q instance with QASAN (`AFL_USE_QASAN=1`)
|
||||
* run 1 afl-fuzz -Q instance with LAF (`AFL_PRELOAD=libcmpcov.so` +
|
||||
`AFL_COMPCOV_LEVEL=2`), alternatively you can use frida_mode, just switch `-Q`
|
||||
with `-O` and remove the LAF instance
|
||||
|
||||
If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for
|
||||
your binary, then you can use afl-fuzz normally and it will have twice
|
||||
the speed compared to qemu_mode (but slower than qemu persistent mode).
|
||||
Note that several other binary rewriters exist, all with their advantages and
|
||||
caveats.
|
||||
Then run as many instances as you have cores left with either -Q mode or - even
|
||||
better - use a binary rewriter like Dyninst, RetroWrite, ZAFL, etc.
|
||||
|
||||
### Frida
|
||||
If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for your
|
||||
binary, then you can use afl-fuzz normally and it will have twice the speed
|
||||
compared to qemu_mode (but slower than qemu persistent mode). Note that several
|
||||
other binary rewriters exist, all with their advantages and caveats.
|
||||
|
||||
Frida mode is sometimes faster and sometimes slower than Qemu mode.
|
||||
It is also newer, lacks COMPCOV, but supports MacOS.
|
||||
The speed decrease of qemu_mode is at about 50%. However, various options exist
|
||||
to increase the speed:
|
||||
- using AFL_ENTRYPOINT to move the forkserver entry to a later basic block in
|
||||
the binary (+5-10% speed)
|
||||
- using persistent mode
|
||||
[qemu_mode/README.persistent.md](../qemu_mode/README.persistent.md) this will
|
||||
result in a 150-300% overall speed increase - so 3-8x the original qemu_mode
|
||||
speed!
|
||||
- using AFL_CODE_START/AFL_CODE_END to only instrument specific parts
|
||||
|
||||
For additional instructions and caveats, see
|
||||
[qemu_mode/README.md](../qemu_mode/README.md). If possible, you should use the
|
||||
persistent mode, see
|
||||
[qemu_mode/README.persistent.md](../qemu_mode/README.persistent.md). The mode is
|
||||
approximately 2-5x slower than compile-time instrumentation, and is less
|
||||
conducive to parallelization.
|
||||
|
||||
Note that there is also honggfuzz:
|
||||
[https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) which
|
||||
now has a qemu_mode, but its performance is just 1.5% ...
|
||||
|
||||
If you like to code a customized fuzzer without much work, we highly recommend
|
||||
to check out our sister project libafl which supports QEMU, too:
|
||||
[https://github.com/AFLplusplus/LibAFL](https://github.com/AFLplusplus/LibAFL)
|
||||
|
||||
### WINE+QEMU
|
||||
|
||||
Wine mode can run Win32 PE binaries with the QEMU instrumentation. It needs
|
||||
Wine, python3, and the pefile python package installed.
|
||||
|
||||
It is included in AFL++.
|
||||
|
||||
For more information, see [qemu_mode/README.wine.md](../qemu_mode/README.wine.md).
|
||||
|
||||
### Frida_mode
|
||||
|
||||
In frida_mode, you can fuzz binary-only targets as easily as with QEMU.
|
||||
Frida_mode is sometimes faster and sometimes slower than Qemu_mode. It is also
|
||||
newer, lacks COMPCOV, and has the advantage that it works on MacOS (both intel
|
||||
and M1).
|
||||
|
||||
To build frida_mode:
|
||||
|
||||
```shell
|
||||
cd frida_mode
|
||||
make
|
||||
```
|
||||
|
||||
For additional instructions and caveats, see [frida_mode/README.md](../frida_mode/README.md).
|
||||
If possible you should use the persistent mode, see [qemu_frida/README.md](../qemu_frida/README.md).
|
||||
The mode is approximately 2-5x slower than compile-time instrumentation, and is
|
||||
less conducive to parallelization.
|
||||
For additional instructions and caveats, see
|
||||
[frida_mode/README.md](../frida_mode/README.md).
|
||||
|
||||
If possible, you should use the persistent mode, see
|
||||
[qemu_frida/README.md](../qemu_frida/README.md). The mode is approximately 2-5x
|
||||
slower than compile-time instrumentation, and is less conducive to
|
||||
parallelization. But for binary-only fuzzing, it gives a huge speed improvement
|
||||
if it is possible to use.
|
||||
|
||||
If you want to fuzz a binary-only library, then you can fuzz it with frida-gum
|
||||
via frida_mode/. You will have to write a harness to call the target function in
|
||||
the library, use afl-frida.c as a template.
|
||||
|
||||
You can also perform remote fuzzing with frida, e.g. if you want to fuzz on
|
||||
iPhone or Android devices, for this you can use
|
||||
[https://github.com/ttdennis/fpicker/](https://github.com/ttdennis/fpicker/) as
|
||||
an intermediate that uses AFL++ for fuzzing.
|
||||
|
||||
If you like to code a customized fuzzer without much work, we highly recommend
|
||||
to check out our sister project libafl which supports Frida, too:
|
||||
[https://github.com/AFLplusplus/LibAFL](https://github.com/AFLplusplus/LibAFL).
|
||||
Working examples already exist :-)
|
||||
|
||||
### Unicorn
|
||||
|
||||
For non-Linux binaries you can use AFL++'s unicorn mode which can emulate
|
||||
anything you want - for the price of speed and user written scripts.
|
||||
See [unicorn_mode/README.md](../unicorn_mode/README.md).
|
||||
Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. In
|
||||
contrast to QEMU, Unicorn does not offer a full system or even userland
|
||||
emulation. Runtime environment and/or loaders have to be written from scratch,
|
||||
if needed. On top, block chaining has been removed. This means the speed boost
|
||||
introduced in the patched QEMU Mode of AFL++ cannot simply be ported over to
|
||||
Unicorn.
|
||||
|
||||
For non-Linux binaries, you can use AFL++'s unicorn_mode which can emulate
|
||||
anything you want - for the price of speed and user written scripts.
|
||||
|
||||
To build unicorn_mode:
|
||||
|
||||
It can be easily built by:
|
||||
```shell
|
||||
cd unicorn_mode
|
||||
./build_unicorn_support.sh
|
||||
```
|
||||
|
||||
For further information, check out
|
||||
[unicorn_mode/README.md](../unicorn_mode/README.md).
|
||||
|
||||
### Shared libraries
|
||||
|
||||
If the goal is to fuzz a dynamic library then there are two options available.
|
||||
For both you need to write a small harness that loads and calls the library.
|
||||
Then you fuzz this with either frida_mode or qemu_mode, and either use
|
||||
If the goal is to fuzz a dynamic library, then there are two options available.
|
||||
For both, you need to write a small harness that loads and calls the library.
|
||||
Then you fuzz this with either frida_mode or qemu_mode and either use
|
||||
`AFL_INST_LIBS=1` or `AFL_QEMU/FRIDA_INST_RANGES`.
|
||||
|
||||
Another, less precise and slower option is using ptrace with debugger interrupt
|
||||
instrumentation: [utils/afl_untracer/README.md](../utils/afl_untracer/README.md).
|
||||
Another, less precise and slower option is to fuzz it with utils/afl_untracer/
|
||||
and use afl-untracer.c as a template. It is slower than frida_mode.
|
||||
|
||||
### More
|
||||
For more information, see
|
||||
[utils/afl_untracer/README.md](../utils/afl_untracer/README.md).
|
||||
|
||||
A more comprehensive description of these and other options can be found in
|
||||
[binaryonly_fuzzing.md](binaryonly_fuzzing.md).
|
||||
### Coresight
|
||||
|
||||
Coresight is ARM's answer to Intel's PT. With AFL++ v3.15, there is a coresight
|
||||
tracer implementation available in `coresight_mode/` which is faster than QEMU,
|
||||
however, cannot run in parallel. Currently, only one process can be traced, it
|
||||
is WIP.
|
||||
|
||||
Fore more information, see
|
||||
[coresight_mode/README.md](../coresight_mode/README.md).
|
||||
|
||||
## Binary rewriters
|
||||
|
||||
An alternative solution are binary rewriters. They are faster then the solutions native to AFL++ but don't always work.
|
||||
|
||||
### ZAFL
|
||||
ZAFL is a static rewriting platform supporting x86-64 C/C++,
|
||||
stripped/unstripped, and PIE/non-PIE binaries. Beyond conventional
|
||||
instrumentation, ZAFL's API enables transformation passes (e.g., laf-Intel,
|
||||
context sensitivity, InsTrim, etc.).
|
||||
|
||||
Its baseline instrumentation speed typically averages 90-95% of
|
||||
afl-clang-fast's.
|
||||
|
||||
[https://git.zephyr-software.com/opensrc/zafl](https://git.zephyr-software.com/opensrc/zafl)
|
||||
|
||||
### RetroWrite
|
||||
|
||||
If you have an x86/x86_64 binary that still has its symbols, is compiled with
|
||||
position independent code (PIC/PIE), and does not use most of the C++ features,
|
||||
then the RetroWrite solution might be for you. It decompiles to ASM files which
|
||||
can then be instrumented with afl-gcc.
|
||||
|
||||
It is at about 80-85% performance.
|
||||
|
||||
[https://github.com/HexHive/retrowrite](https://github.com/HexHive/retrowrite)
|
||||
|
||||
### Dyninst
|
||||
|
||||
Dyninst is a binary instrumentation framework similar to Pintool and DynamoRIO.
|
||||
However, whereas Pintool and DynamoRIO work at runtime, Dyninst instruments the
|
||||
target at load time and then let it run - or save the binary with the changes.
|
||||
This is great for some things, e.g. fuzzing, and not so effective for others,
|
||||
e.g. malware analysis.
|
||||
|
||||
So, what we can do with Dyninst is taking every basic block and put AFL++'s
|
||||
instrumentation code in there - and then save the binary. Afterwards, we can
|
||||
just fuzz the newly saved target binary with afl-fuzz. Sounds great? It is. The
|
||||
issue though - it is a non-trivial problem to insert instructions, which change
|
||||
addresses in the process space, so that everything is still working afterwards.
|
||||
Hence, more often than not binaries crash when they are run.
|
||||
|
||||
The speed decrease is about 15-35%, depending on the optimization options used
|
||||
with afl-dyninst.
|
||||
|
||||
[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst)
|
||||
|
||||
### Mcsema
|
||||
|
||||
Theoretically, you can also decompile to llvm IR with mcsema, and then use
|
||||
llvm_mode to instrument the binary. Good luck with that.
|
||||
|
||||
[https://github.com/lifting-bits/mcsema](https://github.com/lifting-bits/mcsema)
|
||||
|
||||
## Binary tracers
|
||||
|
||||
### Pintool & DynamoRIO
|
||||
|
||||
Pintool and DynamoRIO are dynamic instrumentation engines. They can be used for
|
||||
getting basic block information at runtime. Pintool is only available for Intel
|
||||
x32/x64 on Linux, Mac OS, and Windows, whereas DynamoRIO is additionally
|
||||
available for ARM and AARCH64. DynamoRIO is also 10x faster than Pintool.
|
||||
|
||||
The big issue with DynamoRIO (and therefore Pintool, too) is speed. DynamoRIO
|
||||
has a speed decrease of 98-99%, Pintool has a speed decrease of 99.5%.
|
||||
|
||||
Hence, DynamoRIO is the option to go for if everything else fails and Pintool
|
||||
only if DynamoRIO fails, too.
|
||||
|
||||
DynamoRIO solutions:
|
||||
* [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio)
|
||||
* [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL)
|
||||
* [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/)
|
||||
<= very good but windows only
|
||||
|
||||
Pintool solutions:
|
||||
* [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin)
|
||||
* [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin)
|
||||
* [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode)
|
||||
<= only old Pintool version supported
|
||||
|
||||
### Intel PT
|
||||
|
||||
If you have a newer Intel CPU, you can make use of Intel's processor trace. The
|
||||
big issue with Intel's PT is the small buffer size and the complex encoding of
|
||||
the debug information collected through PT. This makes the decoding very CPU
|
||||
intensive and hence slow. As a result, the overall speed decrease is about
|
||||
70-90% (depending on the implementation and other factors).
|
||||
|
||||
There are two AFL intel-pt implementations:
|
||||
|
||||
1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt)
|
||||
=> This needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
|
||||
|
||||
2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer)
|
||||
=> This needs a 4.14 or 4.15 kernel. The "nopti" kernel boot option must be
|
||||
used. This one is faster than the other.
|
||||
|
||||
Note that there is also honggfuzz:
|
||||
[https://github.com/google/honggfuzz](https://github.com/google/honggfuzz). But
|
||||
its IPT performance is just 6%!
|
||||
|
||||
## Non-AFL++ solutions
|
||||
|
||||
There are many binary-only fuzzing frameworks. Some are great for CTFs but don't
|
||||
work with large binaries, others are very slow but have good path discovery,
|
||||
some are very hard to set-up...
|
||||
|
||||
|
||||
* Jackalope:
|
||||
[https://github.com/googleprojectzero/Jackalope](https://github.com/googleprojectzero/Jackalope)
|
||||
* Manticore:
|
||||
[https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
|
||||
* QSYM:
|
||||
[https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
|
||||
* S2E: [https://github.com/S2E](https://github.com/S2E)
|
||||
* TinyInst:
|
||||
[https://github.com/googleprojectzero/TinyInst](https://github.com/googleprojectzero/TinyInst)
|
||||
(Mac/Windows only)
|
||||
* ... please send me any missing that are good
|
||||
|
||||
## Closing words
|
||||
|
||||
That's it! News, corrections, updates? Send an email to vh@thc.org.
|
@ -1,630 +0,0 @@
|
||||
# Fuzzing with AFL++
|
||||
|
||||
The following describes how to fuzz with a target if source code is available.
|
||||
If you have a binary-only target please skip to [#Instrumenting binary-only apps](#Instrumenting binary-only apps)
|
||||
|
||||
Fuzzing source code is a three-step process.
|
||||
|
||||
1. Compile the target with a special compiler that prepares the target to be
|
||||
fuzzed efficiently. This step is called "instrumenting a target".
|
||||
2. Prepare the fuzzing by selecting and optimizing the input corpus for the
|
||||
target.
|
||||
3. Perform the fuzzing of the target by randomly mutating input and assessing
|
||||
if a generated input was processed in a new path in the target binary.
|
||||
|
||||
### 1. Instrumenting that target
|
||||
|
||||
#### a) Selecting the best AFL++ compiler for instrumenting the target
|
||||
|
||||
AFL++ comes with a central compiler `afl-cc` that incorporates various different
|
||||
kinds of compiler targets and and instrumentation options.
|
||||
The following evaluation flow will help you to select the best possible.
|
||||
|
||||
It is highly recommended to have the newest llvm version possible installed,
|
||||
anything below 9 is not recommended.
|
||||
|
||||
```
|
||||
+--------------------------------+
|
||||
| clang/clang++ 11+ is available | --> use LTO mode (afl-clang-lto/afl-clang-lto++)
|
||||
+--------------------------------+ see [instrumentation/README.lto.md](instrumentation/README.lto.md)
|
||||
|
|
||||
| if not, or if the target fails with LTO afl-clang-lto/++
|
||||
|
|
||||
v
|
||||
+---------------------------------+
|
||||
| clang/clang++ 3.8+ is available | --> use LLVM mode (afl-clang-fast/afl-clang-fast++)
|
||||
+---------------------------------+ see [instrumentation/README.llvm.md](instrumentation/README.llvm.md)
|
||||
|
|
||||
| if not, or if the target fails with LLVM afl-clang-fast/++
|
||||
|
|
||||
v
|
||||
+--------------------------------+
|
||||
| gcc 5+ is available | -> use GCC_PLUGIN mode (afl-gcc-fast/afl-g++-fast)
|
||||
+--------------------------------+ see [instrumentation/README.gcc_plugin.md](instrumentation/README.gcc_plugin.md) and
|
||||
[instrumentation/README.instrument_list.md](instrumentation/README.instrument_list.md)
|
||||
|
|
||||
| if not, or if you do not have a gcc with plugin support
|
||||
|
|
||||
v
|
||||
use GCC mode (afl-gcc/afl-g++) (or afl-clang/afl-clang++ for clang)
|
||||
```
|
||||
|
||||
Clickable README links for the chosen compiler:
|
||||
|
||||
* [LTO mode - afl-clang-lto](../instrumentation/README.lto.md)
|
||||
* [LLVM mode - afl-clang-fast](../instrumentation/README.llvm.md)
|
||||
* [GCC_PLUGIN mode - afl-gcc-fast](../instrumentation/README.gcc_plugin.md)
|
||||
* GCC/CLANG modes (afl-gcc/afl-clang) have no README as they have no own features
|
||||
|
||||
You can select the mode for the afl-cc compiler by:
|
||||
1. use a symlink to afl-cc: afl-gcc, afl-g++, afl-clang, afl-clang++,
|
||||
afl-clang-fast, afl-clang-fast++, afl-clang-lto, afl-clang-lto++,
|
||||
afl-gcc-fast, afl-g++-fast (recommended!)
|
||||
2. using the environment variable AFL_CC_COMPILER with MODE
|
||||
3. passing --afl-MODE command line options to the compiler via CFLAGS/CXXFLAGS/CPPFLAGS
|
||||
|
||||
MODE can be one of: LTO (afl-clang-lto*), LLVM (afl-clang-fast*), GCC_PLUGIN
|
||||
(afl-g*-fast) or GCC (afl-gcc/afl-g++) or CLANG(afl-clang/afl-clang++).
|
||||
|
||||
Because no AFL specific command-line options are accepted (beside the
|
||||
--afl-MODE command), the compile-time tools make fairly broad use of environment
|
||||
variables, which can be listed with `afl-cc -hh` or by reading [env_variables.md](env_variables.md).
|
||||
|
||||
#### b) Selecting instrumentation options
|
||||
|
||||
The following options are available when you instrument with LTO mode (afl-clang-fast/afl-clang-lto):
|
||||
|
||||
* Splitting integer, string, float and switch comparisons so AFL++ can easier
|
||||
solve these. This is an important option if you do not have a very good
|
||||
and large input corpus. This technique is called laf-intel or COMPCOV.
|
||||
To use this set the following environment variable before compiling the
|
||||
target: `export AFL_LLVM_LAF_ALL=1`
|
||||
You can read more about this in [instrumentation/README.laf-intel.md](../instrumentation/README.laf-intel.md)
|
||||
* A different technique (and usually a better one than laf-intel) is to
|
||||
instrument the target so that any compare values in the target are sent to
|
||||
AFL++ which then tries to put these values into the fuzzing data at different
|
||||
locations. This technique is very fast and good - if the target does not
|
||||
transform input data before comparison. Therefore this technique is called
|
||||
`input to state` or `redqueen`.
|
||||
If you want to use this technique, then you have to compile the target
|
||||
twice, once specifically with/for this mode by setting `AFL_LLVM_CMPLOG=1`,
|
||||
and pass this binary to afl-fuzz via the `-c` parameter.
|
||||
Note that you can compile also just a cmplog binary and use that for both
|
||||
however there will be a performance penality.
|
||||
You can read more about this in [instrumentation/README.cmplog.md](../instrumentation/README.cmplog.md)
|
||||
|
||||
If you use LTO, LLVM or GCC_PLUGIN mode (afl-clang-fast/afl-clang-lto/afl-gcc-fast)
|
||||
you have the option to selectively only instrument parts of the target that you
|
||||
are interested in:
|
||||
|
||||
* To instrument only those parts of the target that you are interested in
|
||||
create a file with all the filenames of the source code that should be
|
||||
instrumented.
|
||||
For afl-clang-lto and afl-gcc-fast - or afl-clang-fast if a mode other than
|
||||
DEFAULT/PCGUARD is used or you have llvm > 10.0.0 - just put one
|
||||
filename or function per line (no directory information necessary for
|
||||
filenames9, and either set `export AFL_LLVM_ALLOWLIST=allowlist.txt` **or**
|
||||
`export AFL_LLVM_DENYLIST=denylist.txt` - depending on if you want per
|
||||
default to instrument unless noted (DENYLIST) or not perform instrumentation
|
||||
unless requested (ALLOWLIST).
|
||||
**NOTE:** During optimization functions might be inlined and then would not match!
|
||||
See [instrumentation/README.instrument_list.md](../instrumentation/README.instrument_list.md)
|
||||
|
||||
There are many more options and modes available however these are most of the
|
||||
time less effective. See:
|
||||
* [instrumentation/README.ctx.md](../instrumentation/README.ctx.md)
|
||||
* [instrumentation/README.ngram.md](../instrumentation/README.ngram.md)
|
||||
|
||||
AFL++ performs "never zero" counting in its bitmap. You can read more about this
|
||||
here:
|
||||
* [instrumentation/README.neverzero.md](../instrumentation/README.neverzero.md)
|
||||
|
||||
#### c) Sanitizers
|
||||
|
||||
It is possible to use sanitizers when instrumenting targets for fuzzing,
|
||||
which allows you to find bugs that would not necessarily result in a crash.
|
||||
|
||||
Note that sanitizers have a huge impact on CPU (= less executions per second)
|
||||
and RAM usage. Also you should only run one afl-fuzz instance per sanitizer type.
|
||||
This is enough because a use-after-free bug will be picked up, e.g. by
|
||||
ASAN (address sanitizer) anyway when syncing to other fuzzing instances,
|
||||
so not all fuzzing instances need to be instrumented with ASAN.
|
||||
|
||||
The following sanitizers have built-in support in AFL++:
|
||||
* ASAN = Address SANitizer, finds memory corruption vulnerabilities like
|
||||
use-after-free, NULL pointer dereference, buffer overruns, etc.
|
||||
Enabled with `export AFL_USE_ASAN=1` before compiling.
|
||||
* MSAN = Memory SANitizer, finds read access to uninitialized memory, eg.
|
||||
a local variable that is defined and read before it is even set.
|
||||
Enabled with `export AFL_USE_MSAN=1` before compiling.
|
||||
* UBSAN = Undefined Behaviour SANitizer, finds instances where - by the
|
||||
C and C++ standards - undefined behaviour happens, e.g. adding two
|
||||
signed integers together where the result is larger than a signed integer
|
||||
can hold.
|
||||
Enabled with `export AFL_USE_UBSAN=1` before compiling.
|
||||
* CFISAN = Control Flow Integrity SANitizer, finds instances where the
|
||||
control flow is found to be illegal. Originally this was rather to
|
||||
prevent return oriented programming exploit chains from functioning,
|
||||
in fuzzing this is mostly reduced to detecting type confusion
|
||||
vulnerabilities - which is however one of the most important and dangerous
|
||||
C++ memory corruption classes!
|
||||
Enabled with `export AFL_USE_CFISAN=1` before compiling.
|
||||
* TSAN = Thread SANitizer, finds thread race conditions.
|
||||
Enabled with `export AFL_USE_TSAN=1` before compiling.
|
||||
* LSAN = Leak SANitizer, finds memory leaks in a program. This is not really
|
||||
a security issue, but for developers this can be very valuable.
|
||||
Note that unlike the other sanitizers above this needs
|
||||
`__AFL_LEAK_CHECK();` added to all areas of the target source code where you
|
||||
find a leak check necessary!
|
||||
Enabled with `export AFL_USE_LSAN=1` before compiling.
|
||||
|
||||
It is possible to further modify the behaviour of the sanitizers at run-time
|
||||
by setting `ASAN_OPTIONS=...`, `LSAN_OPTIONS` etc. - the available parameters
|
||||
can be looked up in the sanitizer documentation of llvm/clang.
|
||||
afl-fuzz however requires some specific parameters important for fuzzing to be
|
||||
set. If you want to set your own, it might bail and report what it is missing.
|
||||
|
||||
Note that some sanitizers cannot be used together, e.g. ASAN and MSAN, and
|
||||
others often cannot work together because of target weirdness, e.g. ASAN and
|
||||
CFISAN. You might need to experiment which sanitizers you can combine in a
|
||||
target (which means more instances can be run without a sanitized target,
|
||||
which is more effective).
|
||||
|
||||
#### d) Modify the target
|
||||
|
||||
If the target has features that make fuzzing more difficult, e.g.
|
||||
checksums, HMAC, etc. then modify the source code so that checks for these
|
||||
values are removed.
|
||||
This can even be done safely for source code used in operational products
|
||||
by eliminating these checks within these AFL specific blocks:
|
||||
|
||||
```
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
// say that the checksum or HMAC was fine - or whatever is required
|
||||
// to eliminate the need for the fuzzer to guess the right checksum
|
||||
return 0;
|
||||
#endif
|
||||
```
|
||||
|
||||
All AFL++ compilers will set this preprocessor definition automatically.
|
||||
|
||||
#### e) Instrument the target
|
||||
|
||||
In this step the target source code is compiled so that it can be fuzzed.
|
||||
|
||||
Basically you have to tell the target build system that the selected AFL++
|
||||
compiler is used. Also - if possible - you should always configure the
|
||||
build system such that the target is compiled statically and not dynamically.
|
||||
How to do this is described below.
|
||||
|
||||
The #1 rule when instrumenting a target is: avoid instrumenting shared
|
||||
libraries at all cost. You would need to set LD_LIBRARY_PATH to point to
|
||||
these, you could accidently type "make install" and install them system wide -
|
||||
so don't. Really don't.
|
||||
**Always compile libraries you want to have instrumented as static and link
|
||||
these to the target program!**
|
||||
|
||||
Then build the target. (Usually with `make`)
|
||||
|
||||
**NOTES**
|
||||
|
||||
1. sometimes configure and build systems are fickle and do not like
|
||||
stderr output (and think this means a test failure) - which is something
|
||||
AFL++ likes to do to show statistics. It is recommended to disable AFL++
|
||||
instrumentation reporting via `export AFL_QUIET=1`.
|
||||
|
||||
2. sometimes configure and build systems error on warnings - these should be
|
||||
disabled (e.g. `--disable-werror` for some configure scripts).
|
||||
|
||||
3. in case the configure/build system complains about AFL++'s compiler and
|
||||
aborts then set `export AFL_NOOPT=1` which will then just behave like the
|
||||
real compiler. This option has to be unset again before building the target!
|
||||
|
||||
##### configure
|
||||
|
||||
For `configure` build systems this is usually done by:
|
||||
`CC=afl-clang-fast CXX=afl-clang-fast++ ./configure --disable-shared`
|
||||
|
||||
Note that if you are using the (better) afl-clang-lto compiler you also have to
|
||||
set AR to llvm-ar[-VERSION] and RANLIB to llvm-ranlib[-VERSION] - as is
|
||||
described in [instrumentation/README.lto.md](../instrumentation/README.lto.md).
|
||||
|
||||
##### cmake
|
||||
|
||||
For `cmake` build systems this is usually done by:
|
||||
`mkdir build; cd build; cmake -DCMAKE_C_COMPILER=afl-cc -DCMAKE_CXX_COMPILER=afl-c++ ..`
|
||||
|
||||
Note that if you are using the (better) afl-clang-lto compiler you also have to
|
||||
set AR to llvm-ar[-VERSION] and RANLIB to llvm-ranlib[-VERSION] - as is
|
||||
described in [instrumentation/README.lto.md](../instrumentation/README.lto.md).
|
||||
|
||||
##### meson
|
||||
|
||||
For meson you have to set the AFL++ compiler with the very first command!
|
||||
`CC=afl-cc CXX=afl-c++ meson`
|
||||
|
||||
##### other build systems or if configure/cmake didn't work
|
||||
|
||||
Sometimes cmake and configure do not pick up the AFL++ compiler, or the
|
||||
ranlib/ar that is needed - because this was just not foreseen by the developer
|
||||
of the target. Or they have non-standard options. Figure out if there is a
|
||||
non-standard way to set this, otherwise set up the build normally and edit the
|
||||
generated build environment afterwards manually to point it to the right compiler
|
||||
(and/or ranlib and ar).
|
||||
|
||||
#### f) Better instrumentation
|
||||
|
||||
If you just fuzz a target program as-is you are wasting a great opportunity for
|
||||
much more fuzzing speed.
|
||||
|
||||
This variant requires the usage of afl-clang-lto, afl-clang-fast or afl-gcc-fast.
|
||||
|
||||
It is the so-called `persistent mode`, which is much, much faster but
|
||||
requires that you code a source file that is specifically calling the target
|
||||
functions that you want to fuzz, plus a few specific AFL++ functions around
|
||||
it. See [instrumentation/README.persistent_mode.md](../instrumentation/README.persistent_mode.md) for details.
|
||||
|
||||
Basically if you do not fuzz a target in persistent mode then you are just
|
||||
doing it for a hobby and not professionally :-).
|
||||
|
||||
#### g) libfuzzer fuzzer harnesses with LLVMFuzzerTestOneInput()
|
||||
|
||||
libfuzzer `LLVMFuzzerTestOneInput()` harnesses are the defacto standard
|
||||
for fuzzing, and they can be used with AFL++ (and honggfuzz) as well!
|
||||
Compiling them is as simple as:
|
||||
```
|
||||
afl-clang-fast++ -fsanitize=fuzzer -o harness harness.cpp targetlib.a
|
||||
```
|
||||
You can even use advanced libfuzzer features like `FuzzedDataProvider`,
|
||||
`LLVMFuzzerMutate()` etc. and they will work!
|
||||
|
||||
The generated binary is fuzzed with afl-fuzz like any other fuzz target.
|
||||
|
||||
Bonus: the target is already optimized for fuzzing due to persistent mode and
|
||||
shared-memory testcases and hence gives you the fastest speed possible.
|
||||
|
||||
For more information see [utils/aflpp_driver/README.md](../utils/aflpp_driver/README.md)
|
||||
|
||||
### 2. Preparing the fuzzing campaign
|
||||
|
||||
As you fuzz the target with mutated input, having as diverse inputs for the
|
||||
target as possible improves the efficiency a lot.
|
||||
|
||||
#### a) Collect inputs
|
||||
|
||||
Try to gather valid inputs for the target from wherever you can. E.g. if it is
|
||||
the PNG picture format try to find as many png files as possible, e.g. from
|
||||
reported bugs, test suites, random downloads from the internet, unit test
|
||||
case data - from all kind of PNG software.
|
||||
|
||||
If the input format is not known, you can also modify a target program to write
|
||||
normal data it receives and processes to a file and use these.
|
||||
|
||||
#### b) Making the input corpus unique
|
||||
|
||||
Use the AFL++ tool `afl-cmin` to remove inputs from the corpus that do not
|
||||
produce a new path in the target.
|
||||
|
||||
Put all files from step a) into one directory, e.g. INPUTS.
|
||||
|
||||
If the target program is to be called by fuzzing as `bin/target -d INPUTFILE`
|
||||
the run afl-cmin like this:
|
||||
`afl-cmin -i INPUTS -o INPUTS_UNIQUE -- bin/target -d @@`
|
||||
Note that the INPUTFILE argument that the target program would read from has to be set as `@@`.
|
||||
|
||||
If the target reads from stdin instead, just omit the `@@` as this is the
|
||||
default.
|
||||
|
||||
This step is highly recommended!
|
||||
|
||||
#### c) Minimizing all corpus files
|
||||
|
||||
The shorter the input files that still traverse the same path
|
||||
within the target, the better the fuzzing will be. This minimization
|
||||
is done with `afl-tmin` however it is a long process as this has to
|
||||
be done for every file:
|
||||
|
||||
```
|
||||
mkdir input
|
||||
cd INPUTS_UNIQUE
|
||||
for i in *; do
|
||||
afl-tmin -i "$i" -o "../input/$i" -- bin/target -d @@
|
||||
done
|
||||
```
|
||||
|
||||
This step can also be parallelized, e.g. with `parallel`.
|
||||
Note that this step is rather optional though.
|
||||
|
||||
#### Done!
|
||||
|
||||
The INPUTS_UNIQUE/ directory from step b) - or even better the directory input/
|
||||
if you minimized the corpus in step c) - is the resulting input corpus directory
|
||||
to be used in fuzzing! :-)
|
||||
|
||||
### 3. Fuzzing the target
|
||||
|
||||
In this final step we fuzz the target.
|
||||
There are not that many important options to run the target - unless you want
|
||||
to use many CPU cores/threads for the fuzzing, which will make the fuzzing much
|
||||
more useful.
|
||||
|
||||
If you just use one CPU for fuzzing, then you are fuzzing just for fun and not
|
||||
seriously :-)
|
||||
|
||||
#### a) Running afl-fuzz
|
||||
|
||||
Before you do even a test run of afl-fuzz execute `sudo afl-system-config` (on
|
||||
the host if you execute afl-fuzz in a docker container). This reconfigures the
|
||||
system for optimal speed - which afl-fuzz checks and bails otherwise.
|
||||
Set `export AFL_SKIP_CPUFREQ=1` for afl-fuzz to skip this check if you cannot
|
||||
run afl-system-config with root privileges on the host for whatever reason.
|
||||
|
||||
Note there is also `sudo afl-persistent-config` which sets additional permanent
|
||||
boot options for a much better fuzzing performance.
|
||||
|
||||
Note that both scripts improve your fuzzing performance but also decrease your
|
||||
system protection against attacks! So set strong firewall rules and only
|
||||
expose SSH as a network service if you use these (which is highly recommended).
|
||||
|
||||
If you have an input corpus from step 2 then specify this directory with the `-i`
|
||||
option. Otherwise create a new directory and create a file with any content
|
||||
as test data in there.
|
||||
|
||||
If you do not want anything special, the defaults are already usually best,
|
||||
hence all you need is to specify the seed input directory with the result of
|
||||
step [2a. Collect inputs](#a-collect-inputs):
|
||||
`afl-fuzz -i input -o output -- bin/target -d @@`
|
||||
Note that the directory specified with -o will be created if it does not exist.
|
||||
|
||||
It can be valuable to run afl-fuzz in a screen or tmux shell so you can log off,
|
||||
or afl-fuzz is not aborted if you are running it in a remote ssh session where
|
||||
the connection fails in between.
|
||||
Only do that though once you have verified that your fuzzing setup works!
|
||||
Simply run it like `screen -dmS afl-main -- afl-fuzz -M main-$HOSTNAME -i ...`
|
||||
and it will start away in a screen session. To enter this session simply type
|
||||
`screen -r afl-main`. You see - it makes sense to name the screen session
|
||||
same as the afl-fuzz -M/-S naming :-)
|
||||
For more information on screen or tmux please check their documentation.
|
||||
|
||||
If you need to stop and re-start the fuzzing, use the same command line options
|
||||
(or even change them by selecting a different power schedule or another
|
||||
mutation mode!) and switch the input directory with a dash (`-`):
|
||||
`afl-fuzz -i - -o output -- bin/target -d @@`
|
||||
|
||||
Memory limits are not enforced by afl-fuzz by default and the system may run
|
||||
out of memory. You can decrease the memory with the `-m` option, the value is
|
||||
in MB. If this is too small for the target, you can usually see this by
|
||||
afl-fuzz bailing with the message that it could not connect to the forkserver.
|
||||
|
||||
Adding a dictionary is helpful. See the directory [dictionaries/](../dictionaries/) if
|
||||
something is already included for your data format, and tell afl-fuzz to load
|
||||
that dictionary by adding `-x dictionaries/FORMAT.dict`. With afl-clang-lto
|
||||
you have an autodictionary generation for which you need to do nothing except
|
||||
to use afl-clang-lto as the compiler. You also have the option to generate
|
||||
a dictionary yourself, see [utils/libtokencap/README.md](../utils/libtokencap/README.md).
|
||||
|
||||
afl-fuzz has a variety of options that help to workaround target quirks like
|
||||
specific locations for the input file (`-f`), performing deterministic
|
||||
fuzzing (`-D`) and many more. Check out `afl-fuzz -h`.
|
||||
|
||||
We highly recommend that you set a memory limit for running the target with `-m`
|
||||
which defines the maximum memory in MB. This prevents a potential
|
||||
out-of-memory problem for your system plus helps you detect missing `malloc()`
|
||||
failure handling in the target.
|
||||
Play around with various -m values until you find one that safely works for all
|
||||
your input seeds (if you have good ones and then double or quadrouple that.
|
||||
|
||||
By default afl-fuzz never stops fuzzing. To terminate AFL++ simply press Control-C
|
||||
or send a signal SIGINT. You can limit the number of executions or approximate runtime
|
||||
in seconds with options also.
|
||||
|
||||
When you start afl-fuzz you will see a user interface that shows what the status
|
||||
is:
|
||||

|
||||
|
||||
All labels are explained in [status_screen.md](status_screen.md).
|
||||
|
||||
#### b) Using multiple cores
|
||||
|
||||
If you want to seriously fuzz then use as many cores/threads as possible to
|
||||
fuzz your target.
|
||||
|
||||
On the same machine - due to the design of how AFL++ works - there is a maximum
|
||||
number of CPU cores/threads that are useful, use more and the overall performance
|
||||
degrades instead. This value depends on the target, and the limit is between 32
|
||||
and 64 cores per machine.
|
||||
|
||||
If you have the RAM, it is highly recommended run the instances with a caching
|
||||
of the testcases. Depending on the average testcase size (and those found
|
||||
during fuzzing) and their number, a value between 50-500MB is recommended.
|
||||
You can set the cache size (in MB) by setting the environment variable `AFL_TESTCACHE_SIZE`.
|
||||
|
||||
There should be one main fuzzer (`-M main-$HOSTNAME` option) and as many secondary
|
||||
fuzzers (eg `-S variant1`) as you have cores that you use.
|
||||
Every -M/-S entry needs a unique name (that can be whatever), however the same
|
||||
-o output directory location has to be used for all instances.
|
||||
|
||||
For every secondary fuzzer there should be a variation, e.g.:
|
||||
* one should fuzz the target that was compiled differently: with sanitizers
|
||||
activated (`export AFL_USE_ASAN=1 ; export AFL_USE_UBSAN=1 ;
|
||||
export AFL_USE_CFISAN=1`)
|
||||
* one or two should fuzz the target with CMPLOG/redqueen (see above), at
|
||||
least one cmplog instance should follow transformations (`-l AT`)
|
||||
* one to three fuzzers should fuzz a target compiled with laf-intel/COMPCOV
|
||||
(see above). Important note: If you run more than one laf-intel/COMPCOV
|
||||
fuzzer and you want them to share their intermediate results, the main
|
||||
fuzzer (`-M`) must be one of the them! (Although this is not really
|
||||
recommended.)
|
||||
|
||||
All other secondaries should be used like this:
|
||||
* A quarter to a third with the MOpt mutator enabled: `-L 0`
|
||||
* run with a different power schedule, recommended are:
|
||||
`fast (default), explore, coe, lin, quad, exploit and rare`
|
||||
which you can set with e.g. `-p explore`
|
||||
* a few instances should use the old queue cycling with `-Z`
|
||||
|
||||
Also it is recommended to set `export AFL_IMPORT_FIRST=1` to load testcases
|
||||
from other fuzzers in the campaign first.
|
||||
|
||||
If you have a large corpus, a corpus from a previous run or are fuzzing in
|
||||
a CI, then also set `export AFL_CMPLOG_ONLY_NEW=1` and `export AFL_FAST_CAL=1`.
|
||||
|
||||
You can also use different fuzzers.
|
||||
If you are using AFL spinoffs or AFL conforming fuzzers, then just use the
|
||||
same -o directory and give it a unique `-S` name.
|
||||
Examples are:
|
||||
* [Fuzzolic](https://github.com/season-lab/fuzzolic)
|
||||
* [symcc](https://github.com/eurecom-s3/symcc/)
|
||||
* [Eclipser](https://github.com/SoftSec-KAIST/Eclipser/)
|
||||
* [AFLsmart](https://github.com/aflsmart/aflsmart)
|
||||
* [FairFuzz](https://github.com/carolemieux/afl-rb)
|
||||
* [Neuzz](https://github.com/Dongdongshe/neuzz)
|
||||
* [Angora](https://github.com/AngoraFuzzer/Angora)
|
||||
|
||||
A long list can be found at [https://github.com/Microsvuln/Awesome-AFL](https://github.com/Microsvuln/Awesome-AFL)
|
||||
|
||||
However you can also sync AFL++ with honggfuzz, libfuzzer with `-entropic=1`, etc.
|
||||
Just show the main fuzzer (-M) with the `-F` option where the queue/work
|
||||
directory of a different fuzzer is, e.g. `-F /src/target/honggfuzz`.
|
||||
Using honggfuzz (with `-n 1` or `-n 2`) and libfuzzer in parallel is highly
|
||||
recommended!
|
||||
|
||||
#### c) Using multiple machines for fuzzing
|
||||
|
||||
Maybe you have more than one machine you want to fuzz the same target on.
|
||||
Simply start the `afl-fuzz` (and perhaps libfuzzer, honggfuzz, ...)
|
||||
orchestra as you like, just ensure that your have one and only one `-M`
|
||||
instance per server, and that its name is unique, hence the recommendation
|
||||
for `-M main-$HOSTNAME`.
|
||||
|
||||
Now there are three strategies on how you can sync between the servers:
|
||||
* never: sounds weird, but this makes every server an island and has the
|
||||
chance the each follow different paths into the target. You can make
|
||||
this even more interesting by even giving different seeds to each server.
|
||||
* regularly (~4h): this ensures that all fuzzing campaigns on the servers
|
||||
"see" the same thing. It is like fuzzing on a huge server.
|
||||
* in intervals of 1/10th of the overall expected runtime of the fuzzing you
|
||||
sync. This tries a bit to combine both. have some individuality of the
|
||||
paths each campaign on a server explores, on the other hand if one
|
||||
gets stuck where another found progress this is handed over making it
|
||||
unstuck.
|
||||
|
||||
The syncing process itself is very simple.
|
||||
As the `-M main-$HOSTNAME` instance syncs to all `-S` secondaries as well
|
||||
as to other fuzzers, you have to copy only this directory to the other
|
||||
machines.
|
||||
|
||||
Lets say all servers have the `-o out` directory in /target/foo/out, and
|
||||
you created a file `servers.txt` which contains the hostnames of all
|
||||
participating servers, plus you have an ssh key deployed to all of them,
|
||||
then run:
|
||||
```bash
|
||||
for FROM in `cat servers.txt`; do
|
||||
for TO in `cat servers.txt`; do
|
||||
rsync -rlpogtz --rsh=ssh $FROM:/target/foo/out/main-$FROM $TO:target/foo/out/
|
||||
done
|
||||
done
|
||||
```
|
||||
You can run this manually, per cron job - as you need it.
|
||||
There is a more complex and configurable script in `utils/distributed_fuzzing`.
|
||||
|
||||
#### d) The status of the fuzz campaign
|
||||
|
||||
AFL++ comes with the `afl-whatsup` script to show the status of the fuzzing
|
||||
campaign.
|
||||
|
||||
Just supply the directory that afl-fuzz is given with the -o option and
|
||||
you will see a detailed status of every fuzzer in that campaign plus
|
||||
a summary.
|
||||
|
||||
To have only the summary use the `-s` switch e.g.: `afl-whatsup -s out/`
|
||||
|
||||
If you have multiple servers then use the command after a sync, or you have
|
||||
to execute this script per server.
|
||||
|
||||
Another tool to inspect the current state and history of a specific instance
|
||||
is afl-plot, which generates an index.html file and a graphs that show how
|
||||
the fuzzing instance is performing.
|
||||
The syntax is `afl-plot instance_dir web_dir`, e.g. `afl-plot out/default /srv/www/htdocs/plot`
|
||||
|
||||
#### e) Stopping fuzzing, restarting fuzzing, adding new seeds
|
||||
|
||||
To stop an afl-fuzz run, simply press Control-C.
|
||||
|
||||
To restart an afl-fuzz run, just reuse the same command line but replace the
|
||||
`-i directory` with `-i -` or set `AFL_AUTORESUME=1`.
|
||||
|
||||
If you want to add new seeds to a fuzzing campaign you can run a temporary
|
||||
fuzzing instance, e.g. when your main fuzzer is using `-o out` and the new
|
||||
seeds are in `newseeds/` directory:
|
||||
```
|
||||
AFL_BENCH_JUST_ONE=1 AFL_FAST_CAL=1 afl-fuzz -i newseeds -o out -S newseeds -- ./target
|
||||
```
|
||||
|
||||
#### f) Checking the coverage of the fuzzing
|
||||
|
||||
The `paths found` value is a bad indicator for checking how good the coverage is.
|
||||
|
||||
A better indicator - if you use default llvm instrumentation with at least
|
||||
version 9 - is to use `afl-showmap` with the collect coverage option `-C` on
|
||||
the output directory:
|
||||
```
|
||||
$ afl-showmap -C -i out -o /dev/null -- ./target -params @@
|
||||
...
|
||||
[*] Using SHARED MEMORY FUZZING feature.
|
||||
[*] Target map size: 9960
|
||||
[+] Processed 7849 input files.
|
||||
[+] Captured 4331 tuples (highest value 255, total values 67130596) in '/dev/nul
|
||||
l'.
|
||||
[+] A coverage of 4331 edges were achieved out of 9960 existing (43.48%) with 7849 input files.
|
||||
```
|
||||
It is even better to check out the exact lines of code that have been reached -
|
||||
and which have not been found so far.
|
||||
|
||||
An "easy" helper script for this is [https://github.com/vanhauser-thc/afl-cov](https://github.com/vanhauser-thc/afl-cov),
|
||||
just follow the README of that separate project.
|
||||
|
||||
If you see that an important area or a feature has not been covered so far then
|
||||
try to find an input that is able to reach that and start a new secondary in
|
||||
that fuzzing campaign with that seed as input, let it run for a few minutes,
|
||||
then terminate it. The main node will pick it up and make it available to the
|
||||
other secondary nodes over time. Set `export AFL_NO_AFFINITY=1` or
|
||||
`export AFL_TRY_AFFINITY=1` if you have no free core.
|
||||
|
||||
Note that in nearly all cases you can never reach full coverage. A lot of
|
||||
functionality is usually dependent on exclusive options that would need individual
|
||||
fuzzing campaigns each with one of these options set. E.g. if you fuzz a library to
|
||||
convert image formats and your target is the png to tiff API then you will not
|
||||
touch any of the other library APIs and features.
|
||||
|
||||
#### g) How long to fuzz a target?
|
||||
|
||||
This is a difficult question.
|
||||
Basically if no new path is found for a long time (e.g. for a day or a week)
|
||||
then you can expect that your fuzzing won't be fruitful anymore.
|
||||
However often this just means that you should switch out secondaries for
|
||||
others, e.g. custom mutator modules, sync to very different fuzzers, etc.
|
||||
|
||||
Keep the queue/ directory (for future fuzzings of the same or similar targets)
|
||||
and use them to seed other good fuzzers like libfuzzer with the -entropic
|
||||
switch or honggfuzz.
|
||||
|
||||
#### h) Improve the speed!
|
||||
|
||||
* Use [persistent mode](../instrumentation/README.persistent_mode.md) (x2-x20 speed increase)
|
||||
* If you do not use shmem persistent mode, use `AFL_TMPDIR` to point the input file on a tempfs location, see [env_variables.md](env_variables.md)
|
||||
* Linux: Improve kernel performance: modify `/etc/default/grub`, set `GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"`; then `update-grub` and `reboot` (warning: makes the system more insecure) - you can also just run `sudo afl-persistent-config`
|
||||
* Linux: Running on an `ext2` filesystem with `noatime` mount option will be a bit faster than on any other journaling filesystem
|
||||
* Use your cores! [b) Using multiple cores](#b-using-multiple-cores)
|
||||
* Run `sudo afl-system-config` before starting the first afl-fuzz instance after a reboot
|
||||
|
||||
### The End
|
||||
|
||||
Check out the [FAQ](FAQ.md) if it maybe answers your question (that
|
||||
you might not even have known you had ;-) ).
|
||||
|
||||
This is basically all you need to know to professionally run fuzzing campaigns.
|
||||
If you want to know more, the tons of texts in [docs/](./) will have you covered.
|
||||
|
||||
Note that there are also a lot of tools out there that help fuzzing with AFL++
|
||||
(some might be deprecated or unsupported), see [tools.md](tools.md).
|
853
docs/fuzzing_in_depth.md
Normal file
853
docs/fuzzing_in_depth.md
Normal file
@ -0,0 +1,853 @@
|
||||
# Fuzzing with AFL++
|
||||
|
||||
The following describes how to fuzz with a target if source code is available.
|
||||
If you have a binary-only target, please go to
|
||||
[fuzzing_binary-only_targets.md](fuzzing_binary-only_targets.md).
|
||||
|
||||
Fuzzing source code is a three-step process:
|
||||
|
||||
1. Compile the target with a special compiler that prepares the target to be
|
||||
fuzzed efficiently. This step is called "instrumenting a target".
|
||||
2. Prepare the fuzzing by selecting and optimizing the input corpus for the
|
||||
target.
|
||||
3. Perform the fuzzing of the target by randomly mutating input and assessing if
|
||||
a generated input was processed in a new path in the target binary.
|
||||
|
||||
## 0. Common sense risks
|
||||
|
||||
Please keep in mind that, similarly to many other computationally-intensive
|
||||
tasks, fuzzing may put a strain on your hardware and on the OS. In particular:
|
||||
|
||||
- Your CPU will run hot and will need adequate cooling. In most cases, if
|
||||
cooling is insufficient or stops working properly, CPU speeds will be
|
||||
automatically throttled. That said, especially when fuzzing on less suitable
|
||||
hardware (laptops, smartphones, etc.), it's not entirely impossible for
|
||||
something to blow up.
|
||||
|
||||
- Targeted programs may end up erratically grabbing gigabytes of memory or
|
||||
filling up disk space with junk files. AFL++ tries to enforce basic memory
|
||||
limits, but can't prevent each and every possible mishap. The bottom line is
|
||||
that you shouldn't be fuzzing on systems where the prospect of data loss is
|
||||
not an acceptable risk.
|
||||
|
||||
- Fuzzing involves billions of reads and writes to the filesystem. On modern
|
||||
systems, this will be usually heavily cached, resulting in fairly modest
|
||||
"physical" I/O - but there are many factors that may alter this equation. It
|
||||
is your responsibility to monitor for potential trouble; with very heavy I/O,
|
||||
the lifespan of many HDDs and SSDs may be reduced.
|
||||
|
||||
A good way to monitor disk I/O on Linux is the `iostat` command:
|
||||
|
||||
```shell
|
||||
$ iostat -d 3 -x -k [...optional disk ID...]
|
||||
```
|
||||
|
||||
Using the `AFL_TMPDIR` environment variable and a RAM-disk, you can have the
|
||||
heavy writing done in RAM to prevent the aforementioned wear and tear. For
|
||||
example, the following line will run a Docker container with all this preset:
|
||||
|
||||
```shell
|
||||
# docker run -ti --mount type=tmpfs,destination=/ramdisk -e AFL_TMPDIR=/ramdisk aflplusplus/aflplusplus
|
||||
```
|
||||
|
||||
## 1. Instrumenting the target
|
||||
|
||||
### a) Selecting the best AFL++ compiler for instrumenting the target
|
||||
|
||||
AFL++ comes with a central compiler `afl-cc` that incorporates various different
|
||||
kinds of compiler targets and and instrumentation options. The following
|
||||
evaluation flow will help you to select the best possible.
|
||||
|
||||
It is highly recommended to have the newest llvm version possible installed,
|
||||
anything below 9 is not recommended.
|
||||
|
||||
```
|
||||
+--------------------------------+
|
||||
| clang/clang++ 11+ is available | --> use LTO mode (afl-clang-lto/afl-clang-lto++)
|
||||
+--------------------------------+ see [instrumentation/README.lto.md](instrumentation/README.lto.md)
|
||||
|
|
||||
| if not, or if the target fails with LTO afl-clang-lto/++
|
||||
|
|
||||
v
|
||||
+---------------------------------+
|
||||
| clang/clang++ 3.8+ is available | --> use LLVM mode (afl-clang-fast/afl-clang-fast++)
|
||||
+---------------------------------+ see [instrumentation/README.llvm.md](instrumentation/README.llvm.md)
|
||||
|
|
||||
| if not, or if the target fails with LLVM afl-clang-fast/++
|
||||
|
|
||||
v
|
||||
+--------------------------------+
|
||||
| gcc 5+ is available | -> use GCC_PLUGIN mode (afl-gcc-fast/afl-g++-fast)
|
||||
+--------------------------------+ see [instrumentation/README.gcc_plugin.md](instrumentation/README.gcc_plugin.md) and
|
||||
[instrumentation/README.instrument_list.md](instrumentation/README.instrument_list.md)
|
||||
|
|
||||
| if not, or if you do not have a gcc with plugin support
|
||||
|
|
||||
v
|
||||
use GCC mode (afl-gcc/afl-g++) (or afl-clang/afl-clang++ for clang)
|
||||
```
|
||||
|
||||
Clickable README links for the chosen compiler:
|
||||
|
||||
* [LTO mode - afl-clang-lto](../instrumentation/README.lto.md)
|
||||
* [LLVM mode - afl-clang-fast](../instrumentation/README.llvm.md)
|
||||
* [GCC_PLUGIN mode - afl-gcc-fast](../instrumentation/README.gcc_plugin.md)
|
||||
* GCC/CLANG modes (afl-gcc/afl-clang) have no README as they have no own
|
||||
features
|
||||
|
||||
You can select the mode for the afl-cc compiler by:
|
||||
1. use a symlink to afl-cc: afl-gcc, afl-g++, afl-clang, afl-clang++,
|
||||
afl-clang-fast, afl-clang-fast++, afl-clang-lto, afl-clang-lto++,
|
||||
afl-gcc-fast, afl-g++-fast (recommended!)
|
||||
2. using the environment variable AFL_CC_COMPILER with MODE
|
||||
3. passing --afl-MODE command line options to the compiler via
|
||||
CFLAGS/CXXFLAGS/CPPFLAGS
|
||||
|
||||
MODE can be one of: LTO (afl-clang-lto*), LLVM (afl-clang-fast*), GCC_PLUGIN
|
||||
(afl-g*-fast) or GCC (afl-gcc/afl-g++) or CLANG(afl-clang/afl-clang++).
|
||||
|
||||
Because no AFL specific command-line options are accepted (beside the --afl-MODE
|
||||
command), the compile-time tools make fairly broad use of environment variables,
|
||||
which can be listed with `afl-cc -hh` or by reading
|
||||
[env_variables.md](env_variables.md).
|
||||
|
||||
### b) Selecting instrumentation options
|
||||
|
||||
The following options are available when you instrument with LTO mode
|
||||
(afl-clang-fast/afl-clang-lto):
|
||||
|
||||
* Splitting integer, string, float and switch comparisons so AFL++ can easier
|
||||
solve these. This is an important option if you do not have a very good and
|
||||
large input corpus. This technique is called laf-intel or COMPCOV. To use this
|
||||
set the following environment variable before compiling the target: `export
|
||||
AFL_LLVM_LAF_ALL=1` You can read more about this in
|
||||
[instrumentation/README.laf-intel.md](../instrumentation/README.laf-intel.md).
|
||||
* A different technique (and usually a better one than laf-intel) is to
|
||||
instrument the target so that any compare values in the target are sent to
|
||||
AFL++ which then tries to put these values into the fuzzing data at different
|
||||
locations. This technique is very fast and good - if the target does not
|
||||
transform input data before comparison. Therefore this technique is called
|
||||
`input to state` or `redqueen`. If you want to use this technique, then you
|
||||
have to compile the target twice, once specifically with/for this mode by
|
||||
setting `AFL_LLVM_CMPLOG=1`, and pass this binary to afl-fuzz via the `-c`
|
||||
parameter. Note that you can compile also just a cmplog binary and use that
|
||||
for both however there will be a performance penality. You can read more about
|
||||
this in
|
||||
[instrumentation/README.cmplog.md](../instrumentation/README.cmplog.md).
|
||||
|
||||
If you use LTO, LLVM or GCC_PLUGIN mode
|
||||
(afl-clang-fast/afl-clang-lto/afl-gcc-fast) you have the option to selectively
|
||||
only instrument parts of the target that you are interested in:
|
||||
|
||||
* To instrument only those parts of the target that you are interested in create
|
||||
a file with all the filenames of the source code that should be instrumented.
|
||||
For afl-clang-lto and afl-gcc-fast - or afl-clang-fast if a mode other than
|
||||
DEFAULT/PCGUARD is used or you have llvm > 10.0.0 - just put one filename or
|
||||
function per line (no directory information necessary for filenames9, and
|
||||
either set `export AFL_LLVM_ALLOWLIST=allowlist.txt` **or** `export
|
||||
AFL_LLVM_DENYLIST=denylist.txt` - depending on if you want per default to
|
||||
instrument unless noted (DENYLIST) or not perform instrumentation unless
|
||||
requested (ALLOWLIST). **NOTE:** During optimization functions might be
|
||||
inlined and then would not match! See
|
||||
[instrumentation/README.instrument_list.md](../instrumentation/README.instrument_list.md)
|
||||
|
||||
There are many more options and modes available however these are most of the
|
||||
time less effective. See:
|
||||
* [instrumentation/README.ctx.md](../instrumentation/README.ctx.md)
|
||||
* [instrumentation/README.ngram.md](../instrumentation/README.ngram.md)
|
||||
|
||||
AFL++ performs "never zero" counting in its bitmap. You can read more about this
|
||||
here:
|
||||
* [instrumentation/README.neverzero.md](../instrumentation/README.neverzero.md)
|
||||
|
||||
### c) Selecting sanitizers
|
||||
|
||||
It is possible to use sanitizers when instrumenting targets for fuzzing, which
|
||||
allows you to find bugs that would not necessarily result in a crash.
|
||||
|
||||
Note that sanitizers have a huge impact on CPU (= less executions per second)
|
||||
and RAM usage. Also you should only run one afl-fuzz instance per sanitizer
|
||||
type. This is enough because a use-after-free bug will be picked up, e.g. by
|
||||
ASAN (address sanitizer) anyway when syncing to other fuzzing instances, so not
|
||||
all fuzzing instances need to be instrumented with ASAN.
|
||||
|
||||
The following sanitizers have built-in support in AFL++:
|
||||
* ASAN = Address SANitizer, finds memory corruption vulnerabilities like
|
||||
use-after-free, NULL pointer dereference, buffer overruns, etc. Enabled with
|
||||
`export AFL_USE_ASAN=1` before compiling.
|
||||
* MSAN = Memory SANitizer, finds read access to uninitialized memory, eg. a
|
||||
local variable that is defined and read before it is even set. Enabled with
|
||||
`export AFL_USE_MSAN=1` before compiling.
|
||||
* UBSAN = Undefined Behaviour SANitizer, finds instances where - by the C and
|
||||
C++ standards - undefined behaviour happens, e.g. adding two signed integers
|
||||
together where the result is larger than a signed integer can hold. Enabled
|
||||
with `export AFL_USE_UBSAN=1` before compiling.
|
||||
* CFISAN = Control Flow Integrity SANitizer, finds instances where the control
|
||||
flow is found to be illegal. Originally this was rather to prevent return
|
||||
oriented programming exploit chains from functioning, in fuzzing this is
|
||||
mostly reduced to detecting type confusion vulnerabilities - which is,
|
||||
however, one of the most important and dangerous C++ memory corruption
|
||||
classes! Enabled with `export AFL_USE_CFISAN=1` before compiling.
|
||||
* TSAN = Thread SANitizer, finds thread race conditions. Enabled with `export
|
||||
AFL_USE_TSAN=1` before compiling.
|
||||
* LSAN = Leak SANitizer, finds memory leaks in a program. This is not really a
|
||||
security issue, but for developers this can be very valuable. Note that unlike
|
||||
the other sanitizers above this needs `__AFL_LEAK_CHECK();` added to all areas
|
||||
of the target source code where you find a leak check necessary! Enabled with
|
||||
`export AFL_USE_LSAN=1` before compiling.
|
||||
|
||||
It is possible to further modify the behaviour of the sanitizers at run-time by
|
||||
setting `ASAN_OPTIONS=...`, `LSAN_OPTIONS` etc. - the available parameters can
|
||||
be looked up in the sanitizer documentation of llvm/clang. afl-fuzz, however,
|
||||
requires some specific parameters important for fuzzing to be set. If you want
|
||||
to set your own, it might bail and report what it is missing.
|
||||
|
||||
Note that some sanitizers cannot be used together, e.g. ASAN and MSAN, and
|
||||
others often cannot work together because of target weirdness, e.g. ASAN and
|
||||
CFISAN. You might need to experiment which sanitizers you can combine in a
|
||||
target (which means more instances can be run without a sanitized target, which
|
||||
is more effective).
|
||||
|
||||
### d) Modifying the target
|
||||
|
||||
If the target has features that make fuzzing more difficult, e.g. checksums,
|
||||
HMAC, etc. then modify the source code so that checks for these values are
|
||||
removed. This can even be done safely for source code used in operational
|
||||
products by eliminating these checks within these AFL specific blocks:
|
||||
|
||||
```
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
// say that the checksum or HMAC was fine - or whatever is required
|
||||
// to eliminate the need for the fuzzer to guess the right checksum
|
||||
return 0;
|
||||
#endif
|
||||
```
|
||||
|
||||
All AFL++ compilers will set this preprocessor definition automatically.
|
||||
|
||||
### e) Instrumenting the target
|
||||
|
||||
In this step the target source code is compiled so that it can be fuzzed.
|
||||
|
||||
Basically you have to tell the target build system that the selected AFL++
|
||||
compiler is used. Also - if possible - you should always configure the build
|
||||
system such that the target is compiled statically and not dynamically. How to
|
||||
do this is described below.
|
||||
|
||||
The #1 rule when instrumenting a target is: avoid instrumenting shared libraries
|
||||
at all cost. You would need to set LD_LIBRARY_PATH to point to these, you could
|
||||
accidentally type "make install" and install them system wide - so don't. Really
|
||||
don't. **Always compile libraries you want to have instrumented as static and
|
||||
link these to the target program!**
|
||||
|
||||
Then build the target. (Usually with `make`)
|
||||
|
||||
**NOTES**
|
||||
|
||||
1. sometimes configure and build systems are fickle and do not like stderr
|
||||
output (and think this means a test failure) - which is something AFL++ likes
|
||||
to do to show statistics. It is recommended to disable AFL++ instrumentation
|
||||
reporting via `export AFL_QUIET=1`.
|
||||
|
||||
2. sometimes configure and build systems error on warnings - these should be
|
||||
disabled (e.g. `--disable-werror` for some configure scripts).
|
||||
|
||||
3. in case the configure/build system complains about AFL++'s compiler and
|
||||
aborts then set `export AFL_NOOPT=1` which will then just behave like the
|
||||
real compiler. This option has to be unset again before building the target!
|
||||
|
||||
#### configure
|
||||
|
||||
For `configure` build systems this is usually done by:
|
||||
`CC=afl-clang-fast CXX=afl-clang-fast++ ./configure --disable-shared`
|
||||
|
||||
Note that if you are using the (better) afl-clang-lto compiler you also have to
|
||||
set AR to llvm-ar[-VERSION] and RANLIB to llvm-ranlib[-VERSION] - as is
|
||||
described in [instrumentation/README.lto.md](../instrumentation/README.lto.md).
|
||||
|
||||
#### cmake
|
||||
|
||||
For `cmake` build systems this is usually done by:
|
||||
`mkdir build; cd build; cmake -DCMAKE_C_COMPILER=afl-cc -DCMAKE_CXX_COMPILER=afl-c++ ..`
|
||||
|
||||
Note that if you are using the (better) afl-clang-lto compiler you also have to
|
||||
set AR to llvm-ar[-VERSION] and RANLIB to llvm-ranlib[-VERSION] - as is
|
||||
described in [instrumentation/README.lto.md](../instrumentation/README.lto.md).
|
||||
|
||||
#### meson
|
||||
|
||||
For meson you have to set the AFL++ compiler with the very first command!
|
||||
`CC=afl-cc CXX=afl-c++ meson`
|
||||
|
||||
#### other build systems or if configure/cmake didn't work
|
||||
|
||||
Sometimes cmake and configure do not pick up the AFL++ compiler, or the
|
||||
ranlib/ar that is needed - because this was just not foreseen by the developer
|
||||
of the target. Or they have non-standard options. Figure out if there is a
|
||||
non-standard way to set this, otherwise set up the build normally and edit the
|
||||
generated build environment afterwards manually to point it to the right
|
||||
compiler (and/or ranlib and ar).
|
||||
|
||||
### f) Better instrumentation
|
||||
|
||||
If you just fuzz a target program as-is you are wasting a great opportunity for
|
||||
much more fuzzing speed.
|
||||
|
||||
This variant requires the usage of afl-clang-lto, afl-clang-fast or
|
||||
afl-gcc-fast.
|
||||
|
||||
It is the so-called `persistent mode`, which is much, much faster but requires
|
||||
that you code a source file that is specifically calling the target functions
|
||||
that you want to fuzz, plus a few specific AFL++ functions around it. See
|
||||
[instrumentation/README.persistent_mode.md](../instrumentation/README.persistent_mode.md)
|
||||
for details.
|
||||
|
||||
Basically if you do not fuzz a target in persistent mode then you are just doing
|
||||
it for a hobby and not professionally :-).
|
||||
|
||||
### g) libfuzzer fuzzer harnesses with LLVMFuzzerTestOneInput()
|
||||
|
||||
libfuzzer `LLVMFuzzerTestOneInput()` harnesses are the defacto standard
|
||||
for fuzzing, and they can be used with AFL++ (and honggfuzz) as well!
|
||||
|
||||
Compiling them is as simple as:
|
||||
|
||||
```
|
||||
afl-clang-fast++ -fsanitize=fuzzer -o harness harness.cpp targetlib.a
|
||||
```
|
||||
|
||||
You can even use advanced libfuzzer features like `FuzzedDataProvider`,
|
||||
`LLVMFuzzerMutate()` etc. and they will work!
|
||||
|
||||
The generated binary is fuzzed with afl-fuzz like any other fuzz target.
|
||||
|
||||
Bonus: the target is already optimized for fuzzing due to persistent mode and
|
||||
shared-memory test cases and hence gives you the fastest speed possible.
|
||||
|
||||
For more information, see
|
||||
[utils/aflpp_driver/README.md](../utils/aflpp_driver/README.md).
|
||||
|
||||
## 2. Preparing the fuzzing campaign
|
||||
|
||||
As you fuzz the target with mutated input, having as diverse inputs for the
|
||||
target as possible improves the efficiency a lot.
|
||||
|
||||
### a) Collecting inputs
|
||||
|
||||
To operate correctly, the fuzzer requires one or more starting files that
|
||||
contain a good example of the input data normally expected by the targeted
|
||||
application.
|
||||
|
||||
Try to gather valid inputs for the target from wherever you can. E.g., if it is
|
||||
the PNG picture format, try to find as many PNG files as possible, e.g., from
|
||||
reported bugs, test suites, random downloads from the internet, unit test case
|
||||
data - from all kind of PNG software.
|
||||
|
||||
If the input format is not known, you can also modify a target program to write
|
||||
normal data it receives and processes to a file and use these.
|
||||
|
||||
You can find many good examples of starting files in the
|
||||
[testcases/](../testcases) subdirectory that comes with this tool.
|
||||
|
||||
### b) Making the input corpus unique
|
||||
|
||||
Use the AFL++ tool `afl-cmin` to remove inputs from the corpus that do not
|
||||
produce a new path in the target.
|
||||
|
||||
Put all files from step a) into one directory, e.g. INPUTS.
|
||||
|
||||
If the target program is to be called by fuzzing as `bin/target -d INPUTFILE`
|
||||
the run afl-cmin like this:
|
||||
`afl-cmin -i INPUTS -o INPUTS_UNIQUE -- bin/target -d @@`
|
||||
Note that the INPUTFILE argument that the target program would read from has to be set as `@@`.
|
||||
|
||||
If the target reads from stdin instead, just omit the `@@` as this is the
|
||||
default.
|
||||
|
||||
This step is highly recommended!
|
||||
|
||||
### c) Minimizing all corpus files
|
||||
|
||||
The shorter the input files that still traverse the same path within the target,
|
||||
the better the fuzzing will be. This minimization is done with `afl-tmin`
|
||||
however it is a long process as this has to be done for every file:
|
||||
|
||||
```
|
||||
mkdir input
|
||||
cd INPUTS_UNIQUE
|
||||
for i in *; do
|
||||
afl-tmin -i "$i" -o "../input/$i" -- bin/target -d @@
|
||||
done
|
||||
```
|
||||
|
||||
This step can also be parallelized, e.g. with `parallel`. Note that this step is
|
||||
rather optional though.
|
||||
|
||||
### Done!
|
||||
|
||||
The INPUTS_UNIQUE/ directory from step b) - or even better the directory input/
|
||||
if you minimized the corpus in step c) - is the resulting input corpus directory
|
||||
to be used in fuzzing! :-)
|
||||
|
||||
## 3. Fuzzing the target
|
||||
|
||||
In this final step we fuzz the target. There are not that many important options
|
||||
to run the target - unless you want to use many CPU cores/threads for the
|
||||
fuzzing, which will make the fuzzing much more useful.
|
||||
|
||||
If you just use one CPU for fuzzing, then you are fuzzing just for fun and not
|
||||
seriously :-)
|
||||
|
||||
### a) Running afl-fuzz
|
||||
|
||||
Before you do even a test run of afl-fuzz execute `sudo afl-system-config` (on
|
||||
the host if you execute afl-fuzz in a docker container). This reconfigures the
|
||||
system for optimal speed - which afl-fuzz checks and bails otherwise. Set
|
||||
`export AFL_SKIP_CPUFREQ=1` for afl-fuzz to skip this check if you cannot run
|
||||
afl-system-config with root privileges on the host for whatever reason.
|
||||
|
||||
Note there is also `sudo afl-persistent-config` which sets additional permanent
|
||||
boot options for a much better fuzzing performance.
|
||||
|
||||
Note that both scripts improve your fuzzing performance but also decrease your
|
||||
system protection against attacks! So set strong firewall rules and only expose
|
||||
SSH as a network service if you use these (which is highly recommended).
|
||||
|
||||
If you have an input corpus from step 2 then specify this directory with the
|
||||
`-i` option. Otherwise create a new directory and create a file with any content
|
||||
as test data in there.
|
||||
|
||||
If you do not want anything special, the defaults are already usually best,
|
||||
hence all you need is to specify the seed input directory with the result of
|
||||
step [2a) Collect inputs](#a-collect-inputs):
|
||||
`afl-fuzz -i input -o output -- bin/target -d @@`
|
||||
Note that the directory specified with -o will be created if it does not exist.
|
||||
|
||||
It can be valuable to run afl-fuzz in a screen or tmux shell so you can log off,
|
||||
or afl-fuzz is not aborted if you are running it in a remote ssh session where
|
||||
the connection fails in between.
|
||||
Only do that though once you have verified that your fuzzing setup works!
|
||||
Simply run it like `screen -dmS afl-main -- afl-fuzz -M main-$HOSTNAME -i ...`
|
||||
and it will start away in a screen session. To enter this session simply type
|
||||
`screen -r afl-main`. You see - it makes sense to name the screen session
|
||||
same as the afl-fuzz -M/-S naming :-)
|
||||
For more information on screen or tmux please check their documentation.
|
||||
|
||||
If you need to stop and re-start the fuzzing, use the same command line options
|
||||
(or even change them by selecting a different power schedule or another mutation
|
||||
mode!) and switch the input directory with a dash (`-`):
|
||||
`afl-fuzz -i - -o output -- bin/target -d @@`
|
||||
|
||||
Adding a dictionary is helpful. See the directory
|
||||
[dictionaries/](../dictionaries/) if something is already included for your data
|
||||
format, and tell afl-fuzz to load that dictionary by adding `-x
|
||||
dictionaries/FORMAT.dict`. With afl-clang-lto you have an autodictionary
|
||||
generation for which you need to do nothing except to use afl-clang-lto as the
|
||||
compiler. You also have the option to generate a dictionary yourself, see
|
||||
[utils/libtokencap/README.md](../utils/libtokencap/README.md).
|
||||
|
||||
afl-fuzz has a variety of options that help to workaround target quirks like
|
||||
specific locations for the input file (`-f`), performing deterministic fuzzing
|
||||
(`-D`) and many more. Check out `afl-fuzz -h`.
|
||||
|
||||
We highly recommend that you set a memory limit for running the target with `-m`
|
||||
which defines the maximum memory in MB. This prevents a potential out-of-memory
|
||||
problem for your system plus helps you detect missing `malloc()` failure
|
||||
handling in the target. Play around with various -m values until you find one
|
||||
that safely works for all your input seeds (if you have good ones and then
|
||||
double or quadruple that.
|
||||
|
||||
By default afl-fuzz never stops fuzzing. To terminate AFL++ simply press
|
||||
Control-C or send a signal SIGINT. You can limit the number of executions or
|
||||
approximate runtime in seconds with options also.
|
||||
|
||||
When you start afl-fuzz you will see a user interface that shows what the status
|
||||
is:
|
||||

|
||||
|
||||
All labels are explained in [status_screen.md](status_screen.md).
|
||||
|
||||
### b) Keeping memory use and timeouts in check
|
||||
|
||||
Memory limits are not enforced by afl-fuzz by default and the system may run out
|
||||
of memory. You can decrease the memory with the `-m` option, the value is in MB.
|
||||
If this is too small for the target, you can usually see this by afl-fuzz
|
||||
bailing with the message that it could not connect to the forkserver.
|
||||
|
||||
Consider setting low values for `-m` and `-t`.
|
||||
|
||||
For programs that are nominally very fast, but get sluggish for some inputs, you
|
||||
can also try setting `-t` values that are more punishing than what `afl-fuzz`
|
||||
dares to use on its own. On fast and idle machines, going down to `-t 5` may be
|
||||
a viable plan.
|
||||
|
||||
The `-m` parameter is worth looking at, too. Some programs can end up spending a
|
||||
fair amount of time allocating and initializing megabytes of memory when
|
||||
presented with pathological inputs. Low `-m` values can make them give up sooner
|
||||
and not waste CPU time.
|
||||
|
||||
### c) Using multiple cores
|
||||
|
||||
If you want to seriously fuzz then use as many cores/threads as possible to fuzz
|
||||
your target.
|
||||
|
||||
On the same machine - due to the design of how AFL++ works - there is a maximum
|
||||
number of CPU cores/threads that are useful, use more and the overall
|
||||
performance degrades instead. This value depends on the target, and the limit is
|
||||
between 32 and 64 cores per machine.
|
||||
|
||||
If you have the RAM, it is highly recommended run the instances with a caching
|
||||
of the test cases. Depending on the average test case size (and those found
|
||||
during fuzzing) and their number, a value between 50-500MB is recommended. You
|
||||
can set the cache size (in MB) by setting the environment variable
|
||||
`AFL_TESTCACHE_SIZE`.
|
||||
|
||||
There should be one main fuzzer (`-M main-$HOSTNAME` option) and as many
|
||||
secondary fuzzers (e.g. `-S variant1`) as you have cores that you use. Every
|
||||
-M/-S entry needs a unique name (that can be whatever), however, the same -o
|
||||
output directory location has to be used for all instances.
|
||||
|
||||
For every secondary fuzzer there should be a variation, e.g.:
|
||||
* one should fuzz the target that was compiled differently: with sanitizers
|
||||
activated (`export AFL_USE_ASAN=1 ; export AFL_USE_UBSAN=1 ; export
|
||||
AFL_USE_CFISAN=1`)
|
||||
* one or two should fuzz the target with CMPLOG/redqueen (see above), at least
|
||||
one cmplog instance should follow transformations (`-l AT`)
|
||||
* one to three fuzzers should fuzz a target compiled with laf-intel/COMPCOV (see
|
||||
above). Important note: If you run more than one laf-intel/COMPCOV fuzzer and
|
||||
you want them to share their intermediate results, the main fuzzer (`-M`) must
|
||||
be one of them! (Although this is not really recommended.)
|
||||
|
||||
All other secondaries should be used like this:
|
||||
* a quarter to a third with the MOpt mutator enabled: `-L 0`
|
||||
* run with a different power schedule, recommended are:
|
||||
`fast (default), explore, coe, lin, quad, exploit and rare` which you can set
|
||||
with e.g. `-p explore`
|
||||
* a few instances should use the old queue cycling with `-Z`
|
||||
|
||||
Also, it is recommended to set `export AFL_IMPORT_FIRST=1` to load test cases
|
||||
from other fuzzers in the campaign first.
|
||||
|
||||
If you have a large corpus, a corpus from a previous run or are fuzzing in
|
||||
a CI, then also set `export AFL_CMPLOG_ONLY_NEW=1` and `export AFL_FAST_CAL=1`.
|
||||
|
||||
You can also use different fuzzers. If you are using AFL spinoffs or AFL
|
||||
conforming fuzzers, then just use the same -o directory and give it a unique
|
||||
`-S` name. Examples are:
|
||||
* [Fuzzolic](https://github.com/season-lab/fuzzolic)
|
||||
* [symcc](https://github.com/eurecom-s3/symcc/)
|
||||
* [Eclipser](https://github.com/SoftSec-KAIST/Eclipser/)
|
||||
* [AFLsmart](https://github.com/aflsmart/aflsmart)
|
||||
* [FairFuzz](https://github.com/carolemieux/afl-rb)
|
||||
* [Neuzz](https://github.com/Dongdongshe/neuzz)
|
||||
* [Angora](https://github.com/AngoraFuzzer/Angora)
|
||||
|
||||
A long list can be found at
|
||||
[https://github.com/Microsvuln/Awesome-AFL](https://github.com/Microsvuln/Awesome-AFL).
|
||||
|
||||
However, you can also sync AFL++ with honggfuzz, libfuzzer with `-entropic=1`,
|
||||
etc. Just show the main fuzzer (-M) with the `-F` option where the queue/work
|
||||
directory of a different fuzzer is, e.g. `-F /src/target/honggfuzz`. Using
|
||||
honggfuzz (with `-n 1` or `-n 2`) and libfuzzer in parallel is highly
|
||||
recommended!
|
||||
|
||||
### d) Using multiple machines for fuzzing
|
||||
|
||||
Maybe you have more than one machine you want to fuzz the same target on.
|
||||
Simply start the `afl-fuzz` (and perhaps libfuzzer, honggfuzz, ...)
|
||||
orchestra as you like, just ensure that your have one and only one `-M`
|
||||
instance per server, and that its name is unique, hence the recommendation
|
||||
for `-M main-$HOSTNAME`.
|
||||
|
||||
Now there are three strategies on how you can sync between the servers:
|
||||
* never: sounds weird, but this makes every server an island and has the chance
|
||||
the each follow different paths into the target. You can make this even more
|
||||
interesting by even giving different seeds to each server.
|
||||
* regularly (~4h): this ensures that all fuzzing campaigns on the servers "see"
|
||||
the same thing. It is like fuzzing on a huge server.
|
||||
* in intervals of 1/10th of the overall expected runtime of the fuzzing you
|
||||
sync. This tries a bit to combine both. have some individuality of the paths
|
||||
each campaign on a server explores, on the other hand if one gets stuck where
|
||||
another found progress this is handed over making it unstuck.
|
||||
|
||||
The syncing process itself is very simple. As the `-M main-$HOSTNAME` instance
|
||||
syncs to all `-S` secondaries as well as to other fuzzers, you have to copy only
|
||||
this directory to the other machines.
|
||||
|
||||
Lets say all servers have the `-o out` directory in /target/foo/out, and you
|
||||
created a file `servers.txt` which contains the hostnames of all participating
|
||||
servers, plus you have an ssh key deployed to all of them, then run:
|
||||
|
||||
```bash
|
||||
for FROM in `cat servers.txt`; do
|
||||
for TO in `cat servers.txt`; do
|
||||
rsync -rlpogtz --rsh=ssh $FROM:/target/foo/out/main-$FROM $TO:target/foo/out/
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
You can run this manually, per cron job - as you need it. There is a more
|
||||
complex and configurable script in `utils/distributed_fuzzing`.
|
||||
|
||||
### e) The status of the fuzz campaign
|
||||
|
||||
AFL++ comes with the `afl-whatsup` script to show the status of the fuzzing
|
||||
campaign.
|
||||
|
||||
Just supply the directory that afl-fuzz is given with the `-o` option and you
|
||||
will see a detailed status of every fuzzer in that campaign plus a summary.
|
||||
|
||||
To have only the summary, use the `-s` switch, e.g., `afl-whatsup -s out/`.
|
||||
|
||||
If you have multiple servers, then use the command after a sync or you have to
|
||||
execute this script per server.
|
||||
|
||||
Another tool to inspect the current state and history of a specific instance is
|
||||
afl-plot, which generates an index.html file and a graphs that show how the
|
||||
fuzzing instance is performing. The syntax is `afl-plot instance_dir web_dir`,
|
||||
e.g., `afl-plot out/default /srv/www/htdocs/plot`.
|
||||
|
||||
### f) Stopping fuzzing, restarting fuzzing, adding new seeds
|
||||
|
||||
To stop an afl-fuzz run, simply press Control-C.
|
||||
|
||||
To restart an afl-fuzz run, just reuse the same command line but replace the `-i
|
||||
directory` with `-i -` or set `AFL_AUTORESUME=1`.
|
||||
|
||||
If you want to add new seeds to a fuzzing campaign you can run a temporary
|
||||
fuzzing instance, e.g. when your main fuzzer is using `-o out` and the new seeds
|
||||
are in `newseeds/` directory:
|
||||
|
||||
```
|
||||
AFL_BENCH_JUST_ONE=1 AFL_FAST_CAL=1 afl-fuzz -i newseeds -o out -S newseeds -- ./target
|
||||
```
|
||||
|
||||
### g) Checking the coverage of the fuzzing
|
||||
|
||||
The `paths found` value is a bad indicator for checking how good the coverage
|
||||
is.
|
||||
|
||||
A better indicator - if you use default llvm instrumentation with at least
|
||||
version 9 - is to use `afl-showmap` with the collect coverage option `-C` on the
|
||||
output directory:
|
||||
|
||||
```
|
||||
$ afl-showmap -C -i out -o /dev/null -- ./target -params @@
|
||||
...
|
||||
[*] Using SHARED MEMORY FUZZING feature.
|
||||
[*] Target map size: 9960
|
||||
[+] Processed 7849 input files.
|
||||
[+] Captured 4331 tuples (highest value 255, total values 67130596) in '/dev/nul
|
||||
l'.
|
||||
[+] A coverage of 4331 edges were achieved out of 9960 existing (43.48%) with 7849 input files.
|
||||
```
|
||||
|
||||
It is even better to check out the exact lines of code that have been reached -
|
||||
and which have not been found so far.
|
||||
|
||||
An "easy" helper script for this is
|
||||
[https://github.com/vanhauser-thc/afl-cov](https://github.com/vanhauser-thc/afl-cov),
|
||||
just follow the README of that separate project.
|
||||
|
||||
If you see that an important area or a feature has not been covered so far then
|
||||
try to find an input that is able to reach that and start a new secondary in
|
||||
that fuzzing campaign with that seed as input, let it run for a few minutes,
|
||||
then terminate it. The main node will pick it up and make it available to the
|
||||
other secondary nodes over time. Set `export AFL_NO_AFFINITY=1` or `export
|
||||
AFL_TRY_AFFINITY=1` if you have no free core.
|
||||
|
||||
Note that in nearly all cases you can never reach full coverage. A lot of
|
||||
functionality is usually dependent on exclusive options that would need
|
||||
individual fuzzing campaigns each with one of these options set. E.g., if you
|
||||
fuzz a library to convert image formats and your target is the png to tiff API
|
||||
then you will not touch any of the other library APIs and features.
|
||||
|
||||
### h) How long to fuzz a target?
|
||||
|
||||
This is a difficult question. Basically if no new path is found for a long time
|
||||
(e.g. for a day or a week) then you can expect that your fuzzing won't be
|
||||
fruitful anymore. However, often this just means that you should switch out
|
||||
secondaries for others, e.g. custom mutator modules, sync to very different
|
||||
fuzzers, etc.
|
||||
|
||||
Keep the queue/ directory (for future fuzzings of the same or similar targets)
|
||||
and use them to seed other good fuzzers like libfuzzer with the -entropic switch
|
||||
or honggfuzz.
|
||||
|
||||
### i) Improve the speed!
|
||||
|
||||
* Use [persistent mode](../instrumentation/README.persistent_mode.md) (x2-x20
|
||||
speed increase)
|
||||
* If you do not use shmem persistent mode, use `AFL_TMPDIR` to point the input
|
||||
file on a tempfs location, see [env_variables.md](env_variables.md)
|
||||
* Linux: Improve kernel performance: modify `/etc/default/grub`, set
|
||||
`GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off
|
||||
mitigations=off no_stf_barrier noibpb noibrs nopcid nopti
|
||||
nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off
|
||||
spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"`; then
|
||||
`update-grub` and `reboot` (warning: makes the system more insecure) - you can
|
||||
also just run `sudo afl-persistent-config`
|
||||
* Linux: Running on an `ext2` filesystem with `noatime` mount option will be a
|
||||
bit faster than on any other journaling filesystem
|
||||
* Use your cores! [3c) Using multiple cores](#c-using-multiple-cores)
|
||||
* Run `sudo afl-system-config` before starting the first afl-fuzz instance after
|
||||
a reboot
|
||||
|
||||
### j) Going beyond crashes
|
||||
|
||||
Fuzzing is a wonderful and underutilized technique for discovering non-crashing
|
||||
design and implementation errors, too. Quite a few interesting bugs have been
|
||||
found by modifying the target programs to call `abort()` when say:
|
||||
|
||||
- Two bignum libraries produce different outputs when given the same
|
||||
fuzzer-generated input.
|
||||
|
||||
- An image library produces different outputs when asked to decode the same
|
||||
input image several times in a row.
|
||||
|
||||
- A serialization/deserialization library fails to produce stable outputs when
|
||||
iteratively serializing and deserializing fuzzer-supplied data.
|
||||
|
||||
- A compression library produces an output inconsistent with the input file when
|
||||
asked to compress and then decompress a particular blob.
|
||||
|
||||
Implementing these or similar sanity checks usually takes very little time; if
|
||||
you are the maintainer of a particular package, you can make this code
|
||||
conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
|
||||
shared with libfuzzer and honggfuzz) or `#ifdef __AFL_COMPILER` (this one is
|
||||
just for AFL++).
|
||||
|
||||
### k) Known limitations & areas for improvement
|
||||
|
||||
Here are some of the most important caveats for AFL++:
|
||||
|
||||
- AFL++ detects faults by checking for the first spawned process dying due to a
|
||||
signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
|
||||
these signals may need to have the relevant code commented out. In the same
|
||||
vein, faults in child processes spawned by the fuzzed target may evade
|
||||
detection unless you manually add some code to catch that.
|
||||
|
||||
- As with any other brute-force tool, the fuzzer offers limited coverage if
|
||||
encryption, checksums, cryptographic signatures, or compression are used to
|
||||
wholly wrap the actual data format to be tested.
|
||||
|
||||
To work around this, you can comment out the relevant checks (see
|
||||
utils/libpng_no_checksum/ for inspiration); if this is not possible, you can
|
||||
also write a postprocessor, one of the hooks of custom mutators. See
|
||||
[custom_mutators.md](custom_mutators.md) on how to use
|
||||
`AFL_CUSTOM_MUTATOR_LIBRARY`.
|
||||
|
||||
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
|
||||
isn't due to any specific fault of afl-fuzz.
|
||||
|
||||
- There is no direct support for fuzzing network services, background daemons,
|
||||
or interactive apps that require UI interaction to work. You may need to make
|
||||
simple code changes to make them behave in a more traditional way. Preeny may
|
||||
offer a relatively simple option, too - see:
|
||||
[https://github.com/zardus/preeny](https://github.com/zardus/preeny)
|
||||
|
||||
Some useful tips for modifying network-based services can be also found at:
|
||||
[https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop](https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop)
|
||||
|
||||
- Occasionally, sentient machines rise against their creators. If this happens
|
||||
to you, please consult
|
||||
[https://lcamtuf.coredump.cx/prep/](https://lcamtuf.coredump.cx/prep/).
|
||||
|
||||
Beyond this, see [INSTALL.md](INSTALL.md) for platform-specific tips.
|
||||
|
||||
## 4. Triaging crashes
|
||||
|
||||
The coverage-based grouping of crashes usually produces a small data set that
|
||||
can be quickly triaged manually or with a very simple GDB or Valgrind script.
|
||||
Every crash is also traceable to its parent non-crashing test case in the queue,
|
||||
making it easier to diagnose faults.
|
||||
|
||||
Having said that, it's important to acknowledge that some fuzzing crashes can be
|
||||
difficult to quickly evaluate for exploitability without a lot of debugging and
|
||||
code analysis work. To assist with this task, afl-fuzz supports a very unique
|
||||
"crash exploration" mode enabled with the -C flag.
|
||||
|
||||
In this mode, the fuzzer takes one or more crashing test cases as the input and
|
||||
uses its feedback-driven fuzzing strategies to very quickly enumerate all code
|
||||
paths that can be reached in the program while keeping it in the crashing state.
|
||||
|
||||
Mutations that do not result in a crash are rejected; so are any changes that do
|
||||
not affect the execution path.
|
||||
|
||||
The output is a small corpus of files that can be very rapidly examined to see
|
||||
what degree of control the attacker has over the faulting address, or whether it
|
||||
is possible to get past an initial out-of-bounds read - and see what lies
|
||||
beneath.
|
||||
|
||||
Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
|
||||
can be operated in a very simple way:
|
||||
|
||||
```shell
|
||||
./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
|
||||
```
|
||||
|
||||
The tool works with crashing and non-crashing test cases alike. In the crash
|
||||
mode, it will happily accept instrumented and non-instrumented binaries. In the
|
||||
non-crashing mode, the minimizer relies on standard AFL++ instrumentation to
|
||||
make the file simpler without altering the execution path.
|
||||
|
||||
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
|
||||
afl-fuzz.
|
||||
|
||||
Another tool in AFL++ is the afl-analyze tool. It takes an input file, attempts
|
||||
to sequentially flip bytes, and observes the behavior of the tested program. It
|
||||
then color-codes the input based on which sections appear to be critical, and
|
||||
which are not; while not bulletproof, it can often offer quick insights into
|
||||
complex file formats.
|
||||
|
||||
|
||||
## 5. CI fuzzing
|
||||
|
||||
Some notes on CI fuzzing - this fuzzing is different to normal fuzzing campaigns
|
||||
as these are much shorter runnings.
|
||||
|
||||
1. Always:
|
||||
* LTO has a much longer compile time which is diametrical to short fuzzing -
|
||||
hence use afl-clang-fast instead.
|
||||
* If you compile with CMPLOG, then you can save fuzzing time and reuse that
|
||||
compiled target for both the `-c` option and the main fuzz target. This
|
||||
will impact the speed by ~15% though.
|
||||
* `AFL_FAST_CAL` - Enable fast calibration, this halves the time the
|
||||
saturated corpus needs to be loaded.
|
||||
* `AFL_CMPLOG_ONLY_NEW` - only perform cmplog on new found paths, not the
|
||||
initial corpus as this very likely has been done for them already.
|
||||
* Keep the generated corpus, use afl-cmin and reuse it every time!
|
||||
|
||||
2. Additionally randomize the AFL++ compilation options, e.g.:
|
||||
* 40% for `AFL_LLVM_CMPLOG`
|
||||
* 10% for `AFL_LLVM_LAF_ALL`
|
||||
|
||||
3. Also randomize the afl-fuzz runtime options, e.g.:
|
||||
* 65% for `AFL_DISABLE_TRIM`
|
||||
* 50% use a dictionary generated by `AFL_LLVM_DICT2FILE`
|
||||
* 40% use MOpt (`-L 0`)
|
||||
* 40% for `AFL_EXPAND_HAVOC_NOW`
|
||||
* 20% for old queue processing (`-Z`)
|
||||
* for CMPLOG targets, 60% for `-l 2`, 40% for `-l 3`
|
||||
|
||||
4. Do *not* run any `-M` modes, just running `-S` modes is better for CI
|
||||
fuzzing. `-M` enables old queue handling etc. which is good for a fuzzing
|
||||
campaign but not good for short CI runs.
|
||||
|
||||
How this can look like can, e.g., be seen at AFL++'s setup in Google's
|
||||
[oss-fuzz](https://github.com/google/oss-fuzz/blob/master/infra/base-images/base-builder/compile_afl)
|
||||
and
|
||||
[clusterfuzz](https://github.com/google/clusterfuzz/blob/master/src/clusterfuzz/_internal/bot/fuzzers/afl/launcher.py).
|
||||
|
||||
## The End
|
||||
|
||||
Check out the [FAQ](FAQ.md) if it maybe answers your question (that you might
|
||||
not even have known you had ;-) ).
|
||||
|
||||
This is basically all you need to know to professionally run fuzzing campaigns.
|
||||
If you want to know more, the tons of texts in [docs/](./) will have you
|
||||
covered.
|
||||
|
||||
Note that there are also a lot of tools out there that help fuzzing with AFL++
|
||||
(some might be deprecated or unsupported), see
|
||||
[third_party_tools.md](third_party_tools.md).
|
@ -36,7 +36,7 @@ behaviours and defaults:
|
||||
shared libraries, etc. Additionally QEMU 5.1 supports more CPU targets so
|
||||
this is really worth it.
|
||||
* When instrumenting targets, afl-cc will not supersede optimizations anymore
|
||||
if any were given. This allows to fuzz targets build regularly like those
|
||||
if any were given. This allows to fuzz targets build regularly like those
|
||||
for debug or release versions.
|
||||
* afl-fuzz:
|
||||
* if neither -M or -S is specified, `-S default` is assumed, so more
|
||||
@ -47,7 +47,7 @@ behaviours and defaults:
|
||||
* -m none is now default, set memory limits (in MB) with e.g. -m 250
|
||||
* deterministic fuzzing is now disabled by default (unless using -M) and
|
||||
can be enabled with -D
|
||||
* a caching of testcases can now be performed and can be modified by
|
||||
* a caching of test cases can now be performed and can be modified by
|
||||
editing config.h for TESTCASE_CACHE or by specifying the env variable
|
||||
`AFL_TESTCACHE_SIZE` (in MB). Good values are between 50-500 (default: 50).
|
||||
* -M mains do not perform trimming
|
||||
|
@ -1,71 +0,0 @@
|
||||
# Interpreting output
|
||||
|
||||
See the [status_screen.md](status_screen.md) file for information on
|
||||
how to interpret the displayed stats and monitor the health of the process. Be
|
||||
sure to consult this file especially if any UI elements are highlighted in red.
|
||||
|
||||
The fuzzing process will continue until you press Ctrl-C. At a minimum, you want
|
||||
to allow the fuzzer to complete one queue cycle, which may take anywhere from a
|
||||
couple of hours to a week or so.
|
||||
|
||||
There are three subdirectories created within the output directory and updated
|
||||
in real-time:
|
||||
|
||||
- queue/ - test cases for every distinctive execution path, plus all the
|
||||
starting files given by the user. This is the synthesized corpus
|
||||
mentioned in section 2.
|
||||
|
||||
Before using this corpus for any other purposes, you can shrink
|
||||
it to a smaller size using the afl-cmin tool. The tool will find
|
||||
a smaller subset of files offering equivalent edge coverage.
|
||||
|
||||
- crashes/ - unique test cases that cause the tested program to receive a
|
||||
fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
|
||||
grouped by the received signal.
|
||||
|
||||
- hangs/ - unique test cases that cause the tested program to time out. The
|
||||
default time limit before something is classified as a hang is
|
||||
the larger of 1 second and the value of the -t parameter.
|
||||
The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
|
||||
is rarely necessary.
|
||||
|
||||
Crashes and hangs are considered "unique" if the associated execution paths
|
||||
involve any state transitions not seen in previously-recorded faults. If a
|
||||
single bug can be reached in multiple ways, there will be some count inflation
|
||||
early in the process, but this should quickly taper off.
|
||||
|
||||
The file names for crashes and hangs are correlated with the parent, non-faulting
|
||||
queue entries. This should help with debugging.
|
||||
|
||||
When you can't reproduce a crash found by afl-fuzz, the most likely cause is
|
||||
that you are not setting the same memory limit as used by the tool. Try:
|
||||
|
||||
```shell
|
||||
LIMIT_MB=50
|
||||
( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
|
||||
```
|
||||
|
||||
Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
|
||||
also change -Sv to -Sd.
|
||||
|
||||
Any existing output directory can be also used to resume aborted jobs; try:
|
||||
|
||||
```shell
|
||||
./afl-fuzz -i- -o existing_output_dir [...etc...]
|
||||
```
|
||||
|
||||
If you have gnuplot installed, you can also generate some pretty graphs for any
|
||||
active fuzzing task using afl-plot. For an example of how this looks like,
|
||||
see [https://lcamtuf.coredump.cx/afl/plot/](https://lcamtuf.coredump.cx/afl/plot/).
|
||||
|
||||
You can also manually build and install afl-plot-ui, which is a helper utility
|
||||
for showing the graphs generated by afl-plot in a graphical window using GTK.
|
||||
You can build and install it as follows
|
||||
|
||||
```shell
|
||||
sudo apt install libgtk-3-0 libgtk-3-dev pkg-config
|
||||
cd utils/plot_ui
|
||||
make
|
||||
cd ../../
|
||||
sudo make install
|
||||
```
|
@ -1,36 +0,0 @@
|
||||
# Known limitations & areas for improvement
|
||||
|
||||
Here are some of the most important caveats for AFL:
|
||||
|
||||
- AFL++ detects faults by checking for the first spawned process dying due to
|
||||
a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
|
||||
these signals may need to have the relevant code commented out. In the same
|
||||
vein, faults in child processes spawned by the fuzzed target may evade
|
||||
detection unless you manually add some code to catch that.
|
||||
|
||||
- As with any other brute-force tool, the fuzzer offers limited coverage if
|
||||
encryption, checksums, cryptographic signatures, or compression are used to
|
||||
wholly wrap the actual data format to be tested.
|
||||
|
||||
To work around this, you can comment out the relevant checks (see
|
||||
utils/libpng_no_checksum/ for inspiration); if this is not possible,
|
||||
you can also write a postprocessor, one of the hooks of custom mutators.
|
||||
See [custom_mutators.md](custom_mutators.md) on how to use
|
||||
`AFL_CUSTOM_MUTATOR_LIBRARY`
|
||||
|
||||
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
|
||||
isn't due to any specific fault of afl-fuzz.
|
||||
|
||||
- There is no direct support for fuzzing network services, background
|
||||
daemons, or interactive apps that require UI interaction to work. You may
|
||||
need to make simple code changes to make them behave in a more traditional
|
||||
way. Preeny may offer a relatively simple option, too - see:
|
||||
[https://github.com/zardus/preeny](https://github.com/zardus/preeny)
|
||||
|
||||
Some useful tips for modifying network-based services can be also found at:
|
||||
[https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop](https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop)
|
||||
|
||||
- Occasionally, sentient machines rise against their creators. If this
|
||||
happens to you, please consult [https://lcamtuf.coredump.cx/prep/](https://lcamtuf.coredump.cx/prep/).
|
||||
|
||||
Beyond this, see [INSTALL.md](INSTALL.md) for platform-specific tips.
|
@ -1,258 +0,0 @@
|
||||
# Tips for parallel fuzzing
|
||||
|
||||
This document talks about synchronizing afl-fuzz jobs on a single machine
|
||||
or across a fleet of systems. See README.md for the general instruction manual.
|
||||
|
||||
Note that this document is rather outdated. please refer to the main document
|
||||
section on multiple core usage [fuzzing_expert.md#Using multiple cores](fuzzing_expert.md#b-using-multiple-cores)
|
||||
for up to date strategies!
|
||||
|
||||
## 1) Introduction
|
||||
|
||||
Every copy of afl-fuzz will take up one CPU core. This means that on an
|
||||
n-core system, you can almost always run around n concurrent fuzzing jobs with
|
||||
virtually no performance hit (you can use the afl-gotcpu tool to make sure).
|
||||
|
||||
In fact, if you rely on just a single job on a multi-core system, you will
|
||||
be underutilizing the hardware. So, parallelization is always the right way to
|
||||
go.
|
||||
|
||||
When targeting multiple unrelated binaries or using the tool in
|
||||
"non-instrumented" (-n) mode, it is perfectly fine to just start up several
|
||||
fully separate instances of afl-fuzz. The picture gets more complicated when
|
||||
you want to have multiple fuzzers hammering a common target: if a hard-to-hit
|
||||
but interesting test case is synthesized by one fuzzer, the remaining instances
|
||||
will not be able to use that input to guide their work.
|
||||
|
||||
To help with this problem, afl-fuzz offers a simple way to synchronize test
|
||||
cases on the fly.
|
||||
|
||||
It is a good idea to use different power schedules if you run several instances
|
||||
in parallel (`-p` option).
|
||||
|
||||
Alternatively running other AFL spinoffs in parallel can be of value,
|
||||
e.g. Angora (https://github.com/AngoraFuzzer/Angora/)
|
||||
|
||||
## 2) Single-system parallelization
|
||||
|
||||
If you wish to parallelize a single job across multiple cores on a local
|
||||
system, simply create a new, empty output directory ("sync dir") that will be
|
||||
shared by all the instances of afl-fuzz; and then come up with a naming scheme
|
||||
for every instance - say, "fuzzer01", "fuzzer02", etc.
|
||||
|
||||
Run the first one ("main node", -M) like this:
|
||||
|
||||
```
|
||||
./afl-fuzz -i testcase_dir -o sync_dir -M fuzzer01 [...other stuff...]
|
||||
```
|
||||
|
||||
...and then, start up secondary (-S) instances like this:
|
||||
|
||||
```
|
||||
./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer02 [...other stuff...]
|
||||
./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer03 [...other stuff...]
|
||||
```
|
||||
|
||||
Each fuzzer will keep its state in a separate subdirectory, like so:
|
||||
|
||||
/path/to/sync_dir/fuzzer01/
|
||||
|
||||
Each instance will also periodically rescan the top-level sync directory
|
||||
for any test cases found by other fuzzers - and will incorporate them into
|
||||
its own fuzzing when they are deemed interesting enough.
|
||||
For performance reasons only -M main node syncs the queue with everyone, the
|
||||
-S secondary nodes will only sync from the main node.
|
||||
|
||||
The difference between the -M and -S modes is that the main instance will
|
||||
still perform deterministic checks; while the secondary instances will
|
||||
proceed straight to random tweaks.
|
||||
|
||||
Note that you must always have one -M main instance!
|
||||
Running multiple -M instances is wasteful!
|
||||
|
||||
You can also monitor the progress of your jobs from the command line with the
|
||||
provided afl-whatsup tool. When the instances are no longer finding new paths,
|
||||
it's probably time to stop.
|
||||
|
||||
WARNING: Exercise caution when explicitly specifying the -f option. Each fuzzer
|
||||
must use a separate temporary file; otherwise, things will go south. One safe
|
||||
example may be:
|
||||
|
||||
```
|
||||
./afl-fuzz [...] -S fuzzer10 -f file10.txt ./fuzzed/binary @@
|
||||
./afl-fuzz [...] -S fuzzer11 -f file11.txt ./fuzzed/binary @@
|
||||
./afl-fuzz [...] -S fuzzer12 -f file12.txt ./fuzzed/binary @@
|
||||
```
|
||||
|
||||
This is not a concern if you use @@ without -f and let afl-fuzz come up with the
|
||||
file name.
|
||||
|
||||
## 3) Multiple -M mains
|
||||
|
||||
|
||||
There is support for parallelizing the deterministic checks.
|
||||
This is only needed where
|
||||
|
||||
1. many new paths are found fast over a long time and it looks unlikely that
|
||||
main node will ever catch up, and
|
||||
2. deterministic fuzzing is actively helping path discovery (you can see this
|
||||
in the main node for the first for lines in the "fuzzing strategy yields"
|
||||
section. If the ration `found/attemps` is high, then it is effective. It
|
||||
most commonly isn't.)
|
||||
|
||||
Only if both are true it is beneficial to have more than one main.
|
||||
You can leverage this by creating -M instances like so:
|
||||
|
||||
```
|
||||
./afl-fuzz -i testcase_dir -o sync_dir -M mainA:1/3 [...]
|
||||
./afl-fuzz -i testcase_dir -o sync_dir -M mainB:2/3 [...]
|
||||
./afl-fuzz -i testcase_dir -o sync_dir -M mainC:3/3 [...]
|
||||
```
|
||||
|
||||
... where the first value after ':' is the sequential ID of a particular main
|
||||
instance (starting at 1), and the second value is the total number of fuzzers to
|
||||
distribute the deterministic fuzzing across. Note that if you boot up fewer
|
||||
fuzzers than indicated by the second number passed to -M, you may end up with
|
||||
poor coverage.
|
||||
|
||||
## 4) Syncing with non-AFL fuzzers or independant instances
|
||||
|
||||
A -M main node can be told with the `-F other_fuzzer_queue_directory` option
|
||||
to sync results from other fuzzers, e.g. libfuzzer or honggfuzz.
|
||||
|
||||
Only the specified directory will by synced into afl, not subdirectories.
|
||||
The specified directory does not need to exist yet at the start of afl.
|
||||
|
||||
The `-F` option can be passed to the main node several times.
|
||||
|
||||
## 5) Multi-system parallelization
|
||||
|
||||
The basic operating principle for multi-system parallelization is similar to
|
||||
the mechanism explained in section 2. The key difference is that you need to
|
||||
write a simple script that performs two actions:
|
||||
|
||||
- Uses SSH with authorized_keys to connect to every machine and retrieve
|
||||
a tar archive of the /path/to/sync_dir/<main_node(s)> directory local to
|
||||
the machine.
|
||||
It is best to use a naming scheme that includes host name and it's being
|
||||
a main node (e.g. main1, main2) in the fuzzer ID, so that you can do
|
||||
something like:
|
||||
|
||||
```sh
|
||||
for host in `cat HOSTLIST`; do
|
||||
ssh user@$host "tar -czf - sync/$host_main*/" > $host.tgz
|
||||
done
|
||||
```
|
||||
|
||||
- Distributes and unpacks these files on all the remaining machines, e.g.:
|
||||
|
||||
```sh
|
||||
for srchost in `cat HOSTLIST`; do
|
||||
for dsthost in `cat HOSTLIST`; do
|
||||
test "$srchost" = "$dsthost" && continue
|
||||
ssh user@$srchost 'tar -kxzf -' < $dsthost.tgz
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
There is an example of such a script in utils/distributed_fuzzing/.
|
||||
|
||||
There are other (older) more featured, experimental tools:
|
||||
* https://github.com/richo/roving
|
||||
* https://github.com/MartijnB/disfuzz-afl
|
||||
|
||||
However these do not support syncing just main nodes (yet).
|
||||
|
||||
When developing custom test case sync code, there are several optimizations
|
||||
to keep in mind:
|
||||
|
||||
- The synchronization does not have to happen very often; running the
|
||||
task every 60 minutes or even less often at later fuzzing stages is
|
||||
fine
|
||||
|
||||
- There is no need to synchronize crashes/ or hangs/; you only need to
|
||||
copy over queue/* (and ideally, also fuzzer_stats).
|
||||
|
||||
- It is not necessary (and not advisable!) to overwrite existing files;
|
||||
the -k option in tar is a good way to avoid that.
|
||||
|
||||
- There is no need to fetch directories for fuzzers that are not running
|
||||
locally on a particular machine, and were simply copied over onto that
|
||||
system during earlier runs.
|
||||
|
||||
- For large fleets, you will want to consolidate tarballs for each host,
|
||||
as this will let you use n SSH connections for sync, rather than n*(n-1).
|
||||
|
||||
You may also want to implement staged synchronization. For example, you
|
||||
could have 10 groups of systems, with group 1 pushing test cases only
|
||||
to group 2; group 2 pushing them only to group 3; and so on, with group
|
||||
eventually 10 feeding back to group 1.
|
||||
|
||||
This arrangement would allow test interesting cases to propagate across
|
||||
the fleet without having to copy every fuzzer queue to every single host.
|
||||
|
||||
- You do not want a "main" instance of afl-fuzz on every system; you should
|
||||
run them all with -S, and just designate a single process somewhere within
|
||||
the fleet to run with -M.
|
||||
|
||||
- Syncing is only necessary for the main nodes on a system. It is possible
|
||||
to run main-less with only secondaries. However then you need to find out
|
||||
which secondary took over the temporary role to be the main node. Look for
|
||||
the `is_main_node` file in the fuzzer directories, eg. `sync-dir/hostname-*/is_main_node`
|
||||
|
||||
It is *not* advisable to skip the synchronization script and run the fuzzers
|
||||
directly on a network filesystem; unexpected latency and unkillable processes
|
||||
in I/O wait state can mess things up.
|
||||
|
||||
## 6) Remote monitoring and data collection
|
||||
|
||||
You can use screen, nohup, tmux, or something equivalent to run remote
|
||||
instances of afl-fuzz. If you redirect the program's output to a file, it will
|
||||
automatically switch from a fancy UI to more limited status reports. There is
|
||||
also basic machine-readable information which is always written to the
|
||||
fuzzer_stats file in the output directory. Locally, that information can be
|
||||
interpreted with afl-whatsup.
|
||||
|
||||
In principle, you can use the status screen of the main (-M) instance to
|
||||
monitor the overall fuzzing progress and decide when to stop. In this
|
||||
mode, the most important signal is just that no new paths are being found
|
||||
for a longer while. If you do not have a main instance, just pick any
|
||||
single secondary instance to watch and go by that.
|
||||
|
||||
You can also rely on that instance's output directory to collect the
|
||||
synthesized corpus that covers all the noteworthy paths discovered anywhere
|
||||
within the fleet. Secondary (-S) instances do not require any special
|
||||
monitoring, other than just making sure that they are up.
|
||||
|
||||
Keep in mind that crashing inputs are *not* automatically propagated to the
|
||||
main instance, so you may still want to monitor for crashes fleet-wide
|
||||
from within your synchronization or health checking scripts (see afl-whatsup).
|
||||
|
||||
## 7) Asymmetric setups
|
||||
|
||||
It is perhaps worth noting that all of the following is permitted:
|
||||
|
||||
- Running afl-fuzz with conjunction with other guided tools that can extend
|
||||
coverage (e.g., via concolic execution). Third-party tools simply need to
|
||||
follow the protocol described above for pulling new test cases from
|
||||
out_dir/<fuzzer_id>/queue/* and writing their own finds to sequentially
|
||||
numbered id:nnnnnn files in out_dir/<ext_tool_id>/queue/*.
|
||||
|
||||
- Running some of the synchronized fuzzers with different (but related)
|
||||
target binaries. For example, simultaneously stress-testing several
|
||||
different JPEG parsers (say, IJG jpeg and libjpeg-turbo) while sharing
|
||||
the discovered test cases can have synergistic effects and improve the
|
||||
overall coverage.
|
||||
|
||||
(In this case, running one -M instance per target is necessary.)
|
||||
|
||||
- Having some of the fuzzers invoke the binary in different ways.
|
||||
For example, 'djpeg' supports several DCT modes, configurable with
|
||||
a command-line flag, while 'dwebp' supports incremental and one-shot
|
||||
decoding. In some scenarios, going after multiple distinct modes and then
|
||||
pooling test cases will improve coverage.
|
||||
|
||||
- Much less convincingly, running the synchronized fuzzers with different
|
||||
starting test cases (e.g., progressive and standard JPEG) or dictionaries.
|
||||
The synchronization mechanism ensures that the test sets will get fairly
|
||||
homogeneous over time, but it introduces some initial variability.
|
@ -1,209 +0,0 @@
|
||||
## Tips for performance optimization
|
||||
|
||||
This file provides tips for troubleshooting slow or wasteful fuzzing jobs.
|
||||
See README.md for the general instruction manual.
|
||||
|
||||
## 1. Keep your test cases small
|
||||
|
||||
This is probably the single most important step to take! Large test cases do
|
||||
not merely take more time and memory to be parsed by the tested binary, but
|
||||
also make the fuzzing process dramatically less efficient in several other
|
||||
ways.
|
||||
|
||||
To illustrate, let's say that you're randomly flipping bits in a file, one bit
|
||||
at a time. Let's assume that if you flip bit #47, you will hit a security bug;
|
||||
flipping any other bit just results in an invalid document.
|
||||
|
||||
Now, if your starting test case is 100 bytes long, you will have a 71% chance of
|
||||
triggering the bug within the first 1,000 execs - not bad! But if the test case
|
||||
is 1 kB long, the probability that we will randomly hit the right pattern in
|
||||
the same timeframe goes down to 11%. And if it has 10 kB of non-essential
|
||||
cruft, the odds plunge to 1%.
|
||||
|
||||
On top of that, with larger inputs, the binary may be now running 5-10x times
|
||||
slower than before - so the overall drop in fuzzing efficiency may be easily
|
||||
as high as 500x or so.
|
||||
|
||||
In practice, this means that you shouldn't fuzz image parsers with your
|
||||
vacation photos. Generate a tiny 16x16 picture instead, and run it through
|
||||
`jpegtran` or `pngcrunch` for good measure. The same goes for most other types
|
||||
of documents.
|
||||
|
||||
There's plenty of small starting test cases in ../testcases/ - try them out
|
||||
or submit new ones!
|
||||
|
||||
If you want to start with a larger, third-party corpus, run `afl-cmin` with an
|
||||
aggressive timeout on that data set first.
|
||||
|
||||
## 2. Use a simpler target
|
||||
|
||||
Consider using a simpler target binary in your fuzzing work. For example, for
|
||||
image formats, bundled utilities such as `djpeg`, `readpng`, or `gifhisto` are
|
||||
considerably (10-20x) faster than the convert tool from ImageMagick - all while exercising roughly the same library-level image parsing code.
|
||||
|
||||
Even if you don't have a lightweight harness for a particular target, remember
|
||||
that you can always use another, related library to generate a corpus that will
|
||||
be then manually fed to a more resource-hungry program later on.
|
||||
|
||||
Also note that reading the fuzzing input via stdin is faster than reading from
|
||||
a file.
|
||||
|
||||
## 3. Use LLVM persistent instrumentation
|
||||
|
||||
The LLVM mode offers a "persistent", in-process fuzzing mode that can
|
||||
work well for certain types of self-contained libraries, and for fast targets,
|
||||
can offer performance gains up to 5-10x; and a "deferred fork server" mode
|
||||
that can offer huge benefits for programs with high startup overhead. Both
|
||||
modes require you to edit the source code of the fuzzed program, but the
|
||||
changes often amount to just strategically placing a single line or two.
|
||||
|
||||
If there are important data comparisons performed (e.g. `strcmp(ptr, MAGIC_HDR)`)
|
||||
then using laf-intel (see instrumentation/README.laf-intel.md) will help `afl-fuzz` a lot
|
||||
to get to the important parts in the code.
|
||||
|
||||
If you are only interested in specific parts of the code being fuzzed, you can
|
||||
instrument_files the files that are actually relevant. This improves the speed and
|
||||
accuracy of afl. See instrumentation/README.instrument_list.md
|
||||
|
||||
## 4. Profile and optimize the binary
|
||||
|
||||
Check for any parameters or settings that obviously improve performance. For
|
||||
example, the djpeg utility that comes with IJG jpeg and libjpeg-turbo can be
|
||||
called with:
|
||||
|
||||
```bash
|
||||
-dct fast -nosmooth -onepass -dither none -scale 1/4
|
||||
```
|
||||
|
||||
...and that will speed things up. There is a corresponding drop in the quality
|
||||
of decoded images, but it's probably not something you care about.
|
||||
|
||||
In some programs, it is possible to disable output altogether, or at least use
|
||||
an output format that is computationally inexpensive. For example, with image
|
||||
transcoding tools, converting to a BMP file will be a lot faster than to PNG.
|
||||
|
||||
With some laid-back parsers, enabling "strict" mode (i.e., bailing out after
|
||||
first error) may result in smaller files and improved run time without
|
||||
sacrificing coverage; for example, for sqlite, you may want to specify -bail.
|
||||
|
||||
If the program is still too slow, you can use `strace -tt` or an equivalent
|
||||
profiling tool to see if the targeted binary is doing anything silly.
|
||||
Sometimes, you can speed things up simply by specifying `/dev/null` as the
|
||||
config file, or disabling some compile-time features that aren't really needed
|
||||
for the job (try `./configure --help`). One of the notoriously resource-consuming
|
||||
things would be calling other utilities via `exec*()`, `popen()`, `system()`, or
|
||||
equivalent calls; for example, tar can invoke external decompression tools
|
||||
when it decides that the input file is a compressed archive.
|
||||
|
||||
Some programs may also intentionally call `sleep()`, `usleep()`, or `nanosleep()`;
|
||||
vim is a good example of that. Other programs may attempt `fsync()` and so on.
|
||||
There are third-party libraries that make it easy to get rid of such code,
|
||||
e.g.:
|
||||
|
||||
https://launchpad.net/libeatmydata
|
||||
|
||||
In programs that are slow due to unavoidable initialization overhead, you may
|
||||
want to try the LLVM deferred forkserver mode (see README.llvm.md),
|
||||
which can give you speed gains up to 10x, as mentioned above.
|
||||
|
||||
Last but not least, if you are using ASAN and the performance is unacceptable,
|
||||
consider turning it off for now, and manually examining the generated corpus
|
||||
with an ASAN-enabled binary later on.
|
||||
|
||||
## 5. Instrument just what you need
|
||||
|
||||
Instrument just the libraries you actually want to stress-test right now, one
|
||||
at a time. Let the program use system-wide, non-instrumented libraries for
|
||||
any functionality you don't actually want to fuzz. For example, in most
|
||||
cases, it doesn't make to instrument `libgmp` just because you're testing a
|
||||
crypto app that relies on it for bignum math.
|
||||
|
||||
Beware of programs that come with oddball third-party libraries bundled with
|
||||
their source code (Spidermonkey is a good example of this). Check `./configure`
|
||||
options to use non-instrumented system-wide copies instead.
|
||||
|
||||
## 6. Parallelize your fuzzers
|
||||
|
||||
The fuzzer is designed to need ~1 core per job. This means that on a, say,
|
||||
4-core system, you can easily run four parallel fuzzing jobs with relatively
|
||||
little performance hit. For tips on how to do that, see parallel_fuzzing.md.
|
||||
|
||||
The `afl-gotcpu` utility can help you understand if you still have idle CPU
|
||||
capacity on your system. (It won't tell you about memory bandwidth, cache
|
||||
misses, or similar factors, but they are less likely to be a concern.)
|
||||
|
||||
## 7. Keep memory use and timeouts in check
|
||||
|
||||
Consider setting low values for `-m` and `-t`.
|
||||
|
||||
For programs that are nominally very fast, but get sluggish for some inputs,
|
||||
you can also try setting `-t` values that are more punishing than what `afl-fuzz`
|
||||
dares to use on its own. On fast and idle machines, going down to `-t 5` may be
|
||||
a viable plan.
|
||||
|
||||
The `-m` parameter is worth looking at, too. Some programs can end up spending
|
||||
a fair amount of time allocating and initializing megabytes of memory when
|
||||
presented with pathological inputs. Low `-m` values can make them give up sooner
|
||||
and not waste CPU time.
|
||||
|
||||
## 8. Check OS configuration
|
||||
|
||||
There are several OS-level factors that may affect fuzzing speed:
|
||||
|
||||
- If you have no risk of power loss then run your fuzzing on a tmpfs
|
||||
partition. This increases the performance noticably.
|
||||
Alternatively you can use `AFL_TMPDIR` to point to a tmpfs location to
|
||||
just write the input file to a tmpfs.
|
||||
- High system load. Use idle machines where possible. Kill any non-essential
|
||||
CPU hogs (idle browser windows, media players, complex screensavers, etc).
|
||||
- Network filesystems, either used for fuzzer input / output, or accessed by
|
||||
the fuzzed binary to read configuration files (pay special attention to the
|
||||
home directory - many programs search it for dot-files).
|
||||
- Disable all the spectre, meltdown etc. security countermeasures in the
|
||||
kernel if your machine is properly separated:
|
||||
|
||||
```
|
||||
ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off
|
||||
no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable
|
||||
nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off
|
||||
spectre_v2=off stf_barrier=off
|
||||
```
|
||||
In most Linux distributions you can put this into a `/etc/default/grub`
|
||||
variable.
|
||||
You can use `sudo afl-persistent-config` to set these options for you.
|
||||
|
||||
The following list of changes are made when executing `afl-system-config`:
|
||||
|
||||
- On-demand CPU scaling. The Linux `ondemand` governor performs its analysis
|
||||
on a particular schedule and is known to underestimate the needs of
|
||||
short-lived processes spawned by `afl-fuzz` (or any other fuzzer). On Linux,
|
||||
this can be fixed with:
|
||||
|
||||
``` bash
|
||||
cd /sys/devices/system/cpu
|
||||
echo performance | tee cpu*/cpufreq/scaling_governor
|
||||
```
|
||||
|
||||
On other systems, the impact of CPU scaling will be different; when fuzzing,
|
||||
use OS-specific tools to find out if all cores are running at full speed.
|
||||
- Transparent huge pages. Some allocators, such as `jemalloc`, can incur a
|
||||
heavy fuzzing penalty when transparent huge pages (THP) are enabled in the
|
||||
kernel. You can disable this via:
|
||||
|
||||
```bash
|
||||
echo never > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
```
|
||||
|
||||
- Suboptimal scheduling strategies. The significance of this will vary from
|
||||
one target to another, but on Linux, you may want to make sure that the
|
||||
following options are set:
|
||||
|
||||
```bash
|
||||
echo 1 >/proc/sys/kernel/sched_child_runs_first
|
||||
echo 1 >/proc/sys/kernel/sched_autogroup_enabled
|
||||
```
|
||||
|
||||
Setting a different scheduling policy for the fuzzer process - say
|
||||
`SCHED_RR` - can usually speed things up, too, but needs to be done with
|
||||
care.
|
||||
|
@ -1,319 +0,0 @@
|
||||
# Sister projects
|
||||
|
||||
This doc lists some of the projects that are inspired by, derived from,
|
||||
designed for, or meant to integrate with AFL. See README.md for the general
|
||||
instruction manual.
|
||||
|
||||
!!!
|
||||
!!! This list is outdated and needs an update, missing: e.g. Angora, FairFuzz
|
||||
!!!
|
||||
|
||||
## Support for other languages / environments:
|
||||
|
||||
### Python AFL (Jakub Wilk)
|
||||
|
||||
Allows fuzz-testing of Python programs. Uses custom instrumentation and its
|
||||
own forkserver.
|
||||
|
||||
https://jwilk.net/software/python-afl
|
||||
|
||||
### Go-fuzz (Dmitry Vyukov)
|
||||
|
||||
AFL-inspired guided fuzzing approach for Go targets:
|
||||
|
||||
https://github.com/dvyukov/go-fuzz
|
||||
|
||||
### afl.rs (Keegan McAllister)
|
||||
|
||||
Allows Rust features to be easily fuzzed with AFL (using the LLVM mode).
|
||||
|
||||
https://github.com/kmcallister/afl.rs
|
||||
|
||||
### OCaml support (KC Sivaramakrishnan)
|
||||
|
||||
Adds AFL-compatible instrumentation to OCaml programs.
|
||||
|
||||
https://github.com/ocamllabs/opam-repo-dev/pull/23
|
||||
https://canopy.mirage.io/Posts/Fuzzing
|
||||
|
||||
### AFL for GCJ Java and other GCC frontends (-)
|
||||
|
||||
GCC Java programs are actually supported out of the box - simply rename
|
||||
afl-gcc to afl-gcj. Unfortunately, by default, unhandled exceptions in GCJ do
|
||||
not result in abort() being called, so you will need to manually add a
|
||||
top-level exception handler that exits with SIGABRT or something equivalent.
|
||||
|
||||
Other GCC-supported languages should be fairly easy to get working, but may
|
||||
face similar problems. See https://gcc.gnu.org/frontends.html for a list of
|
||||
options.
|
||||
|
||||
## AFL-style in-process fuzzer for LLVM (Kostya Serebryany)
|
||||
|
||||
Provides an evolutionary instrumentation-guided fuzzing harness that allows
|
||||
some programs to be fuzzed without the fork / execve overhead. (Similar
|
||||
functionality is now available as the "persistent" feature described in
|
||||
[the llvm_mode readme](../instrumentation/README.llvm.md))
|
||||
|
||||
https://llvm.org/docs/LibFuzzer.html
|
||||
|
||||
## TriforceAFL (Tim Newsham and Jesse Hertz)
|
||||
|
||||
Leverages QEMU full system emulation mode to allow AFL to target operating
|
||||
systems and other alien worlds:
|
||||
|
||||
https://www.nccgroup.trust/us/about-us/newsroom-and-events/blog/2016/june/project-triforce-run-afl-on-everything/
|
||||
|
||||
## WinAFL (Ivan Fratric)
|
||||
|
||||
As the name implies, allows you to fuzz Windows binaries (using DynamoRio).
|
||||
|
||||
https://github.com/ivanfratric/winafl
|
||||
|
||||
Another Windows alternative may be:
|
||||
|
||||
https://github.com/carlosgprado/BrundleFuzz/
|
||||
|
||||
## Network fuzzing
|
||||
|
||||
### Preeny (Yan Shoshitaishvili)
|
||||
|
||||
Provides a fairly simple way to convince dynamically linked network-centric
|
||||
programs to read from a file or not fork. Not AFL-specific, but described as
|
||||
useful by many users. Some assembly required.
|
||||
|
||||
https://github.com/zardus/preeny
|
||||
|
||||
## Distributed fuzzing and related automation
|
||||
|
||||
### roving (Richo Healey)
|
||||
|
||||
A client-server architecture for effortlessly orchestrating AFL runs across
|
||||
a fleet of machines. You don't want to use this on systems that face the
|
||||
Internet or live in other untrusted environments.
|
||||
|
||||
https://github.com/richo/roving
|
||||
|
||||
### Distfuzz-AFL (Martijn Bogaard)
|
||||
|
||||
Simplifies the management of afl-fuzz instances on remote machines. The
|
||||
author notes that the current implementation isn't secure and should not
|
||||
be exposed on the Internet.
|
||||
|
||||
https://github.com/MartijnB/disfuzz-afl
|
||||
|
||||
### AFLDFF (quantumvm)
|
||||
|
||||
A nice GUI for managing AFL jobs.
|
||||
|
||||
https://github.com/quantumvm/AFLDFF
|
||||
|
||||
### afl-launch (Ben Nagy)
|
||||
|
||||
Batch AFL launcher utility with a simple CLI.
|
||||
|
||||
https://github.com/bnagy/afl-launch
|
||||
|
||||
### AFL Utils (rc0r)
|
||||
|
||||
Simplifies the triage of discovered crashes, start parallel instances, etc.
|
||||
|
||||
https://github.com/rc0r/afl-utils
|
||||
|
||||
### AFL crash analyzer (floyd)
|
||||
|
||||
Another crash triage tool:
|
||||
|
||||
https://github.com/floyd-fuh/afl-crash-analyzer
|
||||
|
||||
### afl-extras (fekir)
|
||||
|
||||
Collect data, parallel afl-tmin, startup scripts.
|
||||
|
||||
https://github.com/fekir/afl-extras
|
||||
|
||||
### afl-fuzzing-scripts (Tobias Ospelt)
|
||||
|
||||
Simplifies starting up multiple parallel AFL jobs.
|
||||
|
||||
https://github.com/floyd-fuh/afl-fuzzing-scripts/
|
||||
|
||||
### afl-sid (Jacek Wielemborek)
|
||||
|
||||
Allows users to more conveniently build and deploy AFL via Docker.
|
||||
|
||||
https://github.com/d33tah/afl-sid
|
||||
|
||||
Another Docker-related project:
|
||||
|
||||
https://github.com/ozzyjohnson/docker-afl
|
||||
|
||||
### afl-monitor (Paul S. Ziegler)
|
||||
|
||||
Provides more detailed and versatile statistics about your running AFL jobs.
|
||||
|
||||
https://github.com/reflare/afl-monitor
|
||||
|
||||
### FEXM (Security in Telecommunications)
|
||||
|
||||
Fully automated fuzzing framework, based on AFL
|
||||
|
||||
https://github.com/fgsect/fexm
|
||||
|
||||
## Crash triage, coverage analysis, and other companion tools:
|
||||
|
||||
### afl-crash-analyzer (Tobias Ospelt)
|
||||
|
||||
Makes it easier to navigate and annotate crashing test cases.
|
||||
|
||||
https://github.com/floyd-fuh/afl-crash-analyzer/
|
||||
|
||||
### Crashwalk (Ben Nagy)
|
||||
|
||||
AFL-aware tool to annotate and sort through crashing test cases.
|
||||
|
||||
https://github.com/bnagy/crashwalk
|
||||
|
||||
### afl-cov (Michael Rash)
|
||||
|
||||
Produces human-readable coverage data based on the output queue of afl-fuzz.
|
||||
|
||||
https://github.com/mrash/afl-cov
|
||||
|
||||
### afl-sancov (Bhargava Shastry)
|
||||
|
||||
Similar to afl-cov, but uses clang sanitizer instrumentation.
|
||||
|
||||
https://github.com/bshastry/afl-sancov
|
||||
|
||||
### RecidiVM (Jakub Wilk)
|
||||
|
||||
Makes it easy to estimate memory usage limits when fuzzing with ASAN or MSAN.
|
||||
|
||||
https://jwilk.net/software/recidivm
|
||||
|
||||
### aflize (Jacek Wielemborek)
|
||||
|
||||
Automatically build AFL-enabled versions of Debian packages.
|
||||
|
||||
https://github.com/d33tah/aflize
|
||||
|
||||
### afl-ddmin-mod (Markus Teufelberger)
|
||||
|
||||
A variant of afl-tmin that uses a more sophisticated (but slower)
|
||||
minimization algorithm.
|
||||
|
||||
https://github.com/MarkusTeufelberger/afl-ddmin-mod
|
||||
|
||||
### afl-kit (Kuang-che Wu)
|
||||
|
||||
Replacements for afl-cmin and afl-tmin with additional features, such
|
||||
as the ability to filter crashes based on stderr patterns.
|
||||
|
||||
https://github.com/kcwu/afl-kit
|
||||
|
||||
## Narrow-purpose or experimental:
|
||||
|
||||
### Cygwin support (Ali Rizvi-Santiago)
|
||||
|
||||
Pretty self-explanatory. As per the author, this "mostly" ports AFL to
|
||||
Windows. Field reports welcome!
|
||||
|
||||
https://github.com/arizvisa/afl-cygwin
|
||||
|
||||
### Pause and resume scripts (Ben Nagy)
|
||||
|
||||
Simple automation to suspend and resume groups of fuzzing jobs.
|
||||
|
||||
https://github.com/bnagy/afl-trivia
|
||||
|
||||
### Static binary-only instrumentation (Aleksandar Nikolich)
|
||||
|
||||
Allows black-box binaries to be instrumented statically (i.e., by modifying
|
||||
the binary ahead of the time, rather than translating it on the run). Author
|
||||
reports better performance compared to QEMU, but occasional translation
|
||||
errors with stripped binaries.
|
||||
|
||||
https://github.com/vanhauser-thc/afl-dyninst
|
||||
|
||||
### AFL PIN (Parker Thompson)
|
||||
|
||||
Early-stage Intel PIN instrumentation support (from before we settled on
|
||||
faster-running QEMU).
|
||||
|
||||
https://github.com/mothran/aflpin
|
||||
|
||||
### AFL-style instrumentation in llvm (Kostya Serebryany)
|
||||
|
||||
Allows AFL-equivalent instrumentation to be injected at compiler level.
|
||||
This is currently not supported by AFL as-is, but may be useful in other
|
||||
projects.
|
||||
|
||||
https://code.google.com/p/address-sanitizer/wiki/AsanCoverage#Coverage_counters
|
||||
|
||||
### AFL JS (Han Choongwoo)
|
||||
|
||||
One-off optimizations to speed up the fuzzing of JavaScriptCore (now likely
|
||||
superseded by LLVM deferred forkserver init - see README.llvm.md).
|
||||
|
||||
https://github.com/tunz/afl-fuzz-js
|
||||
|
||||
### AFL harness for fwknop (Michael Rash)
|
||||
|
||||
An example of a fairly involved integration with AFL.
|
||||
|
||||
https://github.com/mrash/fwknop/tree/master/test/afl
|
||||
|
||||
### Building harnesses for DNS servers (Jonathan Foote, Ron Bowes)
|
||||
|
||||
Two articles outlining the general principles and showing some example code.
|
||||
|
||||
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
|
||||
https://goo.gl/j9EgFf
|
||||
|
||||
### Fuzzer shell for SQLite (Richard Hipp)
|
||||
|
||||
A simple SQL shell designed specifically for fuzzing the underlying library.
|
||||
|
||||
https://www.sqlite.org/src/artifact/9e7e273da2030371
|
||||
|
||||
### Support for Python mutation modules (Christian Holler)
|
||||
|
||||
now integrated in AFL++, originally from here
|
||||
https://github.com/choller/afl/blob/master/docs/mozilla/python_modules.txt
|
||||
|
||||
### Support for selective instrumentation (Christian Holler)
|
||||
|
||||
now integrated in AFL++, originally from here
|
||||
https://github.com/choller/afl/blob/master/docs/mozilla/partial_instrumentation.txt
|
||||
|
||||
### Syzkaller (Dmitry Vyukov)
|
||||
|
||||
A similar guided approach as applied to fuzzing syscalls:
|
||||
|
||||
https://github.com/google/syzkaller/wiki/Found-Bugs
|
||||
https://github.com/dvyukov/linux/commit/33787098ffaaa83b8a7ccf519913ac5fd6125931
|
||||
https://events.linuxfoundation.org/sites/events/files/slides/AFL%20filesystem%20fuzzing%2C%20Vault%202016_0.pdf
|
||||
|
||||
|
||||
### Kernel Snapshot Fuzzing using Unicornafl (Security in Telecommunications)
|
||||
|
||||
https://github.com/fgsect/unicorefuzz
|
||||
|
||||
### Android support (ele7enxxh)
|
||||
|
||||
Based on a somewhat dated version of AFL:
|
||||
|
||||
https://github.com/ele7enxxh/android-afl
|
||||
|
||||
### CGI wrapper (floyd)
|
||||
|
||||
Facilitates the testing of CGI scripts.
|
||||
|
||||
https://github.com/floyd-fuh/afl-cgi-wrapper
|
||||
|
||||
### Fuzzing difficulty estimation (Marcel Boehme)
|
||||
|
||||
A fork of AFL that tries to quantify the likelihood of finding additional
|
||||
paths or crashes at any point in a fuzzing job.
|
||||
|
||||
https://github.com/mboehme/pythia
|
@ -1,444 +0,0 @@
|
||||
# Understanding the status screen
|
||||
|
||||
This document provides an overview of the status screen - plus tips for
|
||||
troubleshooting any warnings and red text shown in the UI. See README.md for
|
||||
the general instruction manual.
|
||||
|
||||
## A note about colors
|
||||
|
||||
The status screen and error messages use colors to keep things readable and
|
||||
attract your attention to the most important details. For example, red almost
|
||||
always means "consult this doc" :-)
|
||||
|
||||
Unfortunately, the UI will render correctly only if your terminal is using
|
||||
traditional un*x palette (white text on black background) or something close
|
||||
to that.
|
||||
|
||||
If you are using inverse video, you may want to change your settings, say:
|
||||
|
||||
- For GNOME Terminal, go to `Edit > Profile` preferences, select the "colors" tab, and from the list of built-in schemes, choose "white on black".
|
||||
- For the MacOS X Terminal app, open a new window using the "Pro" scheme via the `Shell > New Window` menu (or make "Pro" your default).
|
||||
|
||||
Alternatively, if you really like your current colors, you can edit config.h
|
||||
to comment out USE_COLORS, then do `make clean all`.
|
||||
|
||||
I'm not aware of any other simple way to make this work without causing
|
||||
other side effects - sorry about that.
|
||||
|
||||
With that out of the way, let's talk about what's actually on the screen...
|
||||
|
||||
### The status bar
|
||||
|
||||
```
|
||||
american fuzzy lop ++3.01a (default) [fast] {0}
|
||||
```
|
||||
|
||||
The top line shows you which mode afl-fuzz is running in
|
||||
(normal: "american fuzy lop", crash exploration mode: "peruvian rabbit mode")
|
||||
and the version of AFL++.
|
||||
Next to the version is the banner, which, if not set with -T by hand, will
|
||||
either show the binary name being fuzzed, or the -M/-S main/secondary name for
|
||||
parallel fuzzing.
|
||||
Second to last is the power schedule mode being run (default: fast).
|
||||
Finally, the last item is the CPU id.
|
||||
|
||||
### Process timing
|
||||
|
||||
```
|
||||
+----------------------------------------------------+
|
||||
| run time : 0 days, 8 hrs, 32 min, 43 sec |
|
||||
| last new path : 0 days, 0 hrs, 6 min, 40 sec |
|
||||
| last uniq crash : none seen yet |
|
||||
| last uniq hang : 0 days, 1 hrs, 24 min, 32 sec |
|
||||
+----------------------------------------------------+
|
||||
```
|
||||
|
||||
This section is fairly self-explanatory: it tells you how long the fuzzer has
|
||||
been running and how much time has elapsed since its most recent finds. This is
|
||||
broken down into "paths" (a shorthand for test cases that trigger new execution
|
||||
patterns), crashes, and hangs.
|
||||
|
||||
When it comes to timing: there is no hard rule, but most fuzzing jobs should be
|
||||
expected to run for days or weeks; in fact, for a moderately complex project, the
|
||||
first pass will probably take a day or so. Every now and then, some jobs
|
||||
will be allowed to run for months.
|
||||
|
||||
There's one important thing to watch out for: if the tool is not finding new
|
||||
paths within several minutes of starting, you're probably not invoking the
|
||||
target binary correctly and it never gets to parse the input files we're
|
||||
throwing at it; another possible explanations are that the default memory limit
|
||||
(`-m`) is too restrictive, and the program exits after failing to allocate a
|
||||
buffer very early on; or that the input files are patently invalid and always
|
||||
fail a basic header check.
|
||||
|
||||
If there are no new paths showing up for a while, you will eventually see a big
|
||||
red warning in this section, too :-)
|
||||
|
||||
### Overall results
|
||||
|
||||
```
|
||||
+-----------------------+
|
||||
| cycles done : 0 |
|
||||
| total paths : 2095 |
|
||||
| uniq crashes : 0 |
|
||||
| uniq hangs : 19 |
|
||||
+-----------------------+
|
||||
```
|
||||
|
||||
The first field in this section gives you the count of queue passes done so far - that is, the number of times the fuzzer went over all the interesting test
|
||||
cases discovered so far, fuzzed them, and looped back to the very beginning.
|
||||
Every fuzzing session should be allowed to complete at least one cycle; and
|
||||
ideally, should run much longer than that.
|
||||
|
||||
As noted earlier, the first pass can take a day or longer, so sit back and
|
||||
relax.
|
||||
|
||||
To help make the call on when to hit `Ctrl-C`, the cycle counter is color-coded.
|
||||
It is shown in magenta during the first pass, progresses to yellow if new finds
|
||||
are still being made in subsequent rounds, then blue when that ends - and
|
||||
finally, turns green after the fuzzer hasn't been seeing any action for a
|
||||
longer while.
|
||||
|
||||
The remaining fields in this part of the screen should be pretty obvious:
|
||||
there's the number of test cases ("paths") discovered so far, and the number of
|
||||
unique faults. The test cases, crashes, and hangs can be explored in real-time
|
||||
by browsing the output directory, as discussed in README.md.
|
||||
|
||||
### Cycle progress
|
||||
|
||||
```
|
||||
+-------------------------------------+
|
||||
| now processing : 1296 (61.86%) |
|
||||
| paths timed out : 0 (0.00%) |
|
||||
+-------------------------------------+
|
||||
```
|
||||
|
||||
This box tells you how far along the fuzzer is with the current queue cycle: it
|
||||
shows the ID of the test case it is currently working on, plus the number of
|
||||
inputs it decided to ditch because they were persistently timing out.
|
||||
|
||||
The "*" suffix sometimes shown in the first line means that the currently
|
||||
processed path is not "favored" (a property discussed later on).
|
||||
|
||||
### Map coverage
|
||||
|
||||
```
|
||||
+--------------------------------------+
|
||||
| map density : 10.15% / 29.07% |
|
||||
| count coverage : 4.03 bits/tuple |
|
||||
+--------------------------------------+
|
||||
```
|
||||
|
||||
The section provides some trivia about the coverage observed by the
|
||||
instrumentation embedded in the target binary.
|
||||
|
||||
The first line in the box tells you how many branch tuples we have already
|
||||
hit, in proportion to how much the bitmap can hold. The number on the left
|
||||
describes the current input; the one on the right is the value for the entire
|
||||
input corpus.
|
||||
|
||||
Be wary of extremes:
|
||||
|
||||
- Absolute numbers below 200 or so suggest one of three things: that the
|
||||
program is extremely simple; that it is not instrumented properly (e.g.,
|
||||
due to being linked against a non-instrumented copy of the target
|
||||
library); or that it is bailing out prematurely on your input test cases.
|
||||
The fuzzer will try to mark this in pink, just to make you aware.
|
||||
- Percentages over 70% may very rarely happen with very complex programs
|
||||
that make heavy use of template-generated code.
|
||||
Because high bitmap density makes it harder for the fuzzer to reliably
|
||||
discern new program states, I recommend recompiling the binary with
|
||||
`AFL_INST_RATIO=10` or so and trying again (see env_variables.md).
|
||||
The fuzzer will flag high percentages in red. Chances are, you will never
|
||||
see that unless you're fuzzing extremely hairy software (say, v8, perl,
|
||||
ffmpeg).
|
||||
|
||||
The other line deals with the variability in tuple hit counts seen in the
|
||||
binary. In essence, if every taken branch is always taken a fixed number of
|
||||
times for all the inputs we have tried, this will read `1.00`. As we manage
|
||||
to trigger other hit counts for every branch, the needle will start to move
|
||||
toward `8.00` (every bit in the 8-bit map hit), but will probably never
|
||||
reach that extreme.
|
||||
|
||||
Together, the values can be useful for comparing the coverage of several
|
||||
different fuzzing jobs that rely on the same instrumented binary.
|
||||
|
||||
### Stage progress
|
||||
|
||||
```
|
||||
+-------------------------------------+
|
||||
| now trying : interest 32/8 |
|
||||
| stage execs : 3996/34.4k (11.62%) |
|
||||
| total execs : 27.4M |
|
||||
| exec speed : 891.7/sec |
|
||||
+-------------------------------------+
|
||||
```
|
||||
|
||||
This part gives you an in-depth peek at what the fuzzer is actually doing right
|
||||
now. It tells you about the current stage, which can be any of:
|
||||
|
||||
- calibration - a pre-fuzzing stage where the execution path is examined
|
||||
to detect anomalies, establish baseline execution speed, and so on. Executed
|
||||
very briefly whenever a new find is being made.
|
||||
- trim L/S - another pre-fuzzing stage where the test case is trimmed to the
|
||||
shortest form that still produces the same execution path. The length (L)
|
||||
and stepover (S) are chosen in general relationship to file size.
|
||||
- bitflip L/S - deterministic bit flips. There are L bits toggled at any given
|
||||
time, walking the input file with S-bit increments. The current L/S variants
|
||||
are: `1/1`, `2/1`, `4/1`, `8/8`, `16/8`, `32/8`.
|
||||
- arith L/8 - deterministic arithmetics. The fuzzer tries to subtract or add
|
||||
small integers to 8-, 16-, and 32-bit values. The stepover is always 8 bits.
|
||||
- interest L/8 - deterministic value overwrite. The fuzzer has a list of known
|
||||
"interesting" 8-, 16-, and 32-bit values to try. The stepover is 8 bits.
|
||||
- extras - deterministic injection of dictionary terms. This can be shown as
|
||||
"user" or "auto", depending on whether the fuzzer is using a user-supplied
|
||||
dictionary (`-x`) or an auto-created one. You will also see "over" or "insert",
|
||||
depending on whether the dictionary words overwrite existing data or are
|
||||
inserted by offsetting the remaining data to accommodate their length.
|
||||
- havoc - a sort-of-fixed-length cycle with stacked random tweaks. The
|
||||
operations attempted during this stage include bit flips, overwrites with
|
||||
random and "interesting" integers, block deletion, block duplication, plus
|
||||
assorted dictionary-related operations (if a dictionary is supplied in the
|
||||
first place).
|
||||
- splice - a last-resort strategy that kicks in after the first full queue
|
||||
cycle with no new paths. It is equivalent to 'havoc', except that it first
|
||||
splices together two random inputs from the queue at some arbitrarily
|
||||
selected midpoint.
|
||||
- sync - a stage used only when `-M` or `-S` is set (see parallel_fuzzing.md).
|
||||
No real fuzzing is involved, but the tool scans the output from other
|
||||
fuzzers and imports test cases as necessary. The first time this is done,
|
||||
it may take several minutes or so.
|
||||
|
||||
The remaining fields should be fairly self-evident: there's the exec count
|
||||
progress indicator for the current stage, a global exec counter, and a
|
||||
benchmark for the current program execution speed. This may fluctuate from
|
||||
one test case to another, but the benchmark should be ideally over 500 execs/sec
|
||||
most of the time - and if it stays below 100, the job will probably take very
|
||||
long.
|
||||
|
||||
The fuzzer will explicitly warn you about slow targets, too. If this happens,
|
||||
see the [perf_tips.md](perf_tips.md) file included with the fuzzer for ideas on how to speed
|
||||
things up.
|
||||
|
||||
### Findings in depth
|
||||
|
||||
```
|
||||
+--------------------------------------+
|
||||
| favored paths : 879 (41.96%) |
|
||||
| new edges on : 423 (20.19%) |
|
||||
| total crashes : 0 (0 unique) |
|
||||
| total tmouts : 24 (19 unique) |
|
||||
+--------------------------------------+
|
||||
```
|
||||
|
||||
This gives you several metrics that are of interest mostly to complete nerds.
|
||||
The section includes the number of paths that the fuzzer likes the most based
|
||||
on a minimization algorithm baked into the code (these will get considerably
|
||||
more air time), and the number of test cases that actually resulted in better
|
||||
edge coverage (versus just pushing the branch hit counters up). There are also
|
||||
additional, more detailed counters for crashes and timeouts.
|
||||
|
||||
Note that the timeout counter is somewhat different from the hang counter; this
|
||||
one includes all test cases that exceeded the timeout, even if they did not
|
||||
exceed it by a margin sufficient to be classified as hangs.
|
||||
|
||||
### Fuzzing strategy yields
|
||||
|
||||
```
|
||||
+-----------------------------------------------------+
|
||||
| bit flips : 57/289k, 18/289k, 18/288k |
|
||||
| byte flips : 0/36.2k, 4/35.7k, 7/34.6k |
|
||||
| arithmetics : 53/2.54M, 0/537k, 0/55.2k |
|
||||
| known ints : 8/322k, 12/1.32M, 10/1.70M |
|
||||
| dictionary : 9/52k, 1/53k, 1/24k |
|
||||
|havoc/splice : 1903/20.0M, 0/0 |
|
||||
|py/custom/rq : unused, 53/2.54M, unused |
|
||||
| trim/eff : 20.31%/9201, 17.05% |
|
||||
+-----------------------------------------------------+
|
||||
```
|
||||
|
||||
This is just another nerd-targeted section keeping track of how many paths we
|
||||
have netted, in proportion to the number of execs attempted, for each of the
|
||||
fuzzing strategies discussed earlier on. This serves to convincingly validate
|
||||
assumptions about the usefulness of the various approaches taken by afl-fuzz.
|
||||
|
||||
The trim strategy stats in this section are a bit different than the rest.
|
||||
The first number in this line shows the ratio of bytes removed from the input
|
||||
files; the second one corresponds to the number of execs needed to achieve this
|
||||
goal. Finally, the third number shows the proportion of bytes that, although
|
||||
not possible to remove, were deemed to have no effect and were excluded from
|
||||
some of the more expensive deterministic fuzzing steps.
|
||||
|
||||
Note that when deterministic mutation mode is off (which is the default
|
||||
because it is not very efficient) the first five lines display
|
||||
"disabled (default, enable with -D)".
|
||||
|
||||
Only what is activated will have counter shown.
|
||||
|
||||
### Path geometry
|
||||
|
||||
```
|
||||
+---------------------+
|
||||
| levels : 5 |
|
||||
| pending : 1570 |
|
||||
| pend fav : 583 |
|
||||
| own finds : 0 |
|
||||
| imported : 0 |
|
||||
| stability : 100.00% |
|
||||
+---------------------+
|
||||
```
|
||||
|
||||
The first field in this section tracks the path depth reached through the
|
||||
guided fuzzing process. In essence: the initial test cases supplied by the
|
||||
user are considered "level 1". The test cases that can be derived from that
|
||||
through traditional fuzzing are considered "level 2"; the ones derived by
|
||||
using these as inputs to subsequent fuzzing rounds are "level 3"; and so forth.
|
||||
The maximum depth is therefore a rough proxy for how much value you're getting
|
||||
out of the instrumentation-guided approach taken by afl-fuzz.
|
||||
|
||||
The next field shows you the number of inputs that have not gone through any
|
||||
fuzzing yet. The same stat is also given for "favored" entries that the fuzzer
|
||||
really wants to get to in this queue cycle (the non-favored entries may have to
|
||||
wait a couple of cycles to get their chance).
|
||||
|
||||
Next, we have the number of new paths found during this fuzzing section and
|
||||
imported from other fuzzer instances when doing parallelized fuzzing; and the
|
||||
extent to which identical inputs appear to sometimes produce variable behavior
|
||||
in the tested binary.
|
||||
|
||||
That last bit is actually fairly interesting: it measures the consistency of
|
||||
observed traces. If a program always behaves the same for the same input data,
|
||||
it will earn a score of 100%. When the value is lower but still shown in purple,
|
||||
the fuzzing process is unlikely to be negatively affected. If it goes into red,
|
||||
you may be in trouble, since AFL will have difficulty discerning between
|
||||
meaningful and "phantom" effects of tweaking the input file.
|
||||
|
||||
Now, most targets will just get a 100% score, but when you see lower figures,
|
||||
there are several things to look at:
|
||||
|
||||
- The use of uninitialized memory in conjunction with some intrinsic sources
|
||||
of entropy in the tested binary. Harmless to AFL, but could be indicative
|
||||
of a security bug.
|
||||
- Attempts to manipulate persistent resources, such as left over temporary
|
||||
files or shared memory objects. This is usually harmless, but you may want
|
||||
to double-check to make sure the program isn't bailing out prematurely.
|
||||
Running out of disk space, SHM handles, or other global resources can
|
||||
trigger this, too.
|
||||
- Hitting some functionality that is actually designed to behave randomly.
|
||||
Generally harmless. For example, when fuzzing sqlite, an input like
|
||||
`select random();` will trigger a variable execution path.
|
||||
- Multiple threads executing at once in semi-random order. This is harmless
|
||||
when the 'stability' metric stays over 90% or so, but can become an issue
|
||||
if not. Here's what to try:
|
||||
* Use afl-clang-fast from [instrumentation](../instrumentation/) - it uses a thread-local tracking
|
||||
model that is less prone to concurrency issues,
|
||||
* See if the target can be compiled or run without threads. Common
|
||||
`./configure` options include `--without-threads`, `--disable-pthreads`, or
|
||||
`--disable-openmp`.
|
||||
* Replace pthreads with GNU Pth (https://www.gnu.org/software/pth/), which
|
||||
allows you to use a deterministic scheduler.
|
||||
- In persistent mode, minor drops in the "stability" metric can be normal,
|
||||
because not all the code behaves identically when re-entered; but major
|
||||
dips may signify that the code within `__AFL_LOOP()` is not behaving
|
||||
correctly on subsequent iterations (e.g., due to incomplete clean-up or
|
||||
reinitialization of the state) and that most of the fuzzing effort goes
|
||||
to waste.
|
||||
|
||||
The paths where variable behavior is detected are marked with a matching entry
|
||||
in the `<out_dir>/queue/.state/variable_behavior/` directory, so you can look
|
||||
them up easily.
|
||||
|
||||
### CPU load
|
||||
|
||||
```
|
||||
[cpu: 25%]
|
||||
```
|
||||
|
||||
This tiny widget shows the apparent CPU utilization on the local system. It is
|
||||
calculated by taking the number of processes in the "runnable" state, and then
|
||||
comparing it to the number of logical cores on the system.
|
||||
|
||||
If the value is shown in green, you are using fewer CPU cores than available on
|
||||
your system and can probably parallelize to improve performance; for tips on
|
||||
how to do that, see parallel_fuzzing.md.
|
||||
|
||||
If the value is shown in red, your CPU is *possibly* oversubscribed, and
|
||||
running additional fuzzers may not give you any benefits.
|
||||
|
||||
Of course, this benchmark is very simplistic; it tells you how many processes
|
||||
are ready to run, but not how resource-hungry they may be. It also doesn't
|
||||
distinguish between physical cores, logical cores, and virtualized CPUs; the
|
||||
performance characteristics of each of these will differ quite a bit.
|
||||
|
||||
If you want a more accurate measurement, you can run the `afl-gotcpu` utility from the command line.
|
||||
|
||||
### Addendum: status and plot files
|
||||
|
||||
For unattended operation, some of the key status screen information can be also
|
||||
found in a machine-readable format in the fuzzer_stats file in the output
|
||||
directory. This includes:
|
||||
|
||||
- `start_time` - unix time indicating the start time of afl-fuzz
|
||||
- `last_update` - unix time corresponding to the last update of this file
|
||||
- `run_time` - run time in seconds to the last update of this file
|
||||
- `fuzzer_pid` - PID of the fuzzer process
|
||||
- `cycles_done` - queue cycles completed so far
|
||||
- `cycles_wo_finds` - number of cycles without any new paths found
|
||||
- `execs_done` - number of execve() calls attempted
|
||||
- `execs_per_sec` - overall number of execs per second
|
||||
- `paths_total` - total number of entries in the queue
|
||||
- `paths_favored` - number of queue entries that are favored
|
||||
- `paths_found` - number of entries discovered through local fuzzing
|
||||
- `paths_imported` - number of entries imported from other instances
|
||||
- `max_depth` - number of levels in the generated data set
|
||||
- `cur_path` - currently processed entry number
|
||||
- `pending_favs` - number of favored entries still waiting to be fuzzed
|
||||
- `pending_total` - number of all entries waiting to be fuzzed
|
||||
- `variable_paths` - number of test cases showing variable behavior
|
||||
- `stability` - percentage of bitmap bytes that behave consistently
|
||||
- `bitmap_cvg` - percentage of edge coverage found in the map so far
|
||||
- `unique_crashes` - number of unique crashes recorded
|
||||
- `unique_hangs` - number of unique hangs encountered
|
||||
- `last_path` - seconds since the last path was found
|
||||
- `last_crash` - seconds since the last crash was found
|
||||
- `last_hang` - seconds since the last hang was found
|
||||
- `execs_since_crash` - execs since the last crash was found
|
||||
- `exec_timeout` - the -t command line value
|
||||
- `slowest_exec_ms` - real time of the slowest execution in ms
|
||||
- `peak_rss_mb` - max rss usage reached during fuzzing in MB
|
||||
- `edges_found` - how many edges have been found
|
||||
- `var_byte_count` - how many edges are non-deterministic
|
||||
- `afl_banner` - banner text (e.g. the target name)
|
||||
- `afl_version` - the version of AFL used
|
||||
- `target_mode` - default, persistent, qemu, unicorn, non-instrumented
|
||||
- `command_line` - full command line used for the fuzzing session
|
||||
|
||||
Most of these map directly to the UI elements discussed earlier on.
|
||||
|
||||
On top of that, you can also find an entry called `plot_data`, containing a
|
||||
plottable history for most of these fields. If you have gnuplot installed, you
|
||||
can turn this into a nice progress report with the included `afl-plot` tool.
|
||||
|
||||
|
||||
### Addendum: Automatically send metrics with StatsD
|
||||
|
||||
In a CI environment or when running multiple fuzzers, it can be tedious to
|
||||
log into each of them or deploy scripts to read the fuzzer statistics.
|
||||
Using `AFL_STATSD` (and the other related environment variables `AFL_STATSD_HOST`,
|
||||
`AFL_STATSD_PORT`, `AFL_STATSD_TAGS_FLAVOR`) you can automatically send metrics
|
||||
to your favorite StatsD server. Depending on your StatsD server you will be able
|
||||
to monitor, trigger alerts or perform actions based on these metrics (e.g: alert on
|
||||
slow exec/s for a new build, threshold of crashes, time since last crash > X, etc).
|
||||
|
||||
The selected metrics are a subset of all the metrics found in the status and in
|
||||
the plot file. The list is the following: `cycle_done`, `cycles_wo_finds`,
|
||||
`execs_done`,`execs_per_sec`, `paths_total`, `paths_favored`, `paths_found`,
|
||||
`paths_imported`, `max_depth`, `cur_path`, `pending_favs`, `pending_total`,
|
||||
`variable_paths`, `unique_crashes`, `unique_hangs`, `total_crashes`,
|
||||
`slowest_exec_ms`, `edges_found`, `var_byte_count`, `havoc_expansion`.
|
||||
Their definitions can be found in the addendum above.
|
||||
|
||||
When using multiple fuzzer instances with StatsD it is *strongly* recommended to setup
|
||||
the flavor (AFL_STATSD_TAGS_FLAVOR) to match your StatsD server. This will allow you
|
||||
to see individual fuzzer performance, detect bad ones, see the progress of each
|
||||
strategy...
|
@ -1,550 +0,0 @@
|
||||
# Technical "whitepaper" for afl-fuzz
|
||||
|
||||
|
||||
NOTE: this document is mostly outdated!
|
||||
|
||||
|
||||
This document provides a quick overview of the guts of American Fuzzy Lop.
|
||||
See README.md for the general instruction manual; and for a discussion of
|
||||
motivations and design goals behind AFL, see historical_notes.md.
|
||||
|
||||
## 0. Design statement
|
||||
|
||||
American Fuzzy Lop does its best not to focus on any singular principle of
|
||||
operation and not be a proof-of-concept for any specific theory. The tool can
|
||||
be thought of as a collection of hacks that have been tested in practice,
|
||||
found to be surprisingly effective, and have been implemented in the simplest,
|
||||
most robust way I could think of at the time.
|
||||
|
||||
Many of the resulting features are made possible thanks to the availability of
|
||||
lightweight instrumentation that served as a foundation for the tool, but this
|
||||
mechanism should be thought of merely as a means to an end. The only true
|
||||
governing principles are speed, reliability, and ease of use.
|
||||
|
||||
## 1. Coverage measurements
|
||||
|
||||
The instrumentation injected into compiled programs captures branch (edge)
|
||||
coverage, along with coarse branch-taken hit counts. The code injected at
|
||||
branch points is essentially equivalent to:
|
||||
|
||||
```c
|
||||
cur_location = <COMPILE_TIME_RANDOM>;
|
||||
shared_mem[cur_location ^ prev_location]++;
|
||||
prev_location = cur_location >> 1;
|
||||
```
|
||||
|
||||
The `cur_location` value is generated randomly to simplify the process of
|
||||
linking complex projects and keep the XOR output distributed uniformly.
|
||||
|
||||
The `shared_mem[]` array is a 64 kB SHM region passed to the instrumented binary
|
||||
by the caller. Every byte set in the output map can be thought of as a hit for
|
||||
a particular (`branch_src`, `branch_dst`) tuple in the instrumented code.
|
||||
|
||||
The size of the map is chosen so that collisions are sporadic with almost all
|
||||
of the intended targets, which usually sport between 2k and 10k discoverable
|
||||
branch points:
|
||||
|
||||
```
|
||||
Branch cnt | Colliding tuples | Example targets
|
||||
------------+------------------+-----------------
|
||||
1,000 | 0.75% | giflib, lzo
|
||||
2,000 | 1.5% | zlib, tar, xz
|
||||
5,000 | 3.5% | libpng, libwebp
|
||||
10,000 | 7% | libxml
|
||||
20,000 | 14% | sqlite
|
||||
50,000 | 30% | -
|
||||
```
|
||||
|
||||
At the same time, its size is small enough to allow the map to be analyzed
|
||||
in a matter of microseconds on the receiving end, and to effortlessly fit
|
||||
within L2 cache.
|
||||
|
||||
This form of coverage provides considerably more insight into the execution
|
||||
path of the program than simple block coverage. In particular, it trivially
|
||||
distinguishes between the following execution traces:
|
||||
|
||||
```
|
||||
A -> B -> C -> D -> E (tuples: AB, BC, CD, DE)
|
||||
A -> B -> D -> C -> E (tuples: AB, BD, DC, CE)
|
||||
```
|
||||
|
||||
This aids the discovery of subtle fault conditions in the underlying code,
|
||||
because security vulnerabilities are more often associated with unexpected
|
||||
or incorrect state transitions than with merely reaching a new basic block.
|
||||
|
||||
The reason for the shift operation in the last line of the pseudocode shown
|
||||
earlier in this section is to preserve the directionality of tuples (without
|
||||
this, A ^ B would be indistinguishable from B ^ A) and to retain the identity
|
||||
of tight loops (otherwise, A ^ A would be obviously equal to B ^ B).
|
||||
|
||||
The absence of simple saturating arithmetic opcodes on Intel CPUs means that
|
||||
the hit counters can sometimes wrap around to zero. Since this is a fairly
|
||||
unlikely and localized event, it's seen as an acceptable performance trade-off.
|
||||
|
||||
### 2. Detecting new behaviors
|
||||
|
||||
The fuzzer maintains a global map of tuples seen in previous executions; this
|
||||
data can be rapidly compared with individual traces and updated in just a couple
|
||||
of dword- or qword-wide instructions and a simple loop.
|
||||
|
||||
When a mutated input produces an execution trace containing new tuples, the
|
||||
corresponding input file is preserved and routed for additional processing
|
||||
later on (see section #3). Inputs that do not trigger new local-scale state
|
||||
transitions in the execution trace (i.e., produce no new tuples) are discarded,
|
||||
even if their overall control flow sequence is unique.
|
||||
|
||||
This approach allows for a very fine-grained and long-term exploration of
|
||||
program state while not having to perform any computationally intensive and
|
||||
fragile global comparisons of complex execution traces, and while avoiding the
|
||||
scourge of path explosion.
|
||||
|
||||
To illustrate the properties of the algorithm, consider that the second trace
|
||||
shown below would be considered substantially new because of the presence of
|
||||
new tuples (CA, AE):
|
||||
|
||||
```
|
||||
#1: A -> B -> C -> D -> E
|
||||
#2: A -> B -> C -> A -> E
|
||||
```
|
||||
|
||||
At the same time, with #2 processed, the following pattern will not be seen
|
||||
as unique, despite having a markedly different overall execution path:
|
||||
|
||||
```
|
||||
#3: A -> B -> C -> A -> B -> C -> A -> B -> C -> D -> E
|
||||
```
|
||||
|
||||
In addition to detecting new tuples, the fuzzer also considers coarse tuple
|
||||
hit counts. These are divided into several buckets:
|
||||
|
||||
```
|
||||
1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+
|
||||
```
|
||||
|
||||
To some extent, the number of buckets is an implementation artifact: it allows
|
||||
an in-place mapping of an 8-bit counter generated by the instrumentation to
|
||||
an 8-position bitmap relied on by the fuzzer executable to keep track of the
|
||||
already-seen execution counts for each tuple.
|
||||
|
||||
Changes within the range of a single bucket are ignored; transition from one
|
||||
bucket to another is flagged as an interesting change in program control flow,
|
||||
and is routed to the evolutionary process outlined in the section below.
|
||||
|
||||
The hit count behavior provides a way to distinguish between potentially
|
||||
interesting control flow changes, such as a block of code being executed
|
||||
twice when it was normally hit only once. At the same time, it is fairly
|
||||
insensitive to empirically less notable changes, such as a loop going from
|
||||
47 cycles to 48. The counters also provide some degree of "accidental"
|
||||
immunity against tuple collisions in dense trace maps.
|
||||
|
||||
The execution is policed fairly heavily through memory and execution time
|
||||
limits; by default, the timeout is set at 5x the initially-calibrated
|
||||
execution speed, rounded up to 20 ms. The aggressive timeouts are meant to
|
||||
prevent dramatic fuzzer performance degradation by descending into tarpits
|
||||
that, say, improve coverage by 1% while being 100x slower; we pragmatically
|
||||
reject them and hope that the fuzzer will find a less expensive way to reach
|
||||
the same code. Empirical testing strongly suggests that more generous time
|
||||
limits are not worth the cost.
|
||||
|
||||
## 3. Evolving the input queue
|
||||
|
||||
Mutated test cases that produced new state transitions within the program are
|
||||
added to the input queue and used as a starting point for future rounds of
|
||||
fuzzing. They supplement, but do not automatically replace, existing finds.
|
||||
|
||||
In contrast to more greedy genetic algorithms, this approach allows the tool
|
||||
to progressively explore various disjoint and possibly mutually incompatible
|
||||
features of the underlying data format, as shown in this image:
|
||||
|
||||

|
||||
|
||||
Several practical examples of the results of this algorithm are discussed
|
||||
here:
|
||||
|
||||
https://lcamtuf.blogspot.com/2014/11/pulling-jpegs-out-of-thin-air.html
|
||||
https://lcamtuf.blogspot.com/2014/11/afl-fuzz-nobody-expects-cdata-sections.html
|
||||
|
||||
The synthetic corpus produced by this process is essentially a compact
|
||||
collection of "hmm, this does something new!" input files, and can be used to
|
||||
seed any other testing processes down the line (for example, to manually
|
||||
stress-test resource-intensive desktop apps).
|
||||
|
||||
With this approach, the queue for most targets grows to somewhere between 1k
|
||||
and 10k entries; approximately 10-30% of this is attributable to the discovery
|
||||
of new tuples, and the remainder is associated with changes in hit counts.
|
||||
|
||||
The following table compares the relative ability to discover file syntax and
|
||||
explore program states when using several different approaches to guided
|
||||
fuzzing. The instrumented target was GNU patch 2.7k.3 compiled with `-O3` and
|
||||
seeded with a dummy text file; the session consisted of a single pass over the
|
||||
input queue with afl-fuzz:
|
||||
|
||||
```
|
||||
Fuzzer guidance | Blocks | Edges | Edge hit | Highest-coverage
|
||||
strategy used | reached | reached | cnt var | test case generated
|
||||
------------------+---------+---------+----------+---------------------------
|
||||
(Initial file) | 156 | 163 | 1.00 | (none)
|
||||
| | | |
|
||||
Blind fuzzing S | 182 | 205 | 2.23 | First 2 B of RCS diff
|
||||
Blind fuzzing L | 228 | 265 | 2.23 | First 4 B of -c mode diff
|
||||
Block coverage | 855 | 1,130 | 1.57 | Almost-valid RCS diff
|
||||
Edge coverage | 1,452 | 2,070 | 2.18 | One-chunk -c mode diff
|
||||
AFL model | 1,765 | 2,597 | 4.99 | Four-chunk -c mode diff
|
||||
```
|
||||
|
||||
The first entry for blind fuzzing ("S") corresponds to executing just a single
|
||||
round of testing; the second set of figures ("L") shows the fuzzer running in a
|
||||
loop for a number of execution cycles comparable with that of the instrumented
|
||||
runs, which required more time to fully process the growing queue.
|
||||
|
||||
Roughly similar results have been obtained in a separate experiment where the
|
||||
fuzzer was modified to compile out all the random fuzzing stages and leave just
|
||||
a series of rudimentary, sequential operations such as walking bit flips.
|
||||
Because this mode would be incapable of altering the size of the input file,
|
||||
the sessions were seeded with a valid unified diff:
|
||||
|
||||
```
|
||||
Queue extension | Blocks | Edges | Edge hit | Number of unique
|
||||
strategy used | reached | reached | cnt var | crashes found
|
||||
------------------+---------+---------+----------+------------------
|
||||
(Initial file) | 624 | 717 | 1.00 | -
|
||||
| | | |
|
||||
Blind fuzzing | 1,101 | 1,409 | 1.60 | 0
|
||||
Block coverage | 1,255 | 1,649 | 1.48 | 0
|
||||
Edge coverage | 1,259 | 1,734 | 1.72 | 0
|
||||
AFL model | 1,452 | 2,040 | 3.16 | 1
|
||||
```
|
||||
|
||||
At noted earlier on, some of the prior work on genetic fuzzing relied on
|
||||
maintaining a single test case and evolving it to maximize coverage. At least
|
||||
in the tests described above, this "greedy" approach appears to confer no
|
||||
substantial benefits over blind fuzzing strategies.
|
||||
|
||||
### 4. Culling the corpus
|
||||
|
||||
The progressive state exploration approach outlined above means that some of
|
||||
the test cases synthesized later on in the game may have edge coverage that
|
||||
is a strict superset of the coverage provided by their ancestors.
|
||||
|
||||
To optimize the fuzzing effort, AFL periodically re-evaluates the queue using a
|
||||
fast algorithm that selects a smaller subset of test cases that still cover
|
||||
every tuple seen so far, and whose characteristics make them particularly
|
||||
favorable to the tool.
|
||||
|
||||
The algorithm works by assigning every queue entry a score proportional to its
|
||||
execution latency and file size; and then selecting lowest-scoring candidates
|
||||
for each tuple.
|
||||
|
||||
The tuples are then processed sequentially using a simple workflow:
|
||||
|
||||
1) Find next tuple not yet in the temporary working set,
|
||||
2) Locate the winning queue entry for this tuple,
|
||||
3) Register *all* tuples present in that entry's trace in the working set,
|
||||
4) Go to #1 if there are any missing tuples in the set.
|
||||
|
||||
The generated corpus of "favored" entries is usually 5-10x smaller than the
|
||||
starting data set. Non-favored entries are not discarded, but they are skipped
|
||||
with varying probabilities when encountered in the queue:
|
||||
|
||||
- If there are new, yet-to-be-fuzzed favorites present in the queue, 99%
|
||||
of non-favored entries will be skipped to get to the favored ones.
|
||||
- If there are no new favorites:
|
||||
* If the current non-favored entry was fuzzed before, it will be skipped
|
||||
95% of the time.
|
||||
* If it hasn't gone through any fuzzing rounds yet, the odds of skipping
|
||||
drop down to 75%.
|
||||
|
||||
Based on empirical testing, this provides a reasonable balance between queue
|
||||
cycling speed and test case diversity.
|
||||
|
||||
Slightly more sophisticated but much slower culling can be performed on input
|
||||
or output corpora with `afl-cmin`. This tool permanently discards the redundant
|
||||
entries and produces a smaller corpus suitable for use with `afl-fuzz` or
|
||||
external tools.
|
||||
|
||||
## 5. Trimming input files
|
||||
|
||||
File size has a dramatic impact on fuzzing performance, both because large
|
||||
files make the target binary slower, and because they reduce the likelihood
|
||||
that a mutation would touch important format control structures, rather than
|
||||
redundant data blocks. This is discussed in more detail in perf_tips.md.
|
||||
|
||||
The possibility that the user will provide a low-quality starting corpus aside,
|
||||
some types of mutations can have the effect of iteratively increasing the size
|
||||
of the generated files, so it is important to counter this trend.
|
||||
|
||||
Luckily, the instrumentation feedback provides a simple way to automatically
|
||||
trim down input files while ensuring that the changes made to the files have no
|
||||
impact on the execution path.
|
||||
|
||||
The built-in trimmer in afl-fuzz attempts to sequentially remove blocks of data
|
||||
with variable length and stepover; any deletion that doesn't affect the checksum
|
||||
of the trace map is committed to disk. The trimmer is not designed to be
|
||||
particularly thorough; instead, it tries to strike a balance between precision
|
||||
and the number of `execve()` calls spent on the process, selecting the block size
|
||||
and stepover to match. The average per-file gains are around 5-20%.
|
||||
|
||||
The standalone `afl-tmin` tool uses a more exhaustive, iterative algorithm, and
|
||||
also attempts to perform alphabet normalization on the trimmed files. The
|
||||
operation of `afl-tmin` is as follows.
|
||||
|
||||
First, the tool automatically selects the operating mode. If the initial input
|
||||
crashes the target binary, afl-tmin will run in non-instrumented mode, simply
|
||||
keeping any tweaks that produce a simpler file but still crash the target.
|
||||
The same mode is used for hangs, if `-H` (hang mode) is specified.
|
||||
If the target is non-crashing, the tool uses an instrumented mode and keeps only
|
||||
the tweaks that produce exactly the same execution path.
|
||||
|
||||
The actual minimization algorithm is:
|
||||
|
||||
1) Attempt to zero large blocks of data with large stepovers. Empirically,
|
||||
this is shown to reduce the number of execs by preempting finer-grained
|
||||
efforts later on.
|
||||
2) Perform a block deletion pass with decreasing block sizes and stepovers,
|
||||
binary-search-style.
|
||||
3) Perform alphabet normalization by counting unique characters and trying
|
||||
to bulk-replace each with a zero value.
|
||||
4) As a last result, perform byte-by-byte normalization on non-zero bytes.
|
||||
|
||||
Instead of zeroing with a 0x00 byte, `afl-tmin` uses the ASCII digit '0'. This
|
||||
is done because such a modification is much less likely to interfere with
|
||||
text parsing, so it is more likely to result in successful minimization of
|
||||
text files.
|
||||
|
||||
The algorithm used here is less involved than some other test case
|
||||
minimization approaches proposed in academic work, but requires far fewer
|
||||
executions and tends to produce comparable results in most real-world
|
||||
applications.
|
||||
|
||||
## 6. Fuzzing strategies
|
||||
|
||||
The feedback provided by the instrumentation makes it easy to understand the
|
||||
value of various fuzzing strategies and optimize their parameters so that they
|
||||
work equally well across a wide range of file types. The strategies used by
|
||||
afl-fuzz are generally format-agnostic and are discussed in more detail here:
|
||||
|
||||
https://lcamtuf.blogspot.com/2014/08/binary-fuzzing-strategies-what-works.html
|
||||
|
||||
It is somewhat notable that especially early on, most of the work done by
|
||||
`afl-fuzz` is actually highly deterministic, and progresses to random stacked
|
||||
modifications and test case splicing only at a later stage. The deterministic
|
||||
strategies include:
|
||||
|
||||
- Sequential bit flips with varying lengths and stepovers,
|
||||
- Sequential addition and subtraction of small integers,
|
||||
- Sequential insertion of known interesting integers (`0`, `1`, `INT_MAX`, etc),
|
||||
|
||||
The purpose of opening with deterministic steps is related to their tendency to
|
||||
produce compact test cases and small diffs between the non-crashing and crashing
|
||||
inputs.
|
||||
|
||||
With deterministic fuzzing out of the way, the non-deterministic steps include
|
||||
stacked bit flips, insertions, deletions, arithmetics, and splicing of different
|
||||
test cases.
|
||||
|
||||
The relative yields and `execve()` costs of all these strategies have been
|
||||
investigated and are discussed in the aforementioned blog post.
|
||||
|
||||
For the reasons discussed in historical_notes.md (chiefly, performance,
|
||||
simplicity, and reliability), AFL generally does not try to reason about the
|
||||
relationship between specific mutations and program states; the fuzzing steps
|
||||
are nominally blind, and are guided only by the evolutionary design of the
|
||||
input queue.
|
||||
|
||||
That said, there is one (trivial) exception to this rule: when a new queue
|
||||
entry goes through the initial set of deterministic fuzzing steps, and tweaks to
|
||||
some regions in the file are observed to have no effect on the checksum of the
|
||||
execution path, they may be excluded from the remaining phases of
|
||||
deterministic fuzzing - and the fuzzer may proceed straight to random tweaks.
|
||||
Especially for verbose, human-readable data formats, this can reduce the number
|
||||
of execs by 10-40% or so without an appreciable drop in coverage. In extreme
|
||||
cases, such as normally block-aligned tar archives, the gains can be as high as
|
||||
90%.
|
||||
|
||||
Because the underlying "effector maps" are local every queue entry and remain
|
||||
in force only during deterministic stages that do not alter the size or the
|
||||
general layout of the underlying file, this mechanism appears to work very
|
||||
reliably and proved to be simple to implement.
|
||||
|
||||
## 7. Dictionaries
|
||||
|
||||
The feedback provided by the instrumentation makes it easy to automatically
|
||||
identify syntax tokens in some types of input files, and to detect that certain
|
||||
combinations of predefined or auto-detected dictionary terms constitute a
|
||||
valid grammar for the tested parser.
|
||||
|
||||
A discussion of how these features are implemented within afl-fuzz can be found
|
||||
here:
|
||||
|
||||
https://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html
|
||||
|
||||
In essence, when basic, typically easily-obtained syntax tokens are combined
|
||||
together in a purely random manner, the instrumentation and the evolutionary
|
||||
design of the queue together provide a feedback mechanism to differentiate
|
||||
between meaningless mutations and ones that trigger new behaviors in the
|
||||
instrumented code - and to incrementally build more complex syntax on top of
|
||||
this discovery.
|
||||
|
||||
The dictionaries have been shown to enable the fuzzer to rapidly reconstruct
|
||||
the grammar of highly verbose and complex languages such as JavaScript, SQL,
|
||||
or XML; several examples of generated SQL statements are given in the blog
|
||||
post mentioned above.
|
||||
|
||||
Interestingly, the AFL instrumentation also allows the fuzzer to automatically
|
||||
isolate syntax tokens already present in an input file. It can do so by looking
|
||||
for run of bytes that, when flipped, produce a consistent change to the
|
||||
program's execution path; this is suggestive of an underlying atomic comparison
|
||||
to a predefined value baked into the code. The fuzzer relies on this signal
|
||||
to build compact "auto dictionaries" that are then used in conjunction with
|
||||
other fuzzing strategies.
|
||||
|
||||
## 8. De-duping crashes
|
||||
|
||||
De-duplication of crashes is one of the more important problems for any
|
||||
competent fuzzing tool. Many of the naive approaches run into problems; in
|
||||
particular, looking just at the faulting address may lead to completely
|
||||
unrelated issues being clustered together if the fault happens in a common
|
||||
library function (say, `strcmp`, `strcpy`); while checksumming call stack
|
||||
backtraces can lead to extreme crash count inflation if the fault can be
|
||||
reached through a number of different, possibly recursive code paths.
|
||||
|
||||
The solution implemented in `afl-fuzz` considers a crash unique if any of two
|
||||
conditions are met:
|
||||
|
||||
- The crash trace includes a tuple not seen in any of the previous crashes,
|
||||
- The crash trace is missing a tuple that was always present in earlier
|
||||
faults.
|
||||
|
||||
The approach is vulnerable to some path count inflation early on, but exhibits
|
||||
a very strong self-limiting effect, similar to the execution path analysis
|
||||
logic that is the cornerstone of `afl-fuzz`.
|
||||
|
||||
## 9. Investigating crashes
|
||||
|
||||
The exploitability of many types of crashes can be ambiguous; afl-fuzz tries
|
||||
to address this by providing a crash exploration mode where a known-faulting
|
||||
test case is fuzzed in a manner very similar to the normal operation of the
|
||||
fuzzer, but with a constraint that causes any non-crashing mutations to be
|
||||
thrown away.
|
||||
|
||||
A detailed discussion of the value of this approach can be found here:
|
||||
|
||||
https://lcamtuf.blogspot.com/2014/11/afl-fuzz-crash-exploration-mode.html
|
||||
|
||||
The method uses instrumentation feedback to explore the state of the crashing
|
||||
program to get past the ambiguous faulting condition and then isolate the
|
||||
newly-found inputs for human review.
|
||||
|
||||
On the subject of crashes, it is worth noting that in contrast to normal
|
||||
queue entries, crashing inputs are *not* trimmed; they are kept exactly as
|
||||
discovered to make it easier to compare them to the parent, non-crashing entry
|
||||
in the queue. That said, `afl-tmin` can be used to shrink them at will.
|
||||
|
||||
## 10 The fork server
|
||||
|
||||
To improve performance, `afl-fuzz` uses a "fork server", where the fuzzed process
|
||||
goes through `execve()`, linking, and libc initialization only once, and is then
|
||||
cloned from a stopped process image by leveraging copy-on-write. The
|
||||
implementation is described in more detail here:
|
||||
|
||||
https://lcamtuf.blogspot.com/2014/10/fuzzing-binaries-without-execve.html
|
||||
|
||||
The fork server is an integral aspect of the injected instrumentation and
|
||||
simply stops at the first instrumented function to await commands from
|
||||
`afl-fuzz`.
|
||||
|
||||
With fast targets, the fork server can offer considerable performance gains,
|
||||
usually between 1.5x and 2x. It is also possible to:
|
||||
|
||||
- Use the fork server in manual ("deferred") mode, skipping over larger,
|
||||
user-selected chunks of initialization code. It requires very modest
|
||||
code changes to the targeted program, and With some targets, can
|
||||
produce 10x+ performance gains.
|
||||
- Enable "persistent" mode, where a single process is used to try out
|
||||
multiple inputs, greatly limiting the overhead of repetitive `fork()`
|
||||
calls. This generally requires some code changes to the targeted program,
|
||||
but can improve the performance of fast targets by a factor of 5 or more - approximating the benefits of in-process fuzzing jobs while still
|
||||
maintaining very robust isolation between the fuzzer process and the
|
||||
targeted binary.
|
||||
|
||||
## 11. Parallelization
|
||||
|
||||
The parallelization mechanism relies on periodically examining the queues
|
||||
produced by independently-running instances on other CPU cores or on remote
|
||||
machines, and then selectively pulling in the test cases that, when tried
|
||||
out locally, produce behaviors not yet seen by the fuzzer at hand.
|
||||
|
||||
This allows for extreme flexibility in fuzzer setup, including running synced
|
||||
instances against different parsers of a common data format, often with
|
||||
synergistic effects.
|
||||
|
||||
For more information about this design, see parallel_fuzzing.md.
|
||||
|
||||
## 12. Binary-only instrumentation
|
||||
|
||||
Instrumentation of black-box, binary-only targets is accomplished with the
|
||||
help of a separately-built version of QEMU in "user emulation" mode. This also
|
||||
allows the execution of cross-architecture code - say, ARM binaries on x86.
|
||||
|
||||
QEMU uses basic blocks as translation units; the instrumentation is implemented
|
||||
on top of this and uses a model roughly analogous to the compile-time hooks:
|
||||
|
||||
```c
|
||||
if (block_address > elf_text_start && block_address < elf_text_end) {
|
||||
|
||||
cur_location = (block_address >> 4) ^ (block_address << 8);
|
||||
shared_mem[cur_location ^ prev_location]++;
|
||||
prev_location = cur_location >> 1;
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
The shift-and-XOR-based scrambling in the second line is used to mask the
|
||||
effects of instruction alignment.
|
||||
|
||||
The start-up of binary translators such as QEMU, DynamoRIO, and PIN is fairly
|
||||
slow; to counter this, the QEMU mode leverages a fork server similar to that
|
||||
used for compiler-instrumented code, effectively spawning copies of an
|
||||
already-initialized process paused at `_start`.
|
||||
|
||||
First-time translation of a new basic block also incurs substantial latency. To
|
||||
eliminate this problem, the AFL fork server is extended by providing a channel
|
||||
between the running emulator and the parent process. The channel is used
|
||||
to notify the parent about the addresses of any newly-encountered blocks and to
|
||||
add them to the translation cache that will be replicated for future child
|
||||
processes.
|
||||
|
||||
As a result of these two optimizations, the overhead of the QEMU mode is
|
||||
roughly 2-5x, compared to 100x+ for PIN.
|
||||
|
||||
## 13. The `afl-analyze` tool
|
||||
|
||||
The file format analyzer is a simple extension of the minimization algorithm
|
||||
discussed earlier on; instead of attempting to remove no-op blocks, the tool
|
||||
performs a series of walking byte flips and then annotates runs of bytes
|
||||
in the input file.
|
||||
|
||||
It uses the following classification scheme:
|
||||
|
||||
- "No-op blocks" - segments where bit flips cause no apparent changes to
|
||||
control flow. Common examples may be comment sections, pixel data within
|
||||
a bitmap file, etc.
|
||||
- "Superficial content" - segments where some, but not all, bitflips
|
||||
produce some control flow changes. Examples may include strings in rich
|
||||
documents (e.g., XML, RTF).
|
||||
- "Critical stream" - a sequence of bytes where all bit flips alter control
|
||||
flow in different but correlated ways. This may be compressed data,
|
||||
non-atomically compared keywords or magic values, etc.
|
||||
- "Suspected length field" - small, atomic integer that, when touched in
|
||||
any way, causes a consistent change to program control flow, suggestive
|
||||
of a failed length check.
|
||||
- "Suspected cksum or magic int" - an integer that behaves similarly to a
|
||||
length field, but has a numerical value that makes the length explanation
|
||||
unlikely. This is suggestive of a checksum or other "magic" integer.
|
||||
- "Suspected checksummed block" - a long block of data where any change
|
||||
always triggers the same new execution path. Likely caused by failing
|
||||
a checksum or a similar integrity check before any subsequent parsing
|
||||
takes place.
|
||||
- "Magic value section" - a generic token where changes cause the type
|
||||
of binary behavior outlined earlier, but that doesn't meet any of the
|
||||
other criteria. May be an atomically compared keyword or so.
|
57
docs/third_party_tools.md
Normal file
57
docs/third_party_tools.md
Normal file
@ -0,0 +1,57 @@
|
||||
# Tools that help fuzzing with AFL++
|
||||
|
||||
Speeding up fuzzing:
|
||||
* [libfiowrapper](https://github.com/marekzmyslowski/libfiowrapper) - if the
|
||||
function you want to fuzz requires loading a file, this allows using the
|
||||
shared memory test case feature :-) - recommended.
|
||||
|
||||
Minimization of test cases:
|
||||
* [afl-pytmin](https://github.com/ilsani/afl-pytmin) - a wrapper for afl-tmin
|
||||
that tries to speed up the process of minimization of a single test case by
|
||||
using many CPU cores.
|
||||
* [afl-ddmin-mod](https://github.com/MarkusTeufelberger/afl-ddmin-mod) - a
|
||||
variation of afl-tmin based on the ddmin algorithm.
|
||||
* [halfempty](https://github.com/googleprojectzero/halfempty) - is a fast
|
||||
utility for minimizing test cases by Tavis Ormandy based on parallelization.
|
||||
|
||||
Distributed execution:
|
||||
* [disfuzz-afl](https://github.com/MartijnB/disfuzz-afl) - distributed fuzzing
|
||||
for AFL.
|
||||
* [AFLDFF](https://github.com/quantumvm/AFLDFF) - AFL distributed fuzzing
|
||||
framework.
|
||||
* [afl-launch](https://github.com/bnagy/afl-launch) - a tool for the execution
|
||||
of many AFL instances.
|
||||
* [afl-mothership](https://github.com/afl-mothership/afl-mothership) -
|
||||
management and execution of many synchronized AFL fuzzers on AWS cloud.
|
||||
* [afl-in-the-cloud](https://github.com/abhisek/afl-in-the-cloud) - another
|
||||
script for running AFL in AWS.
|
||||
|
||||
Deployment, management, monitoring, reporting
|
||||
* [afl-utils](https://gitlab.com/rc0r/afl-utils) - a set of utilities for
|
||||
automatic processing/analysis of crashes and reducing the number of test
|
||||
cases.
|
||||
* [afl-other-arch](https://github.com/shellphish/afl-other-arch) - is a set of
|
||||
patches and scripts for easily adding support for various non-x86
|
||||
architectures for AFL.
|
||||
* [afl-trivia](https://github.com/bnagy/afl-trivia) - a few small scripts to
|
||||
simplify the management of AFL.
|
||||
* [afl-monitor](https://github.com/reflare/afl-monitor) - a script for
|
||||
monitoring AFL.
|
||||
* [afl-manager](https://github.com/zx1340/afl-manager) - a web server on Python
|
||||
for managing multi-afl.
|
||||
* [afl-remote](https://github.com/block8437/afl-remote) - a web server for the
|
||||
remote management of AFL instances.
|
||||
* [afl-extras](https://github.com/fekir/afl-extras) - shell scripts to
|
||||
parallelize afl-tmin, startup, and data collection.
|
||||
|
||||
Crash processing
|
||||
* [afl-crash-analyzer](https://github.com/floyd-fuh/afl-crash-analyzer) -
|
||||
another crash analyzer for AFL.
|
||||
* [fuzzer-utils](https://github.com/ThePatrickStar/fuzzer-utils) - a set of
|
||||
scripts for the analysis of results.
|
||||
* [atriage](https://github.com/Ayrx/atriage) - a simple triage tool.
|
||||
* [afl-kit](https://github.com/kcwu/afl-kit) - afl-cmin on Python.
|
||||
* [AFLize](https://github.com/d33tah/aflize) - a tool that automatically
|
||||
generates builds of debian packages suitable for AFL.
|
||||
* [afl-fid](https://github.com/FoRTE-Research/afl-fid) - a set of tools for
|
||||
working with input data.
|
@ -1,33 +0,0 @@
|
||||
# Tools that help fuzzing with AFL++
|
||||
|
||||
Speeding up fuzzing:
|
||||
* [libfiowrapper](https://github.com/marekzmyslowski/libfiowrapper) - if the function you want to fuzz requires loading a file, this allows using the shared memory testcase feature :-) - recommended.
|
||||
|
||||
Minimization of test cases:
|
||||
* [afl-pytmin](https://github.com/ilsani/afl-pytmin) - a wrapper for afl-tmin that tries to speed up the process of minimization of a single test case by using many CPU cores.
|
||||
* [afl-ddmin-mod](https://github.com/MarkusTeufelberger/afl-ddmin-mod) - a variation of afl-tmin based on the ddmin algorithm.
|
||||
* [halfempty](https://github.com/googleprojectzero/halfempty) - is a fast utility for minimizing test cases by Tavis Ormandy based on parallelization.
|
||||
|
||||
Distributed execution:
|
||||
* [disfuzz-afl](https://github.com/MartijnB/disfuzz-afl) - distributed fuzzing for AFL.
|
||||
* [AFLDFF](https://github.com/quantumvm/AFLDFF) - AFL distributed fuzzing framework.
|
||||
* [afl-launch](https://github.com/bnagy/afl-launch) - a tool for the execution of many AFL instances.
|
||||
* [afl-mothership](https://github.com/afl-mothership/afl-mothership) - management and execution of many synchronized AFL fuzzers on AWS cloud.
|
||||
* [afl-in-the-cloud](https://github.com/abhisek/afl-in-the-cloud) - another script for running AFL in AWS.
|
||||
|
||||
Deployment, management, monitoring, reporting
|
||||
* [afl-utils](https://gitlab.com/rc0r/afl-utils) - a set of utilities for automatic processing/analysis of crashes and reducing the number of test cases.
|
||||
* [afl-other-arch](https://github.com/shellphish/afl-other-arch) - is a set of patches and scripts for easily adding support for various non-x86 architectures for AFL.
|
||||
* [afl-trivia](https://github.com/bnagy/afl-trivia) - a few small scripts to simplify the management of AFL.
|
||||
* [afl-monitor](https://github.com/reflare/afl-monitor) - a script for monitoring AFL.
|
||||
* [afl-manager](https://github.com/zx1340/afl-manager) - a web server on Python for managing multi-afl.
|
||||
* [afl-remote](https://github.com/block8437/afl-remote) - a web server for the remote management of AFL instances.
|
||||
* [afl-extras](https://github.com/fekir/afl-extras) - shell scripts to parallelize afl-tmin, startup, and data collection.
|
||||
|
||||
Crash processing
|
||||
* [afl-crash-analyzer](https://github.com/floyd-fuh/afl-crash-analyzer) - another crash analyzer for AFL.
|
||||
* [fuzzer-utils](https://github.com/ThePatrickStar/fuzzer-utils) - a set of scripts for the analysis of results.
|
||||
* [atriage](https://github.com/Ayrx/atriage) - a simple triage tool.
|
||||
* [afl-kit](https://github.com/kcwu/afl-kit) - afl-cmin on Python.
|
||||
* [AFLize](https://github.com/d33tah/aflize) - a tool that automatically generates builds of debian packages suitable for AFL.
|
||||
* [afl-fid](https://github.com/FoRTE-Research/afl-fid) - a set of tools for working with input data.
|
@ -1,46 +0,0 @@
|
||||
# Triaging crashes
|
||||
|
||||
The coverage-based grouping of crashes usually produces a small data set that
|
||||
can be quickly triaged manually or with a very simple GDB or Valgrind script.
|
||||
Every crash is also traceable to its parent non-crashing test case in the
|
||||
queue, making it easier to diagnose faults.
|
||||
|
||||
Having said that, it's important to acknowledge that some fuzzing crashes can be
|
||||
difficult to quickly evaluate for exploitability without a lot of debugging and
|
||||
code analysis work. To assist with this task, afl-fuzz supports a very unique
|
||||
"crash exploration" mode enabled with the -C flag.
|
||||
|
||||
In this mode, the fuzzer takes one or more crashing test cases as the input
|
||||
and uses its feedback-driven fuzzing strategies to very quickly enumerate all
|
||||
code paths that can be reached in the program while keeping it in the
|
||||
crashing state.
|
||||
|
||||
Mutations that do not result in a crash are rejected; so are any changes that
|
||||
do not affect the execution path.
|
||||
|
||||
The output is a small corpus of files that can be very rapidly examined to see
|
||||
what degree of control the attacker has over the faulting address, or whether
|
||||
it is possible to get past an initial out-of-bounds read - and see what lies
|
||||
beneath.
|
||||
|
||||
Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
|
||||
can be operated in a very simple way:
|
||||
|
||||
```shell
|
||||
./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
|
||||
```
|
||||
|
||||
The tool works with crashing and non-crashing test cases alike. In the crash
|
||||
mode, it will happily accept instrumented and non-instrumented binaries. In the
|
||||
non-crashing mode, the minimizer relies on standard AFL++ instrumentation to make
|
||||
the file simpler without altering the execution path.
|
||||
|
||||
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
|
||||
afl-fuzz.
|
||||
|
||||
Another tool in AFL++ is the afl-analyze tool. It takes an input
|
||||
file, attempts to sequentially flip bytes, and observes the behavior of the
|
||||
tested program. It then color-codes the input based on which sections appear to
|
||||
be critical, and which are not; while not bulletproof, it can often offer quick
|
||||
insights into complex file formats. More info about its operation can be found
|
||||
near the end of [technical_details.md](technical_details.md).
|
@ -1,6 +1,6 @@
|
||||
# Tutorials
|
||||
|
||||
Here are some good writeups to show how to effectively use AFL++:
|
||||
Here are some good write-ups to show how to effectively use AFL++:
|
||||
|
||||
* [https://aflplus.plus/docs/tutorials/libxml2_tutorial/](https://aflplus.plus/docs/tutorials/libxml2_tutorial/)
|
||||
* [https://bananamafia.dev/post/gb-fuzz/](https://bananamafia.dev/post/gb-fuzz/)
|
||||
@ -18,9 +18,13 @@ training, then we can highly recommend the following:
|
||||
If you are interested in fuzzing structured data (where you define what the
|
||||
structure is), these links have you covered:
|
||||
|
||||
* Superion for AFL++: [https://github.com/adrian-rt/superion-mutator](https://github.com/adrian-rt/superion-mutator)
|
||||
* libprotobuf for AFL++: [https://github.com/P1umer/AFLplusplus-protobuf-mutator](https://github.com/P1umer/AFLplusplus-protobuf-mutator)
|
||||
* libprotobuf raw: [https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/tree/master/4_libprotobuf_aflpp_custom_mutator](https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/tree/master/4_libprotobuf_aflpp_custom_mutator)
|
||||
* libprotobuf for old AFL++ API: [https://github.com/thebabush/afl-libprotobuf-mutator](https://github.com/thebabush/afl-libprotobuf-mutator)
|
||||
* Superion for AFL++:
|
||||
[https://github.com/adrian-rt/superion-mutator](https://github.com/adrian-rt/superion-mutator)
|
||||
* libprotobuf for AFL++:
|
||||
[https://github.com/P1umer/AFLplusplus-protobuf-mutator](https://github.com/P1umer/AFLplusplus-protobuf-mutator)
|
||||
* libprotobuf raw:
|
||||
[https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/tree/master/4_libprotobuf_aflpp_custom_mutator](https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/tree/master/4_libprotobuf_aflpp_custom_mutator)
|
||||
* libprotobuf for old AFL++ API:
|
||||
[https://github.com/thebabush/afl-libprotobuf-mutator](https://github.com/thebabush/afl-libprotobuf-mutator)
|
||||
|
||||
If you find other good ones, please send them to us :-)
|
@ -141,31 +141,33 @@ instances run CMPLOG mode and instrumentation of the binary is less frequent
|
||||
(only on CMP, SUB and CALL instructions) performance is not quite so critical.
|
||||
|
||||
## Advanced configuration options
|
||||
|
||||
* `AFL_FRIDA_DRIVER_NO_HOOK` - See `AFL_QEMU_DRIVER_NO_HOOK`. When using the
|
||||
QEMU driver to provide a `main` loop for a user provided
|
||||
`LLVMFuzzerTestOneInput`, this option configures the driver to read input from
|
||||
`stdin` rather than using in-memory test cases.
|
||||
* `AFL_FRIDA_INST_COVERAGE_FILE` - File to write DynamoRIO format coverage
|
||||
information (e.g., to be loaded within IDA lighthouse).
|
||||
* `AFL_FRIDA_INST_DEBUG_FILE` - File to write raw assembly of original blocks
|
||||
and their instrumented counterparts during block compilation.
|
||||
|
||||
```
|
||||
***
|
||||
```
|
||||
***
|
||||
|
||||
Creating block for 0x7ffff7953313:
|
||||
0x7ffff7953313 mov qword ptr [rax], 0
|
||||
0x7ffff795331a add rsp, 8
|
||||
0x7ffff795331e ret
|
||||
Creating block for 0x7ffff7953313:
|
||||
0x7ffff7953313 mov qword ptr [rax], 0
|
||||
0x7ffff795331a add rsp, 8
|
||||
0x7ffff795331e ret
|
||||
|
||||
Generated block 0x7ffff75e98e2
|
||||
0x7ffff75e98e2 mov qword ptr [rax], 0
|
||||
0x7ffff75e98e9 add rsp, 8
|
||||
0x7ffff75e98ed lea rsp, [rsp - 0x80]
|
||||
0x7ffff75e98f5 push rcx
|
||||
0x7ffff75e98f6 movabs rcx, 0x7ffff795331e
|
||||
0x7ffff75e9900 jmp 0x7ffff75e9384
|
||||
Generated block 0x7ffff75e98e2
|
||||
0x7ffff75e98e2 mov qword ptr [rax], 0
|
||||
0x7ffff75e98e9 add rsp, 8
|
||||
0x7ffff75e98ed lea rsp, [rsp - 0x80]
|
||||
0x7ffff75e98f5 push rcx
|
||||
0x7ffff75e98f6 movabs rcx, 0x7ffff795331e
|
||||
0x7ffff75e9900 jmp 0x7ffff75e9384
|
||||
|
||||
|
||||
***
|
||||
```
|
||||
***
|
||||
```
|
||||
|
||||
* `AFL_FRIDA_INST_JIT` - Enable the instrumentation of Just-In-Time compiled
|
||||
code. Code is considered to be JIT if the executable segment is not backed by
|
||||
@ -194,6 +196,8 @@ Generated block 0x7ffff75e98e2
|
||||
* `AFL_FRIDA_INST_UNSTABLE_COVERAGE_FILE` - File to write DynamoRIO format
|
||||
coverage information for unstable edges (e.g., to be loaded within IDA
|
||||
lighthouse).
|
||||
* `AFL_FRIDA_JS_SCRIPT` - Set the script to be loaded by the FRIDA scripting
|
||||
engine. See [Scipting.md](Scripting.md) for details.
|
||||
* `AFL_FRIDA_OUTPUT_STDOUT` - Redirect the standard output of the target
|
||||
application to the named file (supersedes the setting of `AFL_DEBUG_CHILD`).
|
||||
* `AFL_FRIDA_OUTPUT_STDERR` - Redirect the standard error of the target
|
||||
|
@ -8,9 +8,8 @@
|
||||
#define UNUSED_PARAMETER(x) (void)(x)
|
||||
#define IGNORED_RETURN(x) (void)!(x)
|
||||
|
||||
guint64 util_read_address(char *key);
|
||||
|
||||
guint64 util_read_num(char *key);
|
||||
guint64 util_read_address(char *key, guint64 default_value);
|
||||
guint64 util_read_num(char *key, guint64 default_value);
|
||||
gboolean util_output_enabled(void);
|
||||
gsize util_rotate(gsize val, gsize shift, gsize size);
|
||||
gsize util_log2(gsize val);
|
||||
|
@ -62,7 +62,7 @@ void entry_on_fork(void) {
|
||||
|
||||
void entry_config(void) {
|
||||
|
||||
entry_point = util_read_address("AFL_ENTRYPOINT");
|
||||
entry_point = util_read_address("AFL_ENTRYPOINT", 0);
|
||||
if (getenv("AFL_FRIDA_TRACEABLE") != NULL) { traceable = TRUE; }
|
||||
|
||||
}
|
||||
|
@ -246,7 +246,7 @@ void instrument_config(void) {
|
||||
instrument_tracing = (getenv("AFL_FRIDA_INST_TRACE") != NULL);
|
||||
instrument_unique = (getenv("AFL_FRIDA_INST_TRACE_UNIQUE") != NULL);
|
||||
instrument_use_fixed_seed = (getenv("AFL_FRIDA_INST_SEED") != NULL);
|
||||
instrument_fixed_seed = util_read_num("AFL_FRIDA_INST_SEED");
|
||||
instrument_fixed_seed = util_read_num("AFL_FRIDA_INST_SEED", 0);
|
||||
instrument_coverage_unstable_filename =
|
||||
(getenv("AFL_FRIDA_INST_UNSTABLE_COVERAGE_FILE"));
|
||||
|
||||
|
@ -22,9 +22,9 @@ gboolean persistent_debug = FALSE;
|
||||
void persistent_config(void) {
|
||||
|
||||
hook_name = getenv("AFL_FRIDA_PERSISTENT_HOOK");
|
||||
persistent_start = util_read_address("AFL_FRIDA_PERSISTENT_ADDR");
|
||||
persistent_count = util_read_num("AFL_FRIDA_PERSISTENT_CNT");
|
||||
persistent_ret = util_read_address("AFL_FRIDA_PERSISTENT_RET");
|
||||
persistent_start = util_read_address("AFL_FRIDA_PERSISTENT_ADDR", 0);
|
||||
persistent_count = util_read_num("AFL_FRIDA_PERSISTENT_CNT", 0);
|
||||
persistent_ret = util_read_address("AFL_FRIDA_PERSISTENT_RET", 0);
|
||||
|
||||
if (getenv("AFL_FRIDA_PERSISTENT_DEBUG") != NULL) { persistent_debug = TRUE; }
|
||||
|
||||
|
@ -10,13 +10,13 @@
|
||||
|
||||
int seccomp_event_create(void) {
|
||||
|
||||
#ifdef SYS_eventfd
|
||||
#ifdef SYS_eventfd
|
||||
int fd = syscall(SYS_eventfd, 0, 0);
|
||||
#else
|
||||
# ifdef SYS_eventfd2
|
||||
#else
|
||||
#ifdef SYS_eventfd2
|
||||
int fd = syscall(SYS_eventfd2, 0, 0);
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
if (fd < 0) { FFATAL("seccomp_event_create"); }
|
||||
return fd;
|
||||
|
||||
|
@ -72,13 +72,13 @@ static struct sock_filter filter[] = {
|
||||
|
||||
/* Allow us to make anonymous maps */
|
||||
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),
|
||||
#ifdef __NR_mmap
|
||||
#ifdef __NR_mmap
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap, 0, 3),
|
||||
#else
|
||||
# ifdef __NR_mmap2
|
||||
#else
|
||||
#ifdef __NR_mmap2
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap2, 0, 3),
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
|
||||
(offsetof(struct seccomp_data, args[4]))),
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, -1, 0, 1),
|
||||
|
@ -61,9 +61,10 @@ void stalker_config(void) {
|
||||
|
||||
backpatch_enable = (getenv("AFL_FRIDA_INST_NO_BACKPATCH") == NULL);
|
||||
|
||||
stalker_ic_entries = util_read_num("AFL_FRIDA_STALKER_ADJACENT_BLOCKS");
|
||||
stalker_ic_entries = util_read_num("AFL_FRIDA_STALKER_IC_ENTRIES", 32);
|
||||
|
||||
stalker_adjacent_blocks = util_read_num("AFL_FRIDA_STALKER_IC_ENTRIES");
|
||||
stalker_adjacent_blocks =
|
||||
util_read_num("AFL_FRIDA_STALKER_ADJACENT_BLOCKS", 32);
|
||||
|
||||
observer = g_object_new(GUM_TYPE_AFL_STALKER_OBSERVER, NULL);
|
||||
|
||||
@ -98,33 +99,32 @@ void stalker_init(void) {
|
||||
FOKF("Stalker - adjacent_blocks [%u]", stalker_adjacent_blocks);
|
||||
|
||||
#if !(defined(__x86_64__) || defined(__i386__))
|
||||
if (stalker_ic_entries != 0) {
|
||||
if (getenv("AFL_FRIDA_STALKER_IC_ENTRIES") != NULL) {
|
||||
|
||||
FFATAL("AFL_FRIDA_STALKER_IC_ENTRIES not supported");
|
||||
|
||||
}
|
||||
|
||||
if (stalker_adjacent_blocks != 0) {
|
||||
if (getenv("AFL_FRIDA_STALKER_ADJACENT_BLOCKS") != NULL) {
|
||||
|
||||
FFATAL("AFL_FRIDA_STALKER_ADJACENT_BLOCKS not supported");
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
if (stalker_ic_entries == 0) { stalker_ic_entries = 32; }
|
||||
|
||||
if (instrument_coverage_filename == NULL) {
|
||||
if (instrument_coverage_filename != NULL) {
|
||||
|
||||
if (stalker_adjacent_blocks == 0) { stalker_adjacent_blocks = 32; }
|
||||
|
||||
} else {
|
||||
|
||||
if (stalker_adjacent_blocks != 0) {
|
||||
if (getenv("AFL_FRIDA_STALKER_ADJACENT_BLOCKS") != NULL) {
|
||||
|
||||
FFATAL(
|
||||
"AFL_FRIDA_STALKER_ADJACENT_BLOCKS and AFL_FRIDA_INST_COVERAGE_FILE "
|
||||
"are incompatible");
|
||||
|
||||
} else {
|
||||
|
||||
stalker_adjacent_blocks = 0;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -323,7 +323,7 @@ static void stats_observer_init(GumStalkerObserver *observer) {
|
||||
void stats_config(void) {
|
||||
|
||||
stats_filename = getenv("AFL_FRIDA_STATS_FILE");
|
||||
stats_interval = util_read_num("AFL_FRIDA_STATS_INTERVAL");
|
||||
stats_interval = util_read_num("AFL_FRIDA_STATS_INTERVAL", 10);
|
||||
|
||||
}
|
||||
|
||||
@ -332,7 +332,8 @@ void stats_init(void) {
|
||||
FOKF("Stats - file [%s]", stats_filename);
|
||||
FOKF("Stats - interval [%" G_GINT64_MODIFIER "u]", stats_interval);
|
||||
|
||||
if (stats_interval != 0 && stats_filename == NULL) {
|
||||
if (getenv("AFL_FRIDA_STATS_INTERVAL") != NULL &&
|
||||
getenv("AFL_FRIDA_STATS_FILE") == NULL) {
|
||||
|
||||
FFATAL(
|
||||
"AFL_FRIDA_STATS_FILE must be specified if "
|
||||
@ -340,7 +341,6 @@ void stats_init(void) {
|
||||
|
||||
}
|
||||
|
||||
if (stats_interval == 0) { stats_interval = 10; }
|
||||
stats_interval_us = stats_interval * MICRO_TO_SEC;
|
||||
|
||||
if (stats_filename == NULL) { return; }
|
||||
|
@ -1,10 +1,11 @@
|
||||
#include "util.h"
|
||||
|
||||
guint64 util_read_address(char *key) {
|
||||
guint64 util_read_address(char *key, guint64 default_value) {
|
||||
|
||||
char *value_str = getenv(key);
|
||||
char *end_ptr;
|
||||
|
||||
if (value_str == NULL) { return 0; }
|
||||
if (value_str == NULL) { return default_value; }
|
||||
|
||||
if (!g_str_has_prefix(value_str, "0x")) {
|
||||
|
||||
@ -25,8 +26,17 @@ guint64 util_read_address(char *key) {
|
||||
|
||||
}
|
||||
|
||||
guint64 value = g_ascii_strtoull(value_str2, NULL, 16);
|
||||
if (value == 0) {
|
||||
errno = 0;
|
||||
|
||||
guint64 value = g_ascii_strtoull(value_str2, &end_ptr, 16);
|
||||
|
||||
if (errno != 0) {
|
||||
|
||||
FATAL("Error (%d) during conversion: %s", errno, value_str);
|
||||
|
||||
}
|
||||
|
||||
if (value == 0 && end_ptr == value_str2) {
|
||||
|
||||
FATAL("Invalid address failed hex conversion: %s=%s\n", key, value_str2);
|
||||
|
||||
@ -36,11 +46,12 @@ guint64 util_read_address(char *key) {
|
||||
|
||||
}
|
||||
|
||||
guint64 util_read_num(char *key) {
|
||||
guint64 util_read_num(char *key, guint64 default_value) {
|
||||
|
||||
char *value_str = getenv(key);
|
||||
char *end_ptr;
|
||||
|
||||
if (value_str == NULL) { return 0; }
|
||||
if (value_str == NULL) { return default_value; }
|
||||
|
||||
for (char *c = value_str; *c != '\0'; c++) {
|
||||
|
||||
@ -53,8 +64,17 @@ guint64 util_read_num(char *key) {
|
||||
|
||||
}
|
||||
|
||||
errno = 0;
|
||||
|
||||
guint64 value = g_ascii_strtoull(value_str, NULL, 10);
|
||||
if (value == 0) {
|
||||
|
||||
if (errno != 0) {
|
||||
|
||||
FATAL("Error (%d) during conversion: %s", errno, value_str);
|
||||
|
||||
}
|
||||
|
||||
if (value == 0 && end_ptr == value_str) {
|
||||
|
||||
FATAL("Invalid address failed numeric conversion: %s=%s\n", key, value_str);
|
||||
|
||||
|
114
frida_mode/test/bloaty/GNUmakefile
Normal file
114
frida_mode/test/bloaty/GNUmakefile
Normal file
@ -0,0 +1,114 @@
|
||||
PWD:=$(shell pwd)/
|
||||
ROOT:=$(PWD)../../../
|
||||
BUILD_DIR:=$(PWD)build/
|
||||
|
||||
AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/frida_hook.so
|
||||
AFLPP_QEMU_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/qemu_hook.so
|
||||
|
||||
# LIBFUZZER_LIB:=/usr/lib/llvm-12/lib/libFuzzer.a
|
||||
|
||||
BLOATY_GIT_REPO:=https://github.com/google/bloaty.git
|
||||
BLOATY_DIR:=$(BUILD_DIR)bloaty/
|
||||
TEST_BIN:=$(BLOATY_DIR)fuzz_target
|
||||
|
||||
ifeq "$(shell uname)" "Darwin"
|
||||
TEST_BIN_LDFLAGS:=-undefined dynamic_lookup -Wl,-no_pie
|
||||
endif
|
||||
|
||||
TEST_DATA_DIR:=$(BUILD_DIR)in/
|
||||
TEST_DATA_SRC:=$(BLOATY_DIR)tests/testdata/fuzz_corpus/
|
||||
DUMMY_DATA_FILE:=$(BUILD_DIR)default_seed
|
||||
|
||||
FRIDA_OUT:=$(BUILD_DIR)frida-out
|
||||
QEMU_OUT:=$(BUILD_DIR)qemu-out
|
||||
|
||||
ifndef ARCH
|
||||
|
||||
ARCH=$(shell uname -m)
|
||||
ifeq "$(ARCH)" "aarch64"
|
||||
ARCH:=arm64
|
||||
endif
|
||||
|
||||
ifeq "$(ARCH)" "i686"
|
||||
ARCH:=x86
|
||||
endif
|
||||
endif
|
||||
|
||||
GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh
|
||||
|
||||
AFL_QEMU_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x4000000000)
|
||||
|
||||
ifeq "$(ARCH)" "aarch64"
|
||||
AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000aaaaaaaaa000)
|
||||
endif
|
||||
|
||||
ifeq "$(ARCH)" "x86_64"
|
||||
AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000555555554000)
|
||||
endif
|
||||
|
||||
ifeq "$(ARCH)" "x86"
|
||||
AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x56555000)
|
||||
endif
|
||||
|
||||
.PHONY: all clean frida hook
|
||||
|
||||
all: $(TEST_BIN)
|
||||
make -C $(ROOT)frida_mode/
|
||||
|
||||
32:
|
||||
CXXFLAGS="-m32" LDFLAGS="-m32" ARCH="x86" make all
|
||||
|
||||
$(BUILD_DIR):
|
||||
mkdir -p $@
|
||||
|
||||
########## BLOATY #######
|
||||
|
||||
$(BLOATY_DIR): | $(BUILD_DIR)
|
||||
git clone --depth 1 $(BLOATY_GIT_REPO) $@
|
||||
|
||||
$(TEST_BIN): $(BLOATY_DIR)
|
||||
cd $(BLOATY_DIR) && CC=clang CXX=clang++ CCC=clang++ LIB_FUZZING_ENGINE="-fsanitize=fuzzer" cmake -G Ninja -DBUILD_TESTING=false $(BLOATY_DIR)
|
||||
cd $(BLOATY_DIR) && CC=clang CXX=clang++ CCC=clang++ ninja -j $(shell nproc)
|
||||
|
||||
########## DUMMY #######
|
||||
|
||||
$(TEST_DATA_DIR): | $(BLOATY_DIR) $(BUILD_DIR)
|
||||
cp -av $(TEST_DATA_SRC) $@
|
||||
|
||||
$(DUMMY_DATA_FILE): | $(TEST_DATA_DIR)
|
||||
dd if=/dev/zero bs=1048576 count=1 of=$@
|
||||
|
||||
###### TEST DATA #######
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR)
|
||||
|
||||
frida: $(TEST_BIN) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) $(DUMMY_DATA_FILE)
|
||||
AFL_FRIDA_PERSISTENT_CNT=1000000 \
|
||||
AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \
|
||||
AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \
|
||||
AFL_ENTRYPOINT=$(AFL_FRIDA_PERSISTENT_ADDR) \
|
||||
$(ROOT)afl-fuzz \
|
||||
-i $(TEST_DATA_DIR) \
|
||||
-o $(FRIDA_OUT) \
|
||||
-m none \
|
||||
-d \
|
||||
-O \
|
||||
-V 30 \
|
||||
-- \
|
||||
$(TEST_BIN) $(DUMMY_DATA_FILE)
|
||||
|
||||
qemu: $(TEST_BIN) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) $(DUMMY_DATA_FILE)
|
||||
AFL_QEMU_PERSISTENT_CNT=1000000 \
|
||||
AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_QEMU_DRIVER_HOOK_OBJ) \
|
||||
AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \
|
||||
AFL_ENTRYPOINT=$(AFL_QEMU_PERSISTENT_ADDR) \
|
||||
$(ROOT)afl-fuzz \
|
||||
-i $(TEST_DATA_DIR) \
|
||||
-o $(QEMU_OUT) \
|
||||
-m none \
|
||||
-d \
|
||||
-Q \
|
||||
-V 30 \
|
||||
-- \
|
||||
$(TEST_BIN) $(DUMMY_DATA_FILE)
|
13
frida_mode/test/bloaty/Makefile
Normal file
13
frida_mode/test/bloaty/Makefile
Normal file
@ -0,0 +1,13 @@
|
||||
all:
|
||||
@echo trying to use GNU make...
|
||||
@gmake all || echo please install GNUmake
|
||||
|
||||
32:
|
||||
@echo trying to use GNU make...
|
||||
@gmake 32 || echo please install GNUmake
|
||||
|
||||
clean:
|
||||
@gmake clean
|
||||
|
||||
frida:
|
||||
@gmake frida
|
36
frida_mode/test/bloaty/get_symbol_addr.py
Executable file
36
frida_mode/test/bloaty/get_symbol_addr.py
Executable file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/python3
|
||||
import argparse
|
||||
from elftools.elf.elffile import ELFFile
|
||||
|
||||
def process_file(file, symbol, base):
|
||||
with open(file, 'rb') as f:
|
||||
elf = ELFFile(f)
|
||||
symtab = elf.get_section_by_name('.symtab')
|
||||
mains = symtab.get_symbol_by_name(symbol)
|
||||
if len(mains) != 1:
|
||||
print ("Failed to find main")
|
||||
return 1
|
||||
|
||||
main_addr = mains[0]['st_value']
|
||||
main = base + main_addr
|
||||
print ("0x%016x" % main)
|
||||
return 0
|
||||
|
||||
def hex_value(x):
|
||||
return int(x, 16)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||
parser.add_argument('-f', '--file', dest='file', type=str,
|
||||
help='elf file name', required=True)
|
||||
parser.add_argument('-s', '--symbol', dest='symbol', type=str,
|
||||
help='symbol name', required=True)
|
||||
parser.add_argument('-b', '--base', dest='base', type=hex_value,
|
||||
help='elf base address', required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
return process_file (args.file, args.symbol, args.base)
|
||||
|
||||
if __name__ == "__main__":
|
||||
ret = main()
|
||||
exit(ret)
|
@ -1,11 +1,12 @@
|
||||
# CmpLog instrumentation
|
||||
|
||||
The CmpLog instrumentation enables logging of comparison operands in a
|
||||
shared memory.
|
||||
The CmpLog instrumentation enables logging of comparison operands in a shared
|
||||
memory.
|
||||
|
||||
These values can be used by various mutators built on top of it.
|
||||
At the moment we support the RedQueen mutator (input-2-state instructions only),
|
||||
for details see [the RedQueen paper](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf).
|
||||
These values can be used by various mutators built on top of it. At the moment,
|
||||
we support the RedQueen mutator (input-2-state instructions only), for details
|
||||
see
|
||||
[the RedQueen paper](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf).
|
||||
|
||||
## Build
|
||||
|
||||
@ -14,7 +15,8 @@ program.
|
||||
|
||||
The first version is built using the regular AFL++ instrumentation.
|
||||
|
||||
The second one, the CmpLog binary, is built with setting AFL_LLVM_CMPLOG during the compilation.
|
||||
The second one, the CmpLog binary, is built with setting AFL_LLVM_CMPLOG during
|
||||
the compilation.
|
||||
|
||||
For example:
|
||||
|
||||
@ -32,8 +34,8 @@ unset AFL_LLVM_CMPLOG
|
||||
|
||||
## Use
|
||||
|
||||
AFL++ has the new `-c` option that needs to be used to specify the CmpLog binary (the second
|
||||
build).
|
||||
AFL++ has the new `-c` option that needs to be used to specify the CmpLog binary
|
||||
(the second build).
|
||||
|
||||
For example:
|
||||
|
||||
@ -41,4 +43,4 @@ For example:
|
||||
afl-fuzz -i input -o output -c ./program.cmplog -m none -- ./program.afl @@
|
||||
```
|
||||
|
||||
Be sure to use `-m none` because CmpLog can map a lot of pages.
|
||||
Be sure to use `-m none` because CmpLog can map a lot of pages.
|
@ -1,38 +0,0 @@
|
||||
# AFL Context Sensitive Branch Coverage
|
||||
|
||||
## What is this?
|
||||
|
||||
This is an LLVM-based implementation of the context sensitive branch coverage.
|
||||
|
||||
Basically every function gets its own ID and, every time when an edge is logged,
|
||||
all the IDs in the callstack are hashed and combined with the edge transition
|
||||
hash to augment the classic edge coverage with the information about the
|
||||
calling context.
|
||||
|
||||
So if both function A and function B call a function C, the coverage
|
||||
collected in C will be different.
|
||||
|
||||
In math the coverage is collected as follows:
|
||||
`map[current_location_ID ^ previous_location_ID >> 1 ^ hash_callstack_IDs] += 1`
|
||||
|
||||
The callstack hash is produced XOR-ing the function IDs to avoid explosion with
|
||||
recursive functions.
|
||||
|
||||
## Usage
|
||||
|
||||
Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
|
||||
|
||||
It is highly recommended to increase the MAP_SIZE_POW2 definition in
|
||||
config.h to at least 18 and maybe up to 20 for this as otherwise too
|
||||
many map collisions occur.
|
||||
|
||||
## Caller Branch Coverage
|
||||
|
||||
If the context sensitive coverage introduces too may collisions and becoming
|
||||
detrimental, the user can choose to augment edge coverage with just the
|
||||
called function ID, instead of the entire callstack hash.
|
||||
|
||||
In math the coverage is collected as follows:
|
||||
`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
|
||||
|
||||
Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment variable.
|
@ -1,64 +1,68 @@
|
||||
# GCC-based instrumentation for afl-fuzz
|
||||
|
||||
See [../README.md](../README.md) for the general instruction manual.
|
||||
See [README.llvm.md](README.llvm.md) for the LLVM-based instrumentation.
|
||||
For the general instruction manual, see [../README.md](../README.md).
|
||||
For the LLVM-based instrumentation, see [README.llvm.md](README.llvm.md).
|
||||
|
||||
This document describes how to build and use `afl-gcc-fast` and `afl-g++-fast`,
|
||||
which instrument the target with the help of gcc plugins.
|
||||
|
||||
TLDR:
|
||||
* check the version of your gcc compiler: `gcc --version`
|
||||
* `apt-get install gcc-VERSION-plugin-dev` or similar to install headers for gcc plugins
|
||||
* `gcc` and `g++` must match the gcc-VERSION you installed headers for. You can set `AFL_CC`/`AFL_CXX`
|
||||
to point to these!
|
||||
* `make`
|
||||
* just use `afl-gcc-fast`/`afl-g++-fast` normally like you would do with `afl-clang-fast`
|
||||
TL;DR:
|
||||
* Check the version of your gcc compiler: `gcc --version`
|
||||
* `apt-get install gcc-VERSION-plugin-dev` or similar to install headers for gcc
|
||||
plugins.
|
||||
* `gcc` and `g++` must match the gcc-VERSION you installed headers for. You can
|
||||
set `AFL_CC`/`AFL_CXX` to point to these!
|
||||
* `make`
|
||||
* Just use `afl-gcc-fast`/`afl-g++-fast` normally like you would do with
|
||||
`afl-clang-fast`.
|
||||
|
||||
## 1) Introduction
|
||||
|
||||
The code in this directory allows to instrument programs for AFL using
|
||||
true compiler-level instrumentation, instead of the more crude
|
||||
assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
|
||||
several interesting properties:
|
||||
The code in this directory allows to instrument programs for AFL++ using true
|
||||
compiler-level instrumentation, instead of the more crude assembly-level
|
||||
rewriting approach taken by afl-gcc and afl-clang. This has several interesting
|
||||
properties:
|
||||
|
||||
- The compiler can make many optimizations that are hard to pull off when
|
||||
manually inserting assembly. As a result, some slow, CPU-bound programs will
|
||||
run up to around faster.
|
||||
- The compiler can make many optimizations that are hard to pull off when
|
||||
manually inserting assembly. As a result, some slow, CPU-bound programs will
|
||||
run up to around faster.
|
||||
|
||||
The gains are less pronounced for fast binaries, where the speed is limited
|
||||
chiefly by the cost of creating new processes. In such cases, the gain will
|
||||
probably stay within 10%.
|
||||
The gains are less pronounced for fast binaries, where the speed is limited
|
||||
chiefly by the cost of creating new processes. In such cases, the gain will
|
||||
probably stay within 10%.
|
||||
|
||||
- The instrumentation is CPU-independent. At least in principle, you should
|
||||
be able to rely on it to fuzz programs on non-x86 architectures (after
|
||||
building `afl-fuzz` with `AFL_NOX86=1`).
|
||||
- The instrumentation is CPU-independent. At least in principle, you should be
|
||||
able to rely on it to fuzz programs on non-x86 architectures (after building
|
||||
`afl-fuzz` with `AFL_NOX86=1`).
|
||||
|
||||
- Because the feature relies on the internals of GCC, it is gcc-specific
|
||||
and will *not* work with LLVM (see [README.llvm.md](README.llvm.md) for an alternative).
|
||||
- Because the feature relies on the internals of GCC, it is gcc-specific and
|
||||
will *not* work with LLVM (see [README.llvm.md](README.llvm.md) for an
|
||||
alternative).
|
||||
|
||||
Once this implementation is shown to be sufficiently robust and portable, it
|
||||
will probably replace afl-gcc. For now, it can be built separately and
|
||||
co-exists with the original code.
|
||||
will probably replace afl-gcc. For now, it can be built separately and co-exists
|
||||
with the original code.
|
||||
|
||||
The idea and much of the implementation comes from Laszlo Szekeres.
|
||||
|
||||
## 2) How to use
|
||||
|
||||
In order to leverage this mechanism, you need to have modern enough GCC
|
||||
(>= version 4.5.0) and the plugin development headers installed on your system. That
|
||||
In order to leverage this mechanism, you need to have modern enough GCC (>=
|
||||
version 4.5.0) and the plugin development headers installed on your system. That
|
||||
should be all you need. On Debian machines, these headers can be acquired by
|
||||
installing the `gcc-VERSION-plugin-dev` packages.
|
||||
|
||||
To build the instrumentation itself, type `make`. This will generate binaries
|
||||
called `afl-gcc-fast` and `afl-g++-fast` in the parent directory.
|
||||
called `afl-gcc-fast` and `afl-g++-fast` in the parent directory.
|
||||
|
||||
The gcc and g++ compiler links have to point to gcc-VERSION - or set these
|
||||
by pointing the environment variables `AFL_CC`/`AFL_CXX` to them.
|
||||
If the `CC`/`CXX` environment variables have been set, those compilers will be
|
||||
preferred over those from the `AFL_CC`/`AFL_CXX` settings.
|
||||
The gcc and g++ compiler links have to point to gcc-VERSION - or set these by
|
||||
pointing the environment variables `AFL_CC`/`AFL_CXX` to them. If the `CC`/`CXX`
|
||||
environment variables have been set, those compilers will be preferred over
|
||||
those from the `AFL_CC`/`AFL_CXX` settings.
|
||||
|
||||
Once this is done, you can instrument third-party code in a way similar to the
|
||||
standard operating mode of AFL, e.g.:
|
||||
standard operating mode of AFL++, e.g.:
|
||||
|
||||
```
|
||||
CC=/path/to/afl/afl-gcc-fast
|
||||
CXX=/path/to/afl/afl-g++-fast
|
||||
@ -66,15 +70,15 @@ standard operating mode of AFL, e.g.:
|
||||
./configure [...options...]
|
||||
make
|
||||
```
|
||||
|
||||
Note: We also used `CXX` to set the C++ compiler to `afl-g++-fast` for C++ code.
|
||||
|
||||
The tool honors roughly the same environmental variables as `afl-gcc` (see
|
||||
[env_variables.md](../docs/env_variables.md). This includes `AFL_INST_RATIO`,
|
||||
`AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`.
|
||||
[docs/env_variables.md](../docs/env_variables.md). This includes
|
||||
`AFL_INST_RATIO`, `AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`.
|
||||
|
||||
Note: if you want the GCC plugin to be installed on your system for all
|
||||
users, you need to build it before issuing 'make install' in the parent
|
||||
directory.
|
||||
Note: if you want the GCC plugin to be installed on your system for all users,
|
||||
you need to build it before issuing 'make install' in the parent directory.
|
||||
|
||||
## 3) Gotchas, feedback, bugs
|
||||
|
||||
@ -83,93 +87,15 @@ reports to afl@aflplus.plus.
|
||||
|
||||
## 4) Bonus feature #1: deferred initialization
|
||||
|
||||
AFL tries to optimize performance by executing the targeted binary just once,
|
||||
stopping it just before main(), and then cloning this "main" process to get
|
||||
a steady supply of targets to fuzz.
|
||||
|
||||
Although this approach eliminates much of the OS-, linker- and libc-level
|
||||
costs of executing the program, it does not always help with binaries that
|
||||
perform other time-consuming initialization steps - say, parsing a large config
|
||||
file before getting to the fuzzed data.
|
||||
|
||||
In such cases, it's beneficial to initialize the forkserver a bit later, once
|
||||
most of the initialization work is already done, but before the binary attempts
|
||||
to read the fuzzed input and parse it; in some cases, this can offer a 10x+
|
||||
performance gain. You can implement delayed initialization in GCC mode in a
|
||||
fairly simple way.
|
||||
|
||||
First, locate a suitable location in the code where the delayed cloning can
|
||||
take place. This needs to be done with *extreme* care to avoid breaking the
|
||||
binary. In particular, the program will probably malfunction if you select
|
||||
a location after:
|
||||
|
||||
- The creation of any vital threads or child processes - since the forkserver
|
||||
can't clone them easily.
|
||||
|
||||
- The initialization of timers via setitimer() or equivalent calls.
|
||||
|
||||
- The creation of temporary files, network sockets, offset-sensitive file
|
||||
descriptors, and similar shared-state resources - but only provided that
|
||||
their state meaningfully influences the behavior of the program later on.
|
||||
|
||||
- Any access to the fuzzed input, including reading the metadata about its
|
||||
size.
|
||||
|
||||
With the location selected, add this code in the appropriate spot:
|
||||
|
||||
```
|
||||
#ifdef __AFL_HAVE_MANUAL_CONTROL
|
||||
__AFL_INIT();
|
||||
#endif
|
||||
```
|
||||
|
||||
You don't need the #ifdef guards, but they will make the program still work as
|
||||
usual when compiled with a compiler other than afl-gcc-fast/afl-clang-fast.
|
||||
|
||||
Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will
|
||||
*not* generate a deferred-initialization binary) - and you should be all set!
|
||||
See
|
||||
[README.persistent_mode.md#3) Deferred initialization](README.persistent_mode.md#3-deferred-initialization).
|
||||
|
||||
## 5) Bonus feature #2: persistent mode
|
||||
|
||||
Some libraries provide APIs that are stateless, or whose state can be reset in
|
||||
between processing different input files. When such a reset is performed, a
|
||||
single long-lived process can be reused to try out multiple test cases,
|
||||
eliminating the need for repeated `fork()` calls and the associated OS overhead.
|
||||
|
||||
The basic structure of the program that does this would be:
|
||||
|
||||
```
|
||||
while (__AFL_LOOP(1000)) {
|
||||
|
||||
/* Read input data. */
|
||||
/* Call library code to be fuzzed. */
|
||||
/* Reset state. */
|
||||
|
||||
}
|
||||
|
||||
/* Exit normally */
|
||||
```
|
||||
|
||||
The numerical value specified within the loop controls the maximum number
|
||||
of iterations before AFL will restart the process from scratch. This minimizes
|
||||
the impact of memory leaks and similar glitches; 1000 is a good starting point.
|
||||
|
||||
A more detailed template is shown in ../utils/persistent_mode/.
|
||||
Similarly to the previous mode, the feature works only with afl-gcc-fast or
|
||||
afl-clang-fast; #ifdef guards can be used to suppress it when using other
|
||||
compilers.
|
||||
|
||||
Note that as with the previous mode, the feature is easy to misuse; if you
|
||||
do not reset the critical state fully, you may end up with false positives or
|
||||
waste a whole lot of CPU power doing nothing useful at all. Be particularly
|
||||
wary of memory leaks and the state of file descriptors.
|
||||
|
||||
When running in this mode, the execution paths will inherently vary a bit
|
||||
depending on whether the input loop is being entered for the first time or
|
||||
executed again. To avoid spurious warnings, the feature implies
|
||||
`AFL_NO_VAR_CHECK` and hides the "variable path" warnings in the UI.
|
||||
See
|
||||
[README.persistent_mode.md#4) Persistent mode](README.persistent_mode.md#4-persistent-mode).
|
||||
|
||||
## 6) Bonus feature #3: selective instrumentation
|
||||
|
||||
It can be more effective to fuzzing to only instrument parts of the code.
|
||||
For details see [README.instrument_list.md](README.instrument_list.md).
|
||||
It can be more effective to fuzzing to only instrument parts of the code. For
|
||||
details, see [README.instrument_list.md](README.instrument_list.md).
|
@ -1,80 +1,84 @@
|
||||
# Using AFL++ with partial instrumentation
|
||||
|
||||
This file describes two different mechanisms to selectively instrument
|
||||
only specific parts in the target.
|
||||
This file describes two different mechanisms to selectively instrument only
|
||||
specific parts in the target.
|
||||
|
||||
Both mechanisms work for LLVM and GCC_PLUGIN, but not for afl-clang/afl-gcc.
|
||||
Both mechanisms work for LLVM and GCC_PLUGIN, but not for afl-clang/afl-gcc.
|
||||
|
||||
## 1) Description and purpose
|
||||
|
||||
When building and testing complex programs where only a part of the program is
|
||||
the fuzzing target, it often helps to only instrument the necessary parts of
|
||||
the program, leaving the rest uninstrumented. This helps to focus the fuzzer
|
||||
on the important parts of the program, avoiding undesired noise and
|
||||
disturbance by uninteresting code being exercised.
|
||||
the fuzzing target, it often helps to only instrument the necessary parts of the
|
||||
program, leaving the rest uninstrumented. This helps to focus the fuzzer on the
|
||||
important parts of the program, avoiding undesired noise and disturbance by
|
||||
uninteresting code being exercised.
|
||||
|
||||
For this purpose, "partial instrumentation" support is provided by AFL++ that
|
||||
allows to specify what should be instrumented and what not.
|
||||
|
||||
Both mechanisms can be used together.
|
||||
Both mechanisms for partial instrumentation can be used together.
|
||||
|
||||
## 2) Selective instrumentation with __AFL_COVERAGE_... directives
|
||||
|
||||
In this mechanism the selective instrumentation is done in the source code.
|
||||
In this mechanism, the selective instrumentation is done in the source code.
|
||||
|
||||
After the includes a special define has to be made, eg.:
|
||||
After the includes, a special define has to be made, e.g.:
|
||||
|
||||
```
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
// ...
|
||||
|
||||
|
||||
__AFL_COVERAGE(); // <- required for this feature to work
|
||||
```
|
||||
|
||||
If you want to disable the coverage at startup until you specify coverage
|
||||
should be started, then add `__AFL_COVERAGE_START_OFF();` at that position.
|
||||
If you want to disable the coverage at startup until you specify coverage should
|
||||
be started, then add `__AFL_COVERAGE_START_OFF();` at that position.
|
||||
|
||||
From here on out you have the following macros available that you can use
|
||||
in any function where you want:
|
||||
From here on out, you have the following macros available that you can use in
|
||||
any function where you want:
|
||||
|
||||
* `__AFL_COVERAGE_ON();` - enable coverage from this point onwards
|
||||
* `__AFL_COVERAGE_OFF();` - disable coverage from this point onwards
|
||||
* `__AFL_COVERAGE_DISCARD();` - reset all coverage gathered until this point
|
||||
* `__AFL_COVERAGE_SKIP();` - mark this test case as unimportant. Whatever happens, afl-fuzz will ignore it.
|
||||
* `__AFL_COVERAGE_ON();` - Enable coverage from this point onwards.
|
||||
* `__AFL_COVERAGE_OFF();` - Disable coverage from this point onwards.
|
||||
* `__AFL_COVERAGE_DISCARD();` - Reset all coverage gathered until this point.
|
||||
* `__AFL_COVERAGE_SKIP();` - Mark this test case as unimportant. Whatever
|
||||
happens, afl-fuzz will ignore it.
|
||||
|
||||
A special function is `__afl_coverage_interesting`.
|
||||
To use this, you must define `void __afl_coverage_interesting(u8 val, u32 id);`.
|
||||
Then you can use this function globally, where the `val` parameter can be set
|
||||
by you, the `id` parameter is for afl-fuzz and will be overwritten.
|
||||
Note that useful parameters for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128.
|
||||
A value of e.g. 33 will be seen as 32 for coverage purposes.
|
||||
A special function is `__afl_coverage_interesting`. To use this, you must define
|
||||
`void __afl_coverage_interesting(u8 val, u32 id);`. Then you can use this
|
||||
function globally, where the `val` parameter can be set by you, the `id`
|
||||
parameter is for afl-fuzz and will be overwritten. Note that useful parameters
|
||||
for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128. A value of, e.g., 33 will be seen
|
||||
as 32 for coverage purposes.
|
||||
|
||||
## 3) Selective instrumentation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
|
||||
|
||||
This feature is equivalent to llvm 12 sancov feature and allows to specify
|
||||
on a filename and/or function name level to instrument these or skip them.
|
||||
This feature is equivalent to llvm 12 sancov feature and allows to specify on a
|
||||
filename and/or function name level to instrument these or skip them.
|
||||
|
||||
### 3a) How to use the partial instrumentation mode
|
||||
|
||||
In order to build with partial instrumentation, you need to build with
|
||||
afl-clang-fast/afl-clang-fast++ or afl-clang-lto/afl-clang-lto++.
|
||||
The only required change is that you need to set either the environment variable
|
||||
AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST set with a filename.
|
||||
afl-clang-fast/afl-clang-fast++ or afl-clang-lto/afl-clang-lto++. The only
|
||||
required change is that you need to set either the environment variable
|
||||
`AFL_LLVM_ALLOWLIST` or `AFL_LLVM_DENYLIST` set with a filename.
|
||||
|
||||
That file should contain the file names or functions that are to be instrumented
|
||||
(AFL_LLVM_ALLOWLIST) or are specifically NOT to be instrumented (AFL_LLVM_DENYLIST).
|
||||
(`AFL_LLVM_ALLOWLIST`) or are specifically NOT to be instrumented
|
||||
(`AFL_LLVM_DENYLIST`).
|
||||
|
||||
GCC_PLUGIN: you can use either AFL_LLVM_ALLOWLIST or AFL_GCC_ALLOWLIST (or the
|
||||
same for _DENYLIST), both work.
|
||||
GCC_PLUGIN: you can use either `AFL_LLVM_ALLOWLIST` or `AFL_GCC_ALLOWLIST` (or
|
||||
the same for `_DENYLIST`), both work.
|
||||
|
||||
For matching to succeed, the function/file name that is being compiled must end in the
|
||||
function/file name entry contained in this instrument file list. That is to avoid
|
||||
breaking the match when absolute paths are used during compilation.
|
||||
For matching to succeed, the function/file name that is being compiled must end
|
||||
in the function/file name entry contained in this instrument file list. That is
|
||||
to avoid breaking the match when absolute paths are used during compilation.
|
||||
|
||||
**NOTE:** In builds with optimization enabled, functions might be inlined and would not match!
|
||||
**NOTE:** In builds with optimization enabled, functions might be inlined and
|
||||
would not match!
|
||||
|
||||
For example, if your source tree looks like this:
|
||||
|
||||
For example if your source tree looks like this:
|
||||
```
|
||||
project/
|
||||
project/feature_a/a1.cpp
|
||||
@ -83,36 +87,45 @@ project/feature_b/b1.cpp
|
||||
project/feature_b/b2.cpp
|
||||
```
|
||||
|
||||
and you only want to test feature_a, then create an "instrument file list" file containing:
|
||||
And you only want to test feature_a, then create an "instrument file list" file
|
||||
containing:
|
||||
|
||||
```
|
||||
feature_a/a1.cpp
|
||||
feature_a/a2.cpp
|
||||
```
|
||||
|
||||
However if the "instrument file list" file contains only this, it works as well:
|
||||
However, if the "instrument file list" file contains only this, it works as
|
||||
well:
|
||||
|
||||
```
|
||||
a1.cpp
|
||||
a2.cpp
|
||||
```
|
||||
but it might lead to files being unwantedly instrumented if the same filename
|
||||
|
||||
But it might lead to files being unwantedly instrumented if the same filename
|
||||
exists somewhere else in the project directories.
|
||||
|
||||
You can also specify function names. Note that for C++ the function names
|
||||
must be mangled to match! `nm` can print these names.
|
||||
You can also specify function names. Note that for C++ the function names must
|
||||
be mangled to match! `nm` can print these names.
|
||||
|
||||
AFL++ is able to identify whether an entry is a filename or a function. However,
|
||||
if you want to be sure (and compliant to the sancov allow/blocklist format), you
|
||||
can specify source file entries like this:
|
||||
|
||||
AFL++ is able to identify whether an entry is a filename or a function.
|
||||
However if you want to be sure (and compliant to the sancov allow/blocklist
|
||||
format), you can specify source file entries like this:
|
||||
```
|
||||
src: *malloc.c
|
||||
```
|
||||
and function entries like this:
|
||||
|
||||
And function entries like this:
|
||||
|
||||
```
|
||||
fun: MallocFoo
|
||||
```
|
||||
|
||||
Note that whitespace is ignored and comments (`# foo`) are supported.
|
||||
|
||||
### 3b) UNIX-style pattern matching
|
||||
|
||||
You can add UNIX-style pattern matching in the "instrument file list" entries.
|
||||
See `man fnmatch` for the syntax. We do not set any of the `fnmatch` flags.
|
||||
See `man fnmatch` for the syntax. We do not set any of the `fnmatch` flags.
|
@ -2,19 +2,17 @@
|
||||
|
||||
## Introduction
|
||||
|
||||
This originally is the work of an individual nicknamed laf-intel.
|
||||
His blog [Circumventing Fuzzing Roadblocks with Compiler Transformations](https://lafintel.wordpress.com/)
|
||||
and gitlab repo [laf-llvm-pass](https://gitlab.com/laf-intel/laf-llvm-pass/)
|
||||
describe some code transformations that
|
||||
help AFL++ to enter conditional blocks, where conditions consist of
|
||||
comparisons of large values.
|
||||
This originally is the work of an individual nicknamed laf-intel. His blog
|
||||
[Circumventing Fuzzing Roadblocks with Compiler Transformations](https://lafintel.wordpress.com/)
|
||||
and GitLab repo [laf-llvm-pass](https://gitlab.com/laf-intel/laf-llvm-pass/)
|
||||
describe some code transformations that help AFL++ to enter conditional blocks,
|
||||
where conditions consist of comparisons of large values.
|
||||
|
||||
## Usage
|
||||
|
||||
By default these passes will not run when you compile programs using
|
||||
afl-clang-fast. Hence, you can use AFL as usual.
|
||||
To enable the passes you must set environment variables before you
|
||||
compile the target project.
|
||||
By default, these passes will not run when you compile programs using
|
||||
afl-clang-fast. Hence, you can use AFL++ as usual. To enable the passes, you
|
||||
must set environment variables before you compile the target project.
|
||||
|
||||
The following options exist:
|
||||
|
||||
@ -24,32 +22,30 @@ Enables the split-switches pass.
|
||||
|
||||
`export AFL_LLVM_LAF_TRANSFORM_COMPARES=1`
|
||||
|
||||
Enables the transform-compares pass (strcmp, memcmp, strncmp,
|
||||
strcasecmp, strncasecmp).
|
||||
Enables the transform-compares pass (strcmp, memcmp, strncmp, strcasecmp,
|
||||
strncasecmp).
|
||||
|
||||
`export AFL_LLVM_LAF_SPLIT_COMPARES=1`
|
||||
|
||||
Enables the split-compares pass.
|
||||
By default it will
|
||||
Enables the split-compares pass. By default, it will
|
||||
1. simplify operators >= (and <=) into chains of > (<) and == comparisons
|
||||
2. change signed integer comparisons to a chain of sign-only comparison
|
||||
and unsigned integer comparisons
|
||||
3. split all unsigned integer comparisons with bit widths of
|
||||
64, 32 or 16 bits to chains of 8 bits comparisons.
|
||||
2. change signed integer comparisons to a chain of sign-only comparison and
|
||||
unsigned integer comparisons
|
||||
3. split all unsigned integer comparisons with bit widths of 64, 32, or 16 bits
|
||||
to chains of 8 bits comparisons.
|
||||
|
||||
You can change the behaviour of the last step by setting
|
||||
`export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>`, where
|
||||
bit_width may be 64, 32 or 16. For example, a bit_width of 16
|
||||
would split larger comparisons down to 16 bit comparisons.
|
||||
You can change the behavior of the last step by setting `export
|
||||
AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>`, where bit_width may be 64, 32, or
|
||||
16. For example, a bit_width of 16 would split larger comparisons down to 16 bit
|
||||
comparisons.
|
||||
|
||||
A new experimental feature is splitting floating point comparisons into a
|
||||
series of sign, exponent and mantissa comparisons followed by splitting each
|
||||
of them into 8 bit comparisons when necessary.
|
||||
It is activated with the `AFL_LLVM_LAF_SPLIT_FLOATS` setting.
|
||||
Please note that full IEEE 754 functionality is not preserved, that is
|
||||
values of nan and infinity will probably behave differently.
|
||||
A new experimental feature is splitting floating point comparisons into a series
|
||||
of sign, exponent and mantissa comparisons followed by splitting each of them
|
||||
into 8 bit comparisons when necessary. It is activated with the
|
||||
`AFL_LLVM_LAF_SPLIT_FLOATS` setting. Please note that full IEEE 754
|
||||
functionality is not preserved, that is values of nan and infinity will probably
|
||||
behave differently.
|
||||
|
||||
Note that setting this automatically activates `AFL_LLVM_LAF_SPLIT_COMPARES`
|
||||
|
||||
You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled :-)
|
||||
Note that setting this automatically activates `AFL_LLVM_LAF_SPLIT_COMPARES`.
|
||||
|
||||
You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled. :-)
|
@ -1,72 +1,79 @@
|
||||
# Fast LLVM-based instrumentation for afl-fuzz
|
||||
|
||||
(See [../README.md](../README.md) for the general instruction manual.)
|
||||
For the general instruction manual, see [../README.md](../README.md).
|
||||
|
||||
(See [README.gcc_plugin.md](README.gcc_plugin.md) for the GCC-based instrumentation.)
|
||||
For the GCC-based instrumentation, see
|
||||
[README.gcc_plugin.md](README.gcc_plugin.md).
|
||||
|
||||
## 1) Introduction
|
||||
|
||||
! llvm_mode works with llvm versions 3.8 up to 13 !
|
||||
|
||||
The code in this directory allows you to instrument programs for AFL using
|
||||
true compiler-level instrumentation, instead of the more crude
|
||||
assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
|
||||
several interesting properties:
|
||||
The code in this directory allows you to instrument programs for AFL++ using
|
||||
true compiler-level instrumentation, instead of the more crude assembly-level
|
||||
rewriting approach taken by afl-gcc and afl-clang. This has several interesting
|
||||
properties:
|
||||
|
||||
- The compiler can make many optimizations that are hard to pull off when
|
||||
manually inserting assembly. As a result, some slow, CPU-bound programs will
|
||||
run up to around 2x faster.
|
||||
- The compiler can make many optimizations that are hard to pull off when
|
||||
manually inserting assembly. As a result, some slow, CPU-bound programs will
|
||||
run up to around 2x faster.
|
||||
|
||||
The gains are less pronounced for fast binaries, where the speed is limited
|
||||
chiefly by the cost of creating new processes. In such cases, the gain will
|
||||
probably stay within 10%.
|
||||
The gains are less pronounced for fast binaries, where the speed is limited
|
||||
chiefly by the cost of creating new processes. In such cases, the gain will
|
||||
probably stay within 10%.
|
||||
|
||||
- The instrumentation is CPU-independent. At least in principle, you should
|
||||
be able to rely on it to fuzz programs on non-x86 architectures (after
|
||||
building afl-fuzz with AFL_NO_X86=1).
|
||||
- The instrumentation is CPU-independent. At least in principle, you should be
|
||||
able to rely on it to fuzz programs on non-x86 architectures (after building
|
||||
afl-fuzz with AFL_NO_X86=1).
|
||||
|
||||
- The instrumentation can cope a bit better with multi-threaded targets.
|
||||
- The instrumentation can cope a bit better with multi-threaded targets.
|
||||
|
||||
- Because the feature relies on the internals of LLVM, it is clang-specific
|
||||
and will *not* work with GCC (see ../gcc_plugin/ for an alternative once
|
||||
it is available).
|
||||
- Because the feature relies on the internals of LLVM, it is clang-specific and
|
||||
will *not* work with GCC (see ../gcc_plugin/ for an alternative once it is
|
||||
available).
|
||||
|
||||
Once this implementation is shown to be sufficiently robust and portable, it
|
||||
will probably replace afl-clang. For now, it can be built separately and
|
||||
co-exists with the original code.
|
||||
|
||||
The idea and much of the intial implementation came from Laszlo Szekeres.
|
||||
The idea and much of the initial implementation came from Laszlo Szekeres.
|
||||
|
||||
## 2a) How to use this - short
|
||||
|
||||
Set the `LLVM_CONFIG` variable to the clang version you want to use, e.g.
|
||||
|
||||
```
|
||||
LLVM_CONFIG=llvm-config-9 make
|
||||
```
|
||||
|
||||
In case you have your own compiled llvm version specify the full path:
|
||||
|
||||
```
|
||||
LLVM_CONFIG=~/llvm-project/build/bin/llvm-config make
|
||||
```
|
||||
|
||||
If you try to use a new llvm version on an old Linux this can fail because of
|
||||
old c++ libraries. In this case usually switching to gcc/g++ to compile
|
||||
llvm_mode will work:
|
||||
|
||||
```
|
||||
LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make
|
||||
```
|
||||
It is highly recommended to use the newest clang version you can put your
|
||||
hands on :)
|
||||
|
||||
It is highly recommended to use the newest clang version you can put your hands
|
||||
on :)
|
||||
|
||||
Then look at [README.persistent_mode.md](README.persistent_mode.md).
|
||||
|
||||
## 2b) How to use this - long
|
||||
|
||||
In order to leverage this mechanism, you need to have clang installed on your
|
||||
system. You should also make sure that the llvm-config tool is in your path
|
||||
(or pointed to via LLVM_CONFIG in the environment).
|
||||
system. You should also make sure that the llvm-config tool is in your path (or
|
||||
pointed to via LLVM_CONFIG in the environment).
|
||||
|
||||
Note that if you have several LLVM versions installed, pointing LLVM_CONFIG
|
||||
to the version you want to use will switch compiling to this specific
|
||||
version - if you installation is set up correctly :-)
|
||||
Note that if you have several LLVM versions installed, pointing LLVM_CONFIG to
|
||||
the version you want to use will switch compiling to this specific version - if
|
||||
you installation is set up correctly :-)
|
||||
|
||||
Unfortunately, some systems that do have clang come without llvm-config or the
|
||||
LLVM development headers; one example of this is FreeBSD. FreeBSD users will
|
||||
@ -75,15 +82,15 @@ load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so).
|
||||
|
||||
To solve all your problems, you can grab pre-built binaries for your OS from:
|
||||
|
||||
https://llvm.org/releases/download.html
|
||||
[https://llvm.org/releases/download.html](https://llvm.org/releases/download.html)
|
||||
|
||||
...and then put the bin/ directory from the tarball at the beginning of your
|
||||
$PATH when compiling the feature and building packages later on. You don't need
|
||||
to be root for that.
|
||||
|
||||
To build the instrumentation itself, type 'make'. This will generate binaries
|
||||
called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this
|
||||
is done, you can instrument third-party code in a way similar to the standard
|
||||
To build the instrumentation itself, type `make`. This will generate binaries
|
||||
called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this is
|
||||
done, you can instrument third-party code in a way similar to the standard
|
||||
operating mode of AFL, e.g.:
|
||||
|
||||
```
|
||||
@ -93,81 +100,137 @@ operating mode of AFL, e.g.:
|
||||
|
||||
Be sure to also include CXX set to afl-clang-fast++ for C++ code.
|
||||
|
||||
Note that afl-clang-fast/afl-clang-fast++ are just pointers to afl-cc.
|
||||
You can also use afl-cc/afl-c++ and instead direct it to use LLVM
|
||||
instrumentation by either setting `AFL_CC_COMPILER=LLVM` or pass the parameter
|
||||
`--afl-llvm` via CFLAGS/CXXFLAGS/CPPFLAGS.
|
||||
Note that afl-clang-fast/afl-clang-fast++ are just pointers to afl-cc. You can
|
||||
also use afl-cc/afl-c++ and instead direct it to use LLVM instrumentation by
|
||||
either setting `AFL_CC_COMPILER=LLVM` or pass the parameter `--afl-llvm` via
|
||||
CFLAGS/CXXFLAGS/CPPFLAGS.
|
||||
|
||||
The tool honors roughly the same environmental variables as afl-gcc (see
|
||||
[docs/env_variables.md](../docs/env_variables.md)). This includes AFL_USE_ASAN,
|
||||
AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored
|
||||
as it does not serve a good purpose with the more effective PCGUARD analysis.
|
||||
AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored as it
|
||||
does not serve a good purpose with the more effective PCGUARD analysis.
|
||||
|
||||
## 3) Options
|
||||
|
||||
Several options are present to make llvm_mode faster or help it rearrange
|
||||
the code to make afl-fuzz path discovery easier.
|
||||
Several options are present to make llvm_mode faster or help it rearrange the
|
||||
code to make afl-fuzz path discovery easier.
|
||||
|
||||
If you need just to instrument specific parts of the code, you can the instrument file list
|
||||
which C/C++ files to actually instrument. See [README.instrument_list.md](README.instrument_list.md)
|
||||
If you need just to instrument specific parts of the code, you can the
|
||||
instrument file list which C/C++ files to actually instrument. See
|
||||
[README.instrument_list.md](README.instrument_list.md)
|
||||
|
||||
For splitting memcmp, strncmp, etc. please see [README.laf-intel.md](README.laf-intel.md)
|
||||
For splitting memcmp, strncmp, etc. please see
|
||||
[README.laf-intel.md](README.laf-intel.md)
|
||||
|
||||
Then there are different ways of instrumenting the target:
|
||||
|
||||
1. An better instrumentation strategy uses LTO and link time
|
||||
instrumentation. Note that not all targets can compile in this mode, however
|
||||
if it works it is the best option you can use.
|
||||
Simply use afl-clang-lto/afl-clang-lto++ to use this option.
|
||||
See [README.lto.md](README.lto.md)
|
||||
1. An better instrumentation strategy uses LTO and link time instrumentation.
|
||||
Note that not all targets can compile in this mode, however if it works it is
|
||||
the best option you can use. Simply use afl-clang-lto/afl-clang-lto++ to use
|
||||
this option. See [README.lto.md](README.lto.md).
|
||||
|
||||
2. Alternativly you can choose a completely different coverage method:
|
||||
2. Alternatively you can choose a completely different coverage method:
|
||||
|
||||
2a. N-GRAM coverage - which combines the previous visited edges with the
|
||||
current one. This explodes the map but on the other hand has proven to be
|
||||
effective for fuzzing.
|
||||
See [README.ngram.md](README.ngram.md)
|
||||
2a. N-GRAM coverage - which combines the previous visited edges with the current
|
||||
one. This explodes the map but on the other hand has proven to be effective
|
||||
for fuzzing. See
|
||||
[7) AFL++ N-Gram Branch Coverage](#7-afl-n-gram-branch-coverage).
|
||||
|
||||
2b. Context sensitive coverage - which combines the visited edges with an
|
||||
individual caller ID (the function that called the current one)
|
||||
[README.ctx.md](README.ctx.md)
|
||||
individual caller ID (the function that called the current one). See
|
||||
[6) AFL++ Context Sensitive Branch Coverage](#6-afl-context-sensitive-branch-coverage).
|
||||
|
||||
Then - additionally to one of the instrumentation options above - there is
|
||||
a very effective new instrumentation option called CmpLog as an alternative to
|
||||
laf-intel that allow AFL++ to apply mutations similar to Redqueen.
|
||||
See [README.cmplog.md](README.cmplog.md)
|
||||
Then - additionally to one of the instrumentation options above - there is a
|
||||
very effective new instrumentation option called CmpLog as an alternative to
|
||||
laf-intel that allow AFL++ to apply mutations similar to Redqueen. See
|
||||
[README.cmplog.md](README.cmplog.md).
|
||||
|
||||
Finally if your llvm version is 8 or lower, you can activate a mode that
|
||||
prevents that a counter overflow result in a 0 value. This is good for
|
||||
path discovery, but the llvm implementation for x86 for this functionality
|
||||
is not optimal and was only fixed in llvm 9.
|
||||
You can set this with AFL_LLVM_NOT_ZERO=1
|
||||
See [README.neverzero.md](README.neverzero.md)
|
||||
Finally, if your llvm version is 8 or lower, you can activate a mode that
|
||||
prevents that a counter overflow result in a 0 value. This is good for path
|
||||
discovery, but the llvm implementation for x86 for this functionality is not
|
||||
optimal and was only fixed in llvm 9. You can set this with AFL_LLVM_NOT_ZERO=1.
|
||||
|
||||
Support for thread safe counters has been added for all modes.
|
||||
Activate it with `AFL_LLVM_THREADSAFE_INST=1`. The tradeoff is better precision
|
||||
in multi threaded apps for a slightly higher instrumentation overhead.
|
||||
This also disables the nozero counter default for performance reasons.
|
||||
Support for thread safe counters has been added for all modes. Activate it with
|
||||
`AFL_LLVM_THREADSAFE_INST=1`. The tradeoff is better precision in multi threaded
|
||||
apps for a slightly higher instrumentation overhead. This also disables the
|
||||
nozero counter default for performance reasons.
|
||||
|
||||
## 4) Snapshot feature
|
||||
## 4) deferred initialization, persistent mode, shared memory fuzzing
|
||||
|
||||
To speed up fuzzing you can use a linux loadable kernel module which enables
|
||||
a snapshot feature.
|
||||
See [README.snapshot.md](README.snapshot.md)
|
||||
This is the most powerful and effective fuzzing you can do. Please see
|
||||
[README.persistent_mode.md](README.persistent_mode.md) for a full explanation.
|
||||
|
||||
## 5) Gotchas, feedback, bugs
|
||||
|
||||
This is an early-stage mechanism, so field reports are welcome. You can send bug
|
||||
reports to <afl-users@googlegroups.com>.
|
||||
|
||||
## 6) deferred initialization, persistent mode, shared memory fuzzing
|
||||
|
||||
This is the most powerful and effective fuzzing you can do.
|
||||
Please see [README.persistent_mode.md](README.persistent_mode.md) for a
|
||||
full explanation.
|
||||
|
||||
## 7) Bonus feature: 'dict2file' pass
|
||||
## 5) Bonus feature: 'dict2file' pass
|
||||
|
||||
Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation
|
||||
all constant string compare parameters will be written to this file to be
|
||||
used with afl-fuzz' `-x` option.
|
||||
all constant string compare parameters will be written to this file to be used
|
||||
with afl-fuzz' `-x` option.
|
||||
|
||||
## 6) AFL++ Context Sensitive Branch Coverage
|
||||
|
||||
### What is this?
|
||||
|
||||
This is an LLVM-based implementation of the context sensitive branch coverage.
|
||||
|
||||
Basically every function gets its own ID and, every time when an edge is logged,
|
||||
all the IDs in the callstack are hashed and combined with the edge transition
|
||||
hash to augment the classic edge coverage with the information about the calling
|
||||
context.
|
||||
|
||||
So if both function A and function B call a function C, the coverage collected
|
||||
in C will be different.
|
||||
|
||||
In math the coverage is collected as follows: `map[current_location_ID ^
|
||||
previous_location_ID >> 1 ^ hash_callstack_IDs] += 1`
|
||||
|
||||
The callstack hash is produced XOR-ing the function IDs to avoid explosion with
|
||||
recursive functions.
|
||||
|
||||
### Usage
|
||||
|
||||
Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
|
||||
|
||||
It is highly recommended to increase the MAP_SIZE_POW2 definition in config.h to
|
||||
at least 18 and maybe up to 20 for this as otherwise too many map collisions
|
||||
occur.
|
||||
|
||||
### Caller Branch Coverage
|
||||
|
||||
If the context sensitive coverage introduces too may collisions and becoming
|
||||
detrimental, the user can choose to augment edge coverage with just the called
|
||||
function ID, instead of the entire callstack hash.
|
||||
|
||||
In math the coverage is collected as follows: `map[current_location_ID ^
|
||||
previous_location_ID >> 1 ^ previous_callee_ID] += 1`
|
||||
|
||||
Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment
|
||||
variable.
|
||||
|
||||
## 7) AFL++ N-Gram Branch Coverage
|
||||
|
||||
### Source
|
||||
|
||||
This is an LLVM-based implementation of the n-gram branch coverage proposed in
|
||||
the paper
|
||||
["Be Sensitive and Collaborative: Analyzing Impact of Coverage Metrics in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf)
|
||||
by Jinghan Wang, et. al.
|
||||
|
||||
Note that the original implementation (available
|
||||
[here](https://github.com/bitsecurerlab/afl-sensitive)) is built on top of AFL's
|
||||
qemu_mode. This is essentially a port that uses LLVM vectorized instructions
|
||||
(available from llvm versions 4.0.1 and higher) to achieve the same results when
|
||||
compiling source code.
|
||||
|
||||
In math the branch coverage is performed as follows: `map[current_location ^
|
||||
prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
|
||||
|
||||
### Usage
|
||||
|
||||
The size of `n` (i.e., the number of branches to remember) is an option that is
|
||||
specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the
|
||||
`AFL_LLVM_NGRAM_SIZE` environment variable. Good values are 2, 4, or 8, valid
|
||||
are 2-16.
|
||||
|
||||
It is highly recommended to increase the MAP_SIZE_POW2 definition in config.h to
|
||||
at least 18 and maybe up to 20 for this as otherwise too many map collisions
|
||||
occur.
|
@ -1,55 +1,56 @@
|
||||
# afl-clang-lto - collision free instrumentation at link time
|
||||
|
||||
## TLDR;
|
||||
## TL;DR:
|
||||
|
||||
This version requires a current llvm 11+ compiled from the github master.
|
||||
This version requires a current llvm 11+ compiled from the GitHub master.
|
||||
|
||||
1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
|
||||
coverage than anything else that is out there in the AFL world
|
||||
coverage than anything else that is out there in the AFL world.
|
||||
|
||||
2. You can use it together with llvm_mode: laf-intel and the instrument file listing
|
||||
features and can be combined with cmplog/Redqueen
|
||||
2. You can use it together with llvm_mode: laf-intel and the instrument file
|
||||
listing features and can be combined with cmplog/Redqueen.
|
||||
|
||||
3. It only works with llvm 11+
|
||||
3. It only works with llvm 11+.
|
||||
|
||||
4. AUTODICTIONARY feature! see below
|
||||
4. AUTODICTIONARY feature (see below)!
|
||||
|
||||
5. If any problems arise be sure to set `AR=llvm-ar RANLIB=llvm-ranlib`.
|
||||
Some targets might need `LD=afl-clang-lto` and others `LD=afl-ld-lto`.
|
||||
5. If any problems arise, be sure to set `AR=llvm-ar RANLIB=llvm-ranlib`. Some
|
||||
targets might need `LD=afl-clang-lto` and others `LD=afl-ld-lto`.
|
||||
|
||||
## Introduction and problem description
|
||||
|
||||
A big issue with how AFL/AFL++ works is that the basic block IDs that are
|
||||
set during compilation are random - and hence naturally the larger the number
|
||||
of instrumented locations, the higher the number of edge collisions are in the
|
||||
map. This can result in not discovering new paths and therefore degrade the
|
||||
A big issue with how AFL++ works is that the basic block IDs that are set during
|
||||
compilation are random - and hence naturally the larger the number of
|
||||
instrumented locations, the higher the number of edge collisions are in the map.
|
||||
This can result in not discovering new paths and therefore degrade the
|
||||
efficiency of the fuzzing process.
|
||||
|
||||
*This issue is underestimated in the fuzzing community!*
|
||||
With a 2^16 = 64kb standard map at already 256 instrumented blocks there is
|
||||
on average one collision. On average a target has 10.000 to 50.000
|
||||
instrumented blocks hence the real collisions are between 750-18.000!
|
||||
*This issue is underestimated in the fuzzing community!* With a 2^16 = 64kb
|
||||
standard map at already 256 instrumented blocks, there is on average one
|
||||
collision. On average, a target has 10.000 to 50.000 instrumented blocks, hence
|
||||
the real collisions are between 750-18.000!
|
||||
|
||||
To reach a solution that prevents any collisions took several approaches
|
||||
and many dead ends until we got to this:
|
||||
To reach a solution that prevents any collisions took several approaches and
|
||||
many dead ends until we got to this:
|
||||
|
||||
* We instrument at link time when we have all files pre-compiled
|
||||
* To instrument at link time we compile in LTO (link time optimization) mode
|
||||
* Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the
|
||||
correct LTO options and runs our own afl-ld linker instead of the system
|
||||
linker
|
||||
* The LLVM linker collects all LTO files to link and instruments them so that
|
||||
we have non-colliding edge overage
|
||||
* We use a new (for afl) edge coverage - which is the same as in llvm
|
||||
-fsanitize=coverage edge coverage mode :)
|
||||
* We instrument at link time when we have all files pre-compiled.
|
||||
* To instrument at link time, we compile in LTO (link time optimization) mode.
|
||||
* Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the correct
|
||||
LTO options and runs our own afl-ld linker instead of the system linker.
|
||||
* The LLVM linker collects all LTO files to link and instruments them so that we
|
||||
have non-colliding edge overage.
|
||||
* We use a new (for afl) edge coverage - which is the same as in llvm
|
||||
-fsanitize=coverage edge coverage mode. :)
|
||||
|
||||
The result:
|
||||
* 10-25% speed gain compared to llvm_mode
|
||||
* guaranteed non-colliding edge coverage :-)
|
||||
* The compile time especially for binaries to an instrumented library can be
|
||||
much longer
|
||||
|
||||
* 10-25% speed gain compared to llvm_mode
|
||||
* guaranteed non-colliding edge coverage :-)
|
||||
* The compile time, especially for binaries to an instrumented library, can be
|
||||
much longer.
|
||||
|
||||
Example build output from a libtiff build:
|
||||
|
||||
```
|
||||
libtool: link: afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o ../libtiff/.libs/libtiff.a ../port/.libs/libport.a -llzma -ljbig -ljpeg -lz -lm
|
||||
afl-clang-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> in mode LTO
|
||||
@ -62,21 +63,24 @@ AUTODICTIONARY: 11 strings found
|
||||
|
||||
### Installing llvm version 11 or 12
|
||||
|
||||
llvm 11 or even 12 should be available in all current Linux repositories.
|
||||
If you use an outdated Linux distribution read the next section.
|
||||
llvm 11 or even 12 should be available in all current Linux repositories. If you
|
||||
use an outdated Linux distribution, read the next section.
|
||||
|
||||
### Installing llvm from the llvm repository (version 12+)
|
||||
|
||||
Installing the llvm snapshot builds is easy and mostly painless:
|
||||
|
||||
In the follow line change `NAME` for your Debian or Ubuntu release name
|
||||
In the following line, change `NAME` for your Debian or Ubuntu release name
|
||||
(e.g. buster, focal, eon, etc.):
|
||||
|
||||
```
|
||||
echo deb http://apt.llvm.org/NAME/ llvm-toolchain-NAME NAME >> /etc/apt/sources.list
|
||||
```
|
||||
then add the pgp key of llvm and install the packages:
|
||||
|
||||
Then add the pgp key of llvm and install the packages:
|
||||
|
||||
```
|
||||
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
|
||||
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
|
||||
apt-get update && apt-get upgrade -y
|
||||
apt-get install -y clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
|
||||
libc++abi1-12 libc++abi-12-dev libclang1-12 libclang-12-dev \
|
||||
@ -87,7 +91,8 @@ apt-get install -y clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
|
||||
|
||||
### Building llvm yourself (version 12+)
|
||||
|
||||
Building llvm from github takes quite some long time and is not painless:
|
||||
Building llvm from GitHub takes quite some time and is not painless:
|
||||
|
||||
```sh
|
||||
sudo apt install binutils-dev # this is *essential*!
|
||||
git clone --depth=1 https://github.com/llvm/llvm-project
|
||||
@ -126,10 +131,12 @@ sudo make install
|
||||
|
||||
Just use afl-clang-lto like you did with afl-clang-fast or afl-gcc.
|
||||
|
||||
Also the instrument file listing (AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST -> [README.instrument_list.md](README.instrument_list.md)) and
|
||||
laf-intel/compcov (AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work.
|
||||
Also, the instrument file listing (AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST ->
|
||||
[README.instrument_list.md](README.instrument_list.md)) and laf-intel/compcov
|
||||
(AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
CC=afl-clang-lto CXX=afl-clang-lto++ RANLIB=llvm-ranlib AR=llvm-ar ./configure
|
||||
make
|
||||
@ -143,51 +150,48 @@ NOTE: some targets also need to set the linker, try both `afl-clang-lto` and
|
||||
Note: this is highly discouraged! Try to compile to static libraries with
|
||||
afl-clang-lto instead of shared libraries!
|
||||
|
||||
To make instrumented shared libraries work with afl-clang-lto you have to do
|
||||
To make instrumented shared libraries work with afl-clang-lto, you have to do
|
||||
quite some extra steps.
|
||||
|
||||
Every shared library you want to instrument has to be individually compiled.
|
||||
The environment variable `AFL_LLVM_LTO_DONTWRITEID=1` has to be set during
|
||||
compilation.
|
||||
Additionally the environment variable `AFL_LLVM_LTO_STARTID` has to be set to
|
||||
the added edge count values of all previous compiled instrumented shared
|
||||
libraries for that target.
|
||||
E.g. for the first shared library this would be `AFL_LLVM_LTO_STARTID=0` and
|
||||
afl-clang-lto will then report how many edges have been instrumented (let's say
|
||||
it reported 1000 instrumented edges).
|
||||
The second shared library then has to be set to that value
|
||||
Every shared library you want to instrument has to be individually compiled. The
|
||||
environment variable `AFL_LLVM_LTO_DONTWRITEID=1` has to be set during
|
||||
compilation. Additionally, the environment variable `AFL_LLVM_LTO_STARTID` has
|
||||
to be set to the added edge count values of all previous compiled instrumented
|
||||
shared libraries for that target. E.g., for the first shared library this would
|
||||
be `AFL_LLVM_LTO_STARTID=0` and afl-clang-lto will then report how many edges
|
||||
have been instrumented (let's say it reported 1000 instrumented edges). The
|
||||
second shared library then has to be set to that value
|
||||
(`AFL_LLVM_LTO_STARTID=1000` in our example), for the third to all previous
|
||||
counts added, etc.
|
||||
|
||||
The final program compilation step then may *not* have `AFL_LLVM_LTO_DONTWRITEID`
|
||||
set, and `AFL_LLVM_LTO_STARTID` must be set to all edge counts added of all shared
|
||||
libraries it will be linked to.
|
||||
The final program compilation step then may *not* have
|
||||
`AFL_LLVM_LTO_DONTWRITEID` set, and `AFL_LLVM_LTO_STARTID` must be set to all
|
||||
edge counts added of all shared libraries it will be linked to.
|
||||
|
||||
This is quite some hands-on work, so better stay away from instrumenting
|
||||
shared libraries :-)
|
||||
This is quite some hands-on work, so better stay away from instrumenting shared
|
||||
libraries. :-)
|
||||
|
||||
## AUTODICTIONARY feature
|
||||
|
||||
While compiling, a dictionary based on string comparisons is automatically
|
||||
generated and put into the target binary. This dictionary is transfered to afl-fuzz
|
||||
on start. This improves coverage statistically by 5-10% :)
|
||||
generated and put into the target binary. This dictionary is transferred to
|
||||
afl-fuzz on start. This improves coverage statistically by 5-10%. :)
|
||||
|
||||
Note that if for any reason you do not want to use the autodictionary feature
|
||||
Note that if for any reason you do not want to use the autodictionary feature,
|
||||
then just set the environment variable `AFL_NO_AUTODICT` when starting afl-fuzz.
|
||||
|
||||
## Fixed memory map
|
||||
|
||||
To speed up fuzzing a little bit more, it is possible to set a fixed shared
|
||||
memory map.
|
||||
Recommended is the value 0x10000.
|
||||
memory map. Recommended is the value 0x10000.
|
||||
|
||||
In most cases this will work without any problems. However if a target uses
|
||||
early constructors, ifuncs or a deferred forkserver this can crash the target.
|
||||
In most cases, this will work without any problems. However, if a target uses
|
||||
early constructors, ifuncs, or a deferred forkserver, this can crash the target.
|
||||
|
||||
Also on unusual operating systems/processors/kernels or weird libraries the
|
||||
Also, on unusual operating systems/processors/kernels or weird libraries the
|
||||
recommended 0x10000 address might not work, so then change the fixed address.
|
||||
|
||||
To enable this feature set AFL_LLVM_MAP_ADDR with the address.
|
||||
To enable this feature, set `AFL_LLVM_MAP_ADDR` with the address.
|
||||
|
||||
## Document edge IDs
|
||||
|
||||
@ -206,143 +210,155 @@ these.
|
||||
An example of a hard to solve target is ffmpeg. Here is how to successfully
|
||||
instrument it:
|
||||
|
||||
1. Get and extract the current ffmpeg and change to its directory
|
||||
1. Get and extract the current ffmpeg and change to its directory.
|
||||
|
||||
2. Running configure with --cc=clang fails and various other items will fail
|
||||
when compiling, so we have to trick configure:
|
||||
|
||||
```
|
||||
./configure --enable-lto --disable-shared --disable-inline-asm
|
||||
```
|
||||
```
|
||||
./configure --enable-lto --disable-shared --disable-inline-asm
|
||||
```
|
||||
|
||||
3. Now the configuration is done - and we edit the settings in `./ffbuild/config.mak`
|
||||
(-: the original line, +: what to change it into):
|
||||
```
|
||||
-CC=gcc
|
||||
+CC=afl-clang-lto
|
||||
-CXX=g++
|
||||
+CXX=afl-clang-lto++
|
||||
-AS=gcc
|
||||
+AS=llvm-as
|
||||
-LD=gcc
|
||||
+LD=afl-clang-lto++
|
||||
-DEPCC=gcc
|
||||
+DEPCC=afl-clang-lto
|
||||
-DEPAS=gcc
|
||||
+DEPAS=afl-clang-lto++
|
||||
-AR=ar
|
||||
+AR=llvm-ar
|
||||
-AR_CMD=ar
|
||||
+AR_CMD=llvm-ar
|
||||
-NM_CMD=nm -g
|
||||
+NM_CMD=llvm-nm -g
|
||||
-RANLIB=ranlib -D
|
||||
+RANLIB=llvm-ranlib -D
|
||||
```
|
||||
3. Now the configuration is done - and we edit the settings in
|
||||
`./ffbuild/config.mak` (-: the original line, +: what to change it into):
|
||||
|
||||
4. Then type make, wait for a long time and you are done :)
|
||||
```
|
||||
-CC=gcc
|
||||
+CC=afl-clang-lto
|
||||
-CXX=g++
|
||||
+CXX=afl-clang-lto++
|
||||
-AS=gcc
|
||||
+AS=llvm-as
|
||||
-LD=gcc
|
||||
+LD=afl-clang-lto++
|
||||
-DEPCC=gcc
|
||||
+DEPCC=afl-clang-lto
|
||||
-DEPAS=gcc
|
||||
+DEPAS=afl-clang-lto++
|
||||
-AR=ar
|
||||
+AR=llvm-ar
|
||||
-AR_CMD=ar
|
||||
+AR_CMD=llvm-ar
|
||||
-NM_CMD=nm -g
|
||||
+NM_CMD=llvm-nm -g
|
||||
-RANLIB=ranlib -D
|
||||
+RANLIB=llvm-ranlib -D
|
||||
```
|
||||
|
||||
4. Then type make, wait for a long time, and you are done. :)
|
||||
|
||||
### Example: WebKit jsc
|
||||
|
||||
Building jsc is difficult as the build script has bugs.
|
||||
|
||||
1. checkout Webkit:
|
||||
```
|
||||
svn checkout https://svn.webkit.org/repository/webkit/trunk WebKit
|
||||
cd WebKit
|
||||
```
|
||||
1. Checkout Webkit:
|
||||
|
||||
```
|
||||
svn checkout https://svn.webkit.org/repository/webkit/trunk WebKit
|
||||
cd WebKit
|
||||
```
|
||||
|
||||
2. Fix the build environment:
|
||||
```
|
||||
mkdir -p WebKitBuild/Release
|
||||
cd WebKitBuild/Release
|
||||
ln -s ../../../../../usr/bin/llvm-ar-12 llvm-ar-12
|
||||
ln -s ../../../../../usr/bin/llvm-ranlib-12 llvm-ranlib-12
|
||||
cd ../..
|
||||
```
|
||||
|
||||
3. Build :)
|
||||
```
|
||||
mkdir -p WebKitBuild/Release
|
||||
cd WebKitBuild/Release
|
||||
ln -s ../../../../../usr/bin/llvm-ar-12 llvm-ar-12
|
||||
ln -s ../../../../../usr/bin/llvm-ranlib-12 llvm-ranlib-12
|
||||
cd ../..
|
||||
```
|
||||
|
||||
```
|
||||
Tools/Scripts/build-jsc --jsc-only --cli --cmakeargs="-DCMAKE_AR='llvm-ar-12' -DCMAKE_RANLIB='llvm-ranlib-12' -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CC_FLAGS='-O3 -lrt' -DCMAKE_CXX_FLAGS='-O3 -lrt' -DIMPORTED_LOCATION='/lib/x86_64-linux-gnu/' -DCMAKE_CC=afl-clang-lto -DCMAKE_CXX=afl-clang-lto++ -DENABLE_STATIC_JSC=ON"
|
||||
```
|
||||
3. Build. :)
|
||||
|
||||
```
|
||||
Tools/Scripts/build-jsc --jsc-only --cli --cmakeargs="-DCMAKE_AR='llvm-ar-12' -DCMAKE_RANLIB='llvm-ranlib-12' -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CC_FLAGS='-O3 -lrt' -DCMAKE_CXX_FLAGS='-O3 -lrt' -DIMPORTED_LOCATION='/lib/x86_64-linux-gnu/' -DCMAKE_CC=afl-clang-lto -DCMAKE_CXX=afl-clang-lto++ -DENABLE_STATIC_JSC=ON"
|
||||
```
|
||||
|
||||
## Potential issues
|
||||
|
||||
### compiling libraries fails
|
||||
### Compiling libraries fails
|
||||
|
||||
If you see this message:
|
||||
|
||||
```
|
||||
/bin/ld: libfoo.a: error adding symbols: archive has no index; run ranlib to add one
|
||||
```
|
||||
This is because usually gnu gcc ranlib is being called which cannot deal with clang LTO files.
|
||||
The solution is simple: when you ./configure you also have to set RANLIB=llvm-ranlib and AR=llvm-ar
|
||||
|
||||
This is because usually gnu gcc ranlib is being called which cannot deal with
|
||||
clang LTO files. The solution is simple: when you `./configure`, you also have
|
||||
to set `RANLIB=llvm-ranlib` and `AR=llvm-ar`.
|
||||
|
||||
Solution:
|
||||
|
||||
```
|
||||
AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure --disable-shared
|
||||
```
|
||||
and on some targets you have to set AR=/RANLIB= even for make as the configure script does not save it.
|
||||
Other targets ignore environment variables and need the parameters set via
|
||||
`./configure --cc=... --cxx= --ranlib= ...` etc. (I am looking at you ffmpeg!).
|
||||
|
||||
And on some targets you have to set `AR=/RANLIB=` even for `make` as the
|
||||
configure script does not save it. Other targets ignore environment variables
|
||||
and need the parameters set via `./configure --cc=... --cxx= --ranlib= ...` etc.
|
||||
(I am looking at you ffmpeg!)
|
||||
|
||||
If you see this message:
|
||||
|
||||
If you see this message
|
||||
```
|
||||
assembler command failed ...
|
||||
```
|
||||
then try setting `llvm-as` for configure:
|
||||
|
||||
Then try setting `llvm-as` for configure:
|
||||
|
||||
```
|
||||
AS=llvm-as ...
|
||||
```
|
||||
|
||||
### compiling programs still fail
|
||||
### Compiling programs still fail
|
||||
|
||||
afl-clang-lto is still work in progress.
|
||||
|
||||
Known issues:
|
||||
* Anything that llvm 11+ cannot compile, afl-clang-lto cannot compile either - obviously
|
||||
* Anything that does not compile with LTO, afl-clang-lto cannot compile either - obviously
|
||||
* Anything that llvm 11+ cannot compile, afl-clang-lto cannot compile either -
|
||||
obviously.
|
||||
* Anything that does not compile with LTO, afl-clang-lto cannot compile either -
|
||||
obviously.
|
||||
|
||||
Hence if building a target with afl-clang-lto fails try to build it with llvm12
|
||||
and LTO enabled (`CC=clang-12` `CXX=clang++-12` `CFLAGS=-flto=full` and
|
||||
`CXXFLAGS=-flto=full`).
|
||||
Hence, if building a target with afl-clang-lto fails, try to build it with
|
||||
llvm12 and LTO enabled (`CC=clang-12`, `CXX=clang++-12`, `CFLAGS=-flto=full`,
|
||||
and `CXXFLAGS=-flto=full`).
|
||||
|
||||
If this succeeeds then there is an issue with afl-clang-lto. Please report at
|
||||
[https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226)
|
||||
If this succeeds, then there is an issue with afl-clang-lto. Please report at
|
||||
[https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226).
|
||||
|
||||
Even some targets where clang-12 fails can be build if the fail is just in
|
||||
`./configure`, see `Solving difficult targets` above.
|
||||
|
||||
## History
|
||||
|
||||
This was originally envisioned by hexcoder- in Summer 2019, however we saw no
|
||||
way to create a pass that is run at link time - although there is a option
|
||||
for this in the PassManager: EP_FullLinkTimeOptimizationLast
|
||||
("Fun" info - nobody knows what this is doing. And the developer who
|
||||
implemented this didn't respond to emails.)
|
||||
This was originally envisioned by hexcoder- in Summer 2019. However, we saw no
|
||||
way to create a pass that is run at link time - although there is a option for
|
||||
this in the PassManager: EP_FullLinkTimeOptimizationLast. ("Fun" info - nobody
|
||||
knows what this is doing. And the developer who implemented this didn't respond
|
||||
to emails.)
|
||||
|
||||
In December then came the idea to implement this as a pass that is run via
|
||||
the llvm "opt" program, which is performed via an own linker that afterwards
|
||||
calls the real linker.
|
||||
This was first implemented in January and work ... kinda.
|
||||
The LTO time instrumentation worked, however "how" the basic blocks were
|
||||
instrumented was a problem, as reducing duplicates turned out to be very,
|
||||
very difficult with a program that has so many paths and therefore so many
|
||||
dependencies. A lot of strategies were implemented - and failed.
|
||||
And then sat solvers were tried, but with over 10.000 variables that turned
|
||||
out to be a dead-end too.
|
||||
In December then came the idea to implement this as a pass that is run via the
|
||||
llvm "opt" program, which is performed via an own linker that afterwards calls
|
||||
the real linker. This was first implemented in January and work ... kinda. The
|
||||
LTO time instrumentation worked, however, "how" the basic blocks were
|
||||
instrumented was a problem, as reducing duplicates turned out to be very, very
|
||||
difficult with a program that has so many paths and therefore so many
|
||||
dependencies. A lot of strategies were implemented - and failed. And then sat
|
||||
solvers were tried, but with over 10.000 variables that turned out to be a
|
||||
dead-end too.
|
||||
|
||||
The final idea to solve this came from domenukk who proposed to insert a block
|
||||
into an edge and then just use incremental counters ... and this worked!
|
||||
After some trials and errors to implement this vanhauser-thc found out that
|
||||
there is actually an llvm function for this: SplitEdge() :-)
|
||||
into an edge and then just use incremental counters ... and this worked! After
|
||||
some trials and errors to implement this vanhauser-thc found out that there is
|
||||
actually an llvm function for this: SplitEdge() :-)
|
||||
|
||||
Still more problems came up though as this only works without bugs from
|
||||
llvm 9 onwards, and with high optimization the link optimization ruins
|
||||
the instrumented control flow graph.
|
||||
Still more problems came up though as this only works without bugs from llvm 9
|
||||
onwards, and with high optimization the link optimization ruins the instrumented
|
||||
control flow graph.
|
||||
|
||||
This is all now fixed with llvm 11+. The llvm's own linker is now able to
|
||||
load passes and this bypasses all problems we had.
|
||||
This is all now fixed with llvm 11+. The llvm's own linker is now able to load
|
||||
passes and this bypasses all problems we had.
|
||||
|
||||
Happy end :)
|
||||
Happy end :)
|
@ -1,41 +0,0 @@
|
||||
# NeverZero counters for LLVM instrumentation
|
||||
|
||||
## Usage
|
||||
|
||||
In larger, complex or reiterative programs the byte sized counters that collect
|
||||
the edge coverage can easily fill up and wrap around.
|
||||
This is not that much of an issue - unless by chance it wraps just to a value
|
||||
of zero when the program execution ends.
|
||||
In this case afl-fuzz is not able to see that the edge has been accessed and
|
||||
will ignore it.
|
||||
|
||||
NeverZero prevents this behaviour. If a counter wraps, it jumps over the value
|
||||
0 directly to a 1. This improves path discovery (by a very little amount)
|
||||
at a very little cost (one instruction per edge).
|
||||
|
||||
(The alternative of saturated counters has been tested also and proved to be
|
||||
inferior in terms of path discovery.)
|
||||
|
||||
This is implemented in afl-gcc and afl-gcc-fast, however for llvm_mode this is
|
||||
optional if multithread safe counters are selected or the llvm version is below
|
||||
9 - as there are severe performance costs in these cases.
|
||||
|
||||
If you want to enable this for llvm versions below 9 or thread safe counters
|
||||
then set
|
||||
|
||||
```
|
||||
export AFL_LLVM_NOT_ZERO=1
|
||||
```
|
||||
|
||||
In case you are on llvm 9 or greater and you do not want this behaviour then
|
||||
you can set:
|
||||
```
|
||||
AFL_LLVM_SKIP_NEVERZERO=1
|
||||
```
|
||||
If the target does not have extensive loops or functions that are called
|
||||
a lot then this can give a small performance boost.
|
||||
|
||||
Please note that the default counter implementations are not thread safe!
|
||||
|
||||
Support for thread safe counters in mode LLVM CLASSIC can be activated with setting
|
||||
`AFL_LLVM_THREADSAFE_INST=1`.
|
@ -1,28 +0,0 @@
|
||||
# AFL N-Gram Branch Coverage
|
||||
|
||||
## Source
|
||||
|
||||
This is an LLVM-based implementation of the n-gram branch coverage proposed in
|
||||
the paper ["Be Sensitive and Collaborative: Analzying Impact of Coverage Metrics
|
||||
in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf),
|
||||
by Jinghan Wang, et. al.
|
||||
|
||||
Note that the original implementation (available
|
||||
[here](https://github.com/bitsecurerlab/afl-sensitive))
|
||||
is built on top of AFL's QEMU mode.
|
||||
This is essentially a port that uses LLVM vectorized instructions (available from
|
||||
llvm versions 4.0.1 and higher) to achieve the same results when compiling source code.
|
||||
|
||||
In math the branch coverage is performed as follows:
|
||||
`map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
|
||||
|
||||
## Usage
|
||||
|
||||
The size of `n` (i.e., the number of branches to remember) is an option
|
||||
that is specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the
|
||||
`AFL_LLVM_NGRAM_SIZE` environment variable.
|
||||
Good values are 2, 4 or 8, valid are 2-16.
|
||||
|
||||
It is highly recommended to increase the MAP_SIZE_POW2 definition in
|
||||
config.h to at least 18 and maybe up to 20 for this as otherwise too
|
||||
many map collisions occur.
|
@ -1,19 +0,0 @@
|
||||
## Using AFL++ without inlined instrumentation
|
||||
|
||||
This file describes how you can disable inlining of instrumentation.
|
||||
|
||||
|
||||
By default, the GCC plugin will duplicate the effects of calling
|
||||
`__afl_trace` (see `afl-gcc-rt.o.c`) in instrumented code, instead of
|
||||
issuing function calls.
|
||||
|
||||
The calls are presumed to be slower, more so because the rt file
|
||||
itself is not optimized by the compiler.
|
||||
|
||||
Setting `AFL_GCC_OUT_OF_LINE=1` in the environment while compiling code
|
||||
with the plugin will disable this inlining, issuing calls to the
|
||||
unoptimized runtime instead.
|
||||
|
||||
You probably don't want to do this, but it might be useful in certain
|
||||
AFL debugging scenarios, and it might work as a fallback in case
|
||||
something goes wrong with the inlined instrumentation.
|
@ -3,23 +3,23 @@
|
||||
## 1) Introduction
|
||||
|
||||
In persistent mode, AFL++ fuzzes a target multiple times in a single forked
|
||||
process, instead of forking a new process for each fuzz execution.
|
||||
This is the most effective way to fuzz, as the speed can easily be x10 or x20
|
||||
times faster without any disadvanges.
|
||||
*All professional fuzzing uses this mode.*
|
||||
process, instead of forking a new process for each fuzz execution. This is the
|
||||
most effective way to fuzz, as the speed can easily be x10 or x20 times faster
|
||||
without any disadvantages. *All professional fuzzing uses this mode.*
|
||||
|
||||
Persistent mode requires that the target can be called in one or more functions,
|
||||
and that it's state can be completely reset so that multiple calls can be
|
||||
performed without resource leaks, and that earlier runs will have no impact on
|
||||
future runs (an indicator for this is the `stability` value in the `afl-fuzz`
|
||||
UI, if this decreases to lower values in persistent mode compared to
|
||||
non-persistent mode, that the fuzz target keeps state).
|
||||
future runs. An indicator for this is the `stability` value in the `afl-fuzz`
|
||||
UI. If this decreases to lower values in persistent mode compared to
|
||||
non-persistent mode, then the fuzz target keeps state.
|
||||
|
||||
Examples can be found in [utils/persistent_mode](../utils/persistent_mode).
|
||||
|
||||
## 2) TLDR;
|
||||
## 2) TL;DR:
|
||||
|
||||
Example `fuzz_target.c`:
|
||||
|
||||
```c
|
||||
#include "what_you_need_for_your_target.h"
|
||||
|
||||
@ -27,7 +27,7 @@ __AFL_FUZZ_INIT();
|
||||
|
||||
main() {
|
||||
|
||||
// anything else here, eg. command line arguments, initialization, etc.
|
||||
// anything else here, e.g. command line arguments, initialization, etc.
|
||||
|
||||
#ifdef __AFL_HAVE_MANUAL_CONTROL
|
||||
__AFL_INIT();
|
||||
@ -54,14 +54,16 @@ main() {
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
And then compile:
|
||||
|
||||
```
|
||||
afl-clang-fast -o fuzz_target fuzz_target.c -lwhat_you_need_for_your_target
|
||||
```
|
||||
And that is it!
|
||||
The speed increase is usually x10 to x20.
|
||||
|
||||
If you want to be able to compile the target without afl-clang-fast/lto then
|
||||
And that is it! The speed increase is usually x10 to x20.
|
||||
|
||||
If you want to be able to compile the target without afl-clang-fast/lto, then
|
||||
add this just after the includes:
|
||||
|
||||
```c
|
||||
@ -72,20 +74,20 @@ add this just after the includes:
|
||||
#define __AFL_FUZZ_TESTCASE_BUF fuzz_buf
|
||||
#define __AFL_FUZZ_INIT() void sync(void);
|
||||
#define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0)
|
||||
#define __AFL_INIT() sync()
|
||||
#define __AFL_INIT() sync()
|
||||
#endif
|
||||
```
|
||||
|
||||
## 3) Deferred initialization
|
||||
|
||||
AFL tries to optimize performance by executing the targeted binary just once,
|
||||
stopping it just before `main()`, and then cloning this "main" process to get
|
||||
a steady supply of targets to fuzz.
|
||||
AFL++ tries to optimize performance by executing the targeted binary just once,
|
||||
stopping it just before `main()`, and then cloning this "main" process to get a
|
||||
steady supply of targets to fuzz.
|
||||
|
||||
Although this approach eliminates much of the OS-, linker- and libc-level
|
||||
costs of executing the program, it does not always help with binaries that
|
||||
perform other time-consuming initialization steps - say, parsing a large config
|
||||
file before getting to the fuzzed data.
|
||||
Although this approach eliminates much of the OS-, linker- and libc-level costs
|
||||
of executing the program, it does not always help with binaries that perform
|
||||
other time-consuming initialization steps - say, parsing a large config file
|
||||
before getting to the fuzzed data.
|
||||
|
||||
In such cases, it's beneficial to initialize the forkserver a bit later, once
|
||||
most of the initialization work is already done, but before the binary attempts
|
||||
@ -93,22 +95,21 @@ to read the fuzzed input and parse it; in some cases, this can offer a 10x+
|
||||
performance gain. You can implement delayed initialization in LLVM mode in a
|
||||
fairly simple way.
|
||||
|
||||
First, find a suitable location in the code where the delayed cloning can
|
||||
take place. This needs to be done with *extreme* care to avoid breaking the
|
||||
binary. In particular, the program will probably malfunction if you select
|
||||
a location after:
|
||||
First, find a suitable location in the code where the delayed cloning can take
|
||||
place. This needs to be done with *extreme* care to avoid breaking the binary.
|
||||
In particular, the program will probably malfunction if you select a location
|
||||
after:
|
||||
|
||||
- The creation of any vital threads or child processes - since the forkserver
|
||||
can't clone them easily.
|
||||
- The creation of any vital threads or child processes - since the forkserver
|
||||
can't clone them easily.
|
||||
|
||||
- The initialization of timers via `setitimer()` or equivalent calls.
|
||||
- The initialization of timers via `setitimer()` or equivalent calls.
|
||||
|
||||
- The creation of temporary files, network sockets, offset-sensitive file
|
||||
descriptors, and similar shared-state resources - but only provided that
|
||||
their state meaningfully influences the behavior of the program later on.
|
||||
- The creation of temporary files, network sockets, offset-sensitive file
|
||||
descriptors, and similar shared-state resources - but only provided that their
|
||||
state meaningfully influences the behavior of the program later on.
|
||||
|
||||
- Any access to the fuzzed input, including reading the metadata about its
|
||||
size.
|
||||
- Any access to the fuzzed input, including reading the metadata about its size.
|
||||
|
||||
With the location selected, add this code in the appropriate spot:
|
||||
|
||||
@ -126,13 +127,12 @@ Finally, recompile the program with afl-clang-fast/afl-clang-lto/afl-gcc-fast
|
||||
(afl-gcc or afl-clang will *not* generate a deferred-initialization binary) -
|
||||
and you should be all set!
|
||||
|
||||
|
||||
## 4) Persistent mode
|
||||
|
||||
Some libraries provide APIs that are stateless, or whose state can be reset in
|
||||
between processing different input files. When such a reset is performed, a
|
||||
single long-lived process can be reused to try out multiple test cases,
|
||||
eliminating the need for repeated fork() calls and the associated OS overhead.
|
||||
eliminating the need for repeated `fork()` calls and the associated OS overhead.
|
||||
|
||||
The basic structure of the program that does this would be:
|
||||
|
||||
@ -145,34 +145,34 @@ The basic structure of the program that does this would be:
|
||||
|
||||
}
|
||||
|
||||
/* Exit normally */
|
||||
/* Exit normally. */
|
||||
```
|
||||
|
||||
The numerical value specified within the loop controls the maximum number
|
||||
of iterations before AFL will restart the process from scratch. This minimizes
|
||||
The numerical value specified within the loop controls the maximum number of
|
||||
iterations before AFL++ will restart the process from scratch. This minimizes
|
||||
the impact of memory leaks and similar glitches; 1000 is a good starting point,
|
||||
and going much higher increases the likelihood of hiccups without giving you
|
||||
any real performance benefits.
|
||||
and going much higher increases the likelihood of hiccups without giving you any
|
||||
real performance benefits.
|
||||
|
||||
A more detailed template is shown in `../utils/persistent_mode/.`
|
||||
Similarly to the previous mode, the feature works only with afl-clang-fast;
|
||||
`#ifdef` guards can be used to suppress it when using other compilers.
|
||||
A more detailed template is shown in
|
||||
[utils/persistent_mode](../utils/persistent_mode). Similarly to the deferred
|
||||
initialization, the feature works only with afl-clang-fast; `#ifdef` guards can
|
||||
be used to suppress it when using other compilers.
|
||||
|
||||
Note that as with the previous mode, the feature is easy to misuse; if you
|
||||
do not fully reset the critical state, you may end up with false positives or
|
||||
waste a whole lot of CPU power doing nothing useful at all. Be particularly
|
||||
Note that as with the deferred initialization, the feature is easy to misuse; if
|
||||
you do not fully reset the critical state, you may end up with false positives
|
||||
or waste a whole lot of CPU power doing nothing useful at all. Be particularly
|
||||
wary of memory leaks and of the state of file descriptors.
|
||||
|
||||
PS. Because there are task switches still involved, the mode isn't as fast as
|
||||
"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
|
||||
faster than the normal `fork()` model, and compared to in-process fuzzing,
|
||||
should be a lot more robust.
|
||||
When running in this mode, the execution paths will inherently vary a bit
|
||||
depending on whether the input loop is being entered for the first time or
|
||||
executed again.
|
||||
|
||||
## 5) Shared memory fuzzing
|
||||
|
||||
You can speed up the fuzzing process even more by receiving the fuzzing data
|
||||
via shared memory instead of stdin or files.
|
||||
This is a further speed multiplier of about 2x.
|
||||
You can speed up the fuzzing process even more by receiving the fuzzing data via
|
||||
shared memory instead of stdin or files. This is a further speed multiplier of
|
||||
about 2x.
|
||||
|
||||
Setting this up is very easy:
|
||||
|
||||
@ -181,14 +181,18 @@ After the includes set the following macro:
|
||||
```c
|
||||
__AFL_FUZZ_INIT();
|
||||
```
|
||||
Directly at the start of main - or if you are using the deferred forkserver
|
||||
with `__AFL_INIT()` then *after* `__AFL_INIT()` :
|
||||
|
||||
Directly at the start of main - or if you are using the deferred forkserver with
|
||||
`__AFL_INIT()`, then *after* `__AFL_INIT()`:
|
||||
|
||||
```c
|
||||
unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
|
||||
```
|
||||
|
||||
Then as first line after the `__AFL_LOOP` while loop:
|
||||
|
||||
```c
|
||||
int len = __AFL_FUZZ_TESTCASE_LEN;
|
||||
```
|
||||
and that is all!
|
||||
|
||||
And that is all!
|
@ -1,18 +0,0 @@
|
||||
# AFL++ snapshot feature
|
||||
|
||||
**NOTE:** the snapshot lkm is currently not supported and needs a maintainer :-)
|
||||
|
||||
Snapshotting is a feature that makes a snapshot from a process and then
|
||||
restores its state, which is faster then forking it again.
|
||||
|
||||
All targets compiled with llvm_mode are automatically enabled for the
|
||||
snapshot feature.
|
||||
|
||||
To use the snapshot feature for fuzzing compile and load this kernel
|
||||
module: [https://github.com/AFLplusplus/AFL-Snapshot-LKM](https://github.com/AFLplusplus/AFL-Snapshot-LKM)
|
||||
|
||||
Note that is has little value for persistent (__AFL_LOOP) fuzzing.
|
||||
|
||||
## Notes
|
||||
|
||||
Snapshot does not work with multithreaded targets yet. Still in WIP, it is now usable only for single threaded applications.
|
@ -621,7 +621,6 @@ bool ModuleSanitizerCoverage::instrumentModule(
|
||||
bool isStrncasecmp = true;
|
||||
bool isIntMemcpy = true;
|
||||
bool isStdString = true;
|
||||
bool addedNull = false;
|
||||
size_t optLen = 0;
|
||||
|
||||
Function *Callee = callInst->getCalledFunction();
|
||||
@ -801,7 +800,6 @@ bool ModuleSanitizerCoverage::instrumentModule(
|
||||
if (literalLength + 1 == optLength) {
|
||||
|
||||
Str2.append("\0", 1); // add null byte
|
||||
// addedNull = true;
|
||||
|
||||
}
|
||||
|
||||
@ -909,8 +907,8 @@ bool ModuleSanitizerCoverage::instrumentModule(
|
||||
|
||||
if (optLen < 2) { continue; }
|
||||
if (literalLength + 1 == optLen) { // add null byte
|
||||
|
||||
thestring.append("\0", 1);
|
||||
addedNull = true;
|
||||
|
||||
}
|
||||
|
||||
@ -922,14 +920,18 @@ bool ModuleSanitizerCoverage::instrumentModule(
|
||||
// was not already added
|
||||
if (!isMemcmp) {
|
||||
|
||||
if (addedNull == false && thestring[optLen - 1] != '\0') {
|
||||
/*
|
||||
if (addedNull == false && thestring[optLen - 1] !=
|
||||
'\0') {
|
||||
|
||||
thestring.append("\0", 1); // add null byte
|
||||
optLen++;
|
||||
thestring.append("\0", 1); // add null byte
|
||||
optLen++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (!isStdString) {
|
||||
*/
|
||||
if (!isStdString &&
|
||||
thestring.find('\0', 0) != std::string::npos) {
|
||||
|
||||
// ensure we do not have garbage
|
||||
size_t offset = thestring.find('\0', 0);
|
||||
|
@ -291,7 +291,6 @@ bool AFLdict2filePass::runOnModule(Module &M) {
|
||||
bool isIntMemcpy = true;
|
||||
bool isStdString = true;
|
||||
bool isStrstr = true;
|
||||
bool addedNull = false;
|
||||
size_t optLen = 0;
|
||||
|
||||
Function *Callee = callInst->getCalledFunction();
|
||||
@ -590,8 +589,8 @@ bool AFLdict2filePass::runOnModule(Module &M) {
|
||||
|
||||
if (optLen < 2) { continue; }
|
||||
if (literalLength + 1 == optLen) { // add null byte
|
||||
|
||||
thestring.append("\0", 1);
|
||||
addedNull = true;
|
||||
|
||||
}
|
||||
|
||||
@ -603,14 +602,17 @@ bool AFLdict2filePass::runOnModule(Module &M) {
|
||||
// was not already added
|
||||
if (!isMemcmp) {
|
||||
|
||||
if (addedNull == false && thestring[optLen - 1] != '\0') {
|
||||
/*
|
||||
if (addedNull == false && thestring[optLen - 1] != '\0')
|
||||
{
|
||||
|
||||
thestring.append("\0", 1); // add null byte
|
||||
optLen++;
|
||||
thestring.append("\0", 1); // add null byte
|
||||
optLen++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (!isStdString) {
|
||||
*/
|
||||
if (!isStdString && thestring.find('\0', 0) != std::string::npos) {
|
||||
|
||||
// ensure we do not have garbage
|
||||
size_t offset = thestring.find('\0', 0);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -45,18 +45,12 @@ typedef long double max_align_t;
|
||||
#endif
|
||||
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
#include "llvm/Passes/PassPlugin.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#else
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#endif
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
|
||||
#if LLVM_VERSION_MAJOR > 3 || \
|
||||
(LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
|
||||
@ -74,26 +68,17 @@ using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
class AFLCoverage : public PassInfoMixin<AFLCoverage> {
|
||||
public:
|
||||
AFLCoverage() {
|
||||
#else
|
||||
class AFLCoverage : public ModulePass {
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
AFLCoverage() : ModulePass(ID) {
|
||||
#endif
|
||||
|
||||
initInstrumentList();
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
|
||||
#else
|
||||
bool runOnModule(Module &M) override;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
uint32_t ngram_size = 0;
|
||||
@ -107,41 +92,7 @@ class AFLCoverage : public ModulePass {
|
||||
|
||||
} // namespace
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
|
||||
llvmGetPassPluginInfo() {
|
||||
return {
|
||||
LLVM_PLUGIN_API_VERSION, "AFLCoverage", "v0.1",
|
||||
/* lambda to insert our pass into the pass pipeline. */
|
||||
[](PassBuilder &PB) {
|
||||
#if 1
|
||||
using OptimizationLevel = typename PassBuilder::OptimizationLevel;
|
||||
PB.registerOptimizerLastEPCallback(
|
||||
[](ModulePassManager &MPM, OptimizationLevel OL) {
|
||||
MPM.addPass(AFLCoverage());
|
||||
}
|
||||
);
|
||||
/* TODO LTO registration */
|
||||
#else
|
||||
using PipelineElement = typename PassBuilder::PipelineElement;
|
||||
PB.registerPipelineParsingCallback(
|
||||
[](StringRef Name, ModulePassManager &MPM, ArrayRef<PipelineElement>) {
|
||||
if ( Name == "AFLCoverage" ) {
|
||||
MPM.addPass(AFLCoverage());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
#else
|
||||
|
||||
char AFLCoverage::ID = 0;
|
||||
#endif
|
||||
|
||||
/* needed up to 3.9.0 */
|
||||
#if LLVM_VERSION_MAJOR == 3 && \
|
||||
@ -167,13 +118,7 @@ uint64_t PowerOf2Ceil(unsigned in) {
|
||||
(LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1)
|
||||
#define AFL_HAVE_VECTOR_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
PreservedAnalyses AFLCoverage::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
#else
|
||||
bool AFLCoverage::runOnModule(Module &M) {
|
||||
#endif
|
||||
|
||||
LLVMContext &C = M.getContext();
|
||||
|
||||
@ -188,10 +133,6 @@ bool AFLCoverage::runOnModule(Module &M) {
|
||||
u32 rand_seed;
|
||||
unsigned int cur_loc = 0;
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
auto PA = PreservedAnalyses::all();
|
||||
#endif
|
||||
|
||||
/* Setup random() so we get Actually Random(TM) outputs from AFL_R() */
|
||||
gettimeofday(&tv, &tz);
|
||||
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
|
||||
@ -1029,15 +970,10 @@ bool AFLCoverage::runOnModule(Module &M) {
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
return PA;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_VERSION_MAJOR < 7 /* use old pass manager */
|
||||
static void registerAFLPass(const PassManagerBuilder &,
|
||||
legacy::PassManagerBase &PM) {
|
||||
|
||||
@ -1050,4 +986,4 @@ static RegisterStandardPasses RegisterAFLPass(
|
||||
|
||||
static RegisterStandardPasses RegisterAFLPass0(
|
||||
PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass);
|
||||
#endif
|
||||
|
||||
|
@ -26,17 +26,11 @@
|
||||
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#if LLVM_MAJOR >= 7 /* use new pass manager */
|
||||
#include "llvm/Passes/PassPlugin.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#else
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#endif
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
@ -58,28 +52,28 @@ using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
#if LLVM_MAJOR >= 7 /* use new pass manager */
|
||||
class CompareTransform : public PassInfoMixin<CompareTransform> {
|
||||
|
||||
public:
|
||||
CompareTransform() {
|
||||
#else
|
||||
class CompareTransform : public ModulePass {
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
CompareTransform() : ModulePass(ID) {
|
||||
#endif
|
||||
|
||||
initInstrumentList();
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR >= 7 /* use new pass manager */
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
|
||||
#else
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
#if LLVM_VERSION_MAJOR < 4
|
||||
const char *getPassName() const override {
|
||||
|
||||
#else
|
||||
StringRef getPassName() const override {
|
||||
|
||||
#endif
|
||||
return "transforms compare functions";
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
bool transformCmps(Module &M, const bool processStrcmp,
|
||||
@ -91,40 +85,7 @@ class CompareTransform : public ModulePass {
|
||||
|
||||
} // namespace
|
||||
|
||||
#if LLVM_MAJOR >= 7 /* use new pass manager */
|
||||
extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
|
||||
llvmGetPassPluginInfo() {
|
||||
return {
|
||||
LLVM_PLUGIN_API_VERSION, "comparetransform", "v0.1",
|
||||
/* lambda to insert our pass into the pass pipeline. */
|
||||
[](PassBuilder &PB) {
|
||||
#if 1
|
||||
using OptimizationLevel = typename PassBuilder::OptimizationLevel;
|
||||
PB.registerOptimizerLastEPCallback(
|
||||
[](ModulePassManager &MPM, OptimizationLevel OL) {
|
||||
MPM.addPass(CompareTransform());
|
||||
}
|
||||
);
|
||||
/* TODO LTO registration */
|
||||
#else
|
||||
using PipelineElement = typename PassBuilder::PipelineElement;
|
||||
PB.registerPipelineParsingCallback(
|
||||
[](StringRef Name, ModulePassManager &MPM, ArrayRef<PipelineElement>) {
|
||||
if ( Name == "comparetransform" ) {
|
||||
MPM.addPass(CompareTransform());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
#else
|
||||
char CompareTransform::ID = 0;
|
||||
#endif
|
||||
|
||||
bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
|
||||
const bool processMemcmp,
|
||||
@ -484,6 +445,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
|
||||
|
||||
}
|
||||
|
||||
// the following is in general OK, but strncmp is sometimes used in binary
|
||||
// data structures and this can result in crashes :( so it is commented out
|
||||
/*
|
||||
|
||||
// add null termination character implicit in c strings
|
||||
if (!isMemcmp && TmpConstStr[TmpConstStr.length() - 1]) {
|
||||
|
||||
@ -491,10 +456,12 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
|
||||
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
// in the unusual case the const str has embedded null
|
||||
// characters, the string comparison functions should terminate
|
||||
// at the first null
|
||||
if (!isMemcmp) {
|
||||
if (!isMemcmp && TmpConstStr.find('\0') != std::string::npos) {
|
||||
|
||||
TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
|
||||
|
||||
@ -631,11 +598,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR >= 7 /* use new pass manager */
|
||||
PreservedAnalyses CompareTransform::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
#else
|
||||
bool CompareTransform::runOnModule(Module &M) {
|
||||
#endif
|
||||
|
||||
if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL)
|
||||
printf(
|
||||
@ -644,26 +607,13 @@ bool CompareTransform::runOnModule(Module &M) {
|
||||
else
|
||||
be_quiet = 1;
|
||||
|
||||
#if LLVM_MAJOR >= 7 /* use new pass manager */
|
||||
auto PA = PreservedAnalyses::all();
|
||||
#endif
|
||||
|
||||
transformCmps(M, true, true, true, true, true);
|
||||
verifyModule(M);
|
||||
|
||||
#if LLVM_MAJOR >= 7 /* use new pass manager */
|
||||
/* if (modified) {
|
||||
PA.abandon<XX_Manager>();
|
||||
}*/
|
||||
|
||||
return PA;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR < 7 /* use old pass manager */
|
||||
static void registerCompTransPass(const PassManagerBuilder &,
|
||||
legacy::PassManagerBase &PM) {
|
||||
|
||||
@ -682,5 +632,4 @@ static RegisterStandardPasses RegisterCompTransPass0(
|
||||
static RegisterStandardPasses RegisterCompTransPassLTO(
|
||||
PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerCompTransPass);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
/*
|
||||
* Copyright 2016 laf-intel
|
||||
* extended for floating point by Heiko Eißfeldt
|
||||
* adapted to new pass manager by Heiko Eißfeldt
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -29,15 +28,8 @@
|
||||
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
#include "llvm/Passes/PassPlugin.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#else
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#endif
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
|
||||
@ -61,26 +53,27 @@ using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
class SplitComparesTransform : public PassInfoMixin<SplitComparesTransform> {
|
||||
public:
|
||||
// static char ID;
|
||||
SplitComparesTransform() : enableFPSplit(0) {
|
||||
#else
|
||||
class SplitComparesTransform : public ModulePass {
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
SplitComparesTransform() : ModulePass(ID), enableFPSplit(0) {
|
||||
#endif
|
||||
|
||||
initInstrumentList();
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
|
||||
#else
|
||||
bool runOnModule(Module &M) override;
|
||||
#if LLVM_VERSION_MAJOR >= 4
|
||||
StringRef getPassName() const override {
|
||||
|
||||
#else
|
||||
const char *getPassName() const override {
|
||||
|
||||
#endif
|
||||
return "AFL_SplitComparesTransform";
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
int enableFPSplit;
|
||||
@ -169,40 +162,7 @@ class SplitComparesTransform : public ModulePass {
|
||||
|
||||
} // namespace
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
|
||||
llvmGetPassPluginInfo() {
|
||||
return {
|
||||
LLVM_PLUGIN_API_VERSION, "splitcompares", "v0.1",
|
||||
/* lambda to insert our pass into the pass pipeline. */
|
||||
[](PassBuilder &PB) {
|
||||
#if 1
|
||||
using OptimizationLevel = typename PassBuilder::OptimizationLevel;
|
||||
PB.registerOptimizerLastEPCallback(
|
||||
[](ModulePassManager &MPM, OptimizationLevel OL) {
|
||||
MPM.addPass(SplitComparesTransform());
|
||||
}
|
||||
);
|
||||
/* TODO LTO registration */
|
||||
#else
|
||||
using PipelineElement = typename PassBuilder::PipelineElement;
|
||||
PB.registerPipelineParsingCallback(
|
||||
[](StringRef Name, ModulePassManager &MPM, ArrayRef<PipelineElement>) {
|
||||
if ( Name == "splitcompares" ) {
|
||||
MPM.addPass(SplitComparesTransform());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
#else
|
||||
char SplitComparesTransform::ID = 0;
|
||||
#endif
|
||||
|
||||
/// This function splits FCMP instructions with xGE or xLE predicates into two
|
||||
/// FCMP instructions with predicate xGT or xLT and EQ
|
||||
@ -1356,11 +1316,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
PreservedAnalyses SplitComparesTransform::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
#else
|
||||
bool SplitComparesTransform::runOnModule(Module &M) {
|
||||
#endif
|
||||
|
||||
char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW");
|
||||
if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW");
|
||||
@ -1371,7 +1327,7 @@ bool SplitComparesTransform::runOnModule(Module &M) {
|
||||
if ((isatty(2) && getenv("AFL_QUIET") == NULL) ||
|
||||
getenv("AFL_DEBUG") != NULL) {
|
||||
|
||||
errs() << "Split-compare-newpass by laf.intel@gmail.com, extended by "
|
||||
errs() << "Split-compare-pass by laf.intel@gmail.com, extended by "
|
||||
"heiko@hexco.de (splitting icmp to "
|
||||
<< target_bitwidth << " bit)\n";
|
||||
|
||||
@ -1383,10 +1339,6 @@ bool SplitComparesTransform::runOnModule(Module &M) {
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
auto PA = PreservedAnalyses::all();
|
||||
#endif
|
||||
|
||||
if (enableFPSplit) {
|
||||
|
||||
count = splitFPCompares(M);
|
||||
@ -1419,13 +1371,7 @@ bool SplitComparesTransform::runOnModule(Module &M) {
|
||||
|
||||
auto op0 = CI->getOperand(0);
|
||||
auto op1 = CI->getOperand(1);
|
||||
if (!op0 || !op1) {
|
||||
#if LLVM_MAJOR >= 7
|
||||
return PA;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
if (!op0 || !op1) { return false; }
|
||||
auto iTy1 = dyn_cast<IntegerType>(op0->getType());
|
||||
if (iTy1 && isa<IntegerType>(op1->getType())) {
|
||||
|
||||
@ -1474,25 +1420,10 @@ bool SplitComparesTransform::runOnModule(Module &M) {
|
||||
|
||||
}
|
||||
|
||||
if ((isatty(2) && getenv("AFL_QUIET") == NULL) ||
|
||||
getenv("AFL_DEBUG") != NULL) {
|
||||
errs() << count << " comparisons found\n";
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
/* if (modified) {
|
||||
PA.abandon<XX_Manager>();
|
||||
}*/
|
||||
|
||||
return PA;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_MAJOR < 7 /* use old pass manager */
|
||||
|
||||
static void registerSplitComparesPass(const PassManagerBuilder &,
|
||||
legacy::PassManagerBase &PM) {
|
||||
|
||||
@ -1516,4 +1447,4 @@ static RegisterPass<SplitComparesTransform> X("splitcompares",
|
||||
"AFL++ split compares",
|
||||
true /* Only looks at CFG */,
|
||||
true /* Analysis Pass */);
|
||||
#endif
|
||||
|
||||
|
@ -27,17 +27,11 @@
|
||||
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
#include "llvm/Passes/PassPlugin.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#else
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#endif
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
@ -60,25 +54,16 @@ using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
class SplitSwitchesTransform : public PassInfoMixin<SplitSwitchesTransform> {
|
||||
|
||||
public:
|
||||
SplitSwitchesTransform() {
|
||||
#else
|
||||
class SplitSwitchesTransform : public ModulePass {
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
SplitSwitchesTransform() : ModulePass(ID) {
|
||||
#endif
|
||||
|
||||
initInstrumentList();
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
|
||||
#else
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 4
|
||||
@ -91,7 +76,6 @@ class SplitSwitchesTransform : public ModulePass {
|
||||
return "splits switch constructs";
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
struct CaseExpr {
|
||||
|
||||
@ -119,40 +103,7 @@ class SplitSwitchesTransform : public ModulePass {
|
||||
|
||||
} // namespace
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
|
||||
llvmGetPassPluginInfo() {
|
||||
return {
|
||||
LLVM_PLUGIN_API_VERSION, "splitswitches", "v0.1",
|
||||
/* lambda to insert our pass into the pass pipeline. */
|
||||
[](PassBuilder &PB) {
|
||||
#if 1
|
||||
using OptimizationLevel = typename PassBuilder::OptimizationLevel;
|
||||
PB.registerOptimizerLastEPCallback(
|
||||
[](ModulePassManager &MPM, OptimizationLevel OL) {
|
||||
MPM.addPass(SplitSwitchesTransform());
|
||||
}
|
||||
);
|
||||
/* TODO LTO registration */
|
||||
#else
|
||||
using PipelineElement = typename PassBuilder::PipelineElement;
|
||||
PB.registerPipelineParsingCallback(
|
||||
[](StringRef Name, ModulePassManager &MPM, ArrayRef<PipelineElement>) {
|
||||
if ( Name == "splitswitches" ) {
|
||||
MPM.addPass(SplitSwitchesTransform());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
#else
|
||||
char SplitSwitchesTransform::ID = 0;
|
||||
#endif
|
||||
|
||||
/* switchConvert - Transform simple list of Cases into list of CaseRange's */
|
||||
BasicBlock *SplitSwitchesTransform::switchConvert(
|
||||
@ -464,37 +415,19 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) {
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
PreservedAnalyses SplitSwitchesTransform::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
#else
|
||||
bool SplitSwitchesTransform::runOnModule(Module &M) {
|
||||
#endif
|
||||
|
||||
if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL)
|
||||
printf("Running split-switches-pass by laf.intel@gmail.com\n");
|
||||
else
|
||||
be_quiet = 1;
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
auto PA = PreservedAnalyses::all();
|
||||
#endif
|
||||
|
||||
splitSwitches(M);
|
||||
verifyModule(M);
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
/* if (modified) {
|
||||
PA.abandon<XX_Manager>();
|
||||
}*/
|
||||
|
||||
return PA;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if LLVM_VERSION_MAJOR < 7 /* use old pass manager */
|
||||
static void registerSplitSwitchesTransPass(const PassManagerBuilder &,
|
||||
legacy::PassManagerBase &PM) {
|
||||
|
||||
@ -514,4 +447,4 @@ static RegisterStandardPasses RegisterSplitSwitchesTransPassLTO(
|
||||
PassManagerBuilder::EP_FullLinkTimeOptimizationLast,
|
||||
registerSplitSwitchesTransPass);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -19,7 +19,7 @@ finding capabilities during fuzzing) is WIP.
|
||||
### When should I use QASan?
|
||||
|
||||
If your target binary is PIC x86_64, you should also give a try to
|
||||
[retrowrite](https://github.com/HexHive/retrowrite) for static rewriting.
|
||||
[RetroWrite](https://github.com/HexHive/retrowrite) for static rewriting.
|
||||
|
||||
If it fails, or if your binary is for another architecture, or you want to use
|
||||
persistent and snapshot mode, AFL++ QASan mode is what you want/have to use.
|
||||
|
43
src/afl-cc.c
43
src/afl-cc.c
@ -462,17 +462,12 @@ static void edit_params(u32 argc, char **argv, char **envp) {
|
||||
|
||||
} else {
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
cc_params[cc_par_cnt++] = "-fexperimental-new-pass-manager";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("-fpass-plugin=%s/split-switches-pass.so", obj_path);
|
||||
#else
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] = "-load";
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("%s/split-switches-pass.so", obj_path);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -487,17 +482,11 @@ static void edit_params(u32 argc, char **argv, char **envp) {
|
||||
|
||||
} else {
|
||||
|
||||
#if LLVM_VERSION_MAJOR >= 7 /* use new pass manager */
|
||||
cc_params[cc_par_cnt++] = "-fexperimental-new-pass-manager";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("-fpass-plugin=%s/compare-transform-pass.so", obj_path);
|
||||
#else
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] = "-load";
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("%s/compare-transform-pass.so", obj_path);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@ -513,18 +502,11 @@ static void edit_params(u32 argc, char **argv, char **envp) {
|
||||
|
||||
} else {
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
cc_params[cc_par_cnt++] = "-fexperimental-new-pass-manager";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("-fpass-plugin=%s/split-compares-pass.so", obj_path);
|
||||
// cc_params[cc_par_cnt++] = "-fno-experimental-new-pass-manager";
|
||||
#else
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] = "-load";
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("%s/split-compares-pass.so", obj_path);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@ -554,17 +536,11 @@ static void edit_params(u32 argc, char **argv, char **envp) {
|
||||
alloc_printf("%s/cmplog-switches-pass.so", obj_path);
|
||||
|
||||
// reuse split switches from laf
|
||||
#if LLVM_MAJOR >= 7
|
||||
cc_params[cc_par_cnt++] = "-fexperimental-new-pass-manager";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("-fpass-plugin=%s/split-switches-pass.so", obj_path);
|
||||
#else
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] = "-load";
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("%s/split-switches-pass.so", obj_path);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@ -590,15 +566,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
|
||||
free(ld_path);
|
||||
|
||||
cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition";
|
||||
|
||||
if (instrument_mode == INSTRUMENT_CFG ||
|
||||
instrument_mode == INSTRUMENT_PCGUARD)
|
||||
cc_params[cc_par_cnt++] = alloc_printf(
|
||||
"-Wl,-mllvm=-load=%s/SanitizerCoverageLTO.so", obj_path);
|
||||
else
|
||||
|
||||
cc_params[cc_par_cnt++] = alloc_printf(
|
||||
"-Wl,-mllvm=-load=%s/afl-llvm-lto-instrumentation.so", obj_path);
|
||||
cc_params[cc_par_cnt++] =
|
||||
alloc_printf("-Wl,-mllvm=-load=%s/SanitizerCoverageLTO.so", obj_path);
|
||||
cc_params[cc_par_cnt++] = lto_flag;
|
||||
|
||||
} else {
|
||||
@ -654,15 +623,11 @@ static void edit_params(u32 argc, char **argv, char **envp) {
|
||||
|
||||
} else {
|
||||
|
||||
#if LLVM_MAJOR >= 7
|
||||
cc_params[cc_par_cnt++] = "-fexperimental-new-pass-manager";
|
||||
cc_params[cc_par_cnt++] = alloc_printf("-fpass-plugin=%s/afl-llvm-pass.so", obj_path);
|
||||
#else
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] = "-load";
|
||||
cc_params[cc_par_cnt++] = "-Xclang";
|
||||
cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -255,6 +255,7 @@ struct custom_mutator *load_custom_mutator(afl_state_t *afl, const char *fn) {
|
||||
mutator->afl_custom_init_trim = dlsym(dh, "afl_custom_init_trim");
|
||||
if (!mutator->afl_custom_init_trim) {
|
||||
|
||||
notrim = 1;
|
||||
ACTF("optional symbol 'afl_custom_init_trim' not found.");
|
||||
|
||||
}
|
||||
@ -263,6 +264,7 @@ struct custom_mutator *load_custom_mutator(afl_state_t *afl, const char *fn) {
|
||||
mutator->afl_custom_trim = dlsym(dh, "afl_custom_trim");
|
||||
if (!mutator->afl_custom_trim) {
|
||||
|
||||
notrim = 1;
|
||||
ACTF("optional symbol 'afl_custom_trim' not found.");
|
||||
|
||||
}
|
||||
@ -271,6 +273,7 @@ struct custom_mutator *load_custom_mutator(afl_state_t *afl, const char *fn) {
|
||||
mutator->afl_custom_post_trim = dlsym(dh, "afl_custom_post_trim");
|
||||
if (!mutator->afl_custom_post_trim) {
|
||||
|
||||
notrim = 1;
|
||||
ACTF("optional symbol 'afl_custom_post_trim' not found.");
|
||||
|
||||
}
|
||||
|
@ -1,13 +1,16 @@
|
||||
# C Sample
|
||||
|
||||
This shows a simple persistent harness for unicornafl in C.
|
||||
In contrast to the normal c harness, this harness manually resets the unicorn state on each new input.
|
||||
Thanks to this, we can rerun the testcase in unicorn multiple times, without the need to fork again.
|
||||
In contrast to the normal c harness, this harness manually resets the unicorn
|
||||
state on each new input.
|
||||
Thanks to this, we can rerun the test case in unicorn multiple times, without
|
||||
the need to fork again.
|
||||
|
||||
## Compiling sample.c
|
||||
|
||||
The target can be built using the `make` command.
|
||||
Just make sure you have built unicorn support first:
|
||||
|
||||
```bash
|
||||
cd /path/to/afl/unicorn_mode
|
||||
./build_unicorn_support.sh
|
||||
@ -19,6 +22,7 @@ You don't need to compile persistent_target.c since a X86_64 binary version is
|
||||
pre-built and shipped in this sample folder. This file documents how the binary
|
||||
was built in case you want to rebuild it or recompile it for any reason.
|
||||
|
||||
The pre-built binary (persistent_target_x86_64.bin) was built using -g -O0 in gcc.
|
||||
The pre-built binary (persistent_target_x86_64.bin) was built using -g -O0 in
|
||||
gcc.
|
||||
|
||||
We then load the binary and we execute the main function directly.
|
||||
We then load the binary and we execute the main function directly.
|
@ -7,15 +7,15 @@ targets.
|
||||
|
||||
Just do `afl-clang-fast++ -o fuzz fuzzer_harness.cc libAFLDriver.a [plus required linking]`.
|
||||
|
||||
You can also sneakily do this little trick:
|
||||
You can also sneakily do this little trick:
|
||||
If this is the clang compile command to build for libfuzzer:
|
||||
`clang++ -o fuzz -fsanitize=fuzzer fuzzer_harness.cc -lfoo`
|
||||
then just switch `clang++` with `afl-clang-fast++` and our compiler will
|
||||
magically insert libAFLDriver.a :)
|
||||
|
||||
To use shared-memory testcases, you need nothing to do.
|
||||
To use stdin testcases give `-` as the only command line parameter.
|
||||
To use file input testcases give `@@` as the only command line parameter.
|
||||
To use shared-memory test cases, you need nothing to do.
|
||||
To use stdin test cases, give `-` as the only command line parameter.
|
||||
To use file input test cases, give `@@` as the only command line parameter.
|
||||
|
||||
IMPORTANT: if you use `afl-cmin` or `afl-cmin.bash` then either pass `-`
|
||||
or `@@` as command line parameters.
|
||||
@ -30,8 +30,8 @@ are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without
|
||||
|
||||
`clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`.
|
||||
|
||||
|
||||
Then just do (where the name of the binary is `fuzz`):
|
||||
|
||||
```
|
||||
AFL_QEMU_PERSISTENT_ADDR=0x$(nm fuzz | grep "T LLVMFuzzerTestOneInput" | awk '{print $1}')
|
||||
AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz`
|
||||
@ -40,4 +40,4 @@ AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- .
|
||||
if you use afl-cmin or `afl-showmap -C` with the aflpp_qemu_driver you need to
|
||||
set the set same AFL_QEMU_... (or AFL_FRIDA_...) environment variables.
|
||||
If you want to use afl-showmap (without -C) or afl-cmin.bash then you may not
|
||||
set these environment variables and rather set `AFL_QEMU_DRIVER_NO_HOOK=1`.
|
||||
set these environment variables and rather set `AFL_QEMU_DRIVER_NO_HOOK=1`.
|
Loading…
x
Reference in New Issue
Block a user