Merge pull request #1074 from AFLplusplus/dev

push to stable
This commit is contained in:
van Hauser
2021-08-20 23:54:59 +02:00
committed by GitHub
246 changed files with 20325 additions and 1770 deletions

View File

@ -11,7 +11,7 @@ assignees: ''
1. You have verified that the issue to be present in the current `dev` branch
2. Please supply the command line options and relevant environment variables, e.g. a copy-paste of the contents of `out/default/fuzzer_setup`
Thank you for making afl++ better!
Thank you for making AFL++ better!
**Describe the bug**
A clear and concise description of what the bug is.

8
.gitignore vendored
View File

@ -54,6 +54,7 @@ afl-showmap.8
afl-system-config.8
afl-tmin.8
afl-whatsup.8
afl-persistent-config.8
afl-c++
afl-cc
afl-lto
@ -85,4 +86,11 @@ gmon.out
afl-frida-trace.so
utils/afl_network_proxy/afl-network-client
utils/afl_network_proxy/afl-network-server
utils/plot_ui/afl-plot-ui
*.o.tmp
utils/afl_proxy/afl-proxy
utils/optimin/build
utils/optimin/optimin
utils/persistent_mode/persistent_demo
utils/persistent_mode/persistent_demo_new
utils/persistent_mode/test-instr

6
.gitmodules vendored
View File

@ -7,3 +7,9 @@
[submodule "qemu_mode/qemuafl"]
path = qemu_mode/qemuafl
url = https://github.com/AFLplusplus/qemuafl
[submodule "custom_mutators/gramatron/json-c"]
path = custom_mutators/gramatron/json-c
url = https://github.com/json-c/json-c
[submodule "utils/optimin/EvalMaxSAT"]
path = utils/optimin/EvalMaxSAT
url = https://github.com/FlorentAvellaneda/EvalMaxSAT

View File

@ -32,7 +32,7 @@ VERSION = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f
# PROGS intentionally omit afl-as, which gets installed elsewhere.
PROGS = afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config
SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config afl-persistent-config
MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8) afl-as.8
ASAN_OPTIONS=detect_leaks=0
@ -42,7 +42,7 @@ ARCH = $(shell uname -m)
$(info [*] Compiling afl++ for OS $(SYS) on ARCH $(ARCH))
ifdef NO_SPLICING
override CFLAGS += -DNO_SPLICING
override CFLAGS_OPT += -DNO_SPLICING
endif
ifdef ASAN_BUILD
@ -115,13 +115,13 @@ endif
ifdef PROFILING
$(info Compiling with profiling information, for analysis: gprof ./afl-fuzz gmon.out > prof.txt)
CFLAGS_OPT += -pg -DPROFILING=1
LDFLAGS += -pg
override CFLAGS_OPT += -pg -DPROFILING=1
override LDFLAGS += -pg
endif
ifdef INTROSPECTION
$(info Compiling with introspection documentation)
CFLAGS_OPT += -DINTROSPECTION=1
override CFLAGS_OPT += -DINTROSPECTION=1
endif
ifneq "$(ARCH)" "x86_64"
@ -136,7 +136,7 @@ endif
ifdef DEBUG
$(info Compiling DEBUG version of binaries)
CFLAGS += -ggdb3 -O0 -Wall -Wextra -Werror
override CFLAGS += -ggdb3 -O0 -Wall -Wextra -Werror $(CFLAGS_OPT)
else
CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT)
endif
@ -147,28 +147,28 @@ override CFLAGS += -g -Wno-pointer-sign -Wno-variadic-macros -Wall -Wextra -Wpoi
ifeq "$(SYS)" "FreeBSD"
override CFLAGS += -I /usr/local/include/
LDFLAGS += -L /usr/local/lib/
override LDFLAGS += -L /usr/local/lib/
endif
ifeq "$(SYS)" "DragonFly"
override CFLAGS += -I /usr/local/include/
LDFLAGS += -L /usr/local/lib/
override LDFLAGS += -L /usr/local/lib/
endif
ifeq "$(SYS)" "OpenBSD"
override CFLAGS += -I /usr/local/include/ -mno-retpoline
LDFLAGS += -Wl,-z,notext -L /usr/local/lib/
override LDFLAGS += -Wl,-z,notext -L /usr/local/lib/
endif
ifeq "$(SYS)" "NetBSD"
override CFLAGS += -I /usr/pkg/include/
LDFLAGS += -L /usr/pkg/lib/
override LDFLAGS += -L /usr/pkg/lib/
endif
ifeq "$(SYS)" "Haiku"
SHMAT_OK=0
override CFLAGS += -DUSEMMAP=1 -Wno-error=format -fPIC
LDFLAGS += -Wno-deprecated-declarations -lgnu -lnetwork
override LDFLAGS += -Wno-deprecated-declarations -lgnu -lnetwork
SPECIAL_PERFORMANCE += -DUSEMMAP=1
endif
@ -244,22 +244,22 @@ ifneq "$(filter Linux GNU%,$(SYS))" ""
ifndef DEBUG
override CFLAGS += -D_FORTIFY_SOURCE=2
endif
LDFLAGS += -ldl -lrt -lm
override LDFLAGS += -ldl -lrt -lm
endif
ifneq "$(findstring FreeBSD, $(SYS))" ""
override CFLAGS += -pthread
LDFLAGS += -lpthread
override LDFLAGS += -lpthread
endif
ifneq "$(findstring NetBSD, $(SYS))" ""
override CFLAGS += -pthread
LDFLAGS += -lpthread
override LDFLAGS += -lpthread
endif
ifneq "$(findstring OpenBSD, $(SYS))" ""
override CFLAGS += -pthread
LDFLAGS += -lpthread
override LDFLAGS += -lpthread
endif
COMM_HDR = include/alloc-inl.h include/config.h include/debug.h include/types.h
@ -310,12 +310,14 @@ all: test_x86 test_shm test_python ready $(PROGS) afl-as llvm gcc_plugin test_bu
.PHONY: llvm
llvm:
-$(MAKE) -j -f GNUmakefile.llvm
-$(MAKE) -j4 -f GNUmakefile.llvm
@test -e afl-cc || { echo "[-] Compiling afl-cc failed. You seem not to have a working compiler." ; exit 1; }
.PHONY: gcc_plugin
gcc_plugin:
ifneq "$(SYS)" "Darwin"
-$(MAKE) -f GNUmakefile.gcc_plugin
endif
.PHONY: man
man: $(MANPAGES)
@ -351,6 +353,7 @@ help:
@echo "install: installs everything you have compiled with the build option above"
@echo "clean: cleans everything compiled (not downloads when on a checkout)"
@echo "deepclean: cleans everything including downloads"
@echo "uninstall: uninstall afl++ from the system"
@echo "code-format: format the code, do this before you commit and send a PR please!"
@echo "tests: this runs the test framework. It is more catered for the developers, but if you run into problems this helps pinpointing the problem"
@echo "unit: perform unit tests (based on cmocka and GNU linker)"
@ -561,7 +564,7 @@ all_done: test_build
.PHONY: clean
clean:
rm -f $(PROGS) libradamsa.so afl-fuzz-document afl-as as afl-g++ afl-clang afl-clang++ *.o src/*.o *~ a.out core core.[1-9][0-9]* *.stackdump .test .test1 .test2 test-instr .test-instr0 .test-instr1 afl-qemu-trace afl-gcc-fast afl-gcc-pass.so afl-g++-fast ld *.so *.8 test/unittests/*.o test/unittests/unit_maybe_alloc test/unittests/preallocable .afl-* afl-gcc afl-g++ afl-clang afl-clang++ test/unittests/unit_hash test/unittests/unit_rand
rm -rf $(PROGS) libradamsa.so afl-fuzz-document afl-as as afl-g++ afl-clang afl-clang++ *.o src/*.o *~ a.out core core.[1-9][0-9]* *.stackdump .test .test1 .test2 test-instr .test-instr0 .test-instr1 afl-qemu-trace afl-gcc-fast afl-gcc-pass.so afl-g++-fast ld *.so *.8 test/unittests/*.o test/unittests/unit_maybe_alloc test/unittests/preallocable .afl-* afl-gcc afl-g++ afl-clang afl-clang++ test/unittests/unit_hash test/unittests/unit_rand *.dSYM
-$(MAKE) -f GNUmakefile.llvm clean
-$(MAKE) -f GNUmakefile.gcc_plugin clean
$(MAKE) -C utils/libdislocator clean
@ -570,6 +573,7 @@ clean:
$(MAKE) -C utils/afl_network_proxy clean
$(MAKE) -C utils/socket_fuzzing clean
$(MAKE) -C utils/argv_fuzzing clean
-$(MAKE) -C utils/plot_ui clean
$(MAKE) -C qemu_mode/unsigaction clean
$(MAKE) -C qemu_mode/libcompcov clean
$(MAKE) -C qemu_mode/libqasan clean
@ -594,16 +598,21 @@ endif
.PHONY: distrib
distrib: all
-$(MAKE) -j -f GNUmakefile.llvm
-$(MAKE) -j4 -f GNUmakefile.llvm
ifneq "$(SYS)" "Darwin"
-$(MAKE) -f GNUmakefile.gcc_plugin
endif
$(MAKE) -C utils/libdislocator
$(MAKE) -C utils/libtokencap
$(MAKE) -C utils/afl_network_proxy
$(MAKE) -C utils/socket_fuzzing
$(MAKE) -C utils/argv_fuzzing
# -$(MAKE) -C utils/plot_ui
-$(MAKE) -C frida_mode
ifneq "$(SYS)" "Darwin"
-cd qemu_mode && sh ./build_qemu_support.sh
-cd unicorn_mode && unset CFLAGS && sh ./build_unicorn_support.sh
endif
.PHONY: binary-only
binary-only: test_shm test_python ready $(PROGS)
@ -612,16 +621,22 @@ binary-only: test_shm test_python ready $(PROGS)
$(MAKE) -C utils/afl_network_proxy
$(MAKE) -C utils/socket_fuzzing
$(MAKE) -C utils/argv_fuzzing
# -$(MAKE) -C utils/plot_ui
-$(MAKE) -C frida_mode
ifneq "$(SYS)" "Darwin"
-cd qemu_mode && sh ./build_qemu_support.sh
-cd unicorn_mode && unset CFLAGS && sh ./build_unicorn_support.sh
endif
.PHONY: source-only
source-only: all
-$(MAKE) -j -f GNUmakefile.llvm
-$(MAKE) -j4 -f GNUmakefile.llvm
ifneq "$(SYS)" "Darwin"
-$(MAKE) -f GNUmakefile.gcc_plugin
endif
$(MAKE) -C utils/libdislocator
$(MAKE) -C utils/libtokencap
# -$(MAKE) -C utils/plot_ui
%.8: %
@echo .TH $* 8 $(BUILD_DATE) "afl++" > $@
@ -650,6 +665,7 @@ install: all $(MANPAGES)
@rm -f $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH)/afl-gcc-rt.o
install -m 755 $(PROGS) $(SH_PROGS) $${DESTDIR}$(BIN_PATH)
@if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi
@if [ -f utils/plot_ui/afl-plot-ui ]; then install -m 755 utils/plot_ui/afl-plot-ui $${DESTDIR}$(BIN_PATH); fi
@if [ -f libdislocator.so ]; then set -e; install -m 755 libdislocator.so $${DESTDIR}$(HELPER_PATH); fi
@if [ -f libtokencap.so ]; then set -e; install -m 755 libtokencap.so $${DESTDIR}$(HELPER_PATH); fi
@if [ -f libcompcov.so ]; then set -e; install -m 755 libcompcov.so $${DESTDIR}$(HELPER_PATH); fi
@ -662,7 +678,9 @@ install: all $(MANPAGES)
@if [ -f utils/aflpp_driver/libAFLDriver.a ]; then set -e; install -m 644 utils/aflpp_driver/libAFLDriver.a $${DESTDIR}$(HELPER_PATH); fi
@if [ -f utils/aflpp_driver/libAFLQemuDriver.a ]; then set -e; install -m 644 utils/aflpp_driver/libAFLQemuDriver.a $${DESTDIR}$(HELPER_PATH); fi
-$(MAKE) -f GNUmakefile.llvm install
ifneq "$(SYS)" "Darwin"
-$(MAKE) -f GNUmakefile.gcc_plugin install
endif
ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-gcc
ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-g++
ln -sf afl-cc $${DESTDIR}$(BIN_PATH)/afl-clang
@ -674,3 +692,16 @@ install: all $(MANPAGES)
install -m 644 docs/*.md $${DESTDIR}$(DOC_PATH)
cp -r testcases/ $${DESTDIR}$(MISC_PATH)
cp -r dictionaries/ $${DESTDIR}$(MISC_PATH)
.PHONY: uninstall
uninstall:
-cd $${DESTDIR}$(BIN_PATH) && rm -f $(PROGS) $(SH_PROGS) afl-qemu-trace afl-plot-ui afl-fuzz-document afl-network-server afl-g* afl-plot.sh afl-as afl-ld-lto afl-c* afl-lto*
-cd $${DESTDIR}$(HELPER_PATH) && rm -f afl-g*.*o afl-llvm-*.*o afl-compiler-*.*o libdislocator.so libtokencap.so libcompcov.so libqasan.so afl-frida-trace.so socketfuzz*.so argvfuzz*.so libAFLDriver.a libAFLQemuDriver.a as afl-as SanitizerCoverage*.so compare-transform-pass.so cmplog-*-pass.so split-*-pass.so dynamic_list.txt
-rm -rf $${DESTDIR}$(MISC_PATH)/testcases $${DESTDIR}$(MISC_PATH)/dictionaries
-sh -c "ls docs/*.md | sed 's|^docs/|$${DESTDIR}$(DOC_PATH)/|' | xargs rm -f"
-cd $${DESTDIR}$(MAN_PATH) && rm -f $(MANPAGES)
-rmdir $${DESTDIR}$(BIN_PATH) 2>/dev/null
-rmdir $${DESTDIR}$(HELPER_PATH) 2>/dev/null
-rmdir $${DESTDIR}$(MISC_PATH) 2>/dev/null
-rmdir $${DESTDIR}$(DOC_PATH) 2>/dev/null
-rmdir $${DESTDIR}$(MAN_PATH) 2>/dev/null

View File

@ -53,7 +53,7 @@ LLVM_HAVE_LTO = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[1-9]
LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
LLVM_LIBDIR = $(shell $(LLVM_CONFIG) --libdir 2>/dev/null)
LLVM_STDCXX = gnu++11
LLVM_APPLE_XCODE = $(shell clang -v 2>&1 | grep -q Apple && echo 1 || echo 0)
LLVM_APPLE_XCODE = $(shell $(CC) -v 2>&1 | grep -q Apple && echo 1 || echo 0)
LLVM_LTO = 0
ifeq "$(LLVMVER)" ""
@ -279,6 +279,8 @@ CLANG_LFL = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS)
# User teor2345 reports that this is required to make things work on MacOS X.
ifeq "$(SYS)" "Darwin"
CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress
override LLVM_HAVE_LTO := 0
override LLVM_LTO := 0
else
CLANG_CPPFL += -Wl,-znodelete
endif

178
README.md
View File

@ -1,4 +1,4 @@
# American Fuzzy Lop plus plus (afl++)
# American Fuzzy Lop plus plus (AFL++)
<img align="right" src="https://raw.githubusercontent.com/andreafioraldi/AFLplusplus-website/master/static/logo_256x256.png" alt="AFL++ Logo">
@ -8,7 +8,7 @@
Repository: [https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
afl++ is maintained by:
AFL++ is maintained by:
* Marc "van Hauser" Heuse <mh@mh-sec.de>,
* Heiko "hexcoder-" Eißfeldt <heiko.eissfeldt@hexco.de>,
@ -17,36 +17,36 @@
Originally developed by Michał "lcamtuf" Zalewski.
afl++ is a superior fork to Google's afl - more speed, more and better
AFL++ is a superior fork to Google's AFL - more speed, more and better
mutations, more and better instrumentation, custom module support, etc.
If you want to use afl++ for your academic work, check the [papers page](https://aflplus.plus/papers/)
If you want to use AFL++ for your academic work, check the [papers page](https://aflplus.plus/papers/)
on the website. To cite our work, look at the [Cite](#cite) section.
For comparisons use the fuzzbench `aflplusplus` setup, or use `afl-clang-fast`
with `AFL_LLVM_CMPLOG=1`.
## Major behaviour changes in afl++ 3.00 onwards:
## Major behaviour changes in AFL++ 3.00 onwards:
With afl++ 3.13-3.20 we introduce frida_mode (-O) to have an alternative for
With AFL++ 3.13-3.20 we introduce frida_mode (-O) to have an alternative for
binary-only fuzzing. It is slower than Qemu mode but works on MacOS, Android,
iOS etc.
With afl++ 3.15 we introduced the following changes from previous behaviours:
With AFL++ 3.15 we introduced the following changes from previous behaviours:
* Also -M main mode does not do deterministic fuzzing by default anymore
* afl-cmin and afl-showmap -Ci now descent into subdirectories like
afl-fuzz -i does (but note that afl-cmin.bash does not)
With afl++ 3.14 we introduced the following changes from previous behaviours:
With AFL++ 3.14 we introduced the following changes from previous behaviours:
* afl-fuzz: deterministic fuzzing it not a default for -M main anymore
* afl-cmin/afl-showmap -i now descends into subdirectories (afl-cmin.bash
however does not)
With afl++ 3.10 we introduced the following changes from previous behaviours:
With AFL++ 3.10 we introduced the following changes from previous behaviours:
* The '+' feature of the '-t' option now means to auto-calculate the timeout
with the value given being the maximum timeout. The original meaning of
"skipping timeouts instead of abort" is now inherent to the -t option.
With afl++ 3.00 we introduced changes that break some previous afl and afl++
With AFL++ 3.00 we introduced changes that break some previous AFL and AFL++
behaviours and defaults:
* There are no llvm_mode and gcc_plugin subdirectories anymore and there is
only one compiler: afl-cc. All previous compilers now symlink to this one.
@ -82,18 +82,18 @@ behaviours and defaults:
## Contents
1. [Features](#important-features-of-afl)
2. [How to compile and install afl++](#building-and-installing-afl)
2. [How to compile and install AFL++](#building-and-installing-afl)
3. [How to fuzz a target](#how-to-fuzz-with-afl)
4. [Fuzzing binary-only targets](#fuzzing-binary-only-targets)
5. [Good examples and writeups of afl++ usages](#good-examples-and-writeups)
5. [Good examples and writeups of AFL++ usages](#good-examples-and-writeups)
6. [CI Fuzzing](#ci-fuzzing)
7. [Branches](#branches)
8. [Want to help?](#help-wanted)
9. [Detailed help and description of afl++](#challenges-of-guided-fuzzing)
9. [Detailed help and description of AFL++](#challenges-of-guided-fuzzing)
## Important features of afl++
## Important features of AFL++
afl++ supports llvm from 3.8 up to version 12, very fast binary fuzzing with QEMU 5.1
AFL++ supports llvm from 3.8 up to version 12, very fast binary fuzzing with QEMU 5.1
with laf-intel and redqueen, frida mode, unicorn mode, gcc plugin, full *BSD,
Mac OS, Solaris and Android support and much, much, much more.
@ -136,7 +136,7 @@ behaviours and defaults:
* QBDI mode to fuzz android native libraries via Quarkslab's [QBDI](https://github.com/QBDI/QBDI) framework
* Frida and ptrace mode to fuzz binary-only libraries, etc.
So all in all this is the best-of afl that is out there :-)
So all in all this is the best-of AFL that is out there :-)
For new versions and additional information, check out:
[https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
@ -151,9 +151,9 @@ behaviours and defaults:
The following branches exist:
* [stable/trunk](https://github.com/AFLplusplus/AFLplusplus/) : stable state of afl++ - it is synced from dev from time to
* [stable/trunk](https://github.com/AFLplusplus/AFLplusplus/) : stable state of AFL++ - it is synced from dev from time to
time when we are satisfied with its stability
* [dev](https://github.com/AFLplusplus/AFLplusplus/tree/dev) : development state of afl++ - bleeding edge and you might catch a
* [dev](https://github.com/AFLplusplus/AFLplusplus/tree/dev) : development state of AFL++ - bleeding edge and you might catch a
checkout which does not compile or has a bug. *We only accept PRs in dev!!*
* [release](https://github.com/AFLplusplus/AFLplusplus/tree/release) : the latest release
* (any other) : experimental branches to work on specific features or testing
@ -175,9 +175,9 @@ We have an idea list in [docs/ideas.md](docs/ideas.md).
For everyone who wants to contribute (and send pull requests) please read
[CONTRIBUTING.md](CONTRIBUTING.md) before your submit.
## Building and installing afl++
## Building and installing AFL++
An easy way to install afl++ with everything compiled is available via docker:
An easy way to install AFL++ with everything compiled is available via docker:
You can use the [Dockerfile](Dockerfile) (which has gcc-10 and clang-11 -
hence afl-clang-lto is available!) or just pull directly from the docker hub:
```shell
@ -187,7 +187,7 @@ docker run -ti -v /location/of/your/target:/src aflplusplus/aflplusplus
This image is automatically generated when a push to the stable repo happens.
You will find your target source code in /src in the container.
If you want to build afl++ yourself you have many options.
If you want to build AFL++ yourself you have many options.
The easiest choice is to build and install everything:
```shell
@ -205,7 +205,7 @@ It is recommended to install the newest available gcc, clang and llvm-dev
possible in your distribution!
Note that "make distrib" also builds instrumentation, qemu_mode, unicorn_mode and
more. If you just want plain afl++ then do "make all", however compiling and
more. If you just want plain AFL++ then do "make all", however compiling and
using at least instrumentation is highly recommended for much better results -
hence in this case
@ -216,7 +216,7 @@ is what you should choose.
These build targets exist:
* all: just the main afl++ binaries
* all: just the main AFL++ binaries
* binary-only: everything for binary-only fuzzing: qemu_mode, unicorn_mode, libdislocator, libtokencap
* source-only: everything for source code fuzzing: instrumentation, libdislocator, libtokencap
* distrib: everything (for both binary-only and source code fuzzing)
@ -230,7 +230,7 @@ These build targets exist:
* help: shows these build options
[Unless you are on Mac OS X](https://developer.apple.com/library/archive/qa/qa1118/_index.html) you can also build statically linked versions of the
afl++ binaries by passing the STATIC=1 argument to make:
AFL++ binaries by passing the STATIC=1 argument to make:
```shell
make STATIC=1
@ -262,16 +262,20 @@ Here are some good writeups to show how to effectively use AFL++:
* [https://securitylab.github.com/research/fuzzing-sockets-FreeRDP](https://securitylab.github.com/research/fuzzing-sockets-FreeRDP)
* [https://securitylab.github.com/research/fuzzing-apache-1](https://securitylab.github.com/research/fuzzing-apache-1)
If you do not want to follow a tutorial but rather try an exercise type of
training then we can highly recommend the following:
* [https://github.com/antonio-morales/Fuzzing101](https://github.com/antonio-morales/Fuzzing101)
If you are interested in fuzzing structured data (where you define what the
structure is), these links have you covered:
* Superion for afl++: [https://github.com/adrian-rt/superion-mutator](https://github.com/adrian-rt/superion-mutator)
* libprotobuf for afl++: [https://github.com/P1umer/AFLplusplus-protobuf-mutator](https://github.com/P1umer/AFLplusplus-protobuf-mutator)
* Superion for AFL++: [https://github.com/adrian-rt/superion-mutator](https://github.com/adrian-rt/superion-mutator)
* libprotobuf for AFL++: [https://github.com/P1umer/AFLplusplus-protobuf-mutator](https://github.com/P1umer/AFLplusplus-protobuf-mutator)
* libprotobuf raw: [https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/tree/master/4_libprotobuf_aflpp_custom_mutator](https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/tree/master/4_libprotobuf_aflpp_custom_mutator)
* libprotobuf for old afl++ API: [https://github.com/thebabush/afl-libprotobuf-mutator](https://github.com/thebabush/afl-libprotobuf-mutator)
* libprotobuf for old AFL++ API: [https://github.com/thebabush/afl-libprotobuf-mutator](https://github.com/thebabush/afl-libprotobuf-mutator)
If you find other good ones, please send them to us :-)
## How to fuzz with afl++
## How to fuzz with AFL++
The following describes how to fuzz with a target if source code is available.
If you have a binary-only target please skip to [#Instrumenting binary-only apps](#Instrumenting binary-only apps)
@ -287,9 +291,9 @@ Fuzzing source code is a three-step process.
### 1. Instrumenting that target
#### a) Selecting the best afl++ compiler for instrumenting the target
#### a) Selecting the best AFL++ compiler for instrumenting the target
afl++ comes with a central compiler `afl-cc` that incorporates various different
AFL++ comes with a central compiler `afl-cc` that incorporates various different
kinds of compiler targets and and instrumentation options.
The following evaluation flow will help you to select the best possible.
@ -339,7 +343,7 @@ You can select the mode for the afl-cc compiler by:
MODE can be one of: LTO (afl-clang-lto*), LLVM (afl-clang-fast*), GCC_PLUGIN
(afl-g*-fast) or GCC (afl-gcc/afl-g++) or CLANG(afl-clang/afl-clang++).
Because no afl specific command-line options are accepted (beside the
Because no AFL specific command-line options are accepted (beside the
--afl-MODE command), the compile-time tools make fairly broad use of environment
variables, which can be listed with `afl-cc -hh` or by reading [docs/env_variables.md](docs/env_variables.md).
@ -347,7 +351,7 @@ variables, which can be listed with `afl-cc -hh` or by reading [docs/env_variabl
The following options are available when you instrument with LTO mode (afl-clang-fast/afl-clang-lto):
* Splitting integer, string, float and switch comparisons so afl++ can easier
* Splitting integer, string, float and switch comparisons so AFL++ can easier
solve these. This is an important option if you do not have a very good
and large input corpus. This technique is called laf-intel or COMPCOV.
To use this set the following environment variable before compiling the
@ -355,7 +359,7 @@ The following options are available when you instrument with LTO mode (afl-clang
You can read more about this in [instrumentation/README.laf-intel.md](instrumentation/README.laf-intel.md)
* A different technique (and usually a better one than laf-intel) is to
instrument the target so that any compare values in the target are sent to
afl++ which then tries to put these values into the fuzzing data at different
AFL++ which then tries to put these values into the fuzzing data at different
locations. This technique is very fast and good - if the target does not
transform input data before comparison. Therefore this technique is called
`input to state` or `redqueen`.
@ -388,7 +392,7 @@ time less effective. See:
* [instrumentation/README.ctx.md](instrumentation/README.ctx.md)
* [instrumentation/README.ngram.md](instrumentation/README.ngram.md)
afl++ performs "never zero" counting in its bitmap. You can read more about this
AFL++ performs "never zero" counting in its bitmap. You can read more about this
here:
* [instrumentation/README.neverzero.md](instrumentation/README.neverzero.md)
@ -403,7 +407,7 @@ This is enough because a use-after-free bug will be picked up, e.g. by
ASAN (address sanitizer) anyway when syncing to other fuzzing instances,
so not all fuzzing instances need to be instrumented with ASAN.
The following sanitizers have built-in support in afl++:
The following sanitizers have built-in support in AFL++:
* ASAN = Address SANitizer, finds memory corruption vulnerabilities like
use-after-free, NULL pointer dereference, buffer overruns, etc.
Enabled with `export AFL_USE_ASAN=1` before compiling.
@ -457,30 +461,37 @@ by eliminating these checks within these AFL specific blocks:
#endif
```
All afl++ compilers will set this preprocessor definition automatically.
All AFL++ compilers will set this preprocessor definition automatically.
#### e) Instrument the target
In this step the target source code is compiled so that it can be fuzzed.
Basically you have to tell the target build system that the selected afl++
Basically you have to tell the target build system that the selected AFL++
compiler is used. Also - if possible - you should always configure the
build system such that the target is compiled statically and not dynamically.
How to do this is described below.
The #1 rule when instrumenting a target is: avoid instrumenting shared
libraries at all cost. You would need to set LD_LIBRARY_PATH to point to
these, you could accidently type "make install" and install them system wide -
so don't. Really don't.
**Always compile libraries you want to have instrumented as static and link
these to the target program!**
Then build the target. (Usually with `make`)
**NOTES**
1. sometimes configure and build systems are fickle and do not like
stderr output (and think this means a test failure) - which is something
afl++ likes to do to show statistics. It is recommended to disable afl++
AFL++ likes to do to show statistics. It is recommended to disable AFL++
instrumentation reporting via `export AFL_QUIET=1`.
2. sometimes configure and build systems error on warnings - these should be
disabled (e.g. `--disable-werror` for some configure scripts).
3. in case the configure/build system complains about afl++'s compiler and
3. in case the configure/build system complains about AFL++'s compiler and
aborts then set `export AFL_NOOPT=1` which will then just behave like the
real compiler. This option has to be unset again before building the target!
@ -504,12 +515,12 @@ described in [instrumentation/README.lto.md](instrumentation/README.lto.md).
##### meson
For meson you have to set the afl++ compiler with the very first command!
For meson you have to set the AFL++ compiler with the very first command!
`CC=afl-cc CXX=afl-c++ meson`
##### other build systems or if configure/cmake didn't work
Sometimes cmake and configure do not pick up the afl++ compiler, or the
Sometimes cmake and configure do not pick up the AFL++ compiler, or the
ranlib/ar that is needed - because this was just not foreseen by the developer
of the target. Or they have non-standard options. Figure out if there is a
non-standard way to set this, otherwise set up the build normally and edit the
@ -525,7 +536,7 @@ This variant requires the usage of afl-clang-lto, afl-clang-fast or afl-gcc-fast
It is the so-called `persistent mode`, which is much, much faster but
requires that you code a source file that is specifically calling the target
functions that you want to fuzz, plus a few specific afl++ functions around
functions that you want to fuzz, plus a few specific AFL++ functions around
it. See [instrumentation/README.persistent_mode.md](instrumentation/README.persistent_mode.md) for details.
Basically if you do not fuzz a target in persistent mode then you are just
@ -534,7 +545,7 @@ doing it for a hobby and not professionally :-).
#### g) libfuzzer fuzzer harnesses with LLVMFuzzerTestOneInput()
libfuzzer `LLVMFuzzerTestOneInput()` harnesses are the defacto standard
for fuzzing, and they can be used with afl++ (and honggfuzz) as well!
for fuzzing, and they can be used with AFL++ (and honggfuzz) as well!
Compiling them is as simple as:
```
afl-clang-fast++ -fsanitize=fuzzer -o harness harness.cpp targetlib.a
@ -566,7 +577,7 @@ normal data it receives and processes to a file and use these.
#### b) Making the input corpus unique
Use the afl++ tool `afl-cmin` to remove inputs from the corpus that do not
Use the AFL++ tool `afl-cmin` to remove inputs from the corpus that do not
produce a new path in the target.
Put all files from step a) into one directory, e.g. INPUTS.
@ -623,6 +634,13 @@ system for optimal speed - which afl-fuzz checks and bails otherwise.
Set `export AFL_SKIP_CPUFREQ=1` for afl-fuzz to skip this check if you cannot
run afl-system-config with root privileges on the host for whatever reason.
Note there is also `sudo afl-persistent-config` which sets additional permanent
boot options for a much better fuzzing performance.
Note that both scripts improve your fuzzing performance but also decrease your
system protection against attacks! So set strong firewall rules and only
expose SSH as a network service if you use these (which is highly recommended).
If you have an input corpus from step 2 then specify this directory with the `-i`
option. Otherwise create a new directory and create a file with any content
as test data in there.
@ -671,13 +689,13 @@ failure handling in the target.
Play around with various -m values until you find one that safely works for all
your input seeds (if you have good ones and then double or quadrouple that.
By default afl-fuzz never stops fuzzing. To terminate afl++ simply press Control-C
By default afl-fuzz never stops fuzzing. To terminate AFL++ simply press Control-C
or send a signal SIGINT. You can limit the number of executions or approximate runtime
in seconds with options also.
When you start afl-fuzz you will see a user interface that shows what the status
is:
![docs/screenshot.png](docs/screenshot.png)
![docs/resources/screenshot.png](docs/resources/screenshot.png)
All labels are explained in [docs/status_screen.md](docs/status_screen.md).
@ -686,7 +704,7 @@ All labels are explained in [docs/status_screen.md](docs/status_screen.md).
If you want to seriously fuzz then use as many cores/threads as possible to
fuzz your target.
On the same machine - due to the design of how afl++ works - there is a maximum
On the same machine - due to the design of how AFL++ works - there is a maximum
number of CPU cores/threads that are useful, use more and the overall performance
degrades instead. This value depends on the target, and the limit is between 32
and 64 cores per machine.
@ -727,7 +745,7 @@ If you have a large corpus, a corpus from a previous run or are fuzzing in
a CI, then also set `export AFL_CMPLOG_ONLY_NEW=1` and `export AFL_FAST_CAL=1`.
You can also use different fuzzers.
If you are using afl spinoffs or afl conforming fuzzers, then just use the
If you are using AFL spinoffs or AFL conforming fuzzers, then just use the
same -o directory and give it a unique `-S` name.
Examples are:
* [Fuzzolic](https://github.com/season-lab/fuzzolic)
@ -740,7 +758,7 @@ Examples are:
A long list can be found at [https://github.com/Microsvuln/Awesome-AFL](https://github.com/Microsvuln/Awesome-AFL)
However you can also sync afl++ with honggfuzz, libfuzzer with `-entropic=1`, etc.
However you can also sync AFL++ with honggfuzz, libfuzzer with `-entropic=1`, etc.
Just show the main fuzzer (-M) with the `-F` option where the queue/work
directory of a different fuzzer is, e.g. `-F /src/target/honggfuzz`.
Using honggfuzz (with `-n 1` or `-n 2`) and libfuzzer in parallel is highly
@ -787,7 +805,7 @@ There is a more complex and configurable script in `utils/distributed_fuzzing`.
#### d) The status of the fuzz campaign
afl++ comes with the `afl-whatsup` script to show the status of the fuzzing
AFL++ comes with the `afl-whatsup` script to show the status of the fuzzing
campaign.
Just supply the directory that afl-fuzz is given with the -o option and
@ -799,7 +817,21 @@ To have only the summary use the `-s` switch e.g.: `afl-whatsup -s out/`
If you have multiple servers then use the command after a sync, or you have
to execute this script per server.
#### e) Checking the coverage of the fuzzing
#### e) Stopping fuzzing, restarting fuzzing, adding new seeds
To stop an afl-fuzz run, simply press Control-C.
To restart an afl-fuzz run, just reuse the same command line but replace the
`-i directory` with `-i -` or set `AFL_AUTORESUME=1`.
If you want to add new seeds to a fuzzing campaign you can run a temporary
fuzzing instance, e.g. when your main fuzzer is using `-o out` and the new
seeds are in `newseeds/` directory:
```
AFL_BENCH_JUST_ONE=1 AFL_FAST_CAL=1 afl-fuzz -i newseeds -o out -S newseeds -- ./target
```
#### f) Checking the coverage of the fuzzing
The `paths found` value is a bad indicator for checking how good the coverage is.
@ -835,7 +867,7 @@ fuzzing campaigns each with one of these options set. E.g. if you fuzz a library
convert image formats and your target is the png to tiff API then you will not
touch any of the other library APIs and features.
#### f) How long to fuzz a target?
#### g) How long to fuzz a target?
This is a difficult question.
Basically if no new path is found for a long time (e.g. for a day or a week)
@ -847,13 +879,14 @@ Keep the queue/ directory (for future fuzzings of the same or similar targets)
and use them to seed other good fuzzers like libfuzzer with the -entropic
switch or honggfuzz.
#### g) Improve the speed!
#### h) Improve the speed!
* Use [persistent mode](instrumentation/README.persistent_mode.md) (x2-x20 speed increase)
* If you do not use shmem persistent mode, use `AFL_TMPDIR` to point the input file on a tempfs location, see [docs/env_variables.md](docs/env_variables.md)
* Linux: Improve kernel performance: modify `/etc/default/grub`, set `GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"`; then `update-grub` and `reboot` (warning: makes the system more insecure)
* Linux: Improve kernel performance: modify `/etc/default/grub`, set `GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"`; then `update-grub` and `reboot` (warning: makes the system more insecure) - you can also just run `sudo afl-persistent-config`
* Linux: Running on an `ext2` filesystem with `noatime` mount option will be a bit faster than on any other journaling filesystem
* Use your cores! [3.b) Using multiple cores/threads](#b-using-multiple-coresthreads)
* Run `sudo afl-system-config` before starting the first afl-fuzz instance after a reboot
### The End
@ -864,7 +897,7 @@ This is basically all you need to know to professionally run fuzzing campaigns.
If you want to know more, the rest of this README and the tons of texts in
[docs/](docs/) will have you covered.
Note that there are also a lot of tools out there that help fuzzing with afl++
Note that there are also a lot of tools out there that help fuzzing with AFL++
(some might be deprecated or unsupported):
Speeding up fuzzing:
@ -916,7 +949,7 @@ campaigns as these are much shorter runnings.
initial corpus as this very likely has been done for them already.
* Keep the generated corpus, use afl-cmin and reuse it every time!
2. Additionally randomize the afl++ compilation options, e.g.
2. Additionally randomize the AFL++ compilation options, e.g.
* 40% for `AFL_LLVM_CMPLOG`
* 10% for `AFL_LLVM_LAF_ALL`
@ -932,12 +965,12 @@ campaigns as these are much shorter runnings.
`-M` enables old queue handling etc. which is good for a fuzzing campaign but
not good for short CI runs.
How this can look like can e.g. be seen at afl++'s setup in Google's [oss-fuzz](https://github.com/google/oss-fuzz/blob/master/infra/base-images/base-builder/compile_afl)
How this can look like can e.g. be seen at AFL++'s setup in Google's [oss-fuzz](https://github.com/google/oss-fuzz/blob/master/infra/base-images/base-builder/compile_afl)
and [clusterfuzz](https://github.com/google/clusterfuzz/blob/master/src/python/bot/fuzzers/afl/launcher.py).
## Fuzzing binary-only targets
When source code is *NOT* available, afl++ offers various support for fast,
When source code is *NOT* available, AFL++ offers various support for fast,
on-the-fly instrumentation of black-box binaries.
If you do not have to use Unicorn the following setup is recommended to use
@ -991,7 +1024,7 @@ less conducive to parallelization.
### Unicorn
For non-Linux binaries you can use afl++'s unicorn mode which can emulate
For non-Linux binaries you can use AFL++'s unicorn mode which can emulate
anything you want - for the price of speed and user written scripts.
See [unicorn_mode](unicorn_mode/README.md).
@ -1159,6 +1192,18 @@ If you have gnuplot installed, you can also generate some pretty graphs for any
active fuzzing task using afl-plot. For an example of how this looks like,
see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
You can also manually build and install afl-plot-ui, which is a helper utility
for showing the graphs generated by afl-plot in a graphical window using GTK.
You can build and install it as follows
```shell
sudo apt install libgtk-3-0 libgtk-3-dev pkg-config
cd utils/plot_ui
make
cd ../../
sudo make install
```
## Help: Crash triage
The coverage-based grouping of crashes usually produces a small data set that
@ -1193,13 +1238,13 @@ can be operated in a very simple way:
The tool works with crashing and non-crashing test cases alike. In the crash
mode, it will happily accept instrumented and non-instrumented binaries. In the
non-crashing mode, the minimizer relies on standard afl++ instrumentation to make
non-crashing mode, the minimizer relies on standard AFL++ instrumentation to make
the file simpler without altering the execution path.
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
afl-fuzz.
Another tool in afl++ is the afl-analyze tool. It takes an input
Another tool in AFL++ is the afl-analyze tool. It takes an input
file, attempts to sequentially flip bytes, and observes the behavior of the
tested program. It then color-codes the input based on which sections appear to
be critical, and which are not; while not bulletproof, it can often offer quick
@ -1242,7 +1287,7 @@ tasks, fuzzing may put a strain on your hardware and on the OS. In particular:
for something to blow up.
- Targeted programs may end up erratically grabbing gigabytes of memory or
filling up disk space with junk files. afl++ tries to enforce basic memory
filling up disk space with junk files. AFL++ tries to enforce basic memory
limits, but can't prevent each and every possible mishap. The bottom line
is that you shouldn't be fuzzing on systems where the prospect of data loss
is not an acceptable risk.
@ -1271,7 +1316,7 @@ tasks, fuzzing may put a strain on your hardware and on the OS. In particular:
Here are some of the most important caveats for AFL:
- afl++ detects faults by checking for the first spawned process dying due to
- AFL++ detects faults by checking for the first spawned process dying due to
a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
these signals may need to have the relevant code commented out. In the same
vein, faults in child processes spawned by the fuzzed target may evade
@ -1288,8 +1333,7 @@ Here are some of the most important caveats for AFL:
`AFL_CUSTOM_MUTATOR_LIBRARY`
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
isn't due to any specific fault of afl-fuzz; see [docs/notes_for_asan.md](docs/notes_for_asan.md)
for tips.
isn't due to any specific fault of afl-fuzz.
- There is no direct support for fuzzing network services, background
daemons, or interactive apps that require UI interaction to work. You may
@ -1307,7 +1351,7 @@ Beyond this, see INSTALL for platform-specific tips.
## Special thanks
Many of the improvements to the original afl and afl++ wouldn't be possible
Many of the improvements to the original AFL and AFL++ wouldn't be possible
without feedback, bug reports, or patches from:
```
@ -1391,7 +1435,7 @@ Bibtex:
Questions? Concerns? Bug reports? The contributors can be reached via
[https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
There is also a mailing list for the afl/afl++ project; to join, send a mail to
There is also a mailing list for the AFL/AFL++ project; to join, send a mail to
<afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse archives
first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users)

133
afl-persistent-config Executable file
View File

@ -0,0 +1,133 @@
#!/bin/bash
# written by jhertz
#
test "$1" = "-h" -o "$1" = "-hh" && {
echo 'afl-persistent-config'
echo
echo $0
echo
echo afl-persistent-config has no command line options
echo
echo afl-persistent-config permanently reconfigures the system to a high performance fuzzing state.
echo "WARNING: this reduces the security of the system!"
echo
echo Note that there is also afl-system-config which sets additional runtime
echo configuration options.
exit 0
}
echo
echo "WARNING: This scripts makes permanent configuration changes to the system to"
echo " increase the performance for fuzzing. As a result, the system also"
echo " becomes less secure against attacks! If you use this script, setup"
echo " strong firewall rules and only make SSH available as a network"
echo " service!"
echo
echo -n "Type \"YES\" to continue: "
read ANSWER
if [[ "$ANSWER" != "YES" ]]; then
echo Input was not YES, aborting ...
exit 1
fi
echo
PLATFORM=`uname -s`
# check that we're on Mac
if [[ "$PLATFORM" = "Darwin" ]] ; then
# check if UID == 0
if [[ "$EUID" -ne 0 ]]; then
echo "You need to be root to do this. E.g. use \"sudo\""
exit 1
fi
# check if SIP is disabled
if [[ ! $(csrutil status | grep "disabled") ]]; then
echo "SIP needs to be disabled. Restart and press Command-R at reboot, Utilities => Terminal => enter \"csrutil disable\""
exit 1
fi
echo "Checks passed."
echo "Installing /Library/LaunchDaemons/shm_setup.plist"
cat << EOF > /Library/LaunchDaemons/shm_setup.plist
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>shmemsetup</string>
<key>UserName</key>
<string>root</string>
<key>GroupName</key>
<string>wheel</string>
<key>ProgramArguments</key>
<array>
<string>/usr/sbin/sysctl</string>
<string>-w</string>
<string>kern.sysv.shmmax=524288000</string>
<string>kern.sysv.shmmin=1</string>
<string>kern.sysv.shmmni=128</string>
<string>kern.sysv.shmseg=48</string>
<string>kern.sysv.shmall=131072000</string>
</array>
<key>KeepAlive</key>
<false/>
<key>RunAtLoad</key>
<true/>
</dict>
</plist>
EOF
echo
echo "Reboot and enjoy your fuzzing"
exit 0
fi
if [[ "$PLATFORM" = "Linux" ]] ; then
# check if UID == 0
if [[ "$EUID" -ne 0 ]]; then
echo "You need to be root to do this. E.g. use \"sudo\""
exit 1
fi
echo "Checks passed."
test -d /etc/sysctl.d || echo Error: /etc/sysctl.d directory not found, cannot install shmem config
test -d /etc/sysctl.d -a '!' -e /etc/sysctl.d/99-fuzzing && {
echo "Installing /etc/sysctl.d/99-fuzzing"
cat << EOF > /etc/sysctl.d/99-fuzzing
kernel.core_uses_pid=0
kernel.core_pattern=core
kernel.randomize_va_space=0
kernel.sched_child_runs_first=1
kernel.sched_autogroup_enabled=1
kernel.sched_migration_cost_ns=50000000
kernel.sched_latency_ns=250000000
EOF
}
egrep -q '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub 2>/dev/null || echo Error: /etc/default/grub with GRUB_CMDLINE_LINUX_DEFAULT is not present, cannot set boot options
egrep -q '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub 2>/dev/null && {
egrep '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub | egrep -q hardened_usercopy=off || {
echo "Configuring performance boot options"
LINE=`egrep '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub | sed 's/^GRUB_CMDLINE_LINUX_DEFAULT=//' | tr -d '"'`
OPTIONS="$LINE ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off srbds=off noexec=off noexec32=off tsx=on tsx=on tsx_async_abort=off mitigations=off audit=0 hardened_usercopy=off ssbd=force-off"
echo Setting boot options in /etc/default/grub to GRUB_CMDLINE_LINUX_DEFAULT=\"$OPTIONS\"
sed -i "s|^GRUB_CMDLINE_LINUX_DEFAULT=.*|GRUB_CMDLINE_LINUX_DEFAULT=\"$OPTIONS\"|" /etc/default/grub
}
}
echo
echo "Reboot and enjoy your fuzzing"
exit 0
fi
echo "Error: Unknown platform \"$PLATFORM\", currently supported are Linux and MacOS."
exit 1

171
afl-plot
View File

@ -22,16 +22,28 @@ get_abs_path() {
echo "progress plotting utility for afl-fuzz by Michal Zalewski"
echo
if [ ! "$#" = "2" ]; then
GRAPHICAL="0"
if [ "$1" = "-g" ] || [ "$1" = "--graphical" ]; then
GRAPHICAL="1"
shift
fi
if [ "$#" != "2" ]; then
cat 1>&2 <<_EOF_
$0 afl_state_dir graph_output_dir
$0 [ -g | --graphical ] afl_state_dir graph_output_dir
This program generates gnuplot images from afl-fuzz output data. Usage:
This program generates gnuplot images from afl-fuzz output data.
The afl_state_dir parameter should point to an existing state directory for any
active or stopped instance of afl-fuzz; while graph_output_dir should point to
an empty directory where this tool can write the resulting plots to.
Usage:
afl_state_dir should point to an existing state directory for any
active or stopped instance of afl-fuzz
graph_output_dir should point to an empty directory where this
tool can write the resulting plots to
-g, --graphical (optional) display the plots in a graphical window
(you should have built afl-plot-ui to use this option)
The program will put index.html and three PNG images in the output directory;
you should be able to view it with any web browser of your choice.
@ -102,18 +114,10 @@ fi
rm -f "$outputdir/high_freq.png" "$outputdir/low_freq.png" "$outputdir/exec_speed.png" "$outputdir/edges.png"
mv -f "$outputdir/index.html" "$outputdir/index.html.orig" 2>/dev/null
echo "[*] Generating plots..."
(
cat <<_EOF_
set terminal png truecolor enhanced size 1000,300 butt
set output '$outputdir/high_freq.png'
GNUPLOT_SETUP="
#set xdata time
#set timefmt '%s'
#set format x "%b %d\n%H:%M"
#set format x \"%b %d\n%H:%M\"
set tics font 'small'
unset mxtics
unset mytics
@ -127,37 +131,170 @@ set key outside
set autoscale xfixmin
set autoscale xfixmax
set xlabel "relative time in seconds" font "small"
set xlabel \"relative time in seconds\" font \"small\"
"
PLOT_HF="
set terminal png truecolor enhanced size 1000,300 butt
set output '$outputdir/high_freq.png'
$GNUPLOT_SETUP
plot '$inputdir/plot_data' using 1:4 with filledcurve x1 title 'total paths' linecolor rgb '#000000' fillstyle transparent solid 0.2 noborder, \\
'' using 1:3 with filledcurve x1 title 'current path' linecolor rgb '#f0f0f0' fillstyle transparent solid 0.5 noborder, \\
'' using 1:5 with lines title 'pending paths' linecolor rgb '#0090ff' linewidth 3, \\
'' using 1:6 with lines title 'pending favs' linecolor rgb '#c00080' linewidth 3, \\
'' using 1:2 with lines title 'cycles done' linecolor rgb '#c000f0' linewidth 3
"
PLOT_LF="
set terminal png truecolor enhanced size 1000,200 butt
set output '$outputdir/low_freq.png'
$GNUPLOT_SETUP
plot '$inputdir/plot_data' using 1:8 with filledcurve x1 title '' linecolor rgb '#c00080' fillstyle transparent solid 0.2 noborder, \\
'' using 1:8 with lines title ' uniq crashes' linecolor rgb '#c00080' linewidth 3, \\
'' using 1:9 with lines title 'uniq hangs' linecolor rgb '#c000f0' linewidth 3, \\
'' using 1:10 with lines title 'levels' linecolor rgb '#0090ff' linewidth 3
"
PLOT_ES="
set terminal png truecolor enhanced size 1000,200 butt
set output '$outputdir/exec_speed.png'
$GNUPLOT_SETUP
plot '$inputdir/plot_data' using 1:11 with filledcurve x1 title '' linecolor rgb '#0090ff' fillstyle transparent solid 0.2 noborder, \\
'$inputdir/plot_data' using 1:11 with lines title ' execs/sec' linecolor rgb '#0090ff' linewidth 3 smooth bezier;
"
PLOT_EG="
set terminal png truecolor enhanced size 1000,300 butt
set output '$outputdir/edges.png'
$GNUPLOT_SETUP
plot '$inputdir/plot_data' using 1:13 with lines title ' edges' linecolor rgb '#0090ff' linewidth 3
"
if [ "$#" = "2" ] && [ "$GRAPHICAL" = "1" ]; then
afl-plot-ui -h > /dev/null 2>&1
if [ "$?" != "0" ]; then
cat 1>&2 <<_EOF_
You do not seem to have the afl-plot-ui utility installed. If you have installed afl-plot-ui, make sure the afl-plot-ui executable is in your PATH.
If you are still facing any problems, please open an issue at https://github.com/AFLplusplus/AFLplusplus/issues.
No plots have been generated. Please rerun without the "-g" or "--graphical" flag to generate the plots.
_EOF_
exit 1
fi
mkdir -p "$outputdir/tmp"
afl-plot-ui > "$outputdir/tmp/win_ids" &
sleep 0.5
W_ID1=$(cat $outputdir/tmp/win_ids | head -1)
W_ID2=$(cat $outputdir/tmp/win_ids | head -2 | tail -1)
W_ID3=$(cat $outputdir/tmp/win_ids | head -3 | tail -1)
W_ID4=$(cat $outputdir/tmp/win_ids | tail -1)
echo "[*] Generating plots..."
(
cat << _EOF_
$PLOT_HF
set term x11 window "$W_ID3"
set output
replot
pause mouse close
_EOF_
) | gnuplot 2> /dev/null &
(
cat << _EOF_
$PLOT_LF
set term x11 window "$W_ID4"
set output
replot
pause mouse close
_EOF_
) | gnuplot 2> /dev/null &
(
cat << _EOF_
$PLOT_ES
set term x11 window "$W_ID2"
set output
replot
pause mouse close
_EOF_
) | gnuplot 2> /dev/null &
(
cat << _EOF_
$PLOT_EG
set term x11 window "$W_ID1"
set output
replot
pause mouse close
_EOF_
) | gnuplot 2> /dev/null &
sleep 1
rm "$outputdir/tmp/win_ids"
if [ -z "$(ls -A $outputdir/tmp)" ]; then
rm -r "$outputdir/tmp"
fi
else
echo "[*] Generating plots..."
(
cat << _EOF_
$PLOT_HF
$PLOT_LF
$PLOT_ES
$PLOT_EG
_EOF_
) | gnuplot
echo "[?] You can also use -g flag to view the plots in an GUI window, and interact with the plots (if you have built afl-plot-ui). Run \"afl-plot-h\" to know more."
fi
if [ ! -s "$outputdir/exec_speed.png" ]; then
echo "[-] Error: something went wrong! Perhaps you have an ancient version of gnuplot?" 1>&2

View File

@ -6,10 +6,12 @@ test "$1" = "-h" -o "$1" = "-hh" && {
echo
echo afl-system-config has no command line options
echo
echo afl-system reconfigures the system to a high performance fuzzing state
echo afl-system-config reconfigures the system to a high performance fuzzing state.
echo "WARNING: this reduces the security of the system!"
echo
exit 1
echo Note that there is also afl-persistent-config which sets additional permanent
echo configuration options.
exit 0
}
DONE=
@ -99,9 +101,10 @@ if [ "$PLATFORM" = "NetBSD" ] ; then
DONE=1
fi
if [ "$PLATFORM" = "Darwin" ] ; then
sysctl kern.sysv.shmmax=8388608
sysctl kern.sysv.shmmax=524288000
sysctl kern.sysv.shmmin=1
sysctl kern.sysv.shmseg=48
sysctl kern.sysv.shmall=98304
sysctl kern.sysv.shmall=131072000
echo Settings applied.
echo
if [ $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') ] ; then

View File

@ -1,6 +1,6 @@
# Custom Mutators
Custom mutators enhance and alter the mutation strategies of afl++.
Custom mutators enhance and alter the mutation strategies of AFL++.
For further information and documentation on how to write your own, read [the docs](../docs/custom_mutators.md).
## Examples
@ -11,9 +11,9 @@ The `./examples` folder contains examples for custom mutators in python and C.
In `./rust`, you will find rust bindings, including a simple example in `./rust/example` and an example for structured fuzzing, based on lain, in`./rust/example_lain`.
## The afl++ Grammar Mutator
## The AFL++ Grammar Mutator
If you use git to clone afl++, then the following will incorporate our
If you use git to clone AFL++, then the following will incorporate our
excellent grammar custom mutator:
```sh
git submodule update --init
@ -40,7 +40,7 @@ Multiple custom mutators can be used by separating their paths with `:` in the e
### Superion Mutators
Adrian Tiron ported the Superion grammar fuzzer to afl++, it is WIP and
Adrian Tiron ported the Superion grammar fuzzer to AFL++, it is WIP and
requires cmake (among other things):
[https://github.com/adrian-rt/superion-mutator](https://github.com/adrian-rt/superion-mutator)
@ -52,8 +52,8 @@ transforms protobuf raw:
https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/tree/master/4_libprotobuf_aflpp_custom_mutator
has a transform function you need to fill for your protobuf format, however
needs to be ported to the updated afl++ custom mutator API (not much work):
needs to be ported to the updated AFL++ custom mutator API (not much work):
https://github.com/thebabush/afl-libprotobuf-mutator
same as above but is for current afl++:
same as above but is for current AFL++:
https://github.com/P1umer/AFLplusplus-protobuf-mutator

View File

@ -349,12 +349,15 @@ uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) {
* @param data pointer returned in afl_custom_init for this fuzz case
* @param filename_new_queue File name of the new queue entry
* @param filename_orig_queue File name of the original queue entry
* @return if the file contents was modified return 1 (True), 0 (False)
* otherwise
*/
void afl_custom_queue_new_entry(my_mutator_t * data,
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
/* Additional analysis on the original or new test case */
return 0;
}

View File

@ -0,0 +1 @@
af8dd4a307e7b837f9fa2959549548ace4afe08b

View File

@ -0,0 +1,45 @@
# GramaTron
Gramatron is a coverage-guided fuzzer that uses grammar automatons to perform
grammar-aware fuzzing. Technical details about our framework are available
in the [ISSTA'21 paper](https://nebelwelt.net/files/21ISSTA.pdf).
The artifact to reproduce the experiments presented in the paper are present
in `artifact/`. Instructions to run a sample campaign and incorporate new
grammars is presented below:
# Compiling
Simply execute `./build_gramatron_mutator.sh`
# Running
You have to set the grammar file to use with `GRAMMATRON_AUTOMATION`:
```
export AFL_DISABLE_TRIM=1
export AFL_CUSTOM_MUTATOR_ONLY=1
export AFL_CUSTOM_MUTATOR_LIBRARY=./gramatron.so
export GRAMATRON_AUTOMATION=grammars/ruby/source_automata.json
afl-fuzz -i in -o out -- ./target
```
# Adding and testing a new grammar
- Specify in a JSON format for CFG. Examples are correspond `source.json` files
- Run the automaton generation script (in `src/gramfuzz-mutator/preprocess`)
which will place the generated automaton in the same folder.
```
./preprocess/prep_automaton.sh <grammar_file> <start_symbol> [stack_limit]
Eg. ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM
```
- If the grammar has no self-embedding rules then you do not need to pass the
stack limit parameter. However, if it does have self-embedding rules then you
need to pass the stack limit parameter. We recommend starting with `5` and
then increasing it if you need more complexity
- To sanity-check that the automaton is generating inputs as expected you can use the `test` binary housed in `src/gramfuzz-mutator`
```
./test SanityCheck <automaton_file>
Eg. ./test SanityCheck ~/grammars/ruby/source_automata.json
```

View File

@ -0,0 +1,149 @@
#!/bin/sh
#
# american fuzzy lop++ - gramatron build script
# ------------------------------------------------
#
# Originally written by Nathan Voss <njvoss99@gmail.com>
#
# Adapted from code by Andrew Griffiths <agriffiths@google.com> and
# Michal Zalewski
#
# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co>
#
# Copyright 2017 Battelle Memorial Institute. All rights reserved.
# Copyright 2019-2020 AFLplusplus Project. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This script downloads, patches, and builds a version of Unicorn with
# minor tweaks to allow Unicorn-emulated binaries to be run under
# afl-fuzz.
#
# The modifications reside in patches/*. The standalone Unicorn library
# will be written to /usr/lib/libunicornafl.so, and the Python bindings
# will be installed system-wide.
#
# You must make sure that Unicorn Engine is not already installed before
# running this script. If it is, please uninstall it first.
JSONC_VERSION="$(cat ./JSONC_VERSION)"
JSONC_REPO="https://github.com/json-c/json-c"
echo "================================================="
echo "Gramatron Mutator build script"
echo "================================================="
echo
echo "[*] Performing basic sanity checks..."
PLT=`uname -s`
if [ ! -f "../../config.h" ]; then
echo "[-] Error: key files not found - wrong working directory?"
exit 1
fi
if [ ! -f "../../src/afl-performance.o" ]; then
echo "[-] Error: you must build afl-fuzz first and not do a \"make clean\""
exit 1
fi
PYTHONBIN=`command -v python3 || command -v python || command -v python2 || echo python3`
MAKECMD=make
TARCMD=tar
if [ "$PLT" = "Darwin" ]; then
CORES=`sysctl -n hw.ncpu`
TARCMD=tar
fi
if [ "$PLT" = "FreeBSD" ]; then
MAKECMD=gmake
CORES=`sysctl -n hw.ncpu`
TARCMD=gtar
fi
if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then
MAKECMD=gmake
CORES=`sysctl -n hw.ncpu`
TARCMD=gtar
fi
PREREQ_NOTFOUND=
for i in git $MAKECMD $TARCMD; do
T=`command -v "$i" 2>/dev/null`
if [ "$T" = "" ]; then
echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar."
PREREQ_NOTFOUND=1
fi
done
test -z "$CC" && export CC=cc
if echo "$CC" | grep -qF /afl-; then
echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
PREREQ_NOTFOUND=1
fi
if [ "$PREREQ_NOTFOUND" = "1" ]; then
exit 1
fi
echo "[+] All checks passed!"
echo "[*] Making sure json-c is checked out"
git status 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
echo "[*] initializing json-c submodule"
git submodule init || exit 1
git submodule update ./json-c 2>/dev/null # ignore errors
else
echo "[*] cloning json-c"
test -d json-c || {
CNT=1
while [ '!' -d json-c -a "$CNT" -lt 4 ]; do
echo "Trying to clone json-c (attempt $CNT/3)"
git clone "$JSONC_REPO"
CNT=`expr "$CNT" + 1`
done
}
fi
test -d json-c || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; }
echo "[+] Got json-c."
test -e json-c/.libs/libjson-c.a || {
cd "json-c" || exit 1
echo "[*] Checking out $JSONC_VERSION"
sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null
git checkout "$JSONC_VERSION" || exit 1
sh autogen.sh || exit 1
export CFLAGS=-fPIC
./configure --disable-shared || exit 1
make || exit 1
cd ..
}
echo
echo
echo "[+] Json-c successfully prepared!"
echo "[+] Builing gramatron now."
$CC -O3 -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o gramatron.so -shared -I. -I/prg/dev/include gramfuzz.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c ../../src/afl-performance.o json-c/.libs/libjson-c.a || exit 1
echo
echo "[+] gramatron successfully built!"

View File

@ -0,0 +1,336 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
/*Slices from beginning till idx*/
Array *slice(Array *input, int idx) {
// printf("\nSlice idx:%d", idx);
terminal *origptr;
terminal *term_ptr;
Array * sliced = (Array *)malloc(sizeof(Array));
initArray(sliced, input->size);
// Populate dynamic array members
if (idx == 0) { return sliced; }
for (int x = 0; x < idx; x++) {
origptr = &input->start[x];
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
origptr->trigger_idx);
}
return sliced;
}
/* Slices from idx till end*/
Array *slice_inverse(Array *input, int idx) {
// printf("\nSlice idx:%d", idx);
terminal *origptr;
terminal *term_ptr;
Array * sliced = (Array *)malloc(sizeof(Array));
initArray(sliced, input->size);
for (int x = idx; x < input->used; x++) {
origptr = &input->start[x];
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
origptr->trigger_idx);
}
return sliced;
}
/*Carves with `start` included and `end` excluded*/
Array *carve(Array *input, int start, int end) {
terminal *origptr;
terminal *term_ptr;
Array * sliced = (Array *)malloc(sizeof(Array));
initArray(sliced, input->size);
for (int x = start; x < end; x++) {
origptr = &input->start[x];
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
origptr->trigger_idx);
}
return sliced;
}
/*Concats prefix + feature *mult*/
void concatPrefixFeature(Array *prefix, Array *feature) {
// XXX: Currently we have hardcoded the multiplication threshold for adding
// the recursive feature. Might want to fix it to choose a random number upper
// bounded by a static value instead.
terminal *featureptr;
int len = rand_below(global_afl, RECUR_THRESHOLD);
for (int x = 0; x < len; x++) {
for (int y = 0; y < feature->used; y++) {
featureptr = &feature->start[y];
insertArray(prefix, featureptr->state, featureptr->symbol,
featureptr->symbol_len, featureptr->trigger_idx);
}
}
}
void concatPrefixFeatureBench(Array *prefix, Array *feature) {
// XXX: Currently we have hardcoded the multiplication threshold for adding
// the recursive feature. Might want to fix it to choose a random number upper
// bounded by a static value instead.
terminal *featureptr;
int len =
5; // 5 is the number of times we compare performing random recursion.
for (int x = 0; x < len; x++) {
for (int y = 0; y < feature->used; y++) {
featureptr = &feature->start[y];
insertArray(prefix, featureptr->state, featureptr->symbol,
featureptr->symbol_len, featureptr->trigger_idx);
}
}
}
Array *spliceGF(Array *orig, Array *toSplice, int idx) {
terminal *toSplicePtr;
terminal *tempPtr;
// Iterate through the splice candidate from the `idx` till end
for (int x = idx; x < toSplice->used; x++) {
toSplicePtr = &toSplice->start[x];
insertArray(orig, toSplicePtr->state, toSplicePtr->symbol,
toSplicePtr->symbol_len, toSplicePtr->trigger_idx);
}
return orig;
}
Array *gen_input(state *pda, Array *input) {
state * state_ptr;
trigger * trigger_ptr;
terminal *term_ptr;
int offset = 0;
int randval, error;
// Generating an input for the first time
if (input == NULL) {
input = (Array *)calloc(1, sizeof(Array));
initArray(input, INIT_SIZE);
curr_state = init_state;
}
while (curr_state != final_state) {
// Retrieving the state from the pda
state_ptr = pda + curr_state;
// Get a random trigger
randval = rand_below(global_afl, state_ptr->trigger_len);
trigger_ptr = (state_ptr->ptr) + randval;
// Insert into the dynamic array
insertArray(input, curr_state, trigger_ptr->term, trigger_ptr->term_len,
randval);
curr_state = trigger_ptr->dest;
offset += 1;
}
return input;
}
Array *gen_input_count(state *pda, Array *input, int *mut_count) {
state * state_ptr;
trigger * trigger_ptr;
terminal *term_ptr;
int offset = 0;
int randval, error;
// Generating an input for the first time
if (input == NULL) {
input = (Array *)calloc(1, sizeof(Array));
initArray(input, INIT_SIZE);
curr_state = init_state;
}
while (curr_state != final_state) {
*mut_count += 1;
// Retrieving the state from the pda
state_ptr = pda + curr_state;
// Get a random trigger
randval = rand_below(global_afl, state_ptr->trigger_len);
trigger_ptr = (state_ptr->ptr) + randval;
// Insert into the dynamic array
insertArray(input, curr_state, trigger_ptr->term, trigger_ptr->term_len,
randval);
curr_state = trigger_ptr->dest;
offset += 1;
}
return input;
}
/*Creates a candidate from walk with state hashmap and
* recursion hashmap
*/
Candidate *gen_candidate(Array *input) {
terminal * term_ptr;
IdxMap_new *idxmapPtr;
// Declare the State Hash Table
IdxMap_new *idxmapStart =
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
for (int x = 0; x < numstates; x++) {
idxmapPtr = &idxmapStart[x];
utarray_new(idxmapPtr->nums, &ut_int_icd);
}
char * trigger;
int state;
char * key;
Candidate *candidate = (Candidate *)malloc(sizeof(Candidate));
candidate->walk = input;
int offset = 0, error;
// Generate statemap for splicing
while (offset < input->used) {
term_ptr = &input->start[offset];
state = term_ptr->state;
// char *statenum = state + 1;
// int num = atoi(statenum);
idxmapPtr = &idxmapStart[state];
utarray_push_back(idxmapPtr->nums, &offset);
offset += 1;
}
candidate->statemap = idxmapStart;
return candidate;
}
char *get_state(char *trigger) {
// Get the state from transition
int trigger_idx = 0;
printf("\nTrigger:%s", trigger);
char *state = (char *)malloc(sizeof(char) * 10);
while (trigger[trigger_idx] != '_') {
state[trigger_idx] = trigger[trigger_idx];
trigger_idx += 1;
}
printf("\nTrigger Idx:%d", trigger_idx);
state[trigger_idx] = '\0';
return state;
}
void print_repr(Array *input, char *prefix) {
size_t offset = 0;
terminal *term_ptr;
char geninput[input->used * 100];
if (!input->used) {
printf("\n=============");
printf("\n%s:%s", prefix, "");
printf("\n=============");
return;
}
// This is done to create a null-terminated initial string
term_ptr = &input->start[offset];
strcpy(geninput, term_ptr->symbol);
offset += 1;
while (offset < input->used) {
term_ptr = &input->start[offset];
strcat(geninput, term_ptr->symbol);
offset += 1;
}
printf("\n=============");
printf("\n%s:%s", prefix, geninput);
printf("\n=============");
}
// int main(int argc, char*argv[]) {
// char *mode;
// if (argc == 1) {
// printf("\nUsage: ./gramfuzzer <mode>");
// return -1;
// }
// if (argc >= 2) {
// mode = argv[1];
// printf("\nMode:%s", mode);
// }
// if (! strcmp(mode, "Generate")) {
// GenInputBenchmark();
// }
// else if (! strcmp(mode, "RandomMutation")) {
// RandomMutationBenchmark();
// }
// else if (! strcmp(mode, "Splice")) {
// SpliceMutationBenchmark();
// }
// else if (! strcmp(mode, "Recursive")) {
// RandomRecursiveBenchmark();
// }
// else {
// printf("\nUnrecognized mode");
// return -1;
// }
// return 0;
// }

View File

@ -0,0 +1,247 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
Array *performRandomMutation(state *pda, Array *input) {
terminal *term_ptr;
// terminal *prev_ptr;
Array *mutated;
Array *sliced;
// Get offset at which to generate new input and slice it
int idx = rand_below(global_afl, input->used);
sliced = slice(input, idx);
// print_repr(sliced, "Slice");
// prev_ptr = & input->start[idx - 1];
// printf("\nState:%s Symbol:%s", prev_ptr->state, prev_ptr->symbol);
// Reset current state to that of the slice's last member
term_ptr = &input->start[idx];
curr_state = term_ptr->state;
// printf("\nState:%s Symbol:%s", curr_state, term_ptr->symbol);
// Set the next available cell to the one adjacent to this chosen point
mutated = gen_input(pda, sliced);
return mutated;
}
// Tries to perform splice operation between two automaton walks
UT_icd intpair_icd = {sizeof(intpair_t), NULL, NULL, NULL};
Array *performSpliceOne(Array *originput, IdxMap_new *statemap_orig,
Array *splicecand) {
UT_array * stateptr, *pairs;
intpair_t ip;
intpair_t *cand;
terminal *term_ptr;
Array * prefix;
int state;
// Initialize the dynamic holding the splice indice pairs
utarray_new(pairs, &intpair_icd);
// print_repr(originput, "Orig");
// print_repr(splicecand, "SpliceCand");
// Iterate through the splice candidate identifying potential splice points
// and pushing pair (orig_idx, splice_idx) to a dynamic array
for (int x = 0; x < splicecand->used; x++) {
term_ptr = &splicecand->start[x];
stateptr = statemap_orig[term_ptr->state].nums;
int length = utarray_len(stateptr);
if (length) {
int *splice_idx = (int *)utarray_eltptr(stateptr, rand_below(global_afl, length));
ip.orig_idx = *splice_idx;
ip.splice_idx = x;
utarray_push_back(pairs, &ip);
}
}
// Pick a random pair
int length = utarray_len(pairs);
cand = (intpair_t *)utarray_eltptr(pairs, rand_below(global_afl, length));
// printf("\n Orig_idx:%d Splice_idx:%d", cand->orig_idx, cand->splice_idx);
// Perform the splicing
prefix = slice(originput, cand->orig_idx);
Array *spliced = spliceGF(prefix, splicecand, cand->splice_idx);
// print_repr(spliced, "Spliced");
//
utarray_free(pairs);
return spliced;
}
UT_array **get_dupes(Array *input, int *recur_len) {
// Variables related to finding duplicates
int offset = 0;
int state;
terminal * term_ptr;
IdxMap_new *idxMapPtr;
UT_array ** recurIdx;
// Declare the Recursive Map Table
IdxMap_new *idxmapStart =
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
//
// UT_array *(recurIdx[numstates]);
recurIdx = malloc(sizeof(UT_array *) * numstates);
for (int x = 0; x < numstates; x++) {
idxMapPtr = &idxmapStart[x];
utarray_new(idxMapPtr->nums, &ut_int_icd);
}
// Obtain frequency distribution of states
while (offset < input->used) {
term_ptr = &input->start[offset];
state = term_ptr->state;
// int num = atoi(state + 1);
idxMapPtr = &idxmapStart[state];
utarray_push_back(idxMapPtr->nums, &offset);
offset += 1;
}
// Retrieve the duplicated states
offset = 0;
while (offset < numstates) {
idxMapPtr = &idxmapStart[offset];
int length = utarray_len(idxMapPtr->nums);
if (length >= 2) {
recurIdx[*recur_len] = idxMapPtr->nums;
*recur_len += 1;
}
// else {
// utarray_free(idxMapPtr->nums);
// }
offset += 1;
}
if (*recur_len) {
// Declare the return struct
// We use this struct so that we save the reference to IdxMap_new and free
// it after we have used it in doMult
// Get_Dupes_Ret* getdupesret =
// (Get_Dupes_Ret*)malloc(sizeof(Get_Dupes_Ret));
return recurIdx;
// getdupesret->idxmap = idxmapStart;
// getdupesret->recurIdx = recurIdx;
// return getdupesret;
} else {
return NULL;
}
}
Array *doMult(Array *input, UT_array **recur, int recurlen) {
int offset = 0;
int idx = rand_below(global_afl, recurlen);
UT_array *recurMap = recur[idx];
UT_array *recurPtr;
Array * prefix;
Array * postfix;
Array * feature;
// Choose two indices to get the recursive feature
int recurIndices = utarray_len(recurMap);
int firstIdx = 0;
int secondIdx = 0;
getTwoIndices(recurMap, recurIndices, &firstIdx, &secondIdx);
// Perform the recursive mut
// print_repr(input, "Orig");
prefix = slice(input, firstIdx);
// print_repr(prefix, "Prefix");
if (firstIdx < secondIdx) {
feature = carve(input, firstIdx, secondIdx);
} else {
feature = carve(input, secondIdx, firstIdx);
}
// print_repr(feature, "Feature");
concatPrefixFeature(prefix, feature);
// GC allocated structures
free(feature->start);
free(feature);
// for(int x = 0; x < recurlen; x++) {
// utarray_free(recur[x]);
// }
// free(recur);
// print_repr(prefix, "Concat");
return spliceGF(prefix, input, secondIdx);
}
void getTwoIndices(UT_array *recur, int recurlen, int *firstIdx,
int *secondIdx) {
int ArrayRecurIndices[recurlen];
int offset = 0, *p;
// Unroll into an array
for (p = (int *)utarray_front(recur); p != NULL;
p = (int *)utarray_next(recur, p)) {
ArrayRecurIndices[offset] = *p;
offset += 1;
}
/*Source:
* https://www.geeksforgeeks.org/shuffle-a-given-array-using-fisher-yates-shuffle-algorithm/
*/
for (int i = offset - 1; i > 0; i--) {
// Pick a random index from 0 to i
int j = rand_below(global_afl, i + 1);
// Swap arr[i] with the element at random index
swap(&ArrayRecurIndices[i], &ArrayRecurIndices[j]);
}
*firstIdx = ArrayRecurIndices[0];
*secondIdx = ArrayRecurIndices[1];
}
void swap(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}

View File

@ -0,0 +1,268 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
#ifdef _GNU_SOURCE
#undef _GNU_SOURCE
#endif
#define _GNU_SOURCE
#include <sys/mman.h>
/* Dynamic Array for adding to the input repr
* */
void initArray(Array *a, size_t initialSize) {
a->start = (terminal *)calloc(1, sizeof(terminal) * initialSize);
a->used = 0;
a->size = initialSize;
a->inputlen = 0;
}
void insertArray(Array *a, int state, char *symbol, size_t symbol_len,
int trigger_idx) {
// a->used is the number of used entries, because a->array[a->used++] updates
// a->used only *after* the array has been accessed. Therefore a->used can go
// up to a->size
terminal *term_ptr;
if (a->used == a->size) {
a->size = a->size * sizeof(terminal);
a->start = (terminal *)realloc(a->start, a->size * sizeof(terminal));
}
// Add the element
term_ptr = &a->start[a->used];
term_ptr->state = state;
term_ptr->symbol = symbol;
term_ptr->symbol_len = symbol_len;
term_ptr->trigger_idx = trigger_idx;
// Increment the pointer
a->used += 1;
a->inputlen += symbol_len;
}
void freeArray(Array *a) {
terminal *ptr;
for (int x = 0; x < a->used; x++) {
ptr = &a->start[x];
free(ptr);
}
a->start = NULL;
a->used = a->size = 0;
}
/* Dynamic array for adding indices of states/recursive features
* Source:
* https://stackoverflow.com/questions/3536153/c-dynamically-growing-array
*/
void initArrayIdx(IdxMap *a, size_t initialSize) {
a->array = (int *)malloc(initialSize * sizeof(int));
a->used = 0;
a->size = initialSize;
}
void insertArrayIdx(IdxMap *a, int idx) {
// a->used is the number of used entries, because a->array[a->used++] updates
// a->used only *after* the array has been accessed. Therefore a->used can go
// up to a->size
if (a->used == a->size) {
a->size *= 2;
a->array = (int *)realloc(a->array, a->size * sizeof(int));
}
a->array[a->used++] = idx;
}
void freeArrayIdx(IdxMap *a) {
free(a->array);
a->array = NULL;
a->used = a->size = 0;
}
/* Dynamic array for adding potential splice points
*/
void initArraySplice(SpliceCandArray *a, size_t initialSize) {
a->start = (SpliceCand *)malloc(initialSize * sizeof(SpliceCand));
a->used = 0;
a->size = initialSize;
}
void insertArraySplice(SpliceCandArray *a, Candidate *candidate, int idx) {
// a->used is the number of used entries, because a->array[a->used++] updates
// a->used only *after* the array has been accessed. Therefore a->used can go
// up to a->size
SpliceCand *candptr;
if (a->used == a->size) {
a->size = a->size * sizeof(SpliceCand);
a->start = (SpliceCand *)realloc(a->start, a->size * sizeof(SpliceCand));
}
// Add the element
candptr = &a->start[a->used];
candptr->splice_cand = candidate;
candptr->idx = idx;
a->used += 1;
}
void freeArraySplice(IdxMap *a) {
free(a->array);
a->array = NULL;
a->used = a->size = 0;
}
int fact(int n) {
int i, f = 1;
for (i = 1; i <= n; i++) {
f *= i;
}
return f;
}
/* Uses the walk to create the input in-memory */
u8 *unparse_walk(Array *input) {
terminal *term_ptr;
int offset = 0;
u8 * unparsed = (u8 *)malloc(input->inputlen + 1);
term_ptr = &input->start[offset];
strcpy(unparsed, term_ptr->symbol);
offset += 1;
while (offset < input->used) {
term_ptr = &input->start[offset];
strcat(unparsed, term_ptr->symbol);
offset += 1;
}
return unparsed;
}
/*Dump the input representation into a file*/
void write_input(Array *input, u8 *fn) {
FILE *fp;
// If file already exists, then skip creating the file
if (access(fn, F_OK) != -1) { return; }
fp = fopen(fn, "wbx+");
// If the input has already been flushed, then skip silently
if (fp == NULL) {
fprintf(stderr, "\n File '%s' could not be open, exiting\n", fn);
exit(1);
}
// Write the length parameters
fwrite(&input->used, sizeof(size_t), 1, fp);
fwrite(&input->size, sizeof(size_t), 1, fp);
fwrite(&input->inputlen, sizeof(size_t), 1, fp);
// Write the dynamic array to file
fwrite(input->start, input->size * sizeof(terminal), 1, fp);
// printf("\nUsed:%zu Size:%zu Inputlen:%zu", input->used, input->size,
// input->inputlen);
fclose(fp);
}
Array *parse_input(state *pda, FILE *fp) {
terminal *term;
state * state_ptr;
trigger * trigger;
int trigger_idx;
Array * input = (Array *)calloc(1, sizeof(Array));
// Read the length parameters
fread(&input->used, sizeof(size_t), 1, fp);
fread(&input->size, sizeof(size_t), 1, fp);
fread(&input->inputlen, sizeof(size_t), 1, fp);
terminal *start_ptr = (terminal *)calloc(input->size, sizeof(terminal));
if (!start_ptr) {
fprintf(stderr, "alloc failed!\n");
return NULL;
}
// Read the dynamic array to memory
fread(start_ptr, input->size * sizeof(terminal), 1, fp);
// Update the pointers to the terminals since they would have
// changed
int idx = 0;
while (idx < input->used) {
terminal *term = &start_ptr[idx];
// Find the state
state_ptr = pda + term->state;
// Find the trigger and update the terminal address
trigger_idx = term->trigger_idx;
trigger = (state_ptr->ptr) + trigger_idx;
term->symbol = trigger->term;
idx += 1;
}
input->start = start_ptr;
// printf("\nUsed:%zu Size:%zu Inputlen:%zu", input->used, input->size,
// input->inputlen);
return input;
}
// Read the input representation into memory
Array *read_input(state *pda, u8 *fn) {
FILE *fp;
fp = fopen(fn, "rb");
if (fp == NULL) {
fprintf(stderr, "\n File '%s' does not exist, exiting\n", fn);
exit(1);
}
Array *res = parse_input(pda, fp);
fclose(fp);
return res;
}

View File

@ -0,0 +1,429 @@
// This simple example just creates random buffer <= 100 filled with 'A'
// needs -I /path/to/AFLplusplus/include
//#include "custom_mutator_helpers.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
#define MUTATORS 4 // Specify the total number of mutators
typedef struct my_mutator {
afl_state_t *afl;
u8 * mutator_buf;
u8 * unparsed_input;
Array *mutated_walk;
Array *orig_walk;
IdxMap_new *statemap; // Keeps track of the statemap
UT_array ** recurIdx;
// Get_Dupes_Ret* getdupesret; // Recursive feature map
int recurlen;
int mut_alloced;
int orig_alloced;
int mut_idx; // Signals the current mutator being used, used to cycle through
// each mutator
unsigned int seed;
} my_mutator_t;
state *create_pda(u8 *automaton_file) {
struct json_object *parsed_json;
state * pda;
json_object * source_obj, *attr;
int arraylen, ii, ii2, trigger_len, error;
printf("\n[GF] Automaton file passed:%s", automaton_file);
// parsed_json =
// json_object_from_file("./gramfuzz/php_gnf_processed_full.json");
parsed_json = json_object_from_file(automaton_file);
// Getting final state
source_obj = json_object_object_get(parsed_json, "final_state");
printf("\t\nFinal=%s\n", json_object_get_string(source_obj));
final_state = atoi(json_object_get_string(source_obj));
// Getting initial state
source_obj = json_object_object_get(parsed_json, "init_state");
init_state = atoi(json_object_get_string(source_obj));
printf("\tInit=%s\n", json_object_get_string(source_obj));
// Getting number of states
source_obj = json_object_object_get(parsed_json, "numstates");
numstates = atoi(json_object_get_string(source_obj)) + 1;
printf("\tNumStates=%d\n", numstates);
// Allocate state space for each pda state
pda = (state *)calloc(atoi(json_object_get_string(source_obj)) + 1,
sizeof(state));
// Getting PDA representation
source_obj = json_object_object_get(parsed_json, "pda");
enum json_type type;
json_object_object_foreach(source_obj, key, val) {
state * state_ptr;
trigger *trigger_ptr;
int offset;
// Get the correct offset into the pda to store state information
state_ptr = pda;
offset = atoi(key);
state_ptr += offset;
// Store state string
state_ptr->state_name = offset;
// Create trigger array of structs
trigger_len = json_object_array_length(val);
state_ptr->trigger_len = trigger_len;
trigger_ptr = (trigger *)calloc(trigger_len, sizeof(trigger));
state_ptr->ptr = trigger_ptr;
for (ii = 0; ii < trigger_len; ii++) {
json_object *obj = json_object_array_get_idx(val, ii);
// Get all the trigger trigger attributes
attr = json_object_array_get_idx(obj, 0);
(trigger_ptr)->id = strdup(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 1);
trigger_ptr->dest = atoi(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 2);
if (!strcmp("\\n", json_object_get_string(attr))) {
trigger_ptr->term = strdup("\n");
} else {
trigger_ptr->term = strdup(json_object_get_string(attr));
}
trigger_ptr->term_len = strlen(trigger_ptr->term);
trigger_ptr++;
}
}
// Delete the JSON object
json_object_put(parsed_json);
return pda;
}
my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
my_mutator_t *data = calloc(1, sizeof(my_mutator_t));
if (!data) {
perror("afl_custom_init alloc");
return NULL;
}
if ((data->mutator_buf = malloc(MAX_FILE)) == NULL) {
perror("mutator_buf alloc");
return NULL;
}
data->afl = afl;
global_afl = afl; // dirty
data->seed = seed;
data->mut_alloced = 0;
data->orig_alloced = 0;
data->mut_idx = 0;
data->recurlen = 0;
// data->mutator_buf = NULL;
// data->unparsed_input = NULL;
// data->mutated_walk = NULL;
// data->orig_walk = NULL;
//
// data->statemap = NULL; // Keeps track of the statemap
// data->recur_idx = NULL; // Will keep track of recursive feature indices
// u32 recur_len = 0; // The number of recursive features
// data->mutator_buf = NULL;
char *automaton_file = getenv("GRAMATRON_AUTOMATION");
if (automaton_file) {
pda = create_pda(automaton_file);
} else {
fprintf(stderr,
"\nError: GrammaTron needs an automation json file set in "
"GRAMATRON_AUTOMATION\n");
exit(-1);
}
return data;
}
size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
u8 **out_buf, uint8_t *add_buf, size_t add_buf_size,
size_t max_size) {
u8 *unparsed_input;
// Pick a mutator
// int choice = rand() % MUTATORS;
// data->mut_idx = 1;
// GC old mutant
if (data->mut_alloced) {
free(data->mutated_walk->start);
free(data->mutated_walk);
data->mut_alloced = 0;
};
// printf("\nChoice:%d", choice);
if (data->mut_idx == 0) { // Perform random mutation
data->mutated_walk = performRandomMutation(pda, data->orig_walk);
data->mut_alloced = 1;
} else if (data->mut_idx == 1 &&
data->recurlen) { // Perform recursive mutation
data->mutated_walk =
doMult(data->orig_walk, data->recurIdx, data->recurlen);
data->mut_alloced = 1;
} else if (data->mut_idx == 2) { // Perform splice mutation
// we cannot use the supplied splice data so choose a new random file
u32 tid = rand_below(global_afl, data->afl->queued_paths);
struct queue_entry *q = data->afl->queue_buf[tid];
// Read the input representation for the splice candidate
u8 * automaton_fn = alloc_printf("%s.aut", q->fname);
Array *spliceCandidate = read_input(pda, automaton_fn);
if (spliceCandidate) {
data->mutated_walk =
performSpliceOne(data->orig_walk, data->statemap, spliceCandidate);
data->mut_alloced = 1;
free(spliceCandidate->start);
free(spliceCandidate);
} else {
data->mutated_walk = gen_input(pda, NULL);
data->mut_alloced = 1;
}
ck_free(automaton_fn);
} else { // Generate an input from scratch
data->mutated_walk = gen_input(pda, NULL);
data->mut_alloced = 1;
}
// Cycle to the next mutator
if (data->mut_idx == MUTATORS - 1)
data->mut_idx =
0; // Wrap around if we have reached end of the mutator list
else
data->mut_idx += 1;
// Unparse the mutated automaton walk
if (data->unparsed_input) { free(data->unparsed_input); }
data->unparsed_input = unparse_walk(data->mutated_walk);
*out_buf = data->unparsed_input;
return data->mutated_walk->inputlen;
}
/**
* Create the automaton-based representation for the corresponding input
*
* @param data pointer returned in afl_custom_init for this fuzz case
* @param filename_new_queue File name of the new queue entry
* @param filename_orig_queue File name of the original queue entry
*/
u8 afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
// get the filename
u8 * automaton_fn, *unparsed_input;
Array *new_input;
s32 fd;
automaton_fn = alloc_printf("%s.aut", filename_new_queue);
// Check if this method is being called during initialization
// fprintf(stderr, "new: %s, old: %s, auto: %s\n",
// filename_new_queue,filename_orig_queue,automaton_fn);
if (filename_orig_queue) {
write_input(data->mutated_walk, automaton_fn);
} else {
new_input = gen_input(pda, NULL);
write_input(new_input, automaton_fn);
// Update the placeholder file
if (unlink(filename_new_queue)) {
PFATAL("Unable to delete '%s'", filename_new_queue);
}
unparsed_input = unparse_walk(new_input);
fd = open(filename_new_queue, O_WRONLY | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR);
if (fd < 0) { PFATAL("Failed to update file '%s'", filename_new_queue); }
int written = write(fd, unparsed_input, new_input->inputlen + 1);
close(fd);
free(new_input->start);
free(new_input);
free(unparsed_input);
}
ck_free(automaton_fn);
return 1;
}
/**
* Get the corresponding tree representation for the candidate that is to be
* mutated
*
* @param[in] data pointer returned in afl_custom_init for this fuzz case
* @param filename File name of the test case in the queue entry
* @return Return True(1) if the fuzzer will fuzz the queue entry, and
* False(0) otherwise.
*/
uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) {
// get the filename
u8 * automaton_fn = alloc_printf("%s.aut", filename);
IdxMap_new *statemap_ptr;
terminal * term_ptr;
int state;
// TODO: I don't think we need to update pointers when reading back
// Probably build two different versions of read_input one for flushing
// inputs to disk and the other that
if (data->orig_alloced) {
free(data->orig_walk->start);
free(data->orig_walk);
data->orig_alloced = 0;
}
if (data->statemap) {
for (int x = 0; x < numstates; x++) {
utarray_free(data->statemap[x].nums);
}
free(data->statemap);
}
if (data->recurIdx) {
data->recurlen = 0;
free(data->recurIdx);
}
data->orig_walk = read_input(pda, automaton_fn);
data->orig_alloced = 1;
// Create statemap for the fuzz candidate
IdxMap_new *statemap_start =
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
for (int x = 0; x < numstates; x++) {
statemap_ptr = &statemap_start[x];
utarray_new(statemap_ptr->nums, &ut_int_icd);
}
int offset = 0;
while (offset < data->orig_walk->used) {
term_ptr = &data->orig_walk->start[offset];
state = term_ptr->state;
statemap_ptr = &statemap_start[state];
utarray_push_back(statemap_ptr->nums, &offset);
offset += 1;
}
data->statemap = statemap_start;
// Create recursive feature map (if it exists)
data->recurIdx = malloc(sizeof(UT_array *) * numstates);
// Retrieve the duplicated states
offset = 0;
while (offset < numstates) {
statemap_ptr = &data->statemap[offset];
int length = utarray_len(statemap_ptr->nums);
if (length >= 2) {
data->recurIdx[data->recurlen] = statemap_ptr->nums;
data->recurlen += 1;
}
offset += 1;
}
// data->getdupesret = get_dupes(data->orig_walk, &data->recurlen);
ck_free(automaton_fn);
return 1;
}
/**
* Deinitialize everything
*
* @param data The data ptr from afl_custom_init
*/
void afl_custom_deinit(my_mutator_t *data) {
free(data->mutator_buf);
free(data);
}

View File

@ -0,0 +1,255 @@
#ifndef _GRAMFUZZ_H
#define _GRAMFUZZ_H
#include <json-c/json.h>
#include <unistd.h>
#include "hashmap.h"
#include "uthash.h"
#include "utarray.h"
#define INIT_INPUTS 100 // No. of initial inputs to be generated
// Set this as `numstates` + 1 where `numstates` is retrieved from gen automata
// json #define STATES 63
#define INIT_SIZE 100 // Initial size of the dynamic array holding the input
#define SPLICE_CORPUS 10000
#define RECUR_THRESHOLD 6
#define SIZE_THRESHOLD 2048
#define FLUSH_INTERVAL \
3600 // Inputs that gave new coverage will be dumped every FLUSH_INTERVAL
// seconds
afl_state_t *global_afl;
typedef struct trigger {
char * id;
int dest;
char * term;
size_t term_len;
} trigger;
typedef struct state {
int state_name; // Integer State name
int trigger_len; // Number of triggers associated with this state
trigger *ptr; // Pointer to beginning of the list of triggers
} state;
typedef struct terminal {
int state;
int trigger_idx;
size_t symbol_len;
char * symbol;
} terminal;
typedef struct buckethash {
int freq;
} buckethash;
int init_state;
int curr_state;
int final_state;
int numstates;
/*****************
/ DYNAMIC ARRAY FOR WALKS
*****************/
typedef struct {
size_t used;
size_t size;
size_t inputlen;
terminal *start;
} Array;
/*****************
/ DYNAMIC ARRAY FOR STATEMAPS/RECURSION MAPS
*****************/
typedef struct {
int * array;
size_t used;
size_t size;
} IdxMap;
typedef struct {
UT_array *nums;
} IdxMap_new;
typedef struct {
IdxMap_new *idxmap;
UT_array ** recurIdx;
} Get_Dupes_Ret;
/* Candidate Struct */
typedef struct {
Array * walk;
IdxMap_new *statemap;
} Candidate;
/* Splice Mutation helpers*/
typedef struct {
Candidate *splice_cand;
int idx;
} SpliceCand;
typedef struct {
SpliceCand *start;
size_t used;
size_t size;
} SpliceCandArray;
// Initialize dynamic array for potential splice points
SpliceCand potential[SPLICE_CORPUS];
typedef struct {
int orig_idx;
int splice_idx;
} intpair_t;
// Initialize dynamic array for potential splice points
// SpliceCand potential[SPLICE_CORPUS];
// IdxMap_new* rcuridx[STATES];
/* Prototypes*/
Array * slice(Array *, int);
state * create_pda(u8 *);
Array * gen_input(state *, Array *);
Array * gen_input_count(state *, Array *, int *);
int updatebucket(map_t, int);
void itoa(int, char *, int);
void strrreverse(char *, char *);
void dbg_hashmap(map_t);
void print_repr(Array *, char *);
int isSatisfied(map_t);
char * get_state(char *);
Candidate *gen_candidate(Array *);
Array *spliceGF(Array *, Array *, int);
Array *performSpliceOne(Array *, IdxMap_new *, Array *);
/* Mutation Methods*/
Array * performRandomMutation(state *, Array *);
Array * performRandomMutationCount(state *, Array *, int *);
Array * performSpliceMutationBench(state *, Array *, Candidate **);
UT_array **get_dupes(Array *, int *);
Array * doMult(Array *, UT_array **, int);
Array * doMultBench(Array *, UT_array **, int);
/* Benchmarks*/
void SpaceBenchmark(char *);
void GenInputBenchmark(char *, char *);
void RandomMutationBenchmark(char *, char *);
void MutationAggrBenchmark(char *, char *);
void SpliceMutationBenchmark(char *, char *);
void SpliceMutationBenchmarkOne(char *, char *);
void RandomRecursiveBenchmark(char *, char *);
/* Testers */
void SanityCheck(char *);
/*Helpers*/
void initArray(Array *, size_t);
void insertArray(Array *, int, char *, size_t, int);
void freeArray(Array *);
void initArrayIdx(IdxMap *, size_t);
void insertArrayIdx(IdxMap *, int);
void freeArrayIdx(IdxMap *);
void initArraySplice(SpliceCandArray *, size_t);
void insertArraySplice(SpliceCandArray *, Candidate *, int);
void freeArraySplice(IdxMap *);
void getTwoIndices(UT_array *, int, int *, int *);
void swap(int *, int *);
Array *slice_inverse(Array *, int);
void concatPrefixFeature(Array *, Array *);
void concatPrefixFeatureBench(Array *, Array *);
Array *carve(Array *, int, int);
int fact(int);
void add_to_corpus(struct json_object *, Array *);
struct json_object *term_to_json(terminal *);
/* Gramatron specific prototypes */
u8 * unparse_walk(Array *);
Array *performSpliceGF(state *, Array *, afl_state_t *);
void dump_input(u8 *, char *, int *);
void write_input(Array *, u8 *);
Array *read_input(state *, u8 *);
state *pda;
// // AFL-specific struct
// typedef uint8_t u8;
// typedef uint16_t u16;
// typedef uint32_t u32;
// #ifdef __x86_64__
// typedef unsigned long long u64;
// #else
// typedef uint64_t u64;
// #endif /* ^__x86_64__ */
//
// struct queue_entry {
// Array* walk; /* Pointer to the automaton walk*/
// u32 walk_len; /* Number of tokens in the input*/
// Candidate* cand; /* Preprocessed info about the
// candidate to allow for faster mutations*/
//
// u8* fname; /* File name for the test case */
// u32 len; /* Input length */
// UT_array** recur_idx; /* Keeps track of recursive feature
// indices*/
//
// u32 recur_len; /* The number of recursive features*/
//
// u8 cal_failed, /* Calibration failed? */
// trim_done, /* Trimmed? */
// was_fuzzed, /* Had any fuzzing done yet? */
// passed_det, /* Deterministic stages passed? */
// has_new_cov, /* Triggers new coverage? */
// var_behavior, /* Variable behavior? */
// favored, /* Currently favored? */
// fs_redundant; /* Marked as redundant in the fs? */
//
// u32 bitmap_size, /* Number of bits set in bitmap */
// exec_cksum; /* Checksum of the execution trace */
//
// u64 exec_us, /* Execution time (us) */
// handicap, /* Number of queue cycles behind */
// depth; /* Path depth */
//
// u8* trace_mini; /* Trace bytes, if kept */
// u32 tc_ref; /* Trace bytes ref count */
//
// struct queue_entry *next, /* Next element, if any */
// *next_100; /* 100 elements ahead */
//
// };
#endif

View File

@ -0,0 +1,606 @@
{
"ARGLIST": [
"EXPR ',' ARGLIST",
"EXPR",
"EXPR ',' ARGLIST",
"EXPR"
],
"ARGS": [
"'()'",
"'(' ARGLIST ')'",
"'()'",
"'(' ARGLIST ')'"
],
"ARITHMETICOPERATION": [
"EXPR '/' EXPR",
"EXPR '*' EXPR",
"EXPR '+' EXPR",
"EXPR '-' EXPR",
"EXPR '%' EXPR",
"EXPR '**' EXPR",
"EXPR '++'"
],
"ARRAY": [
"'[' ARRAYCONTENT ']'",
"'[]'"
],
"ARRAYCONTENT": [
"EXPR ',' ARRAYCONTENT",
"EXPR"
],
"BOOLEAN": [
"'true'",
"'false'"
],
"BYTEWISEOPERATION": [
"EXPR '&' EXPR",
"EXPR '|' EXPR"
],
"COMPARISONOPERATION": [
"EXPR '<' EXPR"
],
"DECIMALDIGITS": [
"'20'",
"'1234'",
"'66'",
"'234_9'",
"'99999999999999999999'"
],
"DECIMALNUMBER": [
"DECIMALDIGITS"
],
"EXPR": [
"'(' EXPR ')'",
"VAR",
"'delete' SP EXPR",
"'new' SP IDENTIFIER ARGS",
"LITERAL",
"IDENTIFIER",
"METHODCALL",
"'(' ARITHMETICOPERATION ')'",
"'(' COMPARISONOPERATION ')'",
"'(' BYTEWISEOPERATION ')'",
"'(' LOGICALOPERATION ')'"
],
"IDENTIFIER": [
"'Object'",
"VAR",
"'Function'",
"'main'",
"'opt'",
"'Boolean'",
"'Symbol'",
"'JSON'",
"'Error'",
"'EvalError'",
"'RangeError'",
"'ReferenceError'",
"'SyntaxError'",
"'TypeError'",
"'URIError'",
"'this'",
"'Number'",
"'Math'",
"'Date'",
"'String'",
"'RegExp'",
"'Array'",
"'Int8Array'",
"'Uint8Array'",
"'Uint8ClampedArray'",
"'Int16Array'",
"'Uint16Array'",
"'Int32Array'",
"'Uint32Array'",
"'Float32Array'",
"'Float64Array'",
"'DataView'",
"'ArrayBuffer'",
"'Map'",
"'Set'",
"'WeakMap'",
"'WeakSet'",
"'Promise'",
"'AsyncFunction'",
"'asyncGenerator'",
"'Reflect'",
"'Proxy'",
"'Intl'",
"'Intl.Collator'",
"'Intl.DateTimeFormat'",
"'Intl.NumberFormat'",
"'Intl.PluralRules'",
"'WebAssembly'",
"'WebAssembly.Module'",
"'WebAssembly.Instance'",
"'WebAssembly.Memory'",
"'WebAssembly.Table'",
"'WebAssembly.CompileError'",
"'WebAssembly.LinkError'",
"'WebAssembly.RuntimeError'",
"'arguments'",
"'Infinity'",
"'NaN'",
"'undefined'",
"'null'",
"'console'",
"' '"
],
"IDENTIFIERLIST": [
"IDENTIFIER ',' IDENTIFIERLIST",
"'(' IDENTIFIERLIST '),' IDENTIFIERLIST",
"IDENTIFIER"
],
"JSBLOCK": [
"JSSTATEMENT",
"JSSTATEMENT JSBLOCK"
],
"JSSTATEMENT": [
"STATEMENT NEWLINE"
],
"LITERAL": [
"'null'",
"BOOLEAN",
"NUMBER",
"ARRAY"
],
"LOGICALOPERATION": [
"EXPR '&&' EXPR",
"EXPR '||' EXPR"
],
"METHODCALL": [
"OBJECT PROPERTY METHODCALL1"
],
"METHODCALL1": [
"'.' METHOD_NAME ARGS METHODCALL1",
"' '"
],
"METHOD_NAME": [
"IDENTIFIER",
"'print'",
"'eval'",
"'uneval'",
"'isFinite'",
"'isNaN'",
"'parseFloat'",
"'parseInt'",
"'decodeURI'",
"'decodeURIComponent'",
"'encodeURI'",
"'encodeURIComponent'",
"'escape'",
"'unescape'",
"'assign'",
"'create'",
"'defineProperty'",
"'defineProperties'",
"'entries'",
"'freeze'",
"'getOwnPropertyDescriptor'",
"'getOwnPropertyDescriptors'",
"'getOwnPropertyNames'",
"'getOwnPropertySymbols'",
"'getPrototypeOf'",
"'is'",
"'isExtensible'",
"'isFrozen'",
"'isSealed'",
"'keys'",
"'preventExtensions'",
"'seal'",
"'setPrototypeOf'",
"'values'",
"'__defineGetter__'",
"'__defineSetter__'",
"'__lookupGetter__'",
"'__lookupSetter__'",
"'hasOwnProperty'",
"'isPrototypeOf'",
"'propertyIsEnumerable'",
"'toSource'",
"'toLocaleString'",
"'toString'",
"'unwatch'",
"'valueOf'",
"'watch'",
"'apply'",
"'bind'",
"'call'",
"'isGenerator'",
"'valueOf'",
"'for'",
"'keyFor'",
"'stringify'",
"'isInteger'",
"'isSafeInteger'",
"'toInteger'",
"'toExponential'",
"'toFixed'",
"'toLocaleString'",
"'toPrecision'",
"'abs'",
"'acos'",
"'acosh'",
"'asin'",
"'asinh'",
"'atan'",
"'atanh'",
"'atan2'",
"'cbrt'",
"'ceil'",
"'clz32'",
"'cos'",
"'cosh'",
"'exp'",
"'expm1'",
"'floor'",
"'fround'",
"'hypot'",
"'imul'",
"'log'",
"'log1p'",
"'log10'",
"'log2'",
"'max'",
"'min'",
"'pow'",
"'random'",
"'round'",
"'sign'",
"'sin'",
"'sinh'",
"'sqrt'",
"'tan'",
"'tanh'",
"'trunc'",
"'now'",
"'parse'",
"'UTC'",
"'getDate'",
"'getDay'",
"'getFullYear'",
"'getHours'",
"'getMilliseconds'",
"'getMinutes'",
"'getMonth'",
"'getSeconds'",
"'getTime'",
"'getTimezoneOffset'",
"'getUTCDate'",
"'getUTCDay'",
"'getUTCFullYear'",
"'getUTCHours'",
"'getUTCMilliseconds'",
"'getUTCMinutes'",
"'getUTCMonth'",
"'getUTCSeconds'",
"'getYear'",
"'setDate'",
"'setFullYear'",
"'setHours'",
"'setMilliseconds'",
"'setMinutes'",
"'setMonth'",
"'setSeconds'",
"'setTime'",
"'setUTCDate'",
"'setUTCFullYear'",
"'setUTCHours'",
"'setUTCMilliseconds'",
"'setUTCMinutes'",
"'setUTCMonth'",
"'setUTCSeconds'",
"'setYear'",
"'toDateString'",
"'toISOString'",
"'toJSON'",
"'toGMTString'",
"'toLocaleDateString'",
"'toLocaleFormat'",
"'toLocaleString'",
"'toLocaleTimeString'",
"'toTimeString'",
"'toUTCString'",
"'indexOf'",
"'substring'",
"'charAt'",
"'strcmp'",
"'fromCharCode'",
"'fromCodePoint'",
"'raw'",
"'charCodeAt'",
"'slice'",
"'codePointAt'",
"'concat'",
"'includes'",
"'endsWith'",
"'lastIndexOf'",
"'localeCompare'",
"'match'",
"'normalize'",
"'padEnd'",
"'padStart'",
"'quote'",
"'repeat'",
"'replace'",
"'search'",
"'split'",
"'startsWith'",
"'substr'",
"'toLocaleLowerCase'",
"'toLocaleUpperCase'",
"'toLowerCase'",
"'toUpperCase'",
"'trim'",
"'trimleft'",
"'trimright'",
"'anchor'",
"'big'",
"'blink'",
"'bold'",
"'fixed'",
"'fontcolor'",
"'fontsize'",
"'italics'",
"'link'",
"'small'",
"'strike'",
"'sub'",
"'sup'",
"'compile'",
"'exec'",
"'test'",
"'from'",
"'isArray'",
"'of'",
"'copyWithin'",
"'fill'",
"'pop'",
"'push'",
"'reverse'",
"'shift'",
"'sort'",
"'splice'",
"'unshift'",
"'concat'",
"'join'",
"'every'",
"'filter'",
"'findIndex'",
"'forEach'",
"'map'",
"'reduce'",
"'reduceRight'",
"'some'",
"'move'",
"'getInt8'",
"'getUint8'",
"'getInt16'",
"'getUint16'",
"'getInt32'",
"'getUint32'",
"'getFloat32'",
"'getFloat64'",
"'setInt8'",
"'setUint8'",
"'setInt16'",
"'setUint16'",
"'setInt32'",
"'setUint32'",
"'setFloat32'",
"'setFloat64'",
"'isView'",
"'transfer'",
"'clear'",
"'get'",
"'has'",
"'set'",
"'add'",
"'splat'",
"'check'",
"'extractLane'",
"'replaceLane'",
"'load'",
"'load1'",
"'load2'",
"'load3'",
"'store'",
"'store1'",
"'store2'",
"'store3'",
"'addSaturate'",
"'div'",
"'mul'",
"'neg'",
"'reciprocalApproximation'",
"'reciprocalSqrtApproximation'",
"'subSaturate'",
"'shuffle'",
"'swizzle'",
"'maxNum'",
"'minNum'",
"'select'",
"'equal'",
"'notEqual'",
"'lessThan'",
"'lessThanOrEqual'",
"'greaterThan'",
"'greaterThanOrEqual'",
"'and'",
"'or'",
"'xor'",
"'not'",
"'shiftLeftByScalar'",
"'shiftRightByScalar'",
"'allTrue'",
"'anyTrue'",
"'fromFloat32x4'",
"'fromFloat32x4Bits'",
"'fromFloat64x2Bits'",
"'fromInt32x4'",
"'fromInt32x4Bits'",
"'fromInt16x8Bits'",
"'fromInt8x16Bits'",
"'fromUint32x4'",
"'fromUint32x4Bits'",
"'fromUint16x8Bits'",
"'fromUint8x16Bits'",
"'neg'",
"'compareExchange'",
"'exchange'",
"'wait'",
"'wake'",
"'isLockFree'",
"'all'",
"'race'",
"'reject'",
"'resolve'",
"'catch'",
"'then'",
"'finally'",
"'next'",
"'throw'",
"'close'",
"'send'",
"'apply'",
"'construct'",
"'deleteProperty'",
"'ownKeys'",
"'getCanonicalLocales'",
"'supportedLocalesOf'",
"'resolvedOptions'",
"'formatToParts'",
"'resolvedOptions'",
"'instantiate'",
"'instantiateStreaming'",
"'compileStreaming'",
"'validate'",
"'customSections'",
"'exports'",
"'imports'",
"'grow'",
"'super'",
"'in'",
"'instanceof'",
"' '"
],
"NEWLINE": [
"'\\n'"
],
"NUMBER": [
"'1/2'",
"'1E2'",
"'1E02'",
"'1E+02'",
"'-1'",
"'-1.00'",
"'-1/2'",
"'-1E2'",
"'-1E02'",
"'-1E+02'",
"'1/0'",
"'0/0'",
"'-2147483648/-1'",
"'-9223372036854775808/-1'",
"'-0'",
"'-0.0'",
"'+0'"
],
"OBJECT": [
"IDENTIFIER"
],
"PROGRAM": [
"JSBLOCK"
],
"PROPERTY": [
"'.length' PROPERTY",
"'.prototype' PROPERTY",
"'.constructor' PROPERTY",
"'.__proto__' PROPERTY",
"'.__noSuchMethod__' PROPERTY",
"'.__count__' PROPERTY",
"'.__parent__' PROPERTY",
"'.arguments' PROPERTY",
"'.arity' PROPERTY",
"'.caller' PROPERTY",
"'.name' PROPERTY",
"'.displayName' PROPERTY",
"'.iterator' PROPERTY",
"'.asyncIterator' PROPERTY",
"'.match' PROPERTY",
"'.replace' PROPERTY",
"'.search' PROPERTY",
"'.split' PROPERTY",
"'.hasInstance' PROPERTY",
"'.isConcatSpreadable' PROPERTY",
"'.unscopables' PROPERTY",
"'.species' PROPERTY",
"'.toPrimitive' PROPERTY",
"'.toStringTag' PROPERTY",
"'.fileName' PROPERTY",
"'.lineNumber' PROPERTY",
"'.columnNumber' PROPERTY",
"'.message' PROPERTY",
"'.name' PROPERTY",
"'.EPSILON' PROPERTY",
"'.MAX_SAFE_INTEGER' PROPERTY",
"'.MAX_VALUE' PROPERTY",
"'.MIN_SAFE_INTEGER' PROPERTY",
"'.MIN_VALUE' PROPERTY",
"'.NaN' PROPERTY",
"'.NEGATIVE_INFINITY' PROPERTY",
"'.POSITIVE_INFINITY' PROPERTY",
"'.E' PROPERTY",
"'.LN2' PROPERTY",
"'.LN10' PROPERTY",
"'.LOG2E' PROPERTY",
"'.LOG10E' PROPERTY",
"'.PI' PROPERTY",
"'.SQRT1_2' PROPERTY",
"'.SQRT2' PROPERTY",
"'.flags' PROPERTY",
"'.global' PROPERTY",
"'.ignoreCase' PROPERTY",
"'.multiline' PROPERTY",
"'.source' PROPERTY",
"'.sticky' PROPERTY",
"'.unicode' PROPERTY",
"'.buffer' PROPERTY",
"'.byteLength' PROPERTY",
"'.byteOffset' PROPERTY",
"'.BYTES_PER_ELEMENT' PROPERTY",
"'.compare' PROPERTY",
"'.format' PROPERTY",
"'.callee' PROPERTY",
"'.caller' PROPERTY",
"'.memory' PROPERTY",
"'.exports' PROPERTY",
"' '"
],
"SP": [
"' '"
],
"STATEMENT": [
"EXPR ';'",
"'var' SP VAR '=' EXPR ';'",
"'let' SP VAR '=' EXPR ';'",
"VAR '=' EXPR ';'",
"VAR PROPERTY '=' EXPR ';'",
"VAR '[' DECIMALNUMBER ']' '=' EXPR ';'",
"'const' SP VAR '=' EXPR ';'",
"'typeof' SP EXPR ';'",
"'void' SP EXPR ';'",
"'return' SP EXPR ';'",
"VAR ':'"
],
"VAR": [
"'a'",
"'b'",
"'c'",
"'d'",
"'e'",
"'f'",
"'g'",
"'h'"
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,434 @@
/*
* Generic map implementation.
*/
#include "hashmap.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define INITIAL_SIZE (256)
#define MAX_CHAIN_LENGTH (8)
/* We need to keep keys and values */
typedef struct _hashmap_element {
char *key;
int in_use;
any_t data;
} hashmap_element;
/* A hashmap has some maximum size and current size,
* as well as the data to hold. */
typedef struct _hashmap_map {
int table_size;
int size;
hashmap_element *data;
} hashmap_map;
/*
* Return an empty hashmap, or NULL on failure.
*/
map_t hashmap_new() {
hashmap_map *m = (hashmap_map *)malloc(sizeof(hashmap_map));
if (!m) goto err;
m->data = (hashmap_element *)calloc(INITIAL_SIZE, sizeof(hashmap_element));
if (!m->data) goto err;
m->table_size = INITIAL_SIZE;
m->size = 0;
return m;
err:
if (m) hashmap_free(m);
return NULL;
}
/* The implementation here was originally done by Gary S. Brown. I have
borrowed the tables directly, and made some minor changes to the
crc32-function (including changing the interface). //ylo */
/* ============================================================= */
/* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or */
/* code or tables extracted from it, as desired without restriction. */
/* */
/* First, the polynomial itself and its table of feedback terms. The */
/* polynomial is */
/* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */
/* */
/* Note that we take it "backwards" and put the highest-order term in */
/* the lowest-order bit. The X^32 term is "implied"; the LSB is the */
/* X^31 term, etc. The X^0 term (usually shown as "+1") results in */
/* the MSB being 1. */
/* */
/* Note that the usual hardware shift register implementation, which */
/* is what we're using (we're merely optimizing it by doing eight-bit */
/* chunks at a time) shifts bits into the lowest-order term. In our */
/* implementation, that means shifting towards the right. Why do we */
/* do it this way? Because the calculated CRC must be transmitted in */
/* order from highest-order term to lowest-order term. UARTs transmit */
/* characters in order from LSB to MSB. By storing the CRC this way, */
/* we hand it to the UART in the order low-byte to high-byte; the UART */
/* sends each low-bit to hight-bit; and the result is transmission bit */
/* by bit from highest- to lowest-order term without requiring any bit */
/* shuffling on our part. Reception works similarly. */
/* */
/* The feedback terms table consists of 256, 32-bit entries. Notes: */
/* */
/* The table can be generated at runtime if desired; code to do so */
/* is shown later. It might not be obvious, but the feedback */
/* terms simply represent the results of eight shift/xor opera- */
/* tions for all combinations of data and CRC register values. */
/* */
/* The values must be right-shifted by eight bits by the "updcrc" */
/* logic; the shift must be unsigned (bring in zeroes). On some */
/* hardware you could probably optimize the shift in assembler by */
/* using byte-swap instructions. */
/* polynomial $edb88320 */
/* */
/* -------------------------------------------------------------------- */
static unsigned long crc32_tab[] = {
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
0x2d02ef8dL};
/* Return a 32-bit CRC of the contents of the buffer. */
unsigned long crc32(const unsigned char *s, unsigned int len) {
unsigned int i;
unsigned long crc32val;
crc32val = 0;
for (i = 0; i < len; i++) {
crc32val = crc32_tab[(crc32val ^ s[i]) & 0xff] ^ (crc32val >> 8);
}
return crc32val;
}
/*
* Hashing function for a string
*/
unsigned int hashmap_hash_int(hashmap_map *m, char *keystring) {
unsigned long key = crc32((unsigned char *)(keystring), strlen(keystring));
/* Robert Jenkins' 32 bit Mix Function */
key += (key << 12);
key ^= (key >> 22);
key += (key << 4);
key ^= (key >> 9);
key += (key << 10);
key ^= (key >> 2);
key += (key << 7);
key ^= (key >> 12);
/* Knuth's Multiplicative Method */
key = (key >> 3) * 2654435761;
return key % m->table_size;
}
/*
* Return the integer of the location in data
* to store the point to the item, or MAP_FULL.
*/
int hashmap_hash(map_t in, char *key) {
int curr;
int i;
/* Cast the hashmap */
hashmap_map *m = (hashmap_map *)in;
/* If full, return immediately */
if (m->size >= (m->table_size / 2)) return MAP_FULL;
/* Find the best index */
curr = hashmap_hash_int(m, key);
/* Linear probing */
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
if (m->data[curr].in_use == 0) return curr;
if (m->data[curr].in_use == 1 && (strcmp(m->data[curr].key, key) == 0))
return curr;
curr = (curr + 1) % m->table_size;
}
return MAP_FULL;
}
/*
* Doubles the size of the hashmap, and rehashes all the elements
*/
int hashmap_rehash(map_t in) {
int i;
int old_size;
hashmap_element *curr;
/* Setup the new elements */
hashmap_map * m = (hashmap_map *)in;
hashmap_element *temp =
(hashmap_element *)calloc(2 * m->table_size, sizeof(hashmap_element));
if (!temp) return MAP_OMEM;
/* Update the array */
curr = m->data;
m->data = temp;
/* Update the size */
old_size = m->table_size;
m->table_size = 2 * m->table_size;
m->size = 0;
/* Rehash the elements */
for (i = 0; i < old_size; i++) {
int status;
if (curr[i].in_use == 0) continue;
status = hashmap_put(m, curr[i].key, curr[i].data);
if (status != MAP_OK) return status;
}
free(curr);
return MAP_OK;
}
/*
* Add a pointer to the hashmap with some key
*/
int hashmap_put(map_t in, char *key, any_t value) {
int index;
hashmap_map *m;
/* Cast the hashmap */
m = (hashmap_map *)in;
/* Find a place to put our value */
index = hashmap_hash(in, key);
while (index == MAP_FULL) {
if (hashmap_rehash(in) == MAP_OMEM) { return MAP_OMEM; }
index = hashmap_hash(in, key);
}
/* Set the data */
m->data[index].data = value;
m->data[index].key = key;
m->data[index].in_use = 1;
m->size++;
return MAP_OK;
}
/*
* Get your pointer out of the hashmap with a key
*/
int hashmap_get(map_t in, char *key, any_t *arg) {
int curr;
int i;
hashmap_map *m;
/* Cast the hashmap */
m = (hashmap_map *)in;
/* Find data location */
curr = hashmap_hash_int(m, key);
/* Linear probing, if necessary */
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
int in_use = m->data[curr].in_use;
if (in_use == 1) {
if (strcmp(m->data[curr].key, key) == 0) {
*arg = (m->data[curr].data);
return MAP_OK;
}
}
curr = (curr + 1) % m->table_size;
}
*arg = NULL;
/* Not found */
return MAP_MISSING;
}
/*
* Iterate the function parameter over each element in the hashmap. The
* additional any_t argument is passed to the function as its first
* argument and the hashmap element is the second.
*/
int hashmap_iterate(map_t in, PFany f, any_t item) {
int i;
/* Cast the hashmap */
hashmap_map *m = (hashmap_map *)in;
/* On empty hashmap, return immediately */
if (hashmap_length(m) <= 0) return MAP_MISSING;
/* Linear probing */
for (i = 0; i < m->table_size; i++)
if (m->data[i].in_use != 0) {
any_t data = (any_t)(m->data[i].data);
int status = f(item, data);
if (status != MAP_OK) { return status; }
}
return MAP_OK;
}
/*
* Remove an element with that key from the map
*/
int hashmap_remove(map_t in, char *key) {
int i;
int curr;
hashmap_map *m;
/* Cast the hashmap */
m = (hashmap_map *)in;
/* Find key */
curr = hashmap_hash_int(m, key);
/* Linear probing, if necessary */
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
int in_use = m->data[curr].in_use;
if (in_use == 1) {
if (strcmp(m->data[curr].key, key) == 0) {
/* Blank out the fields */
m->data[curr].in_use = 0;
m->data[curr].data = NULL;
m->data[curr].key = NULL;
/* Reduce the size */
m->size--;
return MAP_OK;
}
}
curr = (curr + 1) % m->table_size;
}
/* Data not found */
return MAP_MISSING;
}
/* Deallocate the hashmap */
void hashmap_free(map_t in) {
hashmap_map *m = (hashmap_map *)in;
free(m->data);
free(m);
}
/* Return the length of the hashmap */
int hashmap_length(map_t in) {
hashmap_map *m = (hashmap_map *)in;
if (m != NULL)
return m->size;
else
return 0;
}

View File

@ -0,0 +1,83 @@
/*
* Generic hashmap manipulation functions
*
* Originally by Elliot C Back -
* http://elliottback.com/wp/hashmap-implementation-in-c/
*
* Modified by Pete Warden to fix a serious performance problem, support strings
* as keys and removed thread synchronization - http://petewarden.typepad.com
*/
#ifndef __HASHMAP_H__
#define __HASHMAP_H__
#define MAP_MISSING -3 /* No such element */
#define MAP_FULL -2 /* Hashmap is full */
#define MAP_OMEM -1 /* Out of Memory */
#define MAP_OK 0 /* OK */
/*
* any_t is a pointer. This allows you to put arbitrary structures in
* the hashmap.
*/
typedef void *any_t;
/*
* PFany is a pointer to a function that can take two any_t arguments
* and return an integer. Returns status code..
*/
typedef int (*PFany)(any_t, any_t);
/*
* map_t is a pointer to an internally maintained data structure.
* Clients of this package do not need to know how hashmaps are
* represented. They see and manipulate only map_t's.
*/
typedef any_t map_t;
/*
* Return an empty hashmap. Returns NULL if empty.
*/
extern map_t hashmap_new();
/*
* Iteratively call f with argument (item, data) for
* each element data in the hashmap. The function must
* return a map status code. If it returns anything other
* than MAP_OK the traversal is terminated. f must
* not reenter any hashmap functions, or deadlock may arise.
*/
extern int hashmap_iterate(map_t in, PFany f, any_t item);
/*
* Add an element to the hashmap. Return MAP_OK or MAP_OMEM.
*/
extern int hashmap_put(map_t in, char *key, any_t value);
/*
* Get an element from the hashmap. Return MAP_OK or MAP_MISSING.
*/
extern int hashmap_get(map_t in, char *key, any_t *arg);
/*
* Remove an element from the hashmap. Return MAP_OK or MAP_MISSING.
*/
extern int hashmap_remove(map_t in, char *key);
/*
* Get any element. Return MAP_OK or MAP_MISSING.
* remove - should the element be removed from the hashmap
*/
extern int hashmap_get_one(map_t in, any_t *arg, int remove);
/*
* Free the hashmap
*/
extern void hashmap_free(map_t in);
/*
* Get the current size of a hashmap
*/
extern int hashmap_length(map_t in);
#endif

View File

@ -0,0 +1,275 @@
import sys
import json
import re
from collections import defaultdict
# import pygraphviz as pgv
gram_data = None
state_count = 1
pda = []
worklist = []
state_stacks = {}
# === If user provides upper bound on the stack size during FSA creation ===
# Specifies the upper bound to which the stack is allowed to grow
# If for any generated state, the stack size is >= stack_limit then this
# state is not expanded further.
stack_limit = None
# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
unexpanded_rules = set()
def main(grammar, limit):
global worklist, gram_data, stack_limit
current = '0'
stack_limit = limit
if stack_limit:
print ('[X] Operating in bounded stack mode')
with open(grammar, 'r') as fd:
gram_data = json.load(fd)
start_symbol = gram_data["Start"][0]
worklist.append([current, [start_symbol]])
# print (grammar)
filename = (grammar.split('/')[-1]).split('.')[0]
while worklist:
# Take an element from the worklist
# print ('================')
# print ('Worklist:', worklist)
element = worklist.pop(0)
prep_transitions(element)
pda_file = filename + '_transition.json'
graph_file = filename + '.png'
# print ('XXXXXXXXXXXXXXXX')
# print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
# XXX Commented out because visualization of current version of PHP causes segfault
# Create the graph and dump the transitions to a file
# create_graph(filename)
transformed = postprocess()
with open(filename + '_automata.json', 'w+') as fd:
json.dump(transformed, fd)
with open(filename + '_transition.json', 'w+') as fd:
json.dump(pda, fd)
if not unexpanded_rules:
print ('[X] No unexpanded rules, absolute FSA formed')
exit(0)
else:
print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
with open(filename + '_disallowed.json', 'w+') as fd:
json.dump(list(unexpanded_rules), fd)
def create_graph(filename):
'''
Creates a DOT representation of the PDA
'''
global pda
G = pgv.AGraph(strict = False, directed = True)
for transition in pda:
print ('Transition:', transition)
G.add_edge(transition['source'], transition['dest'],
label = 'Term:{}'.format(transition['terminal']))
G.layout(prog = 'dot')
print ('Do it up 2')
G.draw(filename + '.png')
def prep_transitions(element):
'''
Generates transitions
'''
global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
state = element[0]
try:
nonterminal = element[1][0]
except IndexError:
# Final state was encountered, pop from worklist without doing anything
return
rules = gram_data[nonterminal]
count = 1
for rule in rules:
isRecursive = False
# print ('Current state:', state)
terminal, ss, termIsRegex = tokenize(rule)
transition = get_template()
transition['trigger'] = '_'.join([state, str(count)])
transition['source'] = state
transition['dest'] = str(state_count)
transition['ss'] = ss
transition['terminal'] = terminal
transition['rule'] = "{} -> {}".format(nonterminal, rule )
if termIsRegex:
transition['termIsRegex'] = True
# Creating a state stack for the new state
try:
state_stack = state_stacks[state][:]
except:
state_stack = []
if len(state_stack):
state_stack.pop(0)
if ss:
for symbol in ss[::-1]:
state_stack.insert(0, symbol)
transition['stack'] = state_stack
# Check if a recursive transition state being created, if so make a backward
# edge and don't add anything to the worklist
# print (state_stacks)
if state_stacks:
for state_element, stack in state_stacks.items():
# print ('Stack:', sorted(stack))
# print ('State stack:', sorted(state_stack))
if sorted(stack) == sorted(state_stack):
transition['dest'] = state_element
# print ('Recursive:', transition)
pda.append(transition)
count += 1
isRecursive = True
break
# If a recursive transition exercised don't add the same transition as a new
# edge, continue onto the next transitions
if isRecursive:
continue
# If the generated state has a stack size > stack_limit then that state is abandoned
# and not added to the FSA or the worklist for further expansion
if stack_limit:
if (len(transition['stack']) > stack_limit):
unexpanded_rules.add(transition['rule'])
continue
# Create transitions for the non-recursive relations and add to the worklist
# print ('Normal:', transition)
# print ('State2:', state)
pda.append(transition)
worklist.append([transition['dest'], transition['stack']])
state_stacks[transition['dest']] = state_stack
state_count += 1
count += 1
def tokenize(rule):
'''
Gets the terminal and the corresponding stack symbols from a rule in GNF form
'''
pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
terminal = None
ss = None
termIsRegex = False
match = pattern.match(rule)
if match.group(1):
termIsRegex = True
if match.group(2):
terminal = match.group(2)
else:
raise AssertionError("Rule is not in GNF form")
if match.group(3):
ss = (match.group(3)).split()
return terminal, ss, termIsRegex
def get_template():
transition_template = {
'trigger':None,
'source': None,
'dest': None,
'termIsRegex': False,
'terminal' : None,
'stack': []
}
return transition_template
def postprocess():
'''
Creates a representation to be passed on to the C-module
'''
global pda
final_struct = {}
memoized = defaultdict(list)
# Supporting data structures for if stack limit is imposed
culled_pda = []
culled_final = []
num_transitions = 0 # Keep track of number of transitions
states, final, initial = _get_states()
print (initial)
assert len(initial) == 1, 'More than one init state found'
# Cull transitions to states which were not expanded owing to the stack limit
if stack_limit:
blocklist = []
for final_state in final:
for transition in pda:
if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
blocklist.append(transition["dest"])
continue
else:
culled_pda.append(transition)
culled_final = [state for state in final if state not in blocklist]
assert len(culled_final) == 1, 'More than one final state found'
for transition in culled_pda:
state = transition["source"]
if transition["dest"] in blocklist:
continue
num_transitions += 1
memoized[state].append([transition["trigger"], transition["dest"],
transition["terminal"]])
final_struct["init_state"] = initial
final_struct["final_state"] = culled_final[0]
# The reason we do this is because when states are culled, the indexing is
# still relative to the actual number of states hence we keep numstates recorded
# as the original number of states
print ('[X] Actual Number of states:', len(memoized.keys()))
print ('[X] Number of transitions:', num_transitions)
print ('[X] Original Number of states:', len(states))
final_struct["numstates"] = len(states)
final_struct["pda"] = memoized
return final_struct
# Running FSA construction in exact approximation mode and postprocessing it like so
for transition in pda:
state = transition["source"]
memoized[state].append([transition["trigger"], transition["dest"],
transition["terminal"]])
final_struct["init_state"] = initial
final_struct["final_state"] = final[0]
print ('[X] Actual Number of states:', len(memoized.keys()))
final_struct["numstates"] = len(memoized.keys())
final_struct["pda"] = memoized
return final_struct
def _get_states():
source = set()
dest = set()
global pda
for transition in pda:
source.add(transition["source"])
dest.add(transition["dest"])
source_copy = source.copy()
source_copy.update(dest)
return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
parser.add_argument(
'--gf',
type = str,
help = 'Location of GNF grammar')
parser.add_argument(
'--limit',
type = int,
default = None,
help = 'Specify the upper bound for the stack size')
args = parser.parse_args()
main(args.gf, args.limit)

View File

@ -0,0 +1,289 @@
import sys
import re
import copy
import json
from string import ascii_uppercase
from itertools import combinations
from collections import defaultdict
NONTERMINALSET = []
COUNT = 1
def main(grammar_file, out, start):
grammar = None
# If grammar file is a preprocessed NT file, then skip preprocessing
if '.json' in grammar_file:
with open(grammar_file, 'r') as fd:
grammar = json.load(fd)
elif '.g4' in grammar_file:
with open(grammar_file, 'r') as fd:
data = fd.readlines()
grammar = preprocess(data)
else:
raise('Unknwown file format passed. Accepts (.g4/.json)')
with open('debug_preprocess.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = remove_unit(grammar) # eliminates unit productions
with open('debug_unit.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = remove_mixed(grammar) # eliminate terminals existing with non-terminals
with open('debug_mixed.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = break_rules(grammar) # eliminate rules with more than two non-terminals
with open('debug_break.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = gnf(grammar)
# Dump GNF form of the grammar with only reachable rules
# reachable_grammar = get_reachable(grammar, start)
# with open('debug_gnf_reachable.json', 'w+') as fd:
# json.dump(reachable_grammar, fd)
with open('debug_gnf.json', 'w+') as fd:
json.dump(grammar, fd)
grammar["Start"] = [start]
with open(out, 'w+') as fd:
json.dump(grammar, fd)
def get_reachable(grammar, start):
'''
Returns a grammar without dead rules
'''
reachable_nt = set()
worklist = list()
processed = set()
reachable_grammar = dict()
worklist.append(start)
while worklist:
nt = worklist.pop(0)
processed.add(nt)
reachable_grammar[nt] = grammar[nt]
rules = grammar[nt]
for rule in rules:
tokens = gettokens(rule)
for token in tokens:
if not isTerminal(token):
if token not in processed:
worklist.append(token)
return reachable_grammar
def gettokens(rule):
pattern = re.compile("([^\s\"\']+)|\"([^\"]*)\"|\'([^\']*)\'")
return [matched.group(0) for matched in pattern.finditer(rule)]
def gnf(grammar):
old_grammar = copy.deepcopy(grammar)
new_grammar = defaultdict(list)
isgnf = False
while not isgnf:
for lhs, rules in old_grammar.items():
for rule in rules:
tokens = gettokens(rule)
if len(tokens) == 1 and isTerminal(rule):
new_grammar[lhs].append(rule)
continue
startoken = tokens[0]
endrule = tokens[1:]
if not isTerminal(startoken):
newrules = []
extendrules = old_grammar[startoken]
for extension in extendrules:
temprule = endrule[:]
temprule.insert(0, extension)
newrules.append(temprule)
for newnew in newrules:
new_grammar[lhs].append(' '.join(newnew))
else:
new_grammar[lhs].append(rule)
isgnf = True
for lhs, rules in new_grammar.items():
for rule in rules:
# if "\' \'" or isTerminal(rule):
tokens = gettokens(rule)
if len(tokens) == 1 and isTerminal(rule):
continue
startoken = tokens[0]
if not isTerminal(startoken):
isgnf = False
break
if not isgnf:
old_grammar = copy.deepcopy(new_grammar)
new_grammar = defaultdict(list)
return new_grammar
def preprocess(data):
productions = []
production = []
for line in data:
if line != '\n':
production.append(line)
else:
productions.append(production)
production = []
final_rule_set = {}
for production in productions:
rules = []
init = production[0]
nonterminal = init.split(':')[0]
rules.append(strip_chars(init.split(':')[1]).strip('| '))
for production_rule in production[1:]:
rules.append(strip_chars(production_rule.split('|')[0]))
final_rule_set[nonterminal] = rules
# for line in data:
# if line != '\n':
# production.append(line)
return final_rule_set
def remove_unit(grammar):
nounitproductions = False
old_grammar = copy.deepcopy(grammar)
new_grammar = defaultdict(list)
while not nounitproductions:
for lhs, rules in old_grammar.items():
for rhs in rules:
# Checking if the rule is a unit production rule
if len(gettokens(rhs)) == 1:
if not isTerminal(rhs):
new_grammar[lhs].extend([rule for rule in old_grammar[rhs]])
else:
new_grammar[lhs].append(rhs)
else:
new_grammar[lhs].append(rhs)
# Checking there are no unit productions left in the grammar
nounitproductions = True
for lhs, rules in new_grammar.items():
for rhs in rules:
if len(gettokens(rhs)) == 1:
if not isTerminal(rhs):
nounitproductions = False
break
if not nounitproductions:
break
# Unit productions are still there in the grammar -- repeat the process
if not nounitproductions:
old_grammar = copy.deepcopy(new_grammar)
new_grammar = defaultdict(list)
return new_grammar
def isTerminal(rule):
# pattern = re.compile("([r]*\'[\s\S]+\')")
pattern = re.compile("\'(.*?)\'")
match = pattern.match(rule)
if match:
return True
else:
return False
def remove_mixed(grammar):
'''
Remove rules where there are terminals mixed in with non-terminals
'''
new_grammar = defaultdict(list)
for lhs, rules in grammar.items():
for rhs in rules:
# tokens = rhs.split(' ')
regen_rule = []
tokens = gettokens(rhs)
if len(gettokens(rhs)) == 1:
new_grammar[lhs].append(rhs)
continue
for token in tokens:
# Identify if there is a terminal in the RHS
if isTerminal(token):
# Check if a corresponding nonterminal already exists
nonterminal = terminal_exist(token, new_grammar)
if nonterminal:
regen_rule.append(nonterminal)
else:
new_nonterm = get_nonterminal()
new_grammar[new_nonterm].append(token)
regen_rule.append(new_nonterm)
else:
regen_rule.append(token)
new_grammar[lhs].append(' '.join(regen_rule))
return new_grammar
def break_rules(grammar):
new_grammar = defaultdict(list)
old_grammar = copy.deepcopy(grammar)
nomulti = False
while not nomulti:
for lhs, rules in old_grammar.items():
for rhs in rules:
tokens = gettokens(rhs)
if len(tokens) > 2 and (not isTerminal(rhs)):
split = tokens[:-1]
nonterminal = terminal_exist(' '.join(split), new_grammar)
if nonterminal:
newrule = ' '.join([nonterminal, tokens[-1]])
new_grammar[lhs].append(newrule)
else:
nonterminal = get_nonterminal()
new_grammar[nonterminal].append(' '.join(split))
newrule = ' '.join([nonterminal, tokens[-1]])
new_grammar[lhs].append(newrule)
else:
new_grammar[lhs].append(rhs)
nomulti = True
for lhs, rules in new_grammar.items():
for rhs in rules:
# tokens = rhs.split(' ')
tokens = gettokens(rhs)
if len(tokens) > 2 and (not isTerminal(rhs)):
nomulti = False
break
if not nomulti:
old_grammar = copy.deepcopy(new_grammar)
new_grammar = defaultdict(list)
return new_grammar
def strip_chars(rule):
return rule.strip('\n\t ')
def get_nonterminal():
global NONTERMINALSET
if NONTERMINALSET:
return NONTERMINALSET.pop(0)
else:
_repopulate()
return NONTERMINALSET.pop(0)
def _repopulate():
global COUNT
global NONTERMINALSET
NONTERMINALSET = [''.join(x) for x in list(combinations(ascii_uppercase, COUNT))]
COUNT += 1
def terminal_exist(token, grammar):
for nonterminal, rules in grammar.items():
if token in rules:
return nonterminal
return None
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description = 'Script to convert grammar to GNF form')
parser.add_argument(
'--gf',
type = str,
required = True,
help = 'Location of grammar file')
parser.add_argument(
'--out',
type = str,
required = True,
help = 'Location of output file')
parser.add_argument(
'--start',
type = str,
required = True,
help = 'Start token')
args = parser.parse_args()
main(args.gf, args.out, args.start)

View File

@ -0,0 +1,38 @@
#!/bin/bash
# This script creates a FSA describing the input grammar *.g4
if [ ! "$#" -lt 4 ]; then
echo "Usage: ./prep_pda.sh <grammar_file> <start> [stack_limit]"
exit 1
fi
GRAMMAR_FILE=$1
GRAMMAR_DIR="$(dirname $GRAMMAR_FILE)"
START="$2"
STACK_LIMIT="$3"
# Get filename
FILE=$(basename -- "$GRAMMAR_FILE")
echo "File:$FILE"
FILENAME="${FILE%.*}"
echo "Name:$FILENAME"
# Create the GNF form of the grammar
CMD="python gnf_converter.py --gf $GRAMMAR_FILE --out ${FILENAME}.json --start $START"
$CMD
# Generate grammar automaton
# Check if user provided a stack limit
if [ -z "${STACK_LIMIT}" ]; then
CMD="python3 construct_automata.py --gf ${FILENAME}.json"
else
CMD="python construct_automata.py --gf ${FILENAME}.json --limit ${STACK_LIMIT}"
fi
echo $CMD
$CMD
# Move PDA to the source dir of the grammar
echo "Copying ${FILENAME}_automata.json to $GRAMMAR_DIR"
mv "${FILENAME}_automata.json" $GRAMMAR_DIR/

View File

@ -0,0 +1,154 @@
/* This is the testing module for Gramatron
*/
#include "afl-fuzz.h"
#include "gramfuzz.h"
#define NUMINPUTS 50
state *create_pda(u8 *automaton_file) {
struct json_object *parsed_json;
state * pda;
json_object * source_obj, *attr;
int arraylen, ii, ii2, trigger_len, error;
printf("\n[GF] Automaton file passed:%s", automaton_file);
// parsed_json =
// json_object_from_file("./gramfuzz/php_gnf_processed_full.json");
parsed_json = json_object_from_file(automaton_file);
// Getting final state
source_obj = json_object_object_get(parsed_json, "final_state");
printf("\t\nFinal=%s\n", json_object_get_string(source_obj));
final_state = atoi(json_object_get_string(source_obj));
// Getting initial state
source_obj = json_object_object_get(parsed_json, "init_state");
init_state = atoi(json_object_get_string(source_obj));
printf("\tInit=%s\n", json_object_get_string(source_obj));
// Getting number of states
source_obj = json_object_object_get(parsed_json, "numstates");
numstates = atoi(json_object_get_string(source_obj)) + 1;
printf("\tNumStates=%d\n", numstates);
// Allocate state space for each pda state
pda = (state *)calloc(atoi(json_object_get_string(source_obj)) + 1,
sizeof(state));
// Getting PDA representation
source_obj = json_object_object_get(parsed_json, "pda");
enum json_type type;
json_object_object_foreach(source_obj, key, val) {
state * state_ptr;
trigger *trigger_ptr;
int offset;
// Get the correct offset into the pda to store state information
state_ptr = pda;
offset = atoi(key);
state_ptr += offset;
// Store state string
state_ptr->state_name = offset;
// Create trigger array of structs
trigger_len = json_object_array_length(val);
state_ptr->trigger_len = trigger_len;
trigger_ptr = (trigger *)calloc(trigger_len, sizeof(trigger));
state_ptr->ptr = trigger_ptr;
printf("\nName:%d Trigger:%d", offset, trigger_len);
for (ii = 0; ii < trigger_len; ii++) {
json_object *obj = json_object_array_get_idx(val, ii);
// Get all the trigger trigger attributes
attr = json_object_array_get_idx(obj, 0);
(trigger_ptr)->id = strdup(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 1);
trigger_ptr->dest = atoi(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 2);
if (!strcmp("\\n", json_object_get_string(attr))) {
trigger_ptr->term = strdup("\n");
} else {
trigger_ptr->term = strdup(json_object_get_string(attr));
}
trigger_ptr->term_len = strlen(trigger_ptr->term);
trigger_ptr++;
}
}
// Delete the JSON object
json_object_put(parsed_json);
return pda;
}
void SanityCheck(char *automaton_path) {
state * pda = create_pda(automaton_path);
int count = 0, state;
Get_Dupes_Ret *getdupesret;
IdxMap_new * statemap;
IdxMap_new * statemap_ptr;
terminal * term_ptr;
while (count < NUMINPUTS) {
// Perform input generation
Array *generated = gen_input(pda, NULL);
print_repr(generated, "Gen");
count += 1;
}
}
int main(int argc, char *argv[]) {
char * mode;
char * automaton_path;
char * output_dir = NULL;
struct timeval tv;
struct timeval tz;
// gettimeofday(&tv, &tz);
srand(1337);
if (argc == 3) {
mode = argv[1];
automaton_path = strdup(argv[2]);
printf("\nMode:%s Path:%s", mode, automaton_path);
} else {
printf("\nUsage: ./test <mode> <automaton_path>");
return -1;
}
if (!strcmp(mode, "SanityCheck")) {
SanityCheck(automaton_path);
} else {
printf("\nUnrecognized mode");
return -1;
}
return 0;
}

View File

@ -0,0 +1,57 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <json-c/json.h>
#include <unistd.h>
#include "hashmap.h"
#include "uthash.h"
#include "utarray.h"
#define INIT_SIZE 100 // Initial size of the dynamic array holding the input
typedef struct terminal {
int state;
int trigger_idx;
size_t symbol_len;
char * symbol;
} terminal;
typedef struct trigger {
char * id;
int dest;
char * term;
size_t term_len;
} trigger;
typedef struct state {
int state_name; // Integer State name
int trigger_len; // Number of triggers associated with this state
trigger *ptr; // Pointer to beginning of the list of triggers
} state;
typedef struct {
size_t used;
size_t size;
size_t inputlen;
terminal *start;
} Array;
int init_state;
int curr_state;
int final_state;
state *create_pda(char *);
Array *gen_input(state *, Array *);
void print_repr(Array *, char *);
void initArray(Array *, size_t);
void insertArray(Array *, int, char *, size_t, int);

View File

@ -0,0 +1,392 @@
/*
Copyright (c) 2008-2018, Troy D. Hanson http://troydhanson.github.com/uthash/
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* a dynamic array implementation using macros
*/
#ifndef UTARRAY_H
#define UTARRAY_H
#define UTARRAY_VERSION 2.1.0
#include <stddef.h> /* size_t */
#include <string.h> /* memset, etc */
#include <stdlib.h> /* exit */
#ifdef __GNUC__
#define UTARRAY_UNUSED __attribute__((__unused__))
#else
#define UTARRAY_UNUSED
#endif
#ifdef oom
#error \
"The name of macro 'oom' has been changed to 'utarray_oom'. Please update your code."
#define utarray_oom() oom()
#endif
#ifndef utarray_oom
#define utarray_oom() exit(-1)
#endif
typedef void(ctor_f)(void *dst, const void *src);
typedef void(dtor_f)(void *elt);
typedef void(init_f)(void *elt);
typedef struct {
size_t sz;
init_f *init;
ctor_f *copy;
dtor_f *dtor;
} UT_icd;
typedef struct {
unsigned i, n; /* i: index of next available slot, n: num slots */
UT_icd icd; /* initializer, copy and destructor functions */
char * d; /* n slots of size icd->sz*/
} UT_array;
#define utarray_init(a, _icd) \
do { \
\
memset(a, 0, sizeof(UT_array)); \
(a)->icd = *(_icd); \
\
} while (0)
#define utarray_done(a) \
do { \
\
if ((a)->n) { \
\
if ((a)->icd.dtor) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (a)->i; _ut_i++) { \
\
(a)->icd.dtor(utarray_eltptr(a, _ut_i)); \
\
} \
\
} \
free((a)->d); \
\
} \
(a)->n = 0; \
\
} while (0)
#define utarray_new(a, _icd) \
do { \
\
(a) = (UT_array *)malloc(sizeof(UT_array)); \
if ((a) == NULL) { utarray_oom(); } \
utarray_init(a, _icd); \
\
} while (0)
#define utarray_free(a) \
do { \
\
utarray_done(a); \
free(a); \
\
} while (0)
#define utarray_reserve(a, by) \
do { \
\
if (((a)->i + (by)) > (a)->n) { \
\
char *utarray_tmp; \
while (((a)->i + (by)) > (a)->n) { \
\
(a)->n = ((a)->n ? (2 * (a)->n) : 8); \
\
} \
utarray_tmp = (char *)realloc((a)->d, (a)->n * (a)->icd.sz); \
if (utarray_tmp == NULL) { utarray_oom(); } \
(a)->d = utarray_tmp; \
\
} \
\
} while (0)
#define utarray_push_back(a, p) \
do { \
\
utarray_reserve(a, 1); \
if ((a)->icd.copy) { \
\
(a)->icd.copy(_utarray_eltptr(a, (a)->i++), p); \
\
} else { \
\
memcpy(_utarray_eltptr(a, (a)->i++), p, (a)->icd.sz); \
\
}; \
\
} while (0)
#define utarray_pop_back(a) \
do { \
\
if ((a)->icd.dtor) { \
\
(a)->icd.dtor(_utarray_eltptr(a, --((a)->i))); \
\
} else { \
\
(a)->i--; \
\
} \
\
} while (0)
#define utarray_extend_back(a) \
do { \
\
utarray_reserve(a, 1); \
if ((a)->icd.init) { \
\
(a)->icd.init(_utarray_eltptr(a, (a)->i)); \
\
} else { \
\
memset(_utarray_eltptr(a, (a)->i), 0, (a)->icd.sz); \
\
} \
(a)->i++; \
\
} while (0)
#define utarray_len(a) ((a)->i)
#define utarray_eltptr(a, j) (((j) < (a)->i) ? _utarray_eltptr(a, j) : NULL)
#define _utarray_eltptr(a, j) ((a)->d + ((a)->icd.sz * (j)))
#define utarray_insert(a, p, j) \
do { \
\
if ((j) > (a)->i) utarray_resize(a, j); \
utarray_reserve(a, 1); \
if ((j) < (a)->i) { \
\
memmove(_utarray_eltptr(a, (j) + 1), _utarray_eltptr(a, j), \
((a)->i - (j)) * ((a)->icd.sz)); \
\
} \
if ((a)->icd.copy) { \
\
(a)->icd.copy(_utarray_eltptr(a, j), p); \
\
} else { \
\
memcpy(_utarray_eltptr(a, j), p, (a)->icd.sz); \
\
}; \
(a)->i++; \
\
} while (0)
#define utarray_inserta(a, w, j) \
do { \
\
if (utarray_len(w) == 0) break; \
if ((j) > (a)->i) utarray_resize(a, j); \
utarray_reserve(a, utarray_len(w)); \
if ((j) < (a)->i) { \
\
memmove(_utarray_eltptr(a, (j) + utarray_len(w)), _utarray_eltptr(a, j), \
((a)->i - (j)) * ((a)->icd.sz)); \
\
} \
if ((a)->icd.copy) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (w)->i; _ut_i++) { \
\
(a)->icd.copy(_utarray_eltptr(a, (j) + _ut_i), \
_utarray_eltptr(w, _ut_i)); \
\
} \
\
} else { \
\
memcpy(_utarray_eltptr(a, j), _utarray_eltptr(w, 0), \
utarray_len(w) * ((a)->icd.sz)); \
\
} \
(a)->i += utarray_len(w); \
\
} while (0)
#define utarray_resize(dst, num) \
do { \
\
unsigned _ut_i; \
if ((dst)->i > (unsigned)(num)) { \
\
if ((dst)->icd.dtor) { \
\
for (_ut_i = (num); _ut_i < (dst)->i; ++_ut_i) { \
\
(dst)->icd.dtor(_utarray_eltptr(dst, _ut_i)); \
\
} \
\
} \
\
} else if ((dst)->i < (unsigned)(num)) { \
\
utarray_reserve(dst, (num) - (dst)->i); \
if ((dst)->icd.init) { \
\
for (_ut_i = (dst)->i; _ut_i < (unsigned)(num); ++_ut_i) { \
\
(dst)->icd.init(_utarray_eltptr(dst, _ut_i)); \
\
} \
\
} else { \
\
memset(_utarray_eltptr(dst, (dst)->i), 0, \
(dst)->icd.sz *((num) - (dst)->i)); \
\
} \
\
} \
(dst)->i = (num); \
\
} while (0)
#define utarray_concat(dst, src) \
do { \
\
utarray_inserta(dst, src, utarray_len(dst)); \
\
} while (0)
#define utarray_erase(a, pos, len) \
do { \
\
if ((a)->icd.dtor) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (len); _ut_i++) { \
\
(a)->icd.dtor(utarray_eltptr(a, (pos) + _ut_i)); \
\
} \
\
} \
if ((a)->i > ((pos) + (len))) { \
\
memmove(_utarray_eltptr(a, pos), _utarray_eltptr(a, (pos) + (len)), \
((a)->i - ((pos) + (len))) * (a)->icd.sz); \
\
} \
(a)->i -= (len); \
\
} while (0)
#define utarray_renew(a, u) \
do { \
\
if (a) \
utarray_clear(a); \
else \
utarray_new(a, u); \
\
} while (0)
#define utarray_clear(a) \
do { \
\
if ((a)->i > 0) { \
\
if ((a)->icd.dtor) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (a)->i; _ut_i++) { \
\
(a)->icd.dtor(_utarray_eltptr(a, _ut_i)); \
\
} \
\
} \
(a)->i = 0; \
\
} \
\
} while (0)
#define utarray_sort(a, cmp) \
do { \
\
qsort((a)->d, (a)->i, (a)->icd.sz, cmp); \
\
} while (0)
#define utarray_find(a, v, cmp) bsearch((v), (a)->d, (a)->i, (a)->icd.sz, cmp)
#define utarray_front(a) (((a)->i) ? (_utarray_eltptr(a, 0)) : NULL)
#define utarray_next(a, e) \
(((e) == NULL) ? utarray_front(a) \
: (((a)->i != utarray_eltidx(a, e) + 1) \
? _utarray_eltptr(a, utarray_eltidx(a, e) + 1) \
: NULL))
#define utarray_prev(a, e) \
(((e) == NULL) ? utarray_back(a) \
: ((utarray_eltidx(a, e) != 0) \
? _utarray_eltptr(a, utarray_eltidx(a, e) - 1) \
: NULL))
#define utarray_back(a) (((a)->i) ? (_utarray_eltptr(a, (a)->i - 1)) : NULL)
#define utarray_eltidx(a, e) (((char *)(e) - (a)->d) / (a)->icd.sz)
/* last we pre-define a few icd for common utarrays of ints and strings */
static void utarray_str_cpy(void *dst, const void *src) {
char **_src = (char **)src, **_dst = (char **)dst;
*_dst = (*_src == NULL) ? NULL : strdup(*_src);
}
static void utarray_str_dtor(void *elt) {
char **eltc = (char **)elt;
if (*eltc != NULL) free(*eltc);
}
static const UT_icd ut_str_icd UTARRAY_UNUSED = {
sizeof(char *), NULL, utarray_str_cpy, utarray_str_dtor};
static const UT_icd ut_int_icd UTARRAY_UNUSED = {sizeof(int), NULL, NULL, NULL};
static const UT_icd ut_ptr_icd UTARRAY_UNUSED = {sizeof(void *), NULL, NULL,
NULL};
#endif /* UTARRAY_H */

File diff suppressed because it is too large Load Diff

View File

@ -1 +1 @@
b79d51a
eedf07d

View File

@ -1,7 +1,7 @@
# custum mutator: honggfuzz mangle
this is the honggfuzz mutator in mangle.c as a custom mutator
module for afl++. It is the original mangle.c, mangle.h and honggfuzz.h
module for AFL++. It is the original mangle.c, mangle.h and honggfuzz.h
with a lot of mocking around it :-)
just type `make` to build

View File

@ -65,7 +65,7 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
/* When a new queue entry is added we check if there are new dictionary
entries to add to honggfuzz structure */
void afl_custom_queue_new_entry(my_mutator_t * data,
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
@ -97,6 +97,8 @@ void afl_custom_queue_new_entry(my_mutator_t * data,
}
return 0;
}
/* we could set only_printable if is_ascii is set ... let's see

View File

@ -78,7 +78,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
/* When a new queue entry is added we check if there are new dictionary
entries to add to honggfuzz structure */
#if 0
extern "C" void afl_custom_queue_new_entry(my_mutator_t * data,
extern "C" uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
@ -110,6 +110,8 @@ extern "C" void afl_custom_queue_new_entry(my_mutator_t * data,
}
return 0;
}
#endif

View File

@ -53,7 +53,13 @@ pub trait RawCustomMutator {
1
}
fn queue_new_entry(&mut self, filename_new_queue: &Path, _filename_orig_queue: Option<&Path>) {}
fn queue_new_entry(
&mut self,
filename_new_queue: &Path,
_filename_orig_queue: Option<&Path>,
) -> bool {
false
}
fn queue_get(&mut self, filename: &Path) -> bool {
true
@ -84,7 +90,6 @@ pub mod wrappers {
use std::{
any::Any,
convert::TryInto,
ffi::{c_void, CStr, OsStr},
mem::ManuallyDrop,
os::{raw::c_char, unix::ffi::OsStrExt},
@ -176,6 +181,10 @@ pub mod wrappers {
}
/// Internal function used in the macro
/// # Safety
///
/// May dereference all passed-in pointers.
/// Should not be called manually, but will be called by `afl-fuzz`
pub unsafe fn afl_custom_fuzz_<M: RawCustomMutator>(
data: *mut c_void,
buf: *mut u8,
@ -199,13 +208,10 @@ pub mod wrappers {
} else {
Some(slice::from_raw_parts(add_buf, add_buf_size))
};
match context
.mutator
.fuzz(buff_slice, add_buff_slice, max_size.try_into().unwrap())
{
match context.mutator.fuzz(buff_slice, add_buff_slice, max_size) {
Some(buffer) => {
*out_buf = buffer.as_ptr();
buffer.len().try_into().unwrap()
buffer.len()
}
None => {
// return the input buffer with 0-length to let AFL skip this mutation attempt
@ -220,6 +226,10 @@ pub mod wrappers {
}
/// Internal function used in the macro
///
/// # Safety
/// Dereferences the passed-in pointers up to `buf_size` bytes.
/// Should not be called directly.
pub unsafe fn afl_custom_fuzz_count_<M: RawCustomMutator>(
data: *mut c_void,
buf: *const u8,
@ -246,7 +256,7 @@ pub mod wrappers {
data: *mut c_void,
filename_new_queue: *const c_char,
filename_orig_queue: *const c_char,
) {
) -> bool {
match catch_unwind(|| {
let mut context = FFIContext::<M>::from(data);
if filename_new_queue.is_null() {
@ -264,7 +274,7 @@ pub mod wrappers {
};
context
.mutator
.queue_new_entry(filename_new_queue, filename_orig_queue);
.queue_new_entry(filename_new_queue, filename_orig_queue)
}) {
Ok(ret) => ret,
Err(err) => panic_handler("afl_custom_queue_new_entry", err),
@ -272,6 +282,10 @@ pub mod wrappers {
}
/// Internal function used in the macro
///
/// # Safety
/// May dereference the passed-in `data` pointer.
/// Should not be called directly.
pub unsafe fn afl_custom_deinit_<M: RawCustomMutator>(data: *mut c_void) {
match catch_unwind(|| {
// drop the context
@ -386,18 +400,16 @@ macro_rules! export_mutator {
}
#[no_mangle]
pub extern "C" fn afl_custom_fuzz_count(
pub unsafe extern "C" fn afl_custom_fuzz_count(
data: *mut ::std::os::raw::c_void,
buf: *const u8,
buf_size: usize,
) -> u32 {
unsafe {
$crate::wrappers::afl_custom_fuzz_count_::<$mutator_type>(data, buf, buf_size)
}
}
#[no_mangle]
pub extern "C" fn afl_custom_fuzz(
pub unsafe extern "C" fn afl_custom_fuzz(
data: *mut ::std::os::raw::c_void,
buf: *mut u8,
buf_size: usize,
@ -406,7 +418,6 @@ macro_rules! export_mutator {
add_buf_size: usize,
max_size: usize,
) -> usize {
unsafe {
$crate::wrappers::afl_custom_fuzz_::<$mutator_type>(
data,
buf,
@ -417,14 +428,13 @@ macro_rules! export_mutator {
max_size,
)
}
}
#[no_mangle]
pub extern "C" fn afl_custom_queue_new_entry(
data: *mut ::std::os::raw::c_void,
filename_new_queue: *const ::std::os::raw::c_char,
filename_orig_queue: *const ::std::os::raw::c_char,
) {
) -> bool {
$crate::wrappers::afl_custom_queue_new_entry_::<$mutator_type>(
data,
filename_new_queue,
@ -456,8 +466,8 @@ macro_rules! export_mutator {
}
#[no_mangle]
pub extern "C" fn afl_custom_deinit(data: *mut ::std::os::raw::c_void) {
unsafe { $crate::wrappers::afl_custom_deinit_::<$mutator_type>(data) }
pub unsafe extern "C" fn afl_custom_deinit(data: *mut ::std::os::raw::c_void) {
$crate::wrappers::afl_custom_deinit_::<$mutator_type>(data)
}
};
}
@ -542,8 +552,8 @@ pub trait CustomMutator {
&mut self,
filename_new_queue: &Path,
filename_orig_queue: Option<&Path>,
) -> Result<(), Self::Error> {
Ok(())
) -> Result<bool, Self::Error> {
Ok(false)
}
fn queue_get(&mut self, filename: &Path) -> Result<bool, Self::Error> {
@ -617,11 +627,16 @@ where
}
}
fn queue_new_entry(&mut self, filename_new_queue: &Path, filename_orig_queue: Option<&Path>) {
fn queue_new_entry(
&mut self,
filename_new_queue: &Path,
filename_orig_queue: Option<&Path>,
) -> bool {
match self.queue_new_entry(filename_new_queue, filename_orig_queue) {
Ok(r) => r,
Err(e) => {
Self::handle_error(e);
false
}
}
}
@ -696,8 +711,7 @@ mod default_mutator_describe {
fn truncate_str_unicode_safe(s: &str, max_len: usize) -> &str {
if s.len() <= max_len {
s
} else {
if let Some((last_index, _)) = s
} else if let Some((last_index, _)) = s
.char_indices()
.take_while(|(index, _)| *index <= max_len)
.last()
@ -706,7 +720,6 @@ fn truncate_str_unicode_safe(s: &str, max_len: usize) -> &str {
} else {
""
}
}
}
#[cfg(test)]

View File

@ -101,7 +101,7 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
/* When a new queue entry is added we run this input with the symcc
instrumented binary */
void afl_custom_queue_new_entry(my_mutator_t * data,
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
@ -232,6 +232,8 @@ void afl_custom_queue_new_entry(my_mutator_t * data,
}
return 0;
}
uint32_t afl_custom_fuzz_count(my_mutator_t *data, const u8 *buf,

View File

@ -8,6 +8,26 @@
Want to stay in the loop on major new features? Join our mailing list by
sending a mail to <afl-users+subscribe@googlegroups.com>.
### Version ++3.15a (dev)
- afl-fuzz:
- added AFL_IGNORE_PROBLEMS plus checks to identify and abort on
incorrect LTO usage setups and enhanced the READMEs for better
information on how to deal with instrumenting libraries
- afl-cc:
- fix for shared linking on MacOS
- added the very good grammar mutator "GramaTron" to the
custom_mutators
- added optimin, a faster and better corpus minimizer by
Adrian Herrera. Thank you!
- added afl-persistent-config script to set perform permanent system
configuration settings for fuzzing, for Linux and Macos.
thanks to jhertz!
- added xml, curl and exotic string functions to llvm dictionary features
- fix AFL_PRELOAD issues on MacOS
- removed utils/afl_frida because frida_mode/ is now so much better
- added uninstall target to makefile (todo: update new readme!)
### Version ++3.14c (release)
- afl-fuzz:
- fix -F when a '/' was part of the parameter
@ -70,7 +90,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- on a crashing seed potentially the wrong input was disabled
- added AFL_EXIT_ON_SEED_ISSUES env that will exit if a seed in
-i dir crashes the target or results in a timeout. By default
afl++ ignores these and uses them for splicing instead.
AFL++ ignores these and uses them for splicing instead.
- added AFL_EXIT_ON_TIME env that will make afl-fuzz exit fuzzing
after no new paths have been found for n seconds
- when AFL_FAST_CAL is set a variable path will now be calibrated
@ -224,7 +244,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- Updated utils/afl_frida to be 5% faster, 7% on x86_x64
- Added `AFL_KILL_SIGNAL` env variable (thanks @v-p-b)
- @Edznux added a nice documentation on how to use rpc.statsd with
afl++ in docs/rpc_statsd.md, thanks!
AFL++ in docs/rpc_statsd.md, thanks!
### Version ++3.00c (release)
- llvm_mode/ and gcc_plugin/ moved to instrumentation/
@ -280,7 +300,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- custom mutators
- added a new custom mutator: symcc -> https://github.com/eurecom-s3/symcc/
- added a new custom mutator: libfuzzer that integrates libfuzzer mutations
- Our afl++ Grammar-Mutator is now better integrated into custom_mutators/
- Our AFL++ Grammar-Mutator is now better integrated into custom_mutators/
- added INTROSPECTION support for custom modules
- python fuzz function was not optional, fixed
- some python mutator speed improvements
@ -291,7 +311,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
### Version ++2.68c (release)
- added the GSoC excellent afl++ grammar mutator by Shengtuo to our
- added the GSoC excellent AFL++ grammar mutator by Shengtuo to our
custom_mutators/ (see custom_mutators/README.md) - or get it here:
https://github.com/AFLplusplus/Grammar-Mutator
- a few QOL changes for Apple and its outdated gmake
@ -314,12 +334,12 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- llvm_mode:
- ported SanCov to LTO, and made it the default for LTO. better
instrumentation locations
- Further llvm 12 support (fast moving target like afl++ :-) )
- Further llvm 12 support (fast moving target like AFL++ :-) )
- deprecated LLVM SKIPSINGLEBLOCK env environment
### Version ++2.67c (release)
- Support for improved afl++ snapshot module:
- Support for improved AFL++ snapshot module:
https://github.com/AFLplusplus/AFL-Snapshot-LKM
- Due to the instrumentation needing more memory, the initial memory sizes
for -m have been increased
@ -421,7 +441,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
files/stdin) - 10-100% performance increase
- General support for 64 bit PowerPC, RiscV, Sparc etc.
- fix afl-cmin.bash
- slightly better performance compilation options for afl++ and targets
- slightly better performance compilation options for AFL++ and targets
- fixed afl-gcc/afl-as that could break on fast systems reusing pids in
the same second
- added lots of dictionaries from oss-fuzz, go-fuzz and Jakub Wilk
@ -434,7 +454,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- afl-fuzz:
- AFL_MAP_SIZE was not working correctly
- better python detection
- an old, old bug in afl that would show negative stability in rare
- an old, old bug in AFL that would show negative stability in rare
circumstances is now hopefully fixed
- AFL_POST_LIBRARY was deprecated, use AFL_CUSTOM_MUTATOR_LIBRARY
instead (see docs/custom_mutators.md)
@ -493,8 +513,8 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- extended forkserver: map_size and more information is communicated to
afl-fuzz (and afl-fuzz acts accordingly)
- new environment variable: AFL_MAP_SIZE to specify the size of the shared map
- if AFL_CC/AFL_CXX is set but empty afl compilers did fail, fixed
(this bug is in vanilla afl too)
- if AFL_CC/AFL_CXX is set but empty AFL compilers did fail, fixed
(this bug is in vanilla AFL too)
- added NO_PYTHON flag to disable python support when building afl-fuzz
- more refactoring
@ -508,7 +528,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- all:
- big code changes to make afl-fuzz thread-safe so afl-fuzz can spawn
multiple fuzzing threads in the future or even become a library
- afl basic tools now report on the environment variables picked up
- AFL basic tools now report on the environment variables picked up
- more tools get environment variable usage info in the help output
- force all output to stdout (some OK/SAY/WARN messages were sent to
stdout, some to stderr)
@ -657,7 +677,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- qemu and unicorn download scripts now try to download until the full
download succeeded. f*ckin travis fails downloading 40% of the time!
- more support for Android (please test!)
- added the few Android stuff we didnt have already from Google afl repository
- added the few Android stuff we didnt have already from Google AFL repository
- removed unnecessary warnings
@ -705,7 +725,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- big code refactoring:
* all includes are now in include/
* all afl sources are now in src/ - see src/README.md
* all AFL sources are now in src/ - see src/README.md
* afl-fuzz was split up in various individual files for including
functionality in other programs (e.g. forkserver, memory map, etc.)
for better readability.
@ -721,7 +741,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- fix building on *BSD (thanks to tobias.kortkamp for the patch)
- fix for a few features to support different map sized than 2^16
- afl-showmap: new option -r now shows the real values in the buckets (stock
afl never did), plus shows tuple content summary information now
AFL never did), plus shows tuple content summary information now
- small docu updates
- NeverZero counters for QEMU
- NeverZero counters for Unicorn
@ -764,7 +784,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
debugging
- added -V time and -E execs option to better comparison runs, runs afl-fuzz
for a specific time/executions.
- added a -s seed switch to allow afl run with a fixed initial
- added a -s seed switch to allow AFL run with a fixed initial
seed that is not updated. This is good for performance and path discovery
tests as the random numbers are deterministic then
- llvm_mode LAF_... env variables can now be specified as AFL_LLVM_LAF_...
@ -1585,7 +1605,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
### Version 1.63b:
- Updated cgroups_asan/ with a new version from Sam, made a couple changes
to streamline it and keep parallel afl instances in separate groups.
to streamline it and keep parallel AFL instances in separate groups.
- Fixed typos, thanks to Jakub Wilk.
@ -2383,7 +2403,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- Added AFL_KEEP_ASSEMBLY for easier troubleshooting.
- Added an override for AFL_USE_ASAN if set at afl compile time. Requested by
- Added an override for AFL_USE_ASAN if set at AFL compile time. Requested by
Hanno Boeck.
### Version 0.79b:

View File

@ -1,8 +1,8 @@
# Frequently asked questions about afl++
# Frequently asked questions about AFL++
## Contents
* [What is the difference between afl and afl++?](#what-is-the-difference-between-afl-and-afl)
* [What is the difference between AFL and AFL++?](#what-is-the-difference-between-afl-and-afl)
* [I got a weird compile error from clang](#i-got-a-weird-compile-error-from-clang)
* [How to improve the fuzzing speed?](#how-to-improve-the-fuzzing-speed)
* [How do I fuzz a network service?](#how-do-i-fuzz-a-network-service)
@ -14,7 +14,7 @@
If you find an interesting or important question missing, submit it via
[https://github.com/AFLplusplus/AFLplusplus/issues](https://github.com/AFLplusplus/AFLplusplus/issues)
## What is the difference between afl and afl++?
## What is the difference between AFL and AFL++?
American Fuzzy Lop (AFL) was developed by Michał "lcamtuf" Zalewski starting in
2013/2014, and when he left Google end of 2017 he stopped developing it.
@ -24,13 +24,13 @@ it is only accepting PRs from the community and is not developing enhancements
anymore.
In the second quarter of 2019, 1 1/2 year later when no further development of
AFL had happened and it became clear there would none be coming, afl++
AFL had happened and it became clear there would none be coming, AFL++
was born, where initially community patches were collected and applied
for bug fixes and enhancements. Then from various AFL spin-offs - mostly academic
research - features were integrated. This already resulted in a much advanced
AFL.
Until the end of 2019 the afl++ team had grown to four active developers which
Until the end of 2019 the AFL++ team had grown to four active developers which
then implemented their own research and features, making it now by far the most
flexible and feature rich guided fuzzer available as open source.
And in independent fuzzing benchmarks it is one of the best fuzzers available,
@ -52,15 +52,15 @@ clang-13: note: diagnostic msg:
********************
```
Then this means that your OS updated the clang installation from an upgrade
package and because of that the afl++ llvm plugins do not match anymore.
package and because of that the AFL++ llvm plugins do not match anymore.
Solution: `git pull ; make clean install` of afl++
Solution: `git pull ; make clean install` of AFL++
## How to improve the fuzzing speed?
1. Use [llvm_mode](docs/llvm_mode/README.md): afl-clang-lto (llvm >= 11) or afl-clang-fast (llvm >= 9 recommended)
2. Use [persistent mode](llvm_mode/README.persistent_mode.md) (x2-x20 speed increase)
3. Use the [afl++ snapshot module](https://github.com/AFLplusplus/AFL-Snapshot-LKM) (x2 speed increase)
1. Use [llvm_mode](../instrumentation/README.llvm.md): afl-clang-lto (llvm >= 11) or afl-clang-fast (llvm >= 9 recommended)
2. Use [persistent mode](../instrumentation/README.persistent_mode.md) (x2-x20 speed increase)
3. Use the [AFL++ snapshot module](https://github.com/AFLplusplus/AFL-Snapshot-LKM) (x2 speed increase)
4. If you do not use shmem persistent mode, use `AFL_TMPDIR` to put the input file directory on a tempfs location, see [docs/env_variables.md](docs/env_variables.md)
5. Improve Linux kernel performance: modify `/etc/default/grub`, set `GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"`; then `update-grub` and `reboot` (warning: makes the system less secure)
6. Running on an `ext2` filesystem with `noatime` mount option will be a bit faster than on any other journaling filesystem
@ -77,7 +77,7 @@ Using a network channel is inadequate for several reasons:
The established method to fuzz network services is to modify the source code
to read from a file or stdin (fd 0) (or even faster via shared memory, combine
this with persistent mode [llvm_mode/README.persistent_mode.md](llvm_mode/README.persistent_mode.md)
this with persistent mode [instrumentation/README.persistent_mode.md](../instrumentation/README.persistent_mode.md)
and you have a performance gain of x10 instead of a performance loss of over
x10 - that is a x100 difference!).
@ -86,7 +86,7 @@ and perform binary fuzzing) you can also use a shared library with AFL_PRELOAD
to emulate the network. This is also much faster than the real network would be.
See [utils/socket_fuzzing/](../utils/socket_fuzzing/).
There is an outdated afl++ branch that implements networking if you are
There is an outdated AFL++ branch that implements networking if you are
desperate though: [https://github.com/AFLplusplus/AFLplusplus/tree/networking](https://github.com/AFLplusplus/AFLplusplus/tree/networking) -
however a better option is AFLnet ([https://github.com/aflnet/aflnet](https://github.com/aflnet/aflnet))
which allows you to define network state with different type of data packets.
@ -158,7 +158,7 @@ reaction to timing, etc. then in some of the re-executions with the same data
the edge coverage result will be different accross runs.
Those edges that change are then flagged "unstable".
The more "unstable" edges, the more difficult for afl++ to identify valid new
The more "unstable" edges, the more difficult for AFL++ to identify valid new
paths.
A value above 90% is usually fine and a value above 80% is also still ok, and
@ -197,7 +197,7 @@ afl-clang-fast PCGUARD and afl-clang-lto LTO instrumentation.
b) For PCGUARD instrumented binaries it is much more difficult. Here you
can either modify the __sanitizer_cov_trace_pc_guard function in
llvm_mode/afl-llvm-rt.o.c to write a backtrace to a file if the ID in
instrumentation/afl-llvm-rt.o.c to write a backtrace to a file if the ID in
__afl_area_ptr[*guard] is one of the unstable edge IDs.
(Example code is already there).
Then recompile and reinstall llvm_mode and rebuild your target.
@ -225,7 +225,7 @@ afl-clang-fast PCGUARD and afl-clang-lto LTO instrumentation.
remove from instrumentation, or just specify the functions you want to
skip for instrumentation. Note that optimization might inline functions!
Simply follow this document on how to do this: [llvm_mode/README.instrument_list.md](llvm_mode/README.instrument_list.md)
Simply follow this document on how to do this: [instrumentation/README.instrument_list.md](../instrumentation/README.instrument_list.md)
If PCGUARD is used, then you need to follow this guide (needs llvm 12+!):
[http://clang.llvm.org/docs/SanitizerCoverage.html#partially-disabling-instrumentation](http://clang.llvm.org/docs/SanitizerCoverage.html#partially-disabling-instrumentation)

View File

@ -74,12 +74,32 @@ and depend mostly on user feedback.
To build AFL, install llvm (and perhaps gcc) from brew and follow the general
instructions for Linux. If possible avoid Xcode at all cost.
`brew install wget git make cmake llvm gdb`
Be sure to setup PATH to point to the correct clang binaries and use the
freshly installed clang, clang++ and gmake, e.g.:
```
export PATH="/usr/local/Cellar/llvm/12.0.1/bin/:$PATH"
export CC=clang
export CXX=clang++
gmake
cd frida_mode
gmake
cd ..
gmake install
```
afl-gcc will fail unless you have GCC installed, but that is using outdated
instrumentation anyway. You don't want that.
Note that afl-clang-lto, afl-gcc-fast and qemu_mode are not working on MacOS.
The crash reporting daemon that comes by default with MacOS X will cause
problems with fuzzing. You need to turn it off by following the instructions
provided here: http://goo.gl/CCcd5u
problems with fuzzing. You need to turn it off:
```
launchctl unload -w /System/Library/LaunchAgents/com.apple.ReportCrash.plist
sudo launchctl unload -w /System/Library/LaunchDaemons/com.apple.ReportCrash.Root.plist
```
The `fork()` semantics on OS X are a bit unusual compared to other unix systems
and definitely don't look POSIX-compliant. This means two things:

View File

@ -1,12 +1,12 @@
# Fuzzing binary-only programs with afl++
# Fuzzing binary-only programs with AFL++
afl++, libfuzzer and others are great if you have the source code, and
AFL++, libfuzzer and others are great if you have the source code, and
it allows for very fast and coverage guided fuzzing.
However, if there is only the binary program and no source code available,
then standard `afl-fuzz -n` (non-instrumented mode) is not effective.
The following is a description of how these binaries can be fuzzed with afl++.
The following is a description of how these binaries can be fuzzed with AFL++.
## TL;DR:
@ -39,7 +39,7 @@
Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz)
which now has a qemu_mode, but its performance is just 1.5% ...
As it is included in afl++ this needs no URL.
As it is included in AFL++ this needs no URL.
If you like to code a customized fuzzer without much work, we highly
recommend to check out our sister project libafl which will support QEMU
@ -56,12 +56,12 @@
frida-gum via utils/afl_frida/, you will have to write a harness to
call the target function in the library, use afl-frida.c as a template.
Both come with afl++ so this needs no URL.
Both come with AFL++ so this needs no URL.
You can also perform remote fuzzing with frida, e.g. if you want to fuzz
on iPhone or Android devices, for this you can use
[https://github.com/ttdennis/fpicker/](https://github.com/ttdennis/fpicker/)
as an intermediate that uses afl++ for fuzzing.
as an intermediate that uses AFL++ for fuzzing.
If you like to code a customized fuzzer without much work, we highly
recommend to check out our sister project libafl which supports Frida too:
@ -74,7 +74,7 @@
Wine mode can run Win32 PE binaries with the QEMU instrumentation.
It needs Wine, python3 and the pefile python package installed.
As it is included in afl++ this needs no URL.
As it is included in AFL++ this needs no URL.
## UNICORN
@ -83,10 +83,10 @@
In contrast to QEMU, Unicorn does not offer a full system or even userland
emulation. Runtime environment and/or loaders have to be written from scratch,
if needed. On top, block chaining has been removed. This means the speed boost
introduced in the patched QEMU Mode of afl++ cannot simply be ported over to
introduced in the patched QEMU Mode of AFL++ cannot simply be ported over to
Unicorn. For further information, check out [unicorn_mode/README.md](../unicorn_mode/README.md).
As it is included in afl++ this needs no URL.
As it is included in AFL++ this needs no URL.
## AFL UNTRACER
@ -153,7 +153,7 @@
As a result, the overall speed decrease is about 70-90% (depending on
the implementation and other factors).
There are two afl intel-pt implementations:
There are two AFL intel-pt implementations:
1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt)
=> this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
@ -175,7 +175,7 @@
the ARM chip is difficult too.
My guess is that it is slower than Qemu, but faster than Intel PT.
If anyone finds any coresight implementation for afl please ping me: vh@thc.org
If anyone finds any coresight implementation for AFL please ping me: vh@thc.org
## PIN & DYNAMORIO

View File

@ -21,7 +21,7 @@ fuzzing by using libraries that perform mutations according to a given grammar.
The custom mutator is passed to `afl-fuzz` via the `AFL_CUSTOM_MUTATOR_LIBRARY`
or `AFL_PYTHON_MODULE` environment variable, and must export a fuzz function.
Now afl also supports multiple custom mutators which can be specified in the same `AFL_CUSTOM_MUTATOR_LIBRARY` environment variable like this.
Now AFL also supports multiple custom mutators which can be specified in the same `AFL_CUSTOM_MUTATOR_LIBRARY` environment variable like this.
```bash
export AFL_CUSTOM_MUTATOR_LIBRARY="full/path/to/mutator_first.so;full/path/to/mutator_second.so"
```
@ -47,7 +47,7 @@ int afl_custom_post_trim(void *data, unsigned char success);
size_t afl_custom_havoc_mutation(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf, size_t max_size);
unsigned char afl_custom_havoc_mutation_probability(void *data);
unsigned char afl_custom_queue_get(void *data, const unsigned char *filename);
void afl_custom_queue_new_entry(void *data, const unsigned char *filename_new_queue, const unsigned int *filename_orig_queue);
u8 afl_custom_queue_new_entry(void *data, const unsigned char *filename_new_queue, const unsigned int *filename_orig_queue);
const char* afl_custom_introspection(my_mutator_t *data);
void afl_custom_deinit(void *data);
```
@ -88,7 +88,7 @@ def queue_get(filename):
return True
def queue_new_entry(filename_new_queue, filename_orig_queue):
pass
return False
def introspection():
return string
@ -156,6 +156,7 @@ def deinit(): # optional for Python
- `queue_new_entry` (optional):
This methods is called after adding a new test case to the queue.
If the contents of the file was changed return True, False otherwise.
- `introspection` (optional):

View File

@ -1,9 +1,9 @@
# Restructure afl++'s documentation
# Restructure AFL++'s documentation
## About us
We are dedicated to everything around fuzzing, our main and most well known
contribution is the fuzzer `afl++` which is part of all major Unix
contribution is the fuzzer `AFL++` which is part of all major Unix
distributions (e.g. Debian, Arch, FreeBSD, etc.) and is deployed on Google's
oss-fuzz and clusterfuzz. It is rated the top fuzzer on Google's fuzzbench.
@ -11,27 +11,27 @@ We are four individuals from Europe supported by a large community.
All our tools are open source.
## About the afl++ fuzzer project
## About the AFL++ fuzzer project
afl++ inherited it's documentation from the original Google afl project.
AFL++ inherited it's documentation from the original Google AFL project.
Since then it has been massively improved - feature and performance wise -
and although the documenation has likewise been continued it has grown out
of proportion.
The documentation is done by non-natives to the English language, plus
none of us has a writer background.
We see questions on afl++ usage on mailing lists (e.g. afl-users), discord
We see questions on AFL++ usage on mailing lists (e.g. afl-users), discord
channels, web forums and as issues in our repository.
This only increases as afl++ has been on the top of Google's fuzzbench
This only increases as AFL++ has been on the top of Google's fuzzbench
statistics (which measures the performance of fuzzers) and is now being
integrated in Google's oss-fuzz and clusterfuzz - and is in many Unix
packaging repositories, e.g. Debian, FreeBSD, etc.
afl++ now has 44 (!) documentation files with 13k total lines of content.
AFL++ now has 44 (!) documentation files with 13k total lines of content.
This is way too much.
Hence afl++ needs a complete overhaul of it's documentation, both on a
Hence AFL++ needs a complete overhaul of it's documentation, both on a
organisation/structural level as well as the content.
Overall the following actions have to be performed:
@ -44,9 +44,9 @@ Overall the following actions have to be performed:
* The documents have been written and modified by a lot of different people,
most of them non-native English speaker. Hence an overall review where
parts should be rewritten has to be performed and then the rewrite done.
* Create a cheat-sheet for a very short best-setup build and run of afl++
* Create a cheat-sheet for a very short best-setup build and run of AFL++
* Pictures explain more than 1000 words. We need at least 4 images that
explain the workflow with afl++:
explain the workflow with AFL++:
- the build workflow
- the fuzzing workflow
- the fuzzing campaign management workflow
@ -65,8 +65,8 @@ us.
## Metrics
afl++ is a the highest performant fuzzer publicly available - but is also the
most feature rich and complex. With the publicity of afl++' success and
AFL++ is a the highest performant fuzzer publicly available - but is also the
most feature rich and complex. With the publicity of AFL++' success and
deployment in Google projects internally and externally and availability as
a package on most Linux distributions we see more and more issues being
created and help requests on our Discord channel that would not be
@ -75,7 +75,7 @@ is unrealistic.
We expect the the new documenation after this project to be cleaner, easier
accessible and lighter to digest by our users, resulting in much less
help requests. On the other hand the amount of users using afl++ should
help requests. On the other hand the amount of users using AFL++ should
increase as well as it will be more accessible which would also increase
questions again - but overall resulting in a reduction of help requests.
@ -103,7 +103,7 @@ graphics (but again - this is basically just guessing).
Technical Writer 10000$
Volunteer stipends 0$ (waved)
T-Shirts for the top 10 contributors and helpers to this documentation project:
10 afl++ logo t-shirts 20$ each 200$
10 AFL++ logo t-shirts 20$ each 200$
10 shipping cost of t-shirts 10$ each 100$
Total: 10.300$
@ -118,5 +118,5 @@ We have no experience with a technical writer, but we will support that person
with video calls, chats, emails and messaging, provide all necessary information
and write technical contents that is required for the success of this project.
It is clear to us that a technical writer knows how to write, but cannot know
the technical details in a complex tooling like in afl++. This guidance, input,
the technical details in a complex tooling like in AFL++. This guidance, input,
etc. has to come from us.

View File

@ -11,7 +11,7 @@
## 1) Settings for all compilers
Starting with afl++ 3.0 there is only one compiler: afl-cc
Starting with AFL++ 3.0 there is only one compiler: afl-cc
To select the different instrumentation modes this can be done by
1. passing the --afl-MODE command line option to the compiler
2. or using a symlink to afl-cc: afl-gcc, afl-g++, afl-clang, afl-clang++,
@ -23,10 +23,10 @@ To select the different instrumentation modes this can be done by
(afl-g*-fast) or `GCC` (afl-gcc/afl-g++).
Because (with the exception of the --afl-MODE command line option) the
compile-time tools do not accept afl specific command-line options, they
compile-time tools do not accept AFL specific command-line options, they
make fairly broad use of environmental variables instead:
- Some build/configure scripts break with afl++ compilers. To be able to
- Some build/configure scripts break with AFL++ compilers. To be able to
pass them, do:
```
export CC=afl-cc
@ -37,7 +37,7 @@ make fairly broad use of environmental variables instead:
make
```
- Most afl tools do not print any output if stdout/stderr are redirected.
- Most AFL tools do not print any output if stdout/stderr are redirected.
If you want to get the output into a file then set the `AFL_DEBUG`
environment variable.
This is sadly necessary for various build processes which fail otherwise.
@ -55,8 +55,7 @@ make fairly broad use of environmental variables instead:
overridden.
- Setting `AFL_USE_ASAN` automatically enables ASAN, provided that your
compiler supports it. Note that fuzzing with ASAN is mildly challenging
- see [notes_for_asan.md](notes_for_asan.md).
compiler supports it.
(You can also enable MSAN via `AFL_USE_MSAN`; ASAN and MSAN come with the
same gotchas; the modes are mutually exclusive. UBSAN can be enabled
@ -149,7 +148,7 @@ Then there are a few specific features that are only available in instrumentatio
This is a different kind way of instrumentation: first it compiles all
code in LTO (link time optimization) and then performs an edge inserting
instrumentation which is 100% collision free (collisions are a big issue
in afl and afl-like instrumentations). This is performed by using
in AFL and AFL-like instrumentations). This is performed by using
afl-clang-lto/afl-clang-lto++ instead of afl-clang-fast, but is only
built if LLVM 11 or newer is used.
@ -167,7 +166,7 @@ Then there are a few specific features that are only available in instrumentatio
or which functions were touched by an input.
- `AFL_LLVM_MAP_ADDR` sets the fixed map address to a different address than
the default `0x10000`. A value of 0 or empty sets the map address to be
dynamic (the original afl way, which is slower)
dynamic (the original AFL way, which is slower)
- `AFL_LLVM_MAP_DYNAMIC` sets the shared memory address to be dynamic
- `AFL_LLVM_LTO_STARTID` sets the starting location ID for the instrumentation.
This defaults to 1
@ -372,7 +371,7 @@ checks or alter some of the more exotic semantics of the tool:
- Setting `AFL_CUSTOM_MUTATOR_LIBRARY` to a shared library with
afl_custom_fuzz() creates additional mutations through this library.
If afl-fuzz is compiled with Python (which is autodetected during builing
If afl-fuzz is compiled with Python (which is autodetected during building
afl-fuzz), setting `AFL_PYTHON_MODULE` to a Python module can also provide
additional mutations.
If `AFL_CUSTOM_MUTATOR_ONLY` is also set, all mutations will solely be
@ -433,6 +432,10 @@ checks or alter some of the more exotic semantics of the tool:
and RECORD:000000,cnt:000009 being the crash case.
NOTE: This option needs to be enabled in config.h first!
- If afl-fuzz encounters an incorrect fuzzing setup during a fuzzing session
(not at startup), it will terminate. If you do not want this then you can
set `AFL_IGNORE_PROBLEMS`.
- If you are Jakub, you may need `AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES`.
Others need not apply, unless they also want to disable the
`/proc/sys/kernel/core_pattern` check.
@ -456,8 +459,8 @@ checks or alter some of the more exotic semantics of the tool:
- Setting `AFL_MAX_DET_EXRAS` will change the threshold at what number of elements
in the `-x` dictionary and LTO autodict (combined) the probabilistic mode will
kick off. In probabilistic mode not all dictionary entires will be used all
of the times for fuzzing mutations to not slow down fuzzing.
kick off. In probabilistic mode, not all dictionary entries will be used all
of the time for fuzzing mutations to not slow down fuzzing.
The default count is `200` elements. So for the 200 + 1st element, there is a
1 in 201 chance, that one of the dictionary entries will not be used directly.
@ -480,11 +483,11 @@ checks or alter some of the more exotic semantics of the tool:
allows you to add tags to your fuzzing instances. This is especially useful when running
multiple instances (`-M/-S` for example). Applied tags are `banner` and `afl_version`.
`banner` corresponds to the name of the fuzzer provided through `-M/-S`.
`afl_version` corresponds to the currently running afl version (e.g `++3.0c`).
`afl_version` corresponds to the currently running AFL version (e.g `++3.0c`).
Default (empty/non present) will add no tags to the metrics.
See [rpc_statsd.md](rpc_statsd.md) for more information.
- Setting `AFL_CRASH_EXITCODE` sets the exit code afl treats as crash.
- Setting `AFL_CRASH_EXITCODE` sets the exit code AFL treats as crash.
For example, if `AFL_CRASH_EXITCODE='-1'` is set, each input resulting
in an `-1` return code (i.e. `exit(-1)` got called), will be treated
as if a crash had ocurred.

View File

@ -1,4 +1,4 @@
# Ideas for afl++
# Ideas for AFL++
In the following, we describe a variety of ideas that could be implemented
for future AFL++ versions.

View File

@ -27,7 +27,7 @@ will not be able to use that input to guide their work.
To help with this problem, afl-fuzz offers a simple way to synchronize test
cases on the fly.
Note that afl++ has AFLfast's power schedules implemented.
Note that AFL++ has AFLfast's power schedules implemented.
It is therefore a good idea to use different power schedules if you run
several instances in parallel. See [power_schedules.md](power_schedules.md)
@ -116,7 +116,7 @@ distribute the deterministic fuzzing across. Note that if you boot up fewer
fuzzers than indicated by the second number passed to -M, you may end up with
poor coverage.
## 4) Syncing with non-afl fuzzers or independant instances
## 4) Syncing with non-AFL fuzzers or independant instances
A -M main node can be told with the `-F other_fuzzer_queue_directory` option
to sync results from other fuzzers, e.g. libfuzzer or honggfuzz.

View File

@ -170,6 +170,7 @@ spectre_v2=off stf_barrier=off
```
In most Linux distributions you can put this into a `/etc/default/grub`
variable.
You can use `sudo afl-persistent-config` to set these options for you.
The following list of changes are made when executing `afl-system-config`:

View File

Before

Width:  |  Height:  |  Size: 581 KiB

After

Width:  |  Height:  |  Size: 581 KiB

View File

Before

Width:  |  Height:  |  Size: 114 KiB

After

Width:  |  Height:  |  Size: 114 KiB

View File

Before

Width:  |  Height:  |  Size: 160 KiB

After

Width:  |  Height:  |  Size: 160 KiB

View File

@ -31,9 +31,9 @@ By doing so, you might be able to see when the fuzzing process has reached a sta
(according to your own criteria) for your targets, etc. And doing so without requiring to log into each instance manually.
An example visualisation may look like the following:
![StatsD Grafana](visualization/statsd-grafana.png)
![StatsD Grafana](resources/statsd-grafana.png)
*Notes: The exact same dashboard can be imported with [this JSON template](statsd/grafana-afl++.json).*
*Notes: The exact same dashboard can be imported with [this JSON template](resources/grafana-afl++.json).*
## How to use

View File

@ -35,7 +35,7 @@ american fuzzy lop ++3.01a (default) [fast] {0}
The top line shows you which mode afl-fuzz is running in
(normal: "american fuzy lop", crash exploration mode: "peruvian rabbit mode")
and the version of afl++.
and the version of AFL++.
Next to the version is the banner, which, if not set with -T by hand, will
either show the binary name being fuzzed, or the -M/-S main/secondary name for
parallel fuzzing.
@ -409,7 +409,7 @@ directory. This includes:
- `edges_found` - how many edges have been found
- `var_byte_count` - how many edges are non-deterministic
- `afl_banner` - banner text (e.g. the target name)
- `afl_version` - the version of afl used
- `afl_version` - the version of AFL used
- `target_mode` - default, persistent, qemu, unicorn, non-instrumented
- `command_line` - full command line used for the fuzzing session

View File

@ -156,7 +156,7 @@ In contrast to more greedy genetic algorithms, this approach allows the tool
to progressively explore various disjoint and possibly mutually incompatible
features of the underlying data format, as shown in this image:
![gzip_coverage](./visualization/afl_gzip.png)
![gzip_coverage](./resources/afl_gzip.png)
Several practical examples of the results of this algorithm are discussed
here:

View File

@ -1 +0,0 @@
() { _; } >_[$($())] { id; }

View File

@ -1 +0,0 @@
() { x() { _; }; x() { _; } <<a; }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 892 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 179 B

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 642 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 595 B

View File

@ -1,3 +0,0 @@
<!DOCTYPEd[<!ENTITY
S ""><!ENTITY %
N "<!ELEMENT<![INCLUDE0"<!ENTITYL%N;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 876 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 293 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 434 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 996 B

View File

@ -1,2 +0,0 @@
create table t0(o CHar(0)CHECK(0&O>O));insert into t0
select randomblob(0)-trim(0);

View File

@ -1 +0,0 @@
SELECT 0 UNION SELECT 0 ORDER BY 1 COLLATE"""""""";

View File

@ -1 +0,0 @@
PRAGMA foreign_keys=1;CREATE TABLE t1("""0"PRIMARY KEy REFERENCES t1 ON DELETE SET NULL);REPLACE INTO t1 SELECT(0);

View File

@ -1,2 +0,0 @@
DROP TABLE IF EXISTS t;CREATE VIRTUAL TABLE t0 USING fts4();insert into t0 select zeroblob(0);SAVEPOINT O;insert into t0
select(0);SAVEPOINT E;insert into t0 SELECT 0 UNION SELECT 0'x'ORDER BY x;

File diff suppressed because one or more lines are too long

View File

@ -1 +0,0 @@
SELECT*from(select"",zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(150000000),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0)),(select"",zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),(0),zeroblob(150000000),(0),zeroblob(0),(0)EXCEPT select zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0));

View File

@ -1,2 +0,0 @@
create table t0(t);insert into t0
select strftime();

View File

@ -1 +0,0 @@
SELECT fts3_tokenizer(@0());

View File

@ -1 +0,0 @@
select''like''like''like#0;

Some files were not shown because too many files have changed in this diff Show More