mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-09 00:31:33 +00:00
Merge pull request #1035 from adrianherrera/optimin-util
add optimin corpus minimizer
This commit is contained in:
commit
fa2b164429
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -10,3 +10,6 @@
|
|||||||
[submodule "custom_mutators/gramatron/json-c"]
|
[submodule "custom_mutators/gramatron/json-c"]
|
||||||
path = custom_mutators/gramatron/json-c
|
path = custom_mutators/gramatron/json-c
|
||||||
url = https://github.com/json-c/json-c
|
url = https://github.com/json-c/json-c
|
||||||
|
[submodule "utils/optimin/EvalMaxSAT"]
|
||||||
|
path = utils/optimin/EvalMaxSAT
|
||||||
|
url = https://github.com/FlorentAvellaneda/EvalMaxSAT
|
||||||
|
@ -40,6 +40,8 @@ Here's a quick overview of the stuff you can find in this directory:
|
|||||||
|
|
||||||
- libpng_no_checksum - a sample patch for removing CRC checks in libpng.
|
- libpng_no_checksum - a sample patch for removing CRC checks in libpng.
|
||||||
|
|
||||||
|
- optimin - An optimal corpus minimizer.
|
||||||
|
|
||||||
- persistent_mode - an example of how to use the LLVM persistent process
|
- persistent_mode - an example of how to use the LLVM persistent process
|
||||||
mode to speed up certain fuzzing jobs.
|
mode to speed up certain fuzzing jobs.
|
||||||
|
|
||||||
|
11
utils/optimin/.gitignore
vendored
Normal file
11
utils/optimin/.gitignore
vendored
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
CMakeLists.txt.user
|
||||||
|
CMakeCache.txt
|
||||||
|
CMakeFiles
|
||||||
|
CMakeScripts
|
||||||
|
Testing
|
||||||
|
Makefile
|
||||||
|
cmake_install.cmake
|
||||||
|
install_manifest.txt
|
||||||
|
compile_commands.json
|
||||||
|
CTestTestfile.cmake
|
||||||
|
_deps
|
22
utils/optimin/CMakeLists.txt
Normal file
22
utils/optimin/CMakeLists.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.10)
|
||||||
|
|
||||||
|
project(optimin
|
||||||
|
LANGUAGES CXX
|
||||||
|
DESCRIPTION "MaxSAT-based fuzzing corpus minimizer"
|
||||||
|
)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
|
||||||
|
|
||||||
|
# Add LLVM
|
||||||
|
find_package(LLVM REQUIRED CONFIG)
|
||||||
|
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
|
||||||
|
|
||||||
|
include_directories(${LLVM_INCLUDE_DIRS})
|
||||||
|
add_definitions(${LLVM_DEFINITIONS} -DNDEBUG)
|
||||||
|
|
||||||
|
add_subdirectory(EvalMaxSAT)
|
||||||
|
add_subdirectory(src)
|
1
utils/optimin/EVALMAXSAT_VERSION
Normal file
1
utils/optimin/EVALMAXSAT_VERSION
Normal file
@ -0,0 +1 @@
|
|||||||
|
440bf90edf88f6ab940934129e3c5b3b93764295
|
1
utils/optimin/EvalMaxSAT
Submodule
1
utils/optimin/EvalMaxSAT
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 440bf90edf88f6ab940934129e3c5b3b93764295
|
74
utils/optimin/README.md
Normal file
74
utils/optimin/README.md
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# OptiMin
|
||||||
|
|
||||||
|
OptiMin is a corpus minimizer that uses a
|
||||||
|
[MaxSAT](https://en.wikipedia.org/wiki/Maximum_satisfiability_problem) solver
|
||||||
|
to identify a subset of functionally distinct files that exercise different code
|
||||||
|
paths in a target program.
|
||||||
|
|
||||||
|
Unlike most corpus minimizers, such as `afl-cmin`, OptiMin does not rely on
|
||||||
|
heuristic and/or greedy algorithms to identify these functionally distinct
|
||||||
|
files. This means that minimized corpora are generally much smaller than those
|
||||||
|
produced by other tools.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To build the `optimin` executable (when cloned from github):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Ensure EvalMaxSAT is available
|
||||||
|
git submodule init
|
||||||
|
git submodule update
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
|
||||||
|
# You may have to specify -DLLVM_DIR=`llvm-config --cmakedir` if you have a
|
||||||
|
# non-standard LLVM install (e.g., install via apt)
|
||||||
|
cmake ..
|
||||||
|
make -j
|
||||||
|
make install
|
||||||
|
```
|
||||||
|
|
||||||
|
Otherwise, run the `build_optimin.sh` script. Running `optimin` is the same as
|
||||||
|
running `afl-cmin`.
|
||||||
|
|
||||||
|
### Weighted Minimizations
|
||||||
|
|
||||||
|
OptiMin allows for weighted minimizations. For examples, seeds can be weighted
|
||||||
|
by file size (or execution time), thus preferencing the selection of smaller (or
|
||||||
|
faster) seeds.
|
||||||
|
|
||||||
|
To perform a weighted minimization, supply a CSV file with the `-w` option. This
|
||||||
|
CSV file is formatted as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
SEED_1,WEIGHT_1
|
||||||
|
SEED_2,WEIGHT_2
|
||||||
|
...
|
||||||
|
SEED_N,WEIGHT_N
|
||||||
|
```
|
||||||
|
|
||||||
|
Where `SEED_N` is the file name (**not** path) of a seed in the input directory,
|
||||||
|
and `WEIGHT_N` is an integer weight.
|
||||||
|
|
||||||
|
## Further Details and Citation
|
||||||
|
|
||||||
|
For more details, please see the paper [Seed Selection for Successful
|
||||||
|
Fuzzing](https://dl.acm.org/doi/10.1145/3460319.3464795). If you use OptiMin in
|
||||||
|
your research, please cite this paper.
|
||||||
|
|
||||||
|
Bibtex:
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@inproceedings{Herrera:2021:FuzzSeedSelection,
|
||||||
|
author = {Adrian Herrera and Hendra Gunadi and Shane Magrath and Michael Norrish and Mathias Payer and Antony L. Hosking},
|
||||||
|
title = {Seed Selection for Successful Fuzzing},
|
||||||
|
booktitle = {30th ACM SIGSOFT International Symposium on Software Testing and Analysis},
|
||||||
|
series = {ISSTA},
|
||||||
|
year = {2021},
|
||||||
|
pages = {230--243},
|
||||||
|
numpages = {14},
|
||||||
|
location = {Virtual, Denmark},
|
||||||
|
publisher = {Association for Computing Machinery},
|
||||||
|
}
|
||||||
|
```
|
130
utils/optimin/build_optimin.sh
Executable file
130
utils/optimin/build_optimin.sh
Executable file
@ -0,0 +1,130 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# american fuzzy lop++ - optimin build script
|
||||||
|
# ------------------------------------------------
|
||||||
|
#
|
||||||
|
# Originally written by Nathan Voss <njvoss99@gmail.com>
|
||||||
|
#
|
||||||
|
# Adapted from code by Andrew Griffiths <agriffiths@google.com> and
|
||||||
|
# Michal Zalewski
|
||||||
|
#
|
||||||
|
# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co>
|
||||||
|
#
|
||||||
|
# Copyright 2017 Battelle Memorial Institute. All rights reserved.
|
||||||
|
# Copyright 2019-2020 AFLplusplus Project. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at:
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# This script builds the OptiMin corpus minimizer.
|
||||||
|
|
||||||
|
EVALMAXSAT_VERSION="$(cat ./EVALMAXSAT_VERSION)"
|
||||||
|
EVALMAXSAT_REPO="https://github.com/FlorentAvellaneda/EvalMaxSAT"
|
||||||
|
|
||||||
|
echo "================================================="
|
||||||
|
echo "OptiMin build script"
|
||||||
|
echo "================================================="
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "[*] Performing basic sanity checks..."
|
||||||
|
|
||||||
|
PLT=`uname -s`
|
||||||
|
|
||||||
|
if [ ! -f "../../config.h" ]; then
|
||||||
|
|
||||||
|
echo "[-] Error: key files not found - wrong working directory?"
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
LLVM_CONFIG="${LLVM_CONFIG:-llvm-config}"
|
||||||
|
CMAKECMD=cmake
|
||||||
|
MAKECMD=make
|
||||||
|
TARCMD=tar
|
||||||
|
|
||||||
|
if [ "$PLT" = "Darwin" ]; then
|
||||||
|
CORES=`sysctl -n hw.ncpu`
|
||||||
|
TARCMD=tar
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PLT" = "FreeBSD" ]; then
|
||||||
|
MAKECMD=gmake
|
||||||
|
CORES=`sysctl -n hw.ncpu`
|
||||||
|
TARCMD=gtar
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then
|
||||||
|
MAKECMD=gmake
|
||||||
|
CORES=`sysctl -n hw.ncpu`
|
||||||
|
TARCMD=gtar
|
||||||
|
fi
|
||||||
|
|
||||||
|
PREREQ_NOTFOUND=
|
||||||
|
for i in git $CMAKECMD $MAKECMD $TARCMD; do
|
||||||
|
|
||||||
|
T=`command -v "$i" 2>/dev/null`
|
||||||
|
|
||||||
|
if [ "$T" = "" ]; then
|
||||||
|
|
||||||
|
echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar."
|
||||||
|
PREREQ_NOTFOUND=1
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
done
|
||||||
|
|
||||||
|
if echo "$CC" | grep -qF /afl-; then
|
||||||
|
|
||||||
|
echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
|
||||||
|
PREREQ_NOTFOUND=1
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PREREQ_NOTFOUND" = "1" ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[+] All checks passed!"
|
||||||
|
|
||||||
|
echo "[*] Making sure EvalMaxSAT is checked out"
|
||||||
|
|
||||||
|
git status 1>/dev/null 2>/dev/null
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "[*] initializing EvalMaxSAT submodule"
|
||||||
|
git submodule init || exit 1
|
||||||
|
git submodule update ./EvalMaxSAT 2>/dev/null # ignore errors
|
||||||
|
else
|
||||||
|
echo "[*] cloning EvalMaxSAT"
|
||||||
|
test -d EvalMaxSAT || {
|
||||||
|
CNT=1
|
||||||
|
while [ '!' -d EvalMaxSAT -a "$CNT" -lt 4 ]; do
|
||||||
|
echo "Trying to clone EvalMaxSAT (attempt $CNT/3)"
|
||||||
|
git clone "$GRAMMAR_REPO"
|
||||||
|
CNT=`expr "$CNT" + 1`
|
||||||
|
done
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
|
||||||
|
test -d EvalMaxSAT || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; }
|
||||||
|
echo "[+] Got EvalMaxSAT."
|
||||||
|
|
||||||
|
cd "EvalMaxSAT" || exit 1
|
||||||
|
echo "[*] Checking out $EVALMAXSAT_VERSION"
|
||||||
|
sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null
|
||||||
|
git checkout "$EVALMAXSAT_VERSION" || exit 1
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo
|
||||||
|
echo "[+] EvalMaxSAT successfully prepared!"
|
||||||
|
echo "[+] Building OptiMin now."
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir`
|
||||||
|
make -j$CORES
|
||||||
|
cd ..
|
||||||
|
echo
|
||||||
|
echo "[+] OptiMin successfully built!"
|
12
utils/optimin/src/CMakeLists.txt
Normal file
12
utils/optimin/src/CMakeLists.txt
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
add_executable(optimin OptiMin.cpp)
|
||||||
|
|
||||||
|
foreach(LIB MaLib EvalMaxSAT glucose)
|
||||||
|
target_include_directories(optimin PRIVATE
|
||||||
|
"${CMAKE_SOURCE_DIR}/EvalMaxSAT/lib/${LIB}/src")
|
||||||
|
target_link_libraries(optimin ${LIB})
|
||||||
|
endforeach(LIB)
|
||||||
|
|
||||||
|
llvm_map_components_to_libnames(LLVM_LIBS support)
|
||||||
|
target_link_libraries(optimin ${LLVM_LIBS})
|
||||||
|
|
||||||
|
install(TARGETS optimin RUNTIME DESTINATION bin)
|
455
utils/optimin/src/OptiMin.cpp
Normal file
455
utils/optimin/src/OptiMin.cpp
Normal file
@ -0,0 +1,455 @@
|
|||||||
|
/*
|
||||||
|
* OptiMin, an optimal fuzzing corpus minimizer.
|
||||||
|
*
|
||||||
|
* Author: Adrian Herrera
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <llvm/ADT/DenseSet.h>
|
||||||
|
#include <llvm/ADT/DenseMap.h>
|
||||||
|
#include <llvm/ADT/SmallVector.h>
|
||||||
|
#include <llvm/ADT/StringExtras.h>
|
||||||
|
#include <llvm/ADT/StringMap.h>
|
||||||
|
#include <llvm/Support/Chrono.h>
|
||||||
|
#include <llvm/Support/CommandLine.h>
|
||||||
|
#include <llvm/Support/FileSystem.h>
|
||||||
|
#include <llvm/Support/MemoryBuffer.h>
|
||||||
|
#include <llvm/Support/Path.h>
|
||||||
|
#include <llvm/Support/Program.h>
|
||||||
|
#include <llvm/Support/WithColor.h>
|
||||||
|
|
||||||
|
#include "EvalMaxSAT.h"
|
||||||
|
#include "ProgressBar.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
// Classes
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
|
||||||
|
/// Ensure seed weights default to 1
|
||||||
|
class Weight {
|
||||||
|
public:
|
||||||
|
Weight() : Weight(1){};
|
||||||
|
Weight(uint32_t V) : Value(V){};
|
||||||
|
|
||||||
|
operator unsigned() const {
|
||||||
|
return Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const unsigned Value;
|
||||||
|
};
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
// Typedefs
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
|
||||||
|
/// AFL tuple (edge) ID
|
||||||
|
using AFLTupleID = uint32_t;
|
||||||
|
|
||||||
|
/// Pair of tuple ID and hit count
|
||||||
|
using AFLTuple = std::pair<AFLTupleID, /* Frequency */ unsigned>;
|
||||||
|
|
||||||
|
/// Coverage for a given seed file
|
||||||
|
using AFLCoverageVector = std::vector<AFLTuple>;
|
||||||
|
|
||||||
|
/// Maps seed file paths to a weight
|
||||||
|
using WeightsMap = StringMap<Weight>;
|
||||||
|
|
||||||
|
/// A seed identifier in the MaxSAT solver
|
||||||
|
using SeedID = int;
|
||||||
|
|
||||||
|
/// Associates seed identifiers to seed files
|
||||||
|
using MaxSATSeeds =
|
||||||
|
SmallVector<std::pair<SeedID, /* Seed file */ std::string>, 0>;
|
||||||
|
|
||||||
|
/// Set of literal identifiers
|
||||||
|
using MaxSATSeedSet = DenseSet<SeedID>;
|
||||||
|
|
||||||
|
/// Maps tuple IDs to the literal identifiers that "cover" that tuple
|
||||||
|
using MaxSATCoverageMap = DenseMap<AFLTupleID, MaxSATSeedSet>;
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
// Global variables
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
|
||||||
|
// This is based on the human class count in `count_class_human[256]` in
|
||||||
|
// `afl-showmap.c`
|
||||||
|
static constexpr uint32_t MAX_EDGE_FREQ = 8;
|
||||||
|
|
||||||
|
static sys::TimePoint<> StartTime, EndTime;
|
||||||
|
static std::chrono::seconds Duration;
|
||||||
|
|
||||||
|
static std::string AFLShowmapPath;
|
||||||
|
static bool TargetArgsHasAtAt = false;
|
||||||
|
|
||||||
|
static const auto ErrMsg = [] {
|
||||||
|
return WithColor(errs(), HighlightColor::Error) << "[-] ";
|
||||||
|
};
|
||||||
|
static const auto WarnMsg = [] {
|
||||||
|
return WithColor(errs(), HighlightColor::Warning) << "[-] ";
|
||||||
|
};
|
||||||
|
static const auto SuccMsg = [] {
|
||||||
|
return WithColor(outs(), HighlightColor::String) << "[+] ";
|
||||||
|
};
|
||||||
|
static const auto StatMsg = [] {
|
||||||
|
return WithColor(outs(), HighlightColor::Remark) << "[*] ";
|
||||||
|
};
|
||||||
|
|
||||||
|
static cl::opt<std::string> CorpusDir("i", cl::desc("Input directory"),
|
||||||
|
cl::value_desc("dir"), cl::Required);
|
||||||
|
static cl::opt<std::string> OutputDir("o", cl::desc("Output directory"),
|
||||||
|
cl::value_desc("dir"), cl::Required);
|
||||||
|
static cl::opt<bool> EdgesOnly("f", cl::desc("Include edge hit counts"),
|
||||||
|
cl::init(true));
|
||||||
|
static cl::opt<bool> ShowProgBar("p", cl::desc("Display progress bar"));
|
||||||
|
static cl::opt<std::string> WeightsFile("w", cl::desc("Weights file"),
|
||||||
|
cl::value_desc("csv"));
|
||||||
|
static cl::opt<std::string> TargetProg(cl::Positional,
|
||||||
|
cl::desc("<target program>"),
|
||||||
|
cl::Required);
|
||||||
|
static cl::list<std::string> TargetArgs(cl::ConsumeAfter,
|
||||||
|
cl::desc("[target args...]"));
|
||||||
|
static cl::opt<std::string> MemLimit(
|
||||||
|
"m", cl::desc("Memory limit for child process (default=none)"),
|
||||||
|
cl::value_desc("megs"), cl::init("none"));
|
||||||
|
static cl::opt<std::string> Timeout(
|
||||||
|
"t", cl::desc("Run time limit for child process (default=none)"),
|
||||||
|
cl::value_desc("msec"), cl::init("none"));
|
||||||
|
static cl::opt<bool> CrashMode(
|
||||||
|
"C", cl::desc("Keep crashing inputs, reject everything else"));
|
||||||
|
static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation"));
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
// Helper functions
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
|
||||||
|
static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
|
||||||
|
SmallVector<StringRef, 0> Lines;
|
||||||
|
MB.getBuffer().trim().split(Lines, '\n');
|
||||||
|
|
||||||
|
unsigned Weight = 0;
|
||||||
|
|
||||||
|
for (const auto &Line : Lines) {
|
||||||
|
const auto &[Seed, WeightStr] = Line.split(',');
|
||||||
|
|
||||||
|
if (to_integer(WeightStr, Weight, 10)) {
|
||||||
|
Weights.try_emplace(Seed, Weight);
|
||||||
|
} else {
|
||||||
|
WarnMsg() << "Failed to read weight for `" << Seed << "`. Skipping...\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] static std::error_code getAFLCoverage(const StringRef Seed,
|
||||||
|
AFLCoverageVector &Cov) {
|
||||||
|
Optional<StringRef> Redirects[] = {None, None, None};
|
||||||
|
std::error_code EC;
|
||||||
|
|
||||||
|
// Create temporary output file
|
||||||
|
SmallString<64> OutputPath;
|
||||||
|
if (EC = sys::fs::createTemporaryFile("showmap", "txt", OutputPath))
|
||||||
|
return EC;
|
||||||
|
|
||||||
|
// Prepare afl-showmap arguments
|
||||||
|
SmallVector<StringRef, 12> AFLShowmapArgs{
|
||||||
|
AFLShowmapPath, "-m", MemLimit, "-t", Timeout, "-q", "-o", OutputPath};
|
||||||
|
|
||||||
|
if (TargetArgsHasAtAt)
|
||||||
|
AFLShowmapArgs.append({"-A", Seed});
|
||||||
|
else
|
||||||
|
Redirects[/* stdin */ 0] = Seed;
|
||||||
|
|
||||||
|
if (QemuMode) AFLShowmapArgs.push_back("-Q");
|
||||||
|
if (CrashMode) AFLShowmapArgs.push_back("-C");
|
||||||
|
|
||||||
|
AFLShowmapArgs.append({"--", TargetProg});
|
||||||
|
AFLShowmapArgs.append(TargetArgs.begin(), TargetArgs.end());
|
||||||
|
|
||||||
|
// Run afl-showmap
|
||||||
|
const int RC = sys::ExecuteAndWait(AFLShowmapPath, AFLShowmapArgs,
|
||||||
|
/*env=*/None, Redirects);
|
||||||
|
if (RC) return std::make_error_code(std::errc::executable_format_error);
|
||||||
|
|
||||||
|
// Parse afl-showmap output
|
||||||
|
const auto CovOrErr = MemoryBuffer::getFile(OutputPath);
|
||||||
|
if (EC = CovOrErr.getError()) {
|
||||||
|
sys::fs::remove(OutputPath);
|
||||||
|
return EC;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmallVector<StringRef, 0> Lines;
|
||||||
|
CovOrErr.get()->getBuffer().trim().split(Lines, '\n');
|
||||||
|
|
||||||
|
AFLTupleID Edge = 0;
|
||||||
|
unsigned Freq = 0;
|
||||||
|
|
||||||
|
for (const auto &Line : Lines) {
|
||||||
|
const auto &[EdgeStr, FreqStr] = Line.split(':');
|
||||||
|
|
||||||
|
to_integer(EdgeStr, Edge, 10);
|
||||||
|
to_integer(FreqStr, Freq, 10);
|
||||||
|
Cov.push_back({Edge, Freq});
|
||||||
|
}
|
||||||
|
|
||||||
|
return sys::fs::remove(OutputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void StartTimer(bool ShowProgBar) {
|
||||||
|
StartTime = std::chrono::system_clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void EndTimer(bool ShowProgBar) {
|
||||||
|
EndTime = std::chrono::system_clock::now();
|
||||||
|
Duration =
|
||||||
|
std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
|
||||||
|
|
||||||
|
if (ShowProgBar)
|
||||||
|
outs() << '\n';
|
||||||
|
else
|
||||||
|
outs() << Duration.count() << "s\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
// Main function
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
WeightsMap Weights;
|
||||||
|
ProgressBar ProgBar;
|
||||||
|
std::error_code EC;
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Parse command-line options
|
||||||
|
//
|
||||||
|
// Also check the target arguments, as this determines how we run afl-showmap.
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer");
|
||||||
|
|
||||||
|
if (!sys::fs::is_directory(OutputDir)) {
|
||||||
|
ErrMsg() << "Invalid output directory `" << OutputDir << "`\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &Arg : TargetArgs)
|
||||||
|
if (Arg == "@@") TargetArgsHasAtAt = true;
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Find afl-showmap
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
const auto AFLShowmapOrErr = sys::findProgramByName("afl-showmap");
|
||||||
|
if (AFLShowmapOrErr.getError()) {
|
||||||
|
ErrMsg() << "Failed to find afl-showmap. Check your PATH\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
AFLShowmapPath = *AFLShowmapOrErr;
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Parse weights
|
||||||
|
//
|
||||||
|
// Weights are stored in CSV file mapping a seed file name to an integer
|
||||||
|
// greater than zero.
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
if (WeightsFile != "") {
|
||||||
|
StatMsg() << "Reading weights from `" << WeightsFile << "`... ";
|
||||||
|
StartTimer(/*ShowProgBar=*/false);
|
||||||
|
|
||||||
|
const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile);
|
||||||
|
if (EC = WeightsOrErr.getError()) {
|
||||||
|
ErrMsg() << "Failed to read weights from `" << WeightsFile
|
||||||
|
<< "`: " << EC.message() << '\n';
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
GetWeights(*WeightsOrErr.get(), Weights);
|
||||||
|
|
||||||
|
EndTimer(/*ShowProgBar=*/false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Traverse corpus directory
|
||||||
|
//
|
||||||
|
// Find the seed files inside this directory.
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
StatMsg() << "Locating seeds in `" << CorpusDir << "`... ";
|
||||||
|
StartTimer(/*ShowProgBar=*/false);
|
||||||
|
|
||||||
|
std::vector<std::string> SeedFiles;
|
||||||
|
sys::fs::file_status Status;
|
||||||
|
|
||||||
|
for (sys::fs::directory_iterator Dir(CorpusDir, EC), DirEnd;
|
||||||
|
Dir != DirEnd && !EC; Dir.increment(EC)) {
|
||||||
|
if (EC) {
|
||||||
|
ErrMsg() << "Failed to traverse corpus directory `" << CorpusDir
|
||||||
|
<< "`: " << EC.message() << '\n';
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &Path = Dir->path();
|
||||||
|
if (EC = sys::fs::status(Path, Status)) {
|
||||||
|
WarnMsg() << "Failed to access seed file `" << Path
|
||||||
|
<< "`: " << EC.message() << ". Skipping...\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (Status.type()) {
|
||||||
|
case sys::fs::file_type::regular_file:
|
||||||
|
case sys::fs::file_type::symlink_file:
|
||||||
|
case sys::fs::file_type::type_unknown:
|
||||||
|
SeedFiles.push_back(Path);
|
||||||
|
default:
|
||||||
|
/* Ignore */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EndTimer(/*ShowProgBar=*/false);
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Generate seed coverage
|
||||||
|
//
|
||||||
|
// Iterate over the corpus directory, which should contain seed files. Execute
|
||||||
|
// these seeds in the target program to generate coverage information, and
|
||||||
|
// then store this coverage information in the appropriate data structures.
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
size_t SeedCount = 0;
|
||||||
|
const size_t NumSeeds = SeedFiles.size();
|
||||||
|
|
||||||
|
if (!ShowProgBar)
|
||||||
|
StatMsg() << "Generating coverage for " << NumSeeds << " seeds... ";
|
||||||
|
StartTimer(ShowProgBar);
|
||||||
|
|
||||||
|
EvalMaxSAT Solver(/*nbMinimizeThread=*/0);
|
||||||
|
MaxSATSeeds SeedVars;
|
||||||
|
MaxSATCoverageMap SeedCoverage;
|
||||||
|
AFLCoverageVector Cov;
|
||||||
|
|
||||||
|
for (const auto &SeedFile : SeedFiles) {
|
||||||
|
// Execute seed
|
||||||
|
Cov.clear();
|
||||||
|
if (EC = getAFLCoverage(SeedFile, Cov)) {
|
||||||
|
ErrMsg() << "Failed to get coverage for seed " << SeedFile << ": "
|
||||||
|
<< EC.message() << '\n';
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a variable to represent the seed
|
||||||
|
const SeedID Var = Solver.newVar();
|
||||||
|
SeedVars.push_back({Var, SeedFile});
|
||||||
|
|
||||||
|
// Record the set of seeds that cover a particular edge
|
||||||
|
for (const auto &[Edge, Freq] : Cov) {
|
||||||
|
if (EdgesOnly) {
|
||||||
|
// Ignore edge frequency
|
||||||
|
SeedCoverage[Edge].insert(Var);
|
||||||
|
} else {
|
||||||
|
// Executing edge `E` `N` times means that it was executed `N - 1` times
|
||||||
|
for (unsigned I = 0; I < Freq; ++I)
|
||||||
|
SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((++SeedCount % 10 == 0) && ShowProgBar)
|
||||||
|
ProgBar.update(SeedCount * 100 / NumSeeds, "Generating seed coverage");
|
||||||
|
}
|
||||||
|
|
||||||
|
EndTimer(ShowProgBar);
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Set the hard and soft constraints in the solver
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
if (!ShowProgBar) StatMsg() << "Generating constraints... ";
|
||||||
|
StartTimer(ShowProgBar);
|
||||||
|
|
||||||
|
SeedCount = 0;
|
||||||
|
|
||||||
|
// Ensure that at least one seed is selected that covers a particular edge
|
||||||
|
// (hard constraint)
|
||||||
|
std::vector<SeedID> Clauses;
|
||||||
|
for (const auto &[_, Seeds] : SeedCoverage) {
|
||||||
|
if (Seeds.empty()) continue;
|
||||||
|
|
||||||
|
Clauses.clear();
|
||||||
|
for (const auto &Seed : Seeds)
|
||||||
|
Clauses.push_back(Seed);
|
||||||
|
|
||||||
|
Solver.addClause(Clauses);
|
||||||
|
|
||||||
|
if ((++SeedCount % 10 == 0) && ShowProgBar)
|
||||||
|
ProgBar.update(SeedCount * 100 / SeedCoverage.size(),
|
||||||
|
"Generating clauses");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Select the minimum number of seeds that cover a particular set of edges
|
||||||
|
// (soft constraint)
|
||||||
|
for (const auto &[Var, Seed] : SeedVars)
|
||||||
|
Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]);
|
||||||
|
|
||||||
|
EndTimer(ShowProgBar);
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Generate a solution
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
StatMsg() << "Solving... ";
|
||||||
|
StartTimer(/*ShowProgBar=*/false);
|
||||||
|
|
||||||
|
const bool Solved = Solver.solve();
|
||||||
|
|
||||||
|
EndTimer(/*ShowProgBar=*/false);
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
// Save the solution
|
||||||
|
//
|
||||||
|
// This will copy the selected seeds to the given output directory.
|
||||||
|
// ------------------------------------------------------------------------ //
|
||||||
|
|
||||||
|
SmallVector<StringRef, 64> Solution;
|
||||||
|
SmallString<32> OutputSeed;
|
||||||
|
|
||||||
|
if (Solved) {
|
||||||
|
for (const auto &[Var, Seed] : SeedVars)
|
||||||
|
if (Solver.getValue(Var) > 0) Solution.push_back(Seed);
|
||||||
|
} else {
|
||||||
|
ErrMsg() << "Failed to find an optimal solution for `" << CorpusDir
|
||||||
|
<< "`\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SuccMsg() << "Minimized corpus size: " << Solution.size() << " seeds\n";
|
||||||
|
|
||||||
|
if (!ShowProgBar) StatMsg() << "Copying to `" << OutputDir << "`... ";
|
||||||
|
StartTimer(ShowProgBar);
|
||||||
|
|
||||||
|
SeedCount = 0;
|
||||||
|
|
||||||
|
for (const auto &Seed : Solution) {
|
||||||
|
OutputSeed = OutputDir;
|
||||||
|
sys::path::append(OutputSeed, sys::path::filename(Seed));
|
||||||
|
|
||||||
|
if (EC = sys::fs::copy_file(Seed, OutputSeed)) {
|
||||||
|
WarnMsg() << "Failed to copy `" << Seed << "` to `" << OutputDir
|
||||||
|
<< "`: " << EC.message() << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((++SeedCount % 10 == 0) && ShowProgBar)
|
||||||
|
ProgBar.update(SeedCount * 100 / Solution.size(), "Copying seeds");
|
||||||
|
}
|
||||||
|
|
||||||
|
EndTimer(ShowProgBar);
|
||||||
|
SuccMsg() << "Done!\n";
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
58
utils/optimin/src/ProgressBar.h
Normal file
58
utils/optimin/src/ProgressBar.h
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
/**
|
||||||
|
* Progress bar.
|
||||||
|
*
|
||||||
|
* Adapted from https://www.bfilipek.com/2020/02/inidicators.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <llvm/ADT/StringRef.h>
|
||||||
|
#include <llvm/Support/raw_ostream.h>
|
||||||
|
|
||||||
|
/// Display a progress bar in the terminal
|
||||||
|
class ProgressBar {
|
||||||
|
private:
|
||||||
|
const size_t BarWidth;
|
||||||
|
const std::string Fill;
|
||||||
|
const std::string Remainder;
|
||||||
|
|
||||||
|
public:
|
||||||
|
ProgressBar() : ProgressBar(60, "#", " ") {
|
||||||
|
}
|
||||||
|
|
||||||
|
ProgressBar(size_t Width, const llvm::StringRef F, const llvm::StringRef R)
|
||||||
|
: BarWidth(Width), Fill(F), Remainder(R) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(float Progress, const llvm::StringRef Status = "",
|
||||||
|
llvm::raw_ostream &OS = llvm::outs()) {
|
||||||
|
// No need to write once progress is 100%
|
||||||
|
if (Progress > 100.0f) return;
|
||||||
|
|
||||||
|
// Move cursor to the first position on the same line and flush
|
||||||
|
OS << '\r';
|
||||||
|
OS.flush();
|
||||||
|
|
||||||
|
// Start bar
|
||||||
|
OS << '[';
|
||||||
|
|
||||||
|
const auto Completed =
|
||||||
|
static_cast<size_t>(Progress * static_cast<float>(BarWidth) / 100.0);
|
||||||
|
for (size_t I = 0; I < BarWidth; ++I) {
|
||||||
|
if (I <= Completed) {
|
||||||
|
OS << Fill;
|
||||||
|
} else {
|
||||||
|
OS << Remainder;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// End bar
|
||||||
|
OS << ']';
|
||||||
|
|
||||||
|
// Write progress percentage
|
||||||
|
OS << ' ' << std::min(static_cast<size_t>(Progress), size_t(100)) << '%';
|
||||||
|
|
||||||
|
// Write status text
|
||||||
|
OS << " " << Status;
|
||||||
|
}
|
||||||
|
};
|
Loading…
x
Reference in New Issue
Block a user