optimin nits

This commit is contained in:
vanhauser-thc
2021-07-21 09:55:22 +02:00
parent fa2b164429
commit 60cbe5b4be
8 changed files with 133 additions and 22 deletions

View File

@ -9,7 +9,10 @@ Want to stay in the loop on major new features? Join our mailing list by
sending a mail to <afl-users+subscribe@googlegroups.com>. sending a mail to <afl-users+subscribe@googlegroups.com>.
### Version ++3.15a (dev) ### Version ++3.15a (dev)
- added the very good grammar mutator "GramaTron" to the custom_mutators - added the very good grammar mutator "GramaTron" to the
custom_mutators
- added optimin, a faster and better corpus minimizer by
Adrian Herrera. Thank you!
### Version ++3.14c (release) ### Version ++3.14c (release)

View File

@ -10,27 +10,46 @@ heuristic and/or greedy algorithms to identify these functionally distinct
files. This means that minimized corpora are generally much smaller than those files. This means that minimized corpora are generally much smaller than those
produced by other tools. produced by other tools.
## Usage ## Building
To build the `optimin` executable (when cloned from github): To build the `optimin` just execute the `build_optimin.sh` script.
```bash ## Running
# Ensure EvalMaxSAT is available
git submodule init
git submodule update
mkdir build Running `optimin` is the same as running `afl-cmin`:
cd build
# You may have to specify -DLLVM_DIR=`llvm-config --cmakedir` if you have a ```
# non-standard LLVM install (e.g., install via apt) Required parameters:
cmake .. -i dir - input directory with starting corpus
make -j -o dir - output directory for minimized files
make install
Execution control settings:
-f file - location read by the fuzzed program (stdin)
-m megs - memory limit for child process (none MB)
-t msec - run time limit for child process (none)
-O - use binary-only instrumentation (FRIDA mode)
-Q - use binary-only instrumentation (QEMU mode)
-U - use unicorn-based instrumentation (unicorn mode)
Minimization settings:
-C - keep crashing inputs, reject everything else
-e - solve for edge coverage only, ignore hit counts
For additional tips, please consult README.md
Environment variables used:
AFL_ALLOW_TMP: allow unsafe use of input/output directories under {/var}/tmp
AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash
AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up
AFL_KEEP_TRACES: leave the temporary <out_dir>/.traces directory
AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)
AFL_NO_FORKSRV: run target via execve instead of using the forkserver
AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH
AFL_PRINT_FILENAMES: If set, the filename currently processed will be printed to stdout
AFL_SKIP_BIN_CHECK: skip afl instrumentation checks for target binary
``` ```
Otherwise, run the `build_optimin.sh` script. Running `optimin` is the same as Example: `optimin -i files -o seeds -- ./target @@`
running `afl-cmin`.
### Weighted Minimizations ### Weighted Minimizations

View File

@ -122,9 +122,10 @@ echo
echo "[+] EvalMaxSAT successfully prepared!" echo "[+] EvalMaxSAT successfully prepared!"
echo "[+] Building OptiMin now." echo "[+] Building OptiMin now."
mkdir -p build mkdir -p build
cd build cd build || exit 1
cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir` cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir` || exit 1
make -j$CORES make -j$CORES || exit 1
cd .. cd ..
echo echo
cp -fv build/src/optimin . || exit 1
echo "[+] OptiMin successfully built!" echo "[+] OptiMin successfully built!"

View File

@ -33,16 +33,20 @@ namespace {
/// Ensure seed weights default to 1 /// Ensure seed weights default to 1
class Weight { class Weight {
public: public:
Weight() : Weight(1){}; Weight() : Weight(1){};
Weight(uint32_t V) : Value(V){}; Weight(uint32_t V) : Value(V){};
operator unsigned() const { operator unsigned() const {
return Value; return Value;
} }
private: private:
const unsigned Value; const unsigned Value;
}; };
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
@ -89,16 +93,27 @@ static std::string AFLShowmapPath;
static bool TargetArgsHasAtAt = false; static bool TargetArgsHasAtAt = false;
static const auto ErrMsg = [] { static const auto ErrMsg = [] {
return WithColor(errs(), HighlightColor::Error) << "[-] "; return WithColor(errs(), HighlightColor::Error) << "[-] ";
}; };
static const auto WarnMsg = [] { static const auto WarnMsg = [] {
return WithColor(errs(), HighlightColor::Warning) << "[-] "; return WithColor(errs(), HighlightColor::Warning) << "[-] ";
}; };
static const auto SuccMsg = [] { static const auto SuccMsg = [] {
return WithColor(outs(), HighlightColor::String) << "[+] "; return WithColor(outs(), HighlightColor::String) << "[+] ";
}; };
static const auto StatMsg = [] { static const auto StatMsg = [] {
return WithColor(outs(), HighlightColor::Remark) << "[*] "; return WithColor(outs(), HighlightColor::Remark) << "[*] ";
}; };
static cl::opt<std::string> CorpusDir("i", cl::desc("Input directory"), static cl::opt<std::string> CorpusDir("i", cl::desc("Input directory"),
@ -124,6 +139,7 @@ static cl::opt<std::string> Timeout(
static cl::opt<bool> CrashMode( static cl::opt<bool> CrashMode(
"C", cl::desc("Keep crashing inputs, reject everything else")); "C", cl::desc("Keep crashing inputs, reject everything else"));
static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation")); static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation"));
} // anonymous namespace } // anonymous namespace
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
@ -131,24 +147,33 @@ static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation"));
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) { static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
SmallVector<StringRef, 0> Lines; SmallVector<StringRef, 0> Lines;
MB.getBuffer().trim().split(Lines, '\n'); MB.getBuffer().trim().split(Lines, '\n');
unsigned Weight = 0; unsigned Weight = 0;
for (const auto &Line : Lines) { for (const auto &Line : Lines) {
const auto &[Seed, WeightStr] = Line.split(','); const auto &[Seed, WeightStr] = Line.split(',');
if (to_integer(WeightStr, Weight, 10)) { if (to_integer(WeightStr, Weight, 10)) {
Weights.try_emplace(Seed, Weight); Weights.try_emplace(Seed, Weight);
} else { } else {
WarnMsg() << "Failed to read weight for `" << Seed << "`. Skipping...\n"; WarnMsg() << "Failed to read weight for `" << Seed << "`. Skipping...\n";
} }
} }
} }
[[nodiscard]] static std::error_code getAFLCoverage(const StringRef Seed, [[nodiscard]] static std::error_code getAFLCoverage(const StringRef Seed,
AFLCoverageVector &Cov) { AFLCoverageVector &Cov) {
Optional<StringRef> Redirects[] = {None, None, None}; Optional<StringRef> Redirects[] = {None, None, None};
std::error_code EC; std::error_code EC;
@ -159,6 +184,7 @@ static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
// Prepare afl-showmap arguments // Prepare afl-showmap arguments
SmallVector<StringRef, 12> AFLShowmapArgs{ SmallVector<StringRef, 12> AFLShowmapArgs{
AFLShowmapPath, "-m", MemLimit, "-t", Timeout, "-q", "-o", OutputPath}; AFLShowmapPath, "-m", MemLimit, "-t", Timeout, "-q", "-o", OutputPath};
if (TargetArgsHasAtAt) if (TargetArgsHasAtAt)
@ -180,8 +206,10 @@ static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
// Parse afl-showmap output // Parse afl-showmap output
const auto CovOrErr = MemoryBuffer::getFile(OutputPath); const auto CovOrErr = MemoryBuffer::getFile(OutputPath);
if (EC = CovOrErr.getError()) { if (EC = CovOrErr.getError()) {
sys::fs::remove(OutputPath); sys::fs::remove(OutputPath);
return EC; return EC;
} }
SmallVector<StringRef, 0> Lines; SmallVector<StringRef, 0> Lines;
@ -191,21 +219,27 @@ static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
unsigned Freq = 0; unsigned Freq = 0;
for (const auto &Line : Lines) { for (const auto &Line : Lines) {
const auto &[EdgeStr, FreqStr] = Line.split(':'); const auto &[EdgeStr, FreqStr] = Line.split(':');
to_integer(EdgeStr, Edge, 10); to_integer(EdgeStr, Edge, 10);
to_integer(FreqStr, Freq, 10); to_integer(FreqStr, Freq, 10);
Cov.push_back({Edge, Freq}); Cov.push_back({Edge, Freq});
} }
return sys::fs::remove(OutputPath); return sys::fs::remove(OutputPath);
} }
static inline void StartTimer(bool ShowProgBar) { static inline void StartTimer(bool ShowProgBar) {
StartTime = std::chrono::system_clock::now(); StartTime = std::chrono::system_clock::now();
} }
static inline void EndTimer(bool ShowProgBar) { static inline void EndTimer(bool ShowProgBar) {
EndTime = std::chrono::system_clock::now(); EndTime = std::chrono::system_clock::now();
Duration = Duration =
std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime); std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
@ -214,6 +248,7 @@ static inline void EndTimer(bool ShowProgBar) {
outs() << '\n'; outs() << '\n';
else else
outs() << Duration.count() << "s\n"; outs() << Duration.count() << "s\n";
} }
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
@ -221,6 +256,7 @@ static inline void EndTimer(bool ShowProgBar) {
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
WeightsMap Weights; WeightsMap Weights;
ProgressBar ProgBar; ProgressBar ProgBar;
std::error_code EC; std::error_code EC;
@ -234,8 +270,10 @@ int main(int argc, char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer"); cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer");
if (!sys::fs::is_directory(OutputDir)) { if (!sys::fs::is_directory(OutputDir)) {
ErrMsg() << "Invalid output directory `" << OutputDir << "`\n"; ErrMsg() << "Invalid output directory `" << OutputDir << "`\n";
return 1; return 1;
} }
for (const auto &Arg : TargetArgs) for (const auto &Arg : TargetArgs)
@ -247,9 +285,12 @@ int main(int argc, char *argv[]) {
const auto AFLShowmapOrErr = sys::findProgramByName("afl-showmap"); const auto AFLShowmapOrErr = sys::findProgramByName("afl-showmap");
if (AFLShowmapOrErr.getError()) { if (AFLShowmapOrErr.getError()) {
ErrMsg() << "Failed to find afl-showmap. Check your PATH\n"; ErrMsg() << "Failed to find afl-showmap. Check your PATH\n";
return 1; return 1;
} }
AFLShowmapPath = *AFLShowmapOrErr; AFLShowmapPath = *AFLShowmapOrErr;
// ------------------------------------------------------------------------ // // ------------------------------------------------------------------------ //
@ -260,19 +301,23 @@ int main(int argc, char *argv[]) {
// ------------------------------------------------------------------------ // // ------------------------------------------------------------------------ //
if (WeightsFile != "") { if (WeightsFile != "") {
StatMsg() << "Reading weights from `" << WeightsFile << "`... "; StatMsg() << "Reading weights from `" << WeightsFile << "`... ";
StartTimer(/*ShowProgBar=*/false); StartTimer(/*ShowProgBar=*/false);
const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile); const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile);
if (EC = WeightsOrErr.getError()) { if (EC = WeightsOrErr.getError()) {
ErrMsg() << "Failed to read weights from `" << WeightsFile ErrMsg() << "Failed to read weights from `" << WeightsFile
<< "`: " << EC.message() << '\n'; << "`: " << EC.message() << '\n';
return 1; return 1;
} }
GetWeights(*WeightsOrErr.get(), Weights); GetWeights(*WeightsOrErr.get(), Weights);
EndTimer(/*ShowProgBar=*/false); EndTimer(/*ShowProgBar=*/false);
} }
// ------------------------------------------------------------------------ // // ------------------------------------------------------------------------ //
@ -289,20 +334,26 @@ int main(int argc, char *argv[]) {
for (sys::fs::directory_iterator Dir(CorpusDir, EC), DirEnd; for (sys::fs::directory_iterator Dir(CorpusDir, EC), DirEnd;
Dir != DirEnd && !EC; Dir.increment(EC)) { Dir != DirEnd && !EC; Dir.increment(EC)) {
if (EC) { if (EC) {
ErrMsg() << "Failed to traverse corpus directory `" << CorpusDir ErrMsg() << "Failed to traverse corpus directory `" << CorpusDir
<< "`: " << EC.message() << '\n'; << "`: " << EC.message() << '\n';
return 1; return 1;
} }
const auto &Path = Dir->path(); const auto &Path = Dir->path();
if (EC = sys::fs::status(Path, Status)) { if (EC = sys::fs::status(Path, Status)) {
WarnMsg() << "Failed to access seed file `" << Path WarnMsg() << "Failed to access seed file `" << Path
<< "`: " << EC.message() << ". Skipping...\n"; << "`: " << EC.message() << ". Skipping...\n";
continue; continue;
} }
switch (Status.type()) { switch (Status.type()) {
case sys::fs::file_type::regular_file: case sys::fs::file_type::regular_file:
case sys::fs::file_type::symlink_file: case sys::fs::file_type::symlink_file:
case sys::fs::file_type::type_unknown: case sys::fs::file_type::type_unknown:
@ -310,7 +361,9 @@ int main(int argc, char *argv[]) {
default: default:
/* Ignore */ /* Ignore */
break; break;
} }
} }
EndTimer(/*ShowProgBar=*/false); EndTimer(/*ShowProgBar=*/false);
@ -336,12 +389,15 @@ int main(int argc, char *argv[]) {
AFLCoverageVector Cov; AFLCoverageVector Cov;
for (const auto &SeedFile : SeedFiles) { for (const auto &SeedFile : SeedFiles) {
// Execute seed // Execute seed
Cov.clear(); Cov.clear();
if (EC = getAFLCoverage(SeedFile, Cov)) { if (EC = getAFLCoverage(SeedFile, Cov)) {
ErrMsg() << "Failed to get coverage for seed " << SeedFile << ": " ErrMsg() << "Failed to get coverage for seed " << SeedFile << ": "
<< EC.message() << '\n'; << EC.message() << '\n';
return 1; return 1;
} }
// Create a variable to represent the seed // Create a variable to represent the seed
@ -350,18 +406,25 @@ int main(int argc, char *argv[]) {
// Record the set of seeds that cover a particular edge // Record the set of seeds that cover a particular edge
for (const auto &[Edge, Freq] : Cov) { for (const auto &[Edge, Freq] : Cov) {
if (EdgesOnly) { if (EdgesOnly) {
// Ignore edge frequency // Ignore edge frequency
SeedCoverage[Edge].insert(Var); SeedCoverage[Edge].insert(Var);
} else { } else {
// Executing edge `E` `N` times means that it was executed `N - 1` times // Executing edge `E` `N` times means that it was executed `N - 1` times
for (unsigned I = 0; I < Freq; ++I) for (unsigned I = 0; I < Freq; ++I)
SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var); SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var);
} }
} }
if ((++SeedCount % 10 == 0) && ShowProgBar) if ((++SeedCount % 10 == 0) && ShowProgBar)
ProgBar.update(SeedCount * 100 / NumSeeds, "Generating seed coverage"); ProgBar.update(SeedCount * 100 / NumSeeds, "Generating seed coverage");
} }
EndTimer(ShowProgBar); EndTimer(ShowProgBar);
@ -379,6 +442,7 @@ int main(int argc, char *argv[]) {
// (hard constraint) // (hard constraint)
std::vector<SeedID> Clauses; std::vector<SeedID> Clauses;
for (const auto &[_, Seeds] : SeedCoverage) { for (const auto &[_, Seeds] : SeedCoverage) {
if (Seeds.empty()) continue; if (Seeds.empty()) continue;
Clauses.clear(); Clauses.clear();
@ -390,6 +454,7 @@ int main(int argc, char *argv[]) {
if ((++SeedCount % 10 == 0) && ShowProgBar) if ((++SeedCount % 10 == 0) && ShowProgBar)
ProgBar.update(SeedCount * 100 / SeedCoverage.size(), ProgBar.update(SeedCount * 100 / SeedCoverage.size(),
"Generating clauses"); "Generating clauses");
} }
// Select the minimum number of seeds that cover a particular set of edges // Select the minimum number of seeds that cover a particular set of edges
@ -420,12 +485,16 @@ int main(int argc, char *argv[]) {
SmallString<32> OutputSeed; SmallString<32> OutputSeed;
if (Solved) { if (Solved) {
for (const auto &[Var, Seed] : SeedVars) for (const auto &[Var, Seed] : SeedVars)
if (Solver.getValue(Var) > 0) Solution.push_back(Seed); if (Solver.getValue(Var) > 0) Solution.push_back(Seed);
} else { } else {
ErrMsg() << "Failed to find an optimal solution for `" << CorpusDir ErrMsg() << "Failed to find an optimal solution for `" << CorpusDir
<< "`\n"; << "`\n";
return 1; return 1;
} }
SuccMsg() << "Minimized corpus size: " << Solution.size() << " seeds\n"; SuccMsg() << "Minimized corpus size: " << Solution.size() << " seeds\n";
@ -436,20 +505,26 @@ int main(int argc, char *argv[]) {
SeedCount = 0; SeedCount = 0;
for (const auto &Seed : Solution) { for (const auto &Seed : Solution) {
OutputSeed = OutputDir; OutputSeed = OutputDir;
sys::path::append(OutputSeed, sys::path::filename(Seed)); sys::path::append(OutputSeed, sys::path::filename(Seed));
if (EC = sys::fs::copy_file(Seed, OutputSeed)) { if (EC = sys::fs::copy_file(Seed, OutputSeed)) {
WarnMsg() << "Failed to copy `" << Seed << "` to `" << OutputDir WarnMsg() << "Failed to copy `" << Seed << "` to `" << OutputDir
<< "`: " << EC.message() << '\n'; << "`: " << EC.message() << '\n';
} }
if ((++SeedCount % 10 == 0) && ShowProgBar) if ((++SeedCount % 10 == 0) && ShowProgBar)
ProgBar.update(SeedCount * 100 / Solution.size(), "Copying seeds"); ProgBar.update(SeedCount * 100 / Solution.size(), "Copying seeds");
} }
EndTimer(ShowProgBar); EndTimer(ShowProgBar);
SuccMsg() << "Done!\n"; SuccMsg() << "Done!\n";
return 0; return 0;
} }

View File

@ -11,6 +11,7 @@
/// Display a progress bar in the terminal /// Display a progress bar in the terminal
class ProgressBar { class ProgressBar {
private: private:
const size_t BarWidth; const size_t BarWidth;
const std::string Fill; const std::string Fill;
@ -18,14 +19,17 @@ class ProgressBar {
public: public:
ProgressBar() : ProgressBar(60, "#", " ") { ProgressBar() : ProgressBar(60, "#", " ") {
} }
ProgressBar(size_t Width, const llvm::StringRef F, const llvm::StringRef R) ProgressBar(size_t Width, const llvm::StringRef F, const llvm::StringRef R)
: BarWidth(Width), Fill(F), Remainder(R) { : BarWidth(Width), Fill(F), Remainder(R) {
} }
void update(float Progress, const llvm::StringRef Status = "", void update(float Progress, const llvm::StringRef Status = "",
llvm::raw_ostream &OS = llvm::outs()) { llvm::raw_ostream &OS = llvm::outs()) {
// No need to write once progress is 100% // No need to write once progress is 100%
if (Progress > 100.0f) return; if (Progress > 100.0f) return;
@ -39,11 +43,17 @@ class ProgressBar {
const auto Completed = const auto Completed =
static_cast<size_t>(Progress * static_cast<float>(BarWidth) / 100.0); static_cast<size_t>(Progress * static_cast<float>(BarWidth) / 100.0);
for (size_t I = 0; I < BarWidth; ++I) { for (size_t I = 0; I < BarWidth; ++I) {
if (I <= Completed) { if (I <= Completed) {
OS << Fill; OS << Fill;
} else { } else {
OS << Remainder; OS << Remainder;
} }
} }
// End bar // End bar
@ -54,5 +64,8 @@ class ProgressBar {
// Write status text // Write status text
OS << " " << Status; OS << " " << Status;
} }
}; };