optimin nits

This commit is contained in:
vanhauser-thc
2021-07-21 09:55:22 +02:00
parent fa2b164429
commit 60cbe5b4be
8 changed files with 133 additions and 22 deletions

View File

@ -9,7 +9,10 @@ Want to stay in the loop on major new features? Join our mailing list by
sending a mail to <afl-users+subscribe@googlegroups.com>.
### Version ++3.15a (dev)
- added the very good grammar mutator "GramaTron" to the custom_mutators
- added the very good grammar mutator "GramaTron" to the
custom_mutators
- added optimin, a faster and better corpus minimizer by
Adrian Herrera. Thank you!
### Version ++3.14c (release)

View File

@ -10,27 +10,46 @@ heuristic and/or greedy algorithms to identify these functionally distinct
files. This means that minimized corpora are generally much smaller than those
produced by other tools.
## Usage
## Building
To build the `optimin` executable (when cloned from github):
To build the `optimin` just execute the `build_optimin.sh` script.
```bash
# Ensure EvalMaxSAT is available
git submodule init
git submodule update
## Running
mkdir build
cd build
Running `optimin` is the same as running `afl-cmin`:
# You may have to specify -DLLVM_DIR=`llvm-config --cmakedir` if you have a
# non-standard LLVM install (e.g., install via apt)
cmake ..
make -j
make install
```
Required parameters:
-i dir - input directory with starting corpus
-o dir - output directory for minimized files
Execution control settings:
-f file - location read by the fuzzed program (stdin)
-m megs - memory limit for child process (none MB)
-t msec - run time limit for child process (none)
-O - use binary-only instrumentation (FRIDA mode)
-Q - use binary-only instrumentation (QEMU mode)
-U - use unicorn-based instrumentation (unicorn mode)
Minimization settings:
-C - keep crashing inputs, reject everything else
-e - solve for edge coverage only, ignore hit counts
For additional tips, please consult README.md
Environment variables used:
AFL_ALLOW_TMP: allow unsafe use of input/output directories under {/var}/tmp
AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash
AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up
AFL_KEEP_TRACES: leave the temporary <out_dir>/.traces directory
AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)
AFL_NO_FORKSRV: run target via execve instead of using the forkserver
AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH
AFL_PRINT_FILENAMES: If set, the filename currently processed will be printed to stdout
AFL_SKIP_BIN_CHECK: skip afl instrumentation checks for target binary
```
Otherwise, run the `build_optimin.sh` script. Running `optimin` is the same as
running `afl-cmin`.
Example: `optimin -i files -o seeds -- ./target @@`
### Weighted Minimizations

View File

@ -122,9 +122,10 @@ echo
echo "[+] EvalMaxSAT successfully prepared!"
echo "[+] Building OptiMin now."
mkdir -p build
cd build
cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir`
make -j$CORES
cd build || exit 1
cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir` || exit 1
make -j$CORES || exit 1
cd ..
echo
cp -fv build/src/optimin . || exit 1
echo "[+] OptiMin successfully built!"

View File

@ -33,16 +33,20 @@ namespace {
/// Ensure seed weights default to 1
class Weight {
public:
Weight() : Weight(1){};
Weight(uint32_t V) : Value(V){};
operator unsigned() const {
return Value;
}
private:
const unsigned Value;
};
// -------------------------------------------------------------------------- //
@ -89,16 +93,27 @@ static std::string AFLShowmapPath;
static bool TargetArgsHasAtAt = false;
static const auto ErrMsg = [] {
return WithColor(errs(), HighlightColor::Error) << "[-] ";
};
static const auto WarnMsg = [] {
return WithColor(errs(), HighlightColor::Warning) << "[-] ";
};
static const auto SuccMsg = [] {
return WithColor(outs(), HighlightColor::String) << "[+] ";
};
static const auto StatMsg = [] {
return WithColor(outs(), HighlightColor::Remark) << "[*] ";
};
static cl::opt<std::string> CorpusDir("i", cl::desc("Input directory"),
@ -124,6 +139,7 @@ static cl::opt<std::string> Timeout(
static cl::opt<bool> CrashMode(
"C", cl::desc("Keep crashing inputs, reject everything else"));
static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation"));
} // anonymous namespace
// -------------------------------------------------------------------------- //
@ -131,24 +147,33 @@ static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation"));
// -------------------------------------------------------------------------- //
static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
SmallVector<StringRef, 0> Lines;
MB.getBuffer().trim().split(Lines, '\n');
unsigned Weight = 0;
for (const auto &Line : Lines) {
const auto &[Seed, WeightStr] = Line.split(',');
if (to_integer(WeightStr, Weight, 10)) {
Weights.try_emplace(Seed, Weight);
} else {
WarnMsg() << "Failed to read weight for `" << Seed << "`. Skipping...\n";
}
}
}
[[nodiscard]] static std::error_code getAFLCoverage(const StringRef Seed,
AFLCoverageVector &Cov) {
Optional<StringRef> Redirects[] = {None, None, None};
std::error_code EC;
@ -159,6 +184,7 @@ static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
// Prepare afl-showmap arguments
SmallVector<StringRef, 12> AFLShowmapArgs{
AFLShowmapPath, "-m", MemLimit, "-t", Timeout, "-q", "-o", OutputPath};
if (TargetArgsHasAtAt)
@ -180,8 +206,10 @@ static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
// Parse afl-showmap output
const auto CovOrErr = MemoryBuffer::getFile(OutputPath);
if (EC = CovOrErr.getError()) {
sys::fs::remove(OutputPath);
return EC;
}
SmallVector<StringRef, 0> Lines;
@ -191,21 +219,27 @@ static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
unsigned Freq = 0;
for (const auto &Line : Lines) {
const auto &[EdgeStr, FreqStr] = Line.split(':');
to_integer(EdgeStr, Edge, 10);
to_integer(FreqStr, Freq, 10);
Cov.push_back({Edge, Freq});
}
return sys::fs::remove(OutputPath);
}
static inline void StartTimer(bool ShowProgBar) {
StartTime = std::chrono::system_clock::now();
}
static inline void EndTimer(bool ShowProgBar) {
EndTime = std::chrono::system_clock::now();
Duration =
std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
@ -214,6 +248,7 @@ static inline void EndTimer(bool ShowProgBar) {
outs() << '\n';
else
outs() << Duration.count() << "s\n";
}
// -------------------------------------------------------------------------- //
@ -221,6 +256,7 @@ static inline void EndTimer(bool ShowProgBar) {
// -------------------------------------------------------------------------- //
int main(int argc, char *argv[]) {
WeightsMap Weights;
ProgressBar ProgBar;
std::error_code EC;
@ -234,8 +270,10 @@ int main(int argc, char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer");
if (!sys::fs::is_directory(OutputDir)) {
ErrMsg() << "Invalid output directory `" << OutputDir << "`\n";
return 1;
}
for (const auto &Arg : TargetArgs)
@ -247,9 +285,12 @@ int main(int argc, char *argv[]) {
const auto AFLShowmapOrErr = sys::findProgramByName("afl-showmap");
if (AFLShowmapOrErr.getError()) {
ErrMsg() << "Failed to find afl-showmap. Check your PATH\n";
return 1;
}
AFLShowmapPath = *AFLShowmapOrErr;
// ------------------------------------------------------------------------ //
@ -260,19 +301,23 @@ int main(int argc, char *argv[]) {
// ------------------------------------------------------------------------ //
if (WeightsFile != "") {
StatMsg() << "Reading weights from `" << WeightsFile << "`... ";
StartTimer(/*ShowProgBar=*/false);
const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile);
if (EC = WeightsOrErr.getError()) {
ErrMsg() << "Failed to read weights from `" << WeightsFile
<< "`: " << EC.message() << '\n';
return 1;
}
GetWeights(*WeightsOrErr.get(), Weights);
EndTimer(/*ShowProgBar=*/false);
}
// ------------------------------------------------------------------------ //
@ -289,20 +334,26 @@ int main(int argc, char *argv[]) {
for (sys::fs::directory_iterator Dir(CorpusDir, EC), DirEnd;
Dir != DirEnd && !EC; Dir.increment(EC)) {
if (EC) {
ErrMsg() << "Failed to traverse corpus directory `" << CorpusDir
<< "`: " << EC.message() << '\n';
return 1;
}
const auto &Path = Dir->path();
if (EC = sys::fs::status(Path, Status)) {
WarnMsg() << "Failed to access seed file `" << Path
<< "`: " << EC.message() << ". Skipping...\n";
continue;
}
switch (Status.type()) {
case sys::fs::file_type::regular_file:
case sys::fs::file_type::symlink_file:
case sys::fs::file_type::type_unknown:
@ -310,7 +361,9 @@ int main(int argc, char *argv[]) {
default:
/* Ignore */
break;
}
}
EndTimer(/*ShowProgBar=*/false);
@ -336,12 +389,15 @@ int main(int argc, char *argv[]) {
AFLCoverageVector Cov;
for (const auto &SeedFile : SeedFiles) {
// Execute seed
Cov.clear();
if (EC = getAFLCoverage(SeedFile, Cov)) {
ErrMsg() << "Failed to get coverage for seed " << SeedFile << ": "
<< EC.message() << '\n';
return 1;
}
// Create a variable to represent the seed
@ -350,18 +406,25 @@ int main(int argc, char *argv[]) {
// Record the set of seeds that cover a particular edge
for (const auto &[Edge, Freq] : Cov) {
if (EdgesOnly) {
// Ignore edge frequency
SeedCoverage[Edge].insert(Var);
} else {
// Executing edge `E` `N` times means that it was executed `N - 1` times
for (unsigned I = 0; I < Freq; ++I)
SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var);
}
}
if ((++SeedCount % 10 == 0) && ShowProgBar)
ProgBar.update(SeedCount * 100 / NumSeeds, "Generating seed coverage");
}
EndTimer(ShowProgBar);
@ -379,6 +442,7 @@ int main(int argc, char *argv[]) {
// (hard constraint)
std::vector<SeedID> Clauses;
for (const auto &[_, Seeds] : SeedCoverage) {
if (Seeds.empty()) continue;
Clauses.clear();
@ -390,6 +454,7 @@ int main(int argc, char *argv[]) {
if ((++SeedCount % 10 == 0) && ShowProgBar)
ProgBar.update(SeedCount * 100 / SeedCoverage.size(),
"Generating clauses");
}
// Select the minimum number of seeds that cover a particular set of edges
@ -420,12 +485,16 @@ int main(int argc, char *argv[]) {
SmallString<32> OutputSeed;
if (Solved) {
for (const auto &[Var, Seed] : SeedVars)
if (Solver.getValue(Var) > 0) Solution.push_back(Seed);
} else {
ErrMsg() << "Failed to find an optimal solution for `" << CorpusDir
<< "`\n";
return 1;
}
SuccMsg() << "Minimized corpus size: " << Solution.size() << " seeds\n";
@ -436,20 +505,26 @@ int main(int argc, char *argv[]) {
SeedCount = 0;
for (const auto &Seed : Solution) {
OutputSeed = OutputDir;
sys::path::append(OutputSeed, sys::path::filename(Seed));
if (EC = sys::fs::copy_file(Seed, OutputSeed)) {
WarnMsg() << "Failed to copy `" << Seed << "` to `" << OutputDir
<< "`: " << EC.message() << '\n';
}
if ((++SeedCount % 10 == 0) && ShowProgBar)
ProgBar.update(SeedCount * 100 / Solution.size(), "Copying seeds");
}
EndTimer(ShowProgBar);
SuccMsg() << "Done!\n";
return 0;
}

View File

@ -11,6 +11,7 @@
/// Display a progress bar in the terminal
class ProgressBar {
private:
const size_t BarWidth;
const std::string Fill;
@ -18,14 +19,17 @@ class ProgressBar {
public:
ProgressBar() : ProgressBar(60, "#", " ") {
}
ProgressBar(size_t Width, const llvm::StringRef F, const llvm::StringRef R)
: BarWidth(Width), Fill(F), Remainder(R) {
}
void update(float Progress, const llvm::StringRef Status = "",
llvm::raw_ostream &OS = llvm::outs()) {
// No need to write once progress is 100%
if (Progress > 100.0f) return;
@ -39,11 +43,17 @@ class ProgressBar {
const auto Completed =
static_cast<size_t>(Progress * static_cast<float>(BarWidth) / 100.0);
for (size_t I = 0; I < BarWidth; ++I) {
if (I <= Completed) {
OS << Fill;
} else {
OS << Remainder;
}
}
// End bar
@ -54,5 +64,8 @@ class ProgressBar {
// Write status text
OS << " " << Status;
}
};