From 864056fcaaeea0e156e650b7a0f6182e81db566a Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sat, 13 Jul 2019 11:08:13 +0200 Subject: [PATCH 1/7] initial commit --- Makefile | 2 +- llvm_mode/LLVMInsTrim.so.cc | 197 ++++++++++++++++++++ llvm_mode/Makefile | 6 +- llvm_mode/MarkNodes.cc | 355 ++++++++++++++++++++++++++++++++++++ llvm_mode/MarkNodes.h | 11 ++ llvm_mode/afl-clang-fast.c | 8 +- 6 files changed, 572 insertions(+), 7 deletions(-) create mode 100644 llvm_mode/LLVMInsTrim.so.cc create mode 100644 llvm_mode/MarkNodes.cc create mode 100644 llvm_mode/MarkNodes.h diff --git a/Makefile b/Makefile index 601f29a7..0d0d6b79 100644 --- a/Makefile +++ b/Makefile @@ -147,7 +147,7 @@ install: all rm -f $${DESTDIR}$(BIN_PATH)/afl-as if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi ifndef AFL_TRACE_PC - if [ -f afl-clang-fast -a -f afl-llvm-pass.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-pass.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi + if [ -f afl-clang-fast -a -f libLLVMInsTrim.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 libLLVMInsTrim.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi else if [ -f afl-clang-fast -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi endif diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc new file mode 100644 index 00000000..2a13981d --- /dev/null +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -0,0 +1,197 @@ +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include + +#include "MarkNodes.h" + +using namespace llvm; + +static cl::opt MarkSetOpt("markset", cl::desc("MarkSet"), + cl::init(false)); +static cl::opt LoopHeadOpt("loophead", cl::desc("LoopHead"), + cl::init(false)); + +namespace { + struct InsTrim : public ModulePass { + private: + std::mt19937 generator; + int total_instr = 0; + + unsigned genLabel() { + return generator() % 65536; + } + + public: + static char ID; + InsTrim() : ModulePass(ID), generator(0) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + + StringRef getPassName() const override { + return "InstTrim Instrumentation"; + } + + bool runOnModule(Module &M) override { + if (getenv("LOOPHEAD")) { + LoopHeadOpt = true; + MarkSetOpt = true; + } else if (getenv("MARKSET")) { + MarkSetOpt = true; + } + + LLVMContext &C = M.getContext(); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + + GlobalVariable *CovMapPtr = new GlobalVariable( + M, PointerType::getUnqual(Int8Ty), false, GlobalValue::ExternalLinkage, + nullptr, "__afl_area_ptr"); + + GlobalVariable *OldPrev = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", + 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); + + unsigned total_rs = 0; + unsigned total_hs = 0; + + for (Function &F : M) { + if (!F.size()) { + continue; + } + + std::unordered_set MS; + if (!MarkSetOpt) { + for (auto &BB : F) { + MS.insert(&BB); + } + total_rs += F.size(); + } else { + auto Result = markNodes(&F); + auto RS = Result.first; + auto HS = Result.second; + + MS.insert(RS.begin(), RS.end()); + if (!LoopHeadOpt) { + MS.insert(HS.begin(), HS.end()); + total_rs += MS.size(); + } else { + DenseSet> EdgeSet; + DominatorTreeWrapperPass *DTWP = + &getAnalysis(F); + auto DT = &DTWP->getDomTree(); + + total_rs += RS.size(); + total_hs += HS.size(); + + for (BasicBlock *BB : HS) { + bool Inserted = false; + for (auto BI = pred_begin(BB), BE = pred_end(BB); + BI != BE; ++BI + ) { + auto Edge = BasicBlockEdge(*BI, BB); + if (Edge.isSingleEdge() && DT->dominates(Edge, BB)) { + EdgeSet.insert({*BI, BB}); + Inserted = true; + break; + } + } + if (!Inserted) { + MS.insert(BB); + total_rs += 1; + total_hs -= 1; + } + } + for (auto I = EdgeSet.begin(), E = EdgeSet.end(); I != E; ++I) { + auto PredBB = I->first; + auto SuccBB = I->second; + auto NewBB = SplitBlockPredecessors(SuccBB, {PredBB}, ".split", + DT, nullptr, false); + MS.insert(NewBB); + } + } + + auto *EBB = &F.getEntryBlock(); + if (succ_begin(EBB) == succ_end(EBB)) { + MS.insert(EBB); + total_rs += 1; + } + + for (BasicBlock &BB : F) { + if (MS.find(&BB) == MS.end()) { + continue; + } + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + IRB.CreateStore(ConstantInt::get(Int32Ty, genLabel()), OldPrev); + } + } + + for (BasicBlock &BB : F) { + auto PI = pred_begin(&BB); + auto PE = pred_end(&BB); + if (MarkSetOpt && MS.find(&BB) == MS.end()) { + continue; + } + + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + Value *L = NULL; + if (PI == PE) { + L = ConstantInt::get(Int32Ty, genLabel()); + } else { + auto *PN = PHINode::Create(Int32Ty, 0, "", &*BB.begin()); + DenseMap PredMap; + for (auto PI = pred_begin(&BB), PE = pred_end(&BB); + PI != PE; ++PI + ) { + BasicBlock *PBB = *PI; + auto It = PredMap.insert({PBB, genLabel()}); + unsigned Label = It.first->second; + PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB); + } + L = PN; + } + + LoadInst *PrevLoc = IRB.CreateLoad(OldPrev); + Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); + + LoadInst *MapPtr = IRB.CreateLoad(CovMapPtr); + Value *MapPtrIdx = IRB.CreateGEP(MapPtr, + IRB.CreateXor(PrevLocCasted, L)); + + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); + IRB.CreateStore(Incr, MapPtrIdx); + total_instr++; + } + } + + errs() << total_instr << " locations instrumented ("<< total_rs << "," << total_hs << ")\n"; + return false; + } + }; // end of struct InsTrim +} // end of anonymous namespace + +char InsTrim::ID = 0; + +static void registerAFLPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new InsTrim()); +} + +static RegisterStandardPasses RegisterAFLPass( + PassManagerBuilder::EP_OptimizerLast, registerAFLPass); + +static RegisterStandardPasses RegisterAFLPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index b6ab0c61..a66f18ab 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -67,7 +67,7 @@ ifeq "$(origin CC)" "default" endif ifndef AFL_TRACE_PC - PROGS = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so + PROGS = ../afl-clang-fast ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so else PROGS = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so endif @@ -91,8 +91,8 @@ endif $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) ln -sf afl-clang-fast ../afl-clang-fast++ -../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps - $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) +../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps + $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=gnu++11 -shared $< -o $@ $(CLANG_LFL) # laf ../split-switches-pass.so: split-switches-pass.so.cc | test_deps diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc new file mode 100644 index 00000000..3c2129ef --- /dev/null +++ b/llvm_mode/MarkNodes.cc @@ -0,0 +1,355 @@ +#include +#include +#include +#include +#include +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +DenseMap LMap; +std::vector Blocks; +std::set Marked , Markabove; +std::vector< std::vector > Succs , Preds; + +void reset(){ + LMap.clear(); + Blocks.clear(); + Marked.clear(); + Markabove.clear(); +} + +uint32_t start_point; + +void labelEachBlock(Function *F) { + // Fake single endpoint; + LMap[NULL] = Blocks.size(); + Blocks.push_back(NULL); + + // Assign the unique LabelID to each block; + for (auto I = F->begin(), E = F->end(); I != E; ++I) { + BasicBlock *BB = &*I; + LMap[BB] = Blocks.size(); + Blocks.push_back(BB); + } + + start_point = LMap[&F->getEntryBlock()]; +} + +void buildCFG(Function *F) { + Succs.resize( Blocks.size() ); + Preds.resize( Blocks.size() ); + for( size_t i = 0 ; i < Succs.size() ; i ++ ){ + Succs[ i ].clear(); + Preds[ i ].clear(); + } + + uint32_t FakeID = 0; + for (auto S = F->begin(), E = F->end(); S != E; ++S) { + BasicBlock *BB = &*S; + uint32_t MyID = LMap[BB]; + //if (succ_begin(BB) == succ_end(BB)) { + //Succs[MyID].push_back(FakeID); + //Marked.insert(MyID); + //} + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + Succs[MyID].push_back(LMap[*I]); + } + } +} + +std::vector< std::vector > tSuccs; +std::vector tag , indfs; + +void DFStree(size_t now_id) { + if(tag[now_id]) return; + tag[now_id]=true; + indfs[now_id]=true; + for (auto succ: tSuccs[now_id]) { + if(tag[succ] and indfs[succ]) { + Marked.insert(succ); + Markabove.insert(succ); + continue; + } + Succs[now_id].push_back(succ); + Preds[succ].push_back(now_id); + DFStree(succ); + } + indfs[now_id]=false; +} +void turnCFGintoDAG(Function *F) { + tSuccs = Succs; + tag.resize(Blocks.size()); + indfs.resize(Blocks.size()); + for (size_t i = 0; i < Blocks.size(); ++ i) { + Succs[i].clear(); + tag[i]=false; + indfs[i]=false; + } + DFStree(start_point); + for (size_t i = 0; i < Blocks.size(); ++ i) + if( Succs[i].empty() ){ + Succs[i].push_back(0); + Preds[0].push_back(i); + } +} + +uint32_t timeStamp; +namespace DominatorTree{ + std::vector< std::vector > cov; + std::vector dfn, nfd, par, sdom, idom, mom, mn; + + bool Compare(uint32_t u, uint32_t v) { + return dfn[u] < dfn[v]; + } + uint32_t eval(uint32_t u) { + if( mom[u] == u ) return u; + uint32_t res = eval( mom[u] ); + if(Compare(sdom[mn[mom[u]]] , sdom[mn[u]])) { + mn[u] = mn[mom[u]]; + } + return mom[u] = res; + } + + void DFS(uint32_t now) { + timeStamp += 1; + dfn[now] = timeStamp; + nfd[timeStamp - 1] = now; + for( auto succ : Succs[now] ) { + if( dfn[succ] == 0 ) { + par[succ] = now; + DFS(succ); + } + } + } + + void DominatorTree(Function *F) { + if( Blocks.empty() ) return; + uint32_t s = start_point; + + // Initialization + mn.resize(Blocks.size()); + cov.resize(Blocks.size()); + dfn.resize(Blocks.size()); + nfd.resize(Blocks.size()); + par.resize(Blocks.size()); + mom.resize(Blocks.size()); + sdom.resize(Blocks.size()); + idom.resize(Blocks.size()); + + for( uint32_t i = 0 ; i < Blocks.size() ; i ++ ) { + dfn[i] = 0; + nfd[i] = Blocks.size(); + cov[i].clear(); + idom[i] = mom[i] = mn[i] = sdom[i] = i; + } + + timeStamp = 0; + DFS(s); + + for( uint32_t i = Blocks.size() - 1 ; i >= 1u ; i -- ) { + uint32_t now = nfd[i]; + if( now == Blocks.size() ) { + continue; + } + for( uint32_t pre : Preds[ now ] ) { + if( dfn[ pre ] ) { + eval(pre); + if( Compare(sdom[mn[pre]], sdom[now]) ) { + sdom[now] = sdom[mn[pre]]; + } + } + } + cov[sdom[now]].push_back(now); + mom[now] = par[now]; + for( uint32_t x : cov[par[now]] ) { + eval(x); + if( Compare(sdom[mn[x]], par[now]) ) { + idom[x] = mn[x]; + } else { + idom[x] = par[now]; + } + } + } + + for( uint32_t i = 1 ; i < Blocks.size() ; i += 1 ) { + uint32_t now = nfd[i]; + if( now == Blocks.size() ) { + continue; + } + if(idom[now] != sdom[now]) + idom[now] = idom[idom[now]]; + } + } +}; // End of DominatorTree + +std::vector Visited, InStack; +std::vector TopoOrder, InDeg; +std::vector< std::vector > t_Succ , t_Pred; + +void Go(uint32_t now, uint32_t tt) { + if( now == tt ) return; + Visited[now] = InStack[now] = timeStamp; + + for(uint32_t nxt : Succs[now]) { + if(Visited[nxt] == timeStamp and InStack[nxt] == timeStamp) { + Marked.insert(nxt); + } + t_Succ[now].push_back(nxt); + t_Pred[nxt].push_back(now); + InDeg[nxt] += 1; + if(Visited[nxt] == timeStamp) { + continue; + } + Go(nxt, tt); + } + + InStack[now] = 0; +} + +void TopologicalSort(uint32_t ss, uint32_t tt) { + timeStamp += 1; + + Go(ss, tt); + + TopoOrder.clear(); + std::queue wait; + wait.push(ss); + while( not wait.empty() ) { + uint32_t now = wait.front(); wait.pop(); + TopoOrder.push_back(now); + for(uint32_t nxt : t_Succ[now]) { + InDeg[nxt] -= 1; + if(InDeg[nxt] == 0u) { + wait.push(nxt); + } + } + } +} + +std::vector< std::set > NextMarked; +bool Indistinguish(uint32_t node1, uint32_t node2) { + if(NextMarked[node1].size() > NextMarked[node2].size()){ + uint32_t _swap = node1; + node1 = node2; + node2 = _swap; + } + for(uint32_t x : NextMarked[node1]) { + if( NextMarked[node2].find(x) != NextMarked[node2].end() ) { + return true; + } + } + return false; +} + +void MakeUniq(uint32_t now) { + bool StopFlag = false; + if (Marked.find(now) == Marked.end()) { + for(uint32_t pred1 : t_Pred[now]) { + for(uint32_t pred2 : t_Pred[now]) { + if(pred1 == pred2) continue; + if(Indistinguish(pred1, pred2)) { + Marked.insert(now); + StopFlag = true; + break; + } + } + if (StopFlag) { + break; + } + } + } + if(Marked.find(now) != Marked.end()) { + NextMarked[now].insert(now); + } else { + for(uint32_t pred : t_Pred[now]) { + for(uint32_t x : NextMarked[pred]) { + NextMarked[now].insert(x); + } + } + } +} + +void MarkSubGraph(uint32_t ss, uint32_t tt) { + TopologicalSort(ss, tt); + if(TopoOrder.empty()) return; + + for(uint32_t i : TopoOrder) { + NextMarked[i].clear(); + } + + NextMarked[TopoOrder[0]].insert(TopoOrder[0]); + for(uint32_t i = 1 ; i < TopoOrder.size() ; i += 1) { + MakeUniq(TopoOrder[i]); + } +} + +void MarkVertice(Function *F) { + uint32_t s = start_point; + + InDeg.resize(Blocks.size()); + Visited.resize(Blocks.size()); + InStack.resize(Blocks.size()); + t_Succ.resize(Blocks.size()); + t_Pred.resize(Blocks.size()); + NextMarked.resize(Blocks.size()); + + for( uint32_t i = 0 ; i < Blocks.size() ; i += 1 ) { + Visited[i] = InStack[i] = InDeg[i] = 0; + t_Succ[i].clear(); + t_Pred[i].clear(); + } + timeStamp = 0; + uint32_t t = 0; + //MarkSubGraph(s, t); + //return; + + while( s != t ) { + MarkSubGraph(DominatorTree::idom[t], t); + t = DominatorTree::idom[t]; + } + +} + +// return {marked nodes} +std::pair, + std::vector >markNodes(Function *F) { + assert(F->size() > 0 && "Function can not be empty"); + + reset(); + labelEachBlock(F); + buildCFG(F); + turnCFGintoDAG(F); + DominatorTree::DominatorTree(F); + MarkVertice(F); + + std::vector Result , ResultAbove; + for( uint32_t x : Markabove ) { + auto it = Marked.find( x ); + if( it != Marked.end() ) + Marked.erase( it ); + if( x ) + ResultAbove.push_back(Blocks[x]); + } + for( uint32_t x : Marked ) { + if (x == 0) { + continue; + } else { + Result.push_back(Blocks[x]); + } + } + + return { Result , ResultAbove }; +} diff --git a/llvm_mode/MarkNodes.h b/llvm_mode/MarkNodes.h new file mode 100644 index 00000000..e3bf3ce5 --- /dev/null +++ b/llvm_mode/MarkNodes.h @@ -0,0 +1,11 @@ +#ifndef __MARK_NODES__ +#define __MARK_NODES__ + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include + +std::pair, + std::vector> markNodes(llvm::Function *F); + +#endif diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index 5bc4ae8c..2034f10a 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -32,6 +32,7 @@ #include #include #include +#include static u8* obj_path; /* Path to runtime libraries */ static u8** cc_params; /* Parameters passed to the real CC */ @@ -87,7 +88,7 @@ static void find_obj(u8* argv0) { return; } - FATAL("Unable to find 'afl-llvm-rt.o' or 'afl-llvm-pass.so'. Please set AFL_PATH"); + FATAL("Unable to find 'afl-llvm-rt.o' or 'libLLVMInsTrim.so'. Please set AFL_PATH"); } @@ -113,7 +114,7 @@ static void edit_params(u32 argc, char** argv) { } /* There are two ways to compile afl-clang-fast. In the traditional mode, we - use afl-llvm-pass.so to inject instrumentation. In the experimental + use libLLVMInsTrim.so to inject instrumentation. In the experimental 'trace-pc-guard' mode, we use native LLVM instrumentation callbacks instead. The latter is a very recent addition - see: @@ -150,7 +151,8 @@ static void edit_params(u32 argc, char** argv) { cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); +// cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); #endif /* ^USE_TRACE_PC */ cc_params[cc_par_cnt++] = "-Qunused-arguments"; From 0f1313761686a61983a4deb2646a8265f8acc214 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20Ei=C3=9Ffeldt?= Date: Sat, 13 Jul 2019 23:40:34 +0200 Subject: [PATCH 2/7] compiles now with LLVM 8.0 --- llvm_mode/LLVMInsTrim.so.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 2a13981d..bba0b86c 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -118,7 +118,7 @@ namespace { auto PredBB = I->first; auto SuccBB = I->second; auto NewBB = SplitBlockPredecessors(SuccBB, {PredBB}, ".split", - DT, nullptr, false); + DT); MS.insert(NewBB); } } From c204efaaab59551bb325af64620626be40aae993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20Ei=C3=9Ffeldt?= Date: Sat, 13 Jul 2019 23:12:36 +0200 Subject: [PATCH 3/7] Compile fix for LLVM 3.8.0 --- llvm_mode/LLVMInsTrim.so.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index bba0b86c..0c75eb4e 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -40,7 +40,12 @@ namespace { AU.addRequired(); } - StringRef getPassName() const override { +#if LLVM_VERSION_MAJOR < 4 + const char * +#else + StringRef +#endif + getPassName() const override { return "InstTrim Instrumentation"; } From 98a696391183274fd683383d015646ac28e64182 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sun, 14 Jul 2019 10:05:46 +0200 Subject: [PATCH 4/7] make fix --- llvm_mode/Makefile | 2 +- llvm_mode/MarkNodes.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index a66f18ab..2947dc9b 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -92,7 +92,7 @@ endif ln -sf afl-clang-fast ../afl-clang-fast++ ../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps - $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=gnu++11 -shared $< -o $@ $(CLANG_LFL) + $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=gnu++11 -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) # laf ../split-switches-pass.so: split-switches-pass.so.cc | test_deps diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc index 3c2129ef..a156fccb 100644 --- a/llvm_mode/MarkNodes.cc +++ b/llvm_mode/MarkNodes.cc @@ -56,7 +56,7 @@ void buildCFG(Function *F) { Preds[ i ].clear(); } - uint32_t FakeID = 0; + //uint32_t FakeID = 0; for (auto S = F->begin(), E = F->end(); S != E; ++S) { BasicBlock *BB = &*S; uint32_t MyID = LMap[BB]; From 495f3b9a681af527018a92d3d0e3240568ac7997 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sun, 14 Jul 2019 10:23:54 +0200 Subject: [PATCH 5/7] notZero added and first attempt at whitelist --- llvm_mode/LLVMInsTrim.so.cc | 159 +++++++++++++++++++++++-- llvm_mode/compare-transform-pass.so.cc | 3 +- llvm_mode/split-compares-pass.so.cc | 3 +- llvm_mode/split-switches-pass.so.cc | 6 +- 4 files changed, 157 insertions(+), 14 deletions(-) diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 0c75eb4e..62977e97 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -1,3 +1,5 @@ +#include + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/IR/CFG.h" @@ -10,8 +12,17 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include #include +#include +#include +#include + +#include "../config.h" +#include "../debug.h" #include "MarkNodes.h" @@ -24,6 +35,10 @@ static cl::opt LoopHeadOpt("loophead", cl::desc("LoopHead"), namespace { struct InsTrim : public ModulePass { + + protected: + std::list myWhitelist; + private: std::mt19937 generator; int total_instr = 0; @@ -34,7 +49,23 @@ namespace { public: static char ID; - InsTrim() : ModulePass(ID), generator(0) {} + InsTrim() : ModulePass(ID), generator(0) {//} + +// AFLCoverage() : ModulePass(ID) { + char* instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) + report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + myWhitelist.push_back(line); + getline(fileStream, line); + } + } + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); @@ -50,12 +81,33 @@ namespace { } bool runOnModule(Module &M) override { + char be_quiet = 0; + + if (isatty(2) && !getenv("AFL_QUIET")) { + SAYF(cCYA "LLVMInsTrim" VERSION cRST " by csienslab\n"); + } else be_quiet = 1; + +#if LLVM_VERSION_MAJOR < 9 + char* neverZero_counters_str; + if ((neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO")) != NULL) + OKF("LLVM neverZero activated (by hexcoder)\n"); +#endif + if (getenv("LOOPHEAD")) { LoopHeadOpt = true; MarkSetOpt = true; } else if (getenv("MARKSET")) { MarkSetOpt = true; } + +/* // I dont think this makes sense to port into LLVMInsTrim + char* inst_ratio_str = getenv("AFL_INST_RATIO"); + unsigned int inst_ratio = 100; + if (inst_ratio_str) { + if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || inst_ratio > 100) + FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)"); + } +*/ LLVMContext &C = M.getContext(); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); @@ -69,14 +121,59 @@ namespace { M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); - unsigned total_rs = 0; - unsigned total_hs = 0; + u64 total_rs = 0; + u64 total_hs = 0; for (Function &F : M) { if (!F.size()) { continue; } + if (!myWhitelist.empty()) { + bool instrumentBlock = false; + BasicBlock &BB = F.getEntryBlock(); + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + DebugLoc Loc = IP->getDebugLoc(); + StringRef instFilename; + + if ( Loc ) { + DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + /* If the original location is empty, try using the inlined location */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + } + } + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + for (std::list::iterator it = myWhitelist.begin(); it != myWhitelist.end(); ++it) { + if (instFilename.str().length() >= it->length()) { + if (instFilename.str().compare(instFilename.str().length() - it->length(), it->length(), *it) == 0) { + instrumentBlock = true; + break; + } + } + } + } + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) { + if (!instFilename.str().empty()) + SAYF( "Not in whitelist, skipping %s ...\n", instFilename.str().c_str()); + continue; + } + } + std::unordered_set MS; if (!MarkSetOpt) { for (auto &BB : F) { @@ -94,8 +191,7 @@ namespace { total_rs += MS.size(); } else { DenseSet> EdgeSet; - DominatorTreeWrapperPass *DTWP = - &getAnalysis(F); + DominatorTreeWrapperPass *DTWP = &getAnalysis(F); auto DT = &DTWP->getDomTree(); total_rs += RS.size(); @@ -123,7 +219,11 @@ namespace { auto PredBB = I->first; auto SuccBB = I->second; auto NewBB = SplitBlockPredecessors(SuccBB, {PredBB}, ".split", - DT); + DT, nullptr, +#if LLVM_VERSION_MAJOR >= 8 + nullptr, +#endif + false); MS.insert(NewBB); } } @@ -168,21 +268,60 @@ namespace { L = PN; } + /* Load prev_loc */ LoadInst *PrevLoc = IRB.CreateLoad(OldPrev); + PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); + /* Load SHM pointer */ LoadInst *MapPtr = IRB.CreateLoad(CovMapPtr); - Value *MapPtrIdx = IRB.CreateGEP(MapPtr, - IRB.CreateXor(PrevLocCasted, L)); + MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, L)); + /* Update bitmap */ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); - IRB.CreateStore(Incr, MapPtrIdx); + +#if LLVM_VERSION_MAJOR < 9 + if (neverZero_counters_str != NULL) { // with llvm 9 we make this the default as the bug in llvm is then fixed +#else + #warning "neverZero implementation needs to be reviewed!" +#endif + /* hexcoder: Realize a counter that skips zero during overflow. + * Once this counter reaches its maximum value, it next increments to 1 + * + * Instead of + * Counter + 1 -> Counter + * we inject now this + * Counter + 1 -> {Counter, OverflowFlag} + * Counter + OverflowFlag -> Counter + */ + auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0)); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); +#if LLVM_VERSION_MAJOR < 9 + } +#endif + + IRB.CreateStore(Incr, MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + /* Set prev_loc to cur_loc >> 1 */ + /* + StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + */ + total_instr++; } } - errs() << total_instr << " locations instrumented ("<< total_rs << "," << total_hs << ")\n"; + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n"/*", ratio %u%%)."*/, + total_instr, total_rs, total_hs, + getenv("AFL_HARDEN") ? "hardened" : + ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? + "ASAN/MSAN" : "non-hardened")/*, inst_ratio*/); return false; } }; // end of struct InsTrim diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index d9a1f945..95435db7 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -304,7 +304,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, const bool CompareTransform::runOnModule(Module &M) { - llvm::errs() << "Running compare-transform-pass by laf.intel@gmail.com, extended by heiko@hexco.de\n"; + if (getenv("AFL_QUIET") == NULL) + llvm::errs() << "Running compare-transform-pass by laf.intel@gmail.com, extended by heiko@hexco.de\n"; transformCmps(M, true, true, true, true, true); verifyModule(M); diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index 2ea73aaa..c025628f 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -487,7 +487,8 @@ bool SplitComparesTransform::runOnModule(Module &M) { simplifySignedness(M); - errs() << "Split-compare-pass by laf.intel@gmail.com\n"; + if (getenv("AFL_QUIET") == NULL) + errs() << "Split-compare-pass by laf.intel@gmail.com\n"; switch (bitw) { case 64: diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc index 4c28f34c..1ace3185 100644 --- a/llvm_mode/split-switches-pass.so.cc +++ b/llvm_mode/split-switches-pass.so.cc @@ -244,7 +244,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { /* If there is only the default destination or the condition checks 8 bit or less, don't bother with the code below. */ if (!SI->getNumCases() || bitw <= 8) { - errs() << "skip trivial switch..\n"; + if (getenv("AFL_QUIET") == NULL) + errs() << "skip trivial switch..\n"; continue; } @@ -302,7 +303,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { bool SplitSwitchesTransform::runOnModule(Module &M) { - llvm::errs() << "Running split-switches-pass by laf.intel@gmail.com\n"; + if (getenv("AFL_QUIET") == NULL) + llvm::errs() << "Running split-switches-pass by laf.intel@gmail.com\n"; splitSwitches(M); verifyModule(M); From e66402485342088e6fcaecfe2abbba291a48bda5 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sun, 14 Jul 2019 10:50:13 +0200 Subject: [PATCH 6/7] whitelist features works now --- docs/PATCHES | 1 + docs/README | 6 +++++- llvm_mode/LLVMInsTrim.so.cc | 19 ++++++++++++++----- llvm_mode/README.llvm | 10 ++++++---- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/docs/PATCHES b/docs/PATCHES index 06da053e..f6ca9284 100644 --- a/docs/PATCHES +++ b/docs/PATCHES @@ -17,6 +17,7 @@ afl-qemu-optimize-entrypoint.diff by mh(at)mh-sec(dot)de afl-qemu-speed.diff by abiondo on github afl-qemu-optimize-map.diff by mh(at)mh-sec(dot)de ++ instrim (https://github.com/csienslab/instrim) was integrated + MOpt (github.com/puppet-meteor/MOpt-AFL) was imported + AFLfast additions (github.com/mboehme/aflfast) were incorporated. + Qemu 3.1 upgrade with enhancement patches (github.com/andreafioraldi/afl) diff --git a/docs/README b/docs/README index 3f19d328..54e3e4a4 100644 --- a/docs/README +++ b/docs/README @@ -19,9 +19,13 @@ american fuzzy lop plus plus C. Hollers afl-fuzz Python mutator module and llvm_mode whitelist support was added too (https://github.com/choller/afl) - The newest additions is the excellent MOpt mutator from + New is the excellent MOpt mutator from https://github.com/puppet-meteor/MOpt-AFL + Also newly integrated is instrim, a very effective CFG llvm_mode + instrumentation implementation which replaced the original afl one and is + from https://github.com/csienslab/instrim + A more thorough list is available in the PATCHES file. So all in all this is the best-of AFL that is currently out there :-) diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 62977e97..51640870 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -1,3 +1,6 @@ +#include +#include +#include #include #include "llvm/ADT/DenseMap.h" @@ -131,12 +134,16 @@ namespace { if (!myWhitelist.empty()) { bool instrumentBlock = false; - BasicBlock &BB = F.getEntryBlock(); - BasicBlock::iterator IP = BB.getFirstInsertionPt(); - IRBuilder<> IRB(&(*IP)); - DebugLoc Loc = IP->getDebugLoc(); + DebugLoc Loc; StringRef instFilename; + for (auto &BB : F) { + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + if (!Loc) + Loc = IP->getDebugLoc(); + } + if ( Loc ) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -169,7 +176,9 @@ namespace { * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) { if (!instFilename.str().empty()) - SAYF( "Not in whitelist, skipping %s ...\n", instFilename.str().c_str()); + SAYF(cYEL "[!] " cBRI "Not in whitelist, skipping %s ...\n", instFilename.str().c_str()); + else + SAYF(cYEL "[!] " cBRI "No filename information found, skipping it"); continue; } } diff --git a/llvm_mode/README.llvm b/llvm_mode/README.llvm index dc860e97..b4e05a7a 100644 --- a/llvm_mode/README.llvm +++ b/llvm_mode/README.llvm @@ -38,8 +38,8 @@ co-exists with the original code. The idea and much of the implementation comes from Laszlo Szekeres. -2) How to use -------------- +2) How to use this +------------------ In order to leverage this mechanism, you need to have clang installed on your system. You should also make sure that the llvm-config tool is in your path @@ -69,8 +69,10 @@ operating mode of AFL, e.g.: Be sure to also include CXX set to afl-clang-fast++ for C++ code. The tool honors roughly the same environmental variables as afl-gcc (see -../docs/env_variables.txt). This includes AFL_INST_RATIO, AFL_USE_ASAN, -AFL_HARDEN, and AFL_DONT_OPTIMIZE. +../docs/env_variables.txt). This includes AFL_USE_ASAN, +AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored +as it does not serve a good purpose with the more effective instrim CFG +analysis. Note: if you want the LLVM helper to be installed on your system for all users, you need to build it before issuing 'make install' in the parent From 013a1731d590eaa1f3e4c58c69985f89b7a3d2f9 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sun, 14 Jul 2019 19:48:28 +0200 Subject: [PATCH 7/7] set instrim as default and updated documentation --- docs/env_variables.txt | 13 ++++++++----- llvm_mode/LLVMInsTrim.so.cc | 6 +++--- llvm_mode/README.llvm | 27 +++++++++++++++++++++++---- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/docs/env_variables.txt b/docs/env_variables.txt index d854ea8d..8e2723d7 100644 --- a/docs/env_variables.txt +++ b/docs/env_variables.txt @@ -82,6 +82,9 @@ discussed in section #1, with the exception of: - TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are created. + - AFL_INST_RATIO, as we switched for instrim instrumentation which + is more effective but makes not much sense together with this option. + Then there are a few specific features that are only available in llvm_mode: LAF-INTEL @@ -108,16 +111,16 @@ Then there are a few specific features that are only available in llvm_mode: OTHER ===== - - Setting export AFL_LLVM_NOT_ZERO=1 during compilation will use counters + - Setting LOOPHEAD=1 optimized loops. afl-fuzz will only be able to + see the path the loop took, but not how many times it was called + (unless its a complex loop). + + - Setting AFL_LLVM_NOT_ZERO=1 during compilation will use counters that skip zero on overflow. This is the default for llvm >= 9, however for llvm versions below that this will increase an unnecessary slowdown due a performance issue that is only fixed in llvm 9+. This feature increases path discovery by a little bit. -Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc, -because functions are *not* instrumented unconditionally - so low values -will have a more striking effect. For this tool, 0 is not a valid choice. - 3) Settings for afl-fuzz ------------------------ diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 51640870..8e9f7667 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -98,10 +98,10 @@ namespace { if (getenv("LOOPHEAD")) { LoopHeadOpt = true; - MarkSetOpt = true; - } else if (getenv("MARKSET")) { - MarkSetOpt = true; } + + // this is our default + MarkSetOpt = true; /* // I dont think this makes sense to port into LLVMInsTrim char* inst_ratio_str = getenv("AFL_INST_RATIO"); diff --git a/llvm_mode/README.llvm b/llvm_mode/README.llvm index b4e05a7a..77c406f8 100644 --- a/llvm_mode/README.llvm +++ b/llvm_mode/README.llvm @@ -78,13 +78,32 @@ Note: if you want the LLVM helper to be installed on your system for all users, you need to build it before issuing 'make install' in the parent directory. -3) Gotchas, feedback, bugs +3) Options + +Several options are present to make llvm_mode faster or help it rearrange +the code to make afl-fuzz path discovery easier. + +If you need just to instrument specific parts of the code, you can whitelist +which C/C++ files to actually intrument. See README.whitelist + +For splitting memcmp, strncmp, etc. please see README.laf-intel + +As the original afl llvm_mode implementation has been replaced with +then much more effective instrim (https://github.com/csienslab/instrim/) +there is an option for optimizing loops. This optimization shows which +part of the loop has been selected, but not how many time a loop has been +called in a row (unless its a complex loop and a block inside was +instrumented). If you want to enable this set the environment variable +LOOPHEAD=1 + + +4) Gotchas, feedback, bugs -------------------------- This is an early-stage mechanism, so field reports are welcome. You can send bug reports to . -4) Bonus feature #1: deferred instrumentation +5) Bonus feature #1: deferred instrumentation --------------------------------------------- AFL tries to optimize performance by executing the targeted binary just once, @@ -131,7 +150,7 @@ will keep working normally when compiled with a tool other than afl-clang-fast. Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will *not* generate a deferred-initialization binary) - and you should be all set! -5) Bonus feature #2: persistent mode +6) Bonus feature #2: persistent mode ------------------------------------ Some libraries provide APIs that are stateless, or whose state can be reset in @@ -171,7 +190,7 @@ PS. Because there are task switches still involved, the mode isn't as fast as faster than the normal fork() model, and compared to in-process fuzzing, should be a lot more robust. -6) Bonus feature #3: new 'trace-pc-guard' mode +8) Bonus feature #3: new 'trace-pc-guard' mode ---------------------------------------------- Recent versions of LLVM are shipping with a built-in execution tracing feature