Merge pull request #375 from risicle/ris-llvm-compare-transform-var-sized-dev

llvm_mode compare-transform-pass: add handling of sized comparisons with non-const size
This commit is contained in:
van Hauser
2020-05-28 22:47:13 +02:00
committed by GitHub
3 changed files with 121 additions and 63 deletions

View File

@ -304,17 +304,27 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
if (!(HasStr1 || HasStr2)) continue; if (!(HasStr1 || HasStr2)) continue;
if (isMemcmp || isStrncmp || isStrncasecmp) { if (isMemcmp || isStrncmp || isStrncasecmp) {
/* check if third operand is a constant integer /* check if third operand is a constant integer
* strlen("constStr") and sizeof() are treated as constant */ * strlen("constStr") and sizeof() are treated as constant */
Value * op2 = callInst->getArgOperand(2); Value * op2 = callInst->getArgOperand(2);
ConstantInt *ilen = dyn_cast<ConstantInt>(op2); ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
if (!ilen) continue; if (ilen) {
/* final precaution: if size of compare is larger than constant uint64_t len = ilen->getZExtValue();
* string skip it*/ // if len is zero this is a pointless call but allow real
uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size(); // implementation to worry about that
if (literalLength + 1 < ilen->getZExtValue()) continue; if (!len) continue;
if (isMemcmp) {
// if size of compare is larger than constant string this is
// likely a bug but allow real implementation to worry about
// that
uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size();
if (literalLength + 1 < ilen->getZExtValue()) continue;
}
} else if (isMemcmp)
// this *may* supply a len greater than the constant string at
// runtime so similarly we don't want to have to handle that
continue;
} }
calls.push_back(callInst); calls.push_back(callInst);
@ -341,7 +351,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
Value * VarStr; Value * VarStr;
bool HasStr1 = getConstantStringInfo(Str1P, Str1); bool HasStr1 = getConstantStringInfo(Str1P, Str1);
bool HasStr2 = getConstantStringInfo(Str2P, Str2); bool HasStr2 = getConstantStringInfo(Str2P, Str2);
uint64_t constLen, sizedLen; uint64_t constStrLen, constSizedLen, unrollLen;
bool isMemcmp = bool isMemcmp =
!callInst->getCalledFunction()->getName().compare(StringRef("memcmp")); !callInst->getCalledFunction()->getName().compare(StringRef("memcmp"));
bool isSizedcmp = isMemcmp || bool isSizedcmp = isMemcmp ||
@ -349,23 +359,13 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
StringRef("strncmp")) || StringRef("strncmp")) ||
!callInst->getCalledFunction()->getName().compare( !callInst->getCalledFunction()->getName().compare(
StringRef("strncasecmp")); StringRef("strncasecmp"));
Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL;
bool isConstSized = sizedValue && isa<ConstantInt>(sizedValue);
bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare( bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare(
StringRef("strcasecmp")) || StringRef("strcasecmp")) ||
!callInst->getCalledFunction()->getName().compare( !callInst->getCalledFunction()->getName().compare(
StringRef("strncasecmp")); StringRef("strncasecmp"));
if (isSizedcmp) {
Value * op2 = callInst->getArgOperand(2);
ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
sizedLen = ilen->getZExtValue();
} else {
sizedLen = 0;
}
if (!(HasStr1 || HasStr2)) { if (!(HasStr1 || HasStr2)) {
// do we have a saved local or global variable initialization? // do we have a saved local or global variable initialization?
@ -389,99 +389,133 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
} }
if (isConstSized) {
constSizedLen = dyn_cast<ConstantInt>(sizedValue)->getZExtValue();
}
if (HasStr1) { if (HasStr1) {
TmpConstStr = Str1.str(); TmpConstStr = Str1.str();
VarStr = Str2P; VarStr = Str2P;
constLen = isMemcmp ? sizedLen : TmpConstStr.length();
} else { } else {
TmpConstStr = Str2.str(); TmpConstStr = Str2.str();
VarStr = Str1P; VarStr = Str1P;
constLen = isMemcmp ? sizedLen : TmpConstStr.length();
} }
/* properly handle zero terminated C strings by adding the terminating 0 to // add null termination character implicit in c strings
* the StringRef (in comparison to std::string a StringRef has built-in
* runtime bounds checking, which makes debugging easier) */
TmpConstStr.append("\0", 1); TmpConstStr.append("\0", 1);
if (!sizedLen) constLen++;
// in the unusual case the const str has embedded null
// characters, the string comparison functions should terminate
// at the first null
if (!isMemcmp)
TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
constStrLen = TmpConstStr.length();
// prefer use of StringRef (in comparison to std::string a StringRef has
// built-in runtime bounds checking, which makes debugging easier)
ConstStr = StringRef(TmpConstStr); ConstStr = StringRef(TmpConstStr);
// fprintf(stderr, "issized: %d, const > sized ? %u > %u\n", isSizedcmp,
// constLen, sizedLen); if (isConstSized)
if (isSizedcmp && constLen > sizedLen && sizedLen) constLen = sizedLen; unrollLen = constSizedLen < constStrLen ? constSizedLen : constStrLen;
if (constLen > TmpConstStr.length()) constLen = TmpConstStr.length(); else
if (!constLen) constLen = TmpConstStr.length(); unrollLen = constStrLen;
if (!constLen) continue;
if (!be_quiet) if (!be_quiet)
errs() << callInst->getCalledFunction()->getName() << ": len " << constLen errs() << callInst->getCalledFunction()->getName() << ": unroll len " << unrollLen
<< ((isSizedcmp && !isConstSized) ? ", variable n" : "")
<< ": " << ConstStr << "\n"; << ": " << ConstStr << "\n";
/* split before the call instruction */ /* split before the call instruction */
BasicBlock *bb = callInst->getParent(); BasicBlock *bb = callInst->getParent();
BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst)); BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst));
BasicBlock *next_bb =
BasicBlock *next_lenchk_bb = NULL;
if (isSizedcmp && !isConstSized) {
next_lenchk_bb = BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb);
BranchInst::Create(end_bb, next_lenchk_bb);
}
BasicBlock *next_cmp_bb =
BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb); BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb);
BranchInst::Create(end_bb, next_bb); BranchInst::Create(end_bb, next_cmp_bb);
PHINode *PN = PHINode::Create(Int32Ty, constLen + 1, "cmp_phi"); PHINode *PN = PHINode::Create(Int32Ty, (next_lenchk_bb ? 2 : 1) * unrollLen + 1, "cmp_phi");
#if LLVM_VERSION_MAJOR < 8 #if LLVM_VERSION_MAJOR < 8
TerminatorInst *term = bb->getTerminator(); TerminatorInst *term = bb->getTerminator();
#else #else
Instruction *term = bb->getTerminator(); Instruction *term = bb->getTerminator();
#endif #endif
BranchInst::Create(next_bb, bb); BranchInst::Create(next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, bb);
term->eraseFromParent(); term->eraseFromParent();
for (uint64_t i = 0; i < constLen; i++) { for (uint64_t i = 0; i < unrollLen; i++) {
BasicBlock * cur_bb = next_bb; BasicBlock *cur_cmp_bb = next_cmp_bb, *cur_lenchk_bb = next_lenchk_bb;
unsigned char c; unsigned char c;
if (cur_lenchk_bb) {
IRBuilder<> cur_lenchk_IRB(&*(cur_lenchk_bb->getFirstInsertionPt()));
Value *icmp = cur_lenchk_IRB.CreateICmpEQ(
sizedValue, ConstantInt::get(Int64Ty, i));
cur_lenchk_IRB.CreateCondBr(icmp, end_bb, cur_cmp_bb);
cur_lenchk_bb->getTerminator()->eraseFromParent();
PN->addIncoming(ConstantInt::get(Int32Ty, 0), cur_lenchk_bb);
}
if (isCaseInsensitive) if (isCaseInsensitive)
c = (unsigned char)(tolower((int)ConstStr[i]) & 0xff); c = (unsigned char)(tolower((int)ConstStr[i]) & 0xff);
else else
c = (unsigned char)ConstStr[i]; c = (unsigned char)ConstStr[i];
BasicBlock::iterator IP = next_bb->getFirstInsertionPt(); IRBuilder<> cur_cmp_IRB(&*(cur_cmp_bb->getFirstInsertionPt()));
IRBuilder<> IRB(&*IP);
Value *v = ConstantInt::get(Int64Ty, i); Value *v = ConstantInt::get(Int64Ty, i);
Value *ele = IRB.CreateInBoundsGEP(VarStr, v, "empty"); Value *ele = cur_cmp_IRB.CreateInBoundsGEP(VarStr, v, "empty");
Value *load = IRB.CreateLoad(ele); Value *load = cur_cmp_IRB.CreateLoad(ele);
if (isCaseInsensitive) { if (isCaseInsensitive) {
// load >= 'A' && load <= 'Z' ? load | 0x020 : load // load >= 'A' && load <= 'Z' ? load | 0x020 : load
load = IRB.CreateZExt(load, Int32Ty); load = cur_cmp_IRB.CreateZExt(load, Int32Ty);
std::vector<Value *> args; std::vector<Value *> args;
args.push_back(load); args.push_back(load);
load = IRB.CreateCall(tolowerFn, args, "tmp"); load = cur_cmp_IRB.CreateCall(tolowerFn, args, "tmp");
load = IRB.CreateTrunc(load, Int8Ty); load = cur_cmp_IRB.CreateTrunc(load, Int8Ty);
} }
Value *isub; Value *isub;
if (HasStr1) if (HasStr1)
isub = IRB.CreateSub(ConstantInt::get(Int8Ty, c), load); isub = cur_cmp_IRB.CreateSub(ConstantInt::get(Int8Ty, c), load);
else else
isub = IRB.CreateSub(load, ConstantInt::get(Int8Ty, c)); isub = cur_cmp_IRB.CreateSub(load, ConstantInt::get(Int8Ty, c));
Value *sext = IRB.CreateSExt(isub, Int32Ty); Value *sext = cur_cmp_IRB.CreateSExt(isub, Int32Ty);
PN->addIncoming(sext, cur_bb); PN->addIncoming(sext, cur_cmp_bb);
if (i < constLen - 1) { if (i < unrollLen - 1) {
next_bb = if (cur_lenchk_bb) {
next_lenchk_bb = BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb);
BranchInst::Create(end_bb, next_lenchk_bb);
}
next_cmp_bb =
BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb); BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb);
BranchInst::Create(end_bb, next_bb); BranchInst::Create(end_bb, next_cmp_bb);
Value *icmp = IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0)); Value *icmp = cur_cmp_IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0));
IRB.CreateCondBr(icmp, next_bb, end_bb); cur_cmp_IRB.CreateCondBr(icmp, next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, end_bb);
cur_bb->getTerminator()->eraseFromParent(); cur_cmp_bb->getTerminator()->eraseFromParent();
} else { } else {

View File

@ -20,9 +20,19 @@ int main(int argc, char **argv) {
} }
if (strcmp(input, "LIBTOKENCAP") == 0) if (strcmp(input, "LIBTOKENCAP") == 0)
printf("your string was libtokencap\n"); printf("your string was LIBTOKENCAP\n");
else if (strcmp(input, "BUGMENOT") == 0) else if (strcmp(input, "BUGMENOT") == 0)
printf("your string was bugmenot\n"); printf("your string was BUGMENOT\n");
else if (strncmp(input, "BANANA", 3) == 0)
printf("your string started with BAN\n");
else if (strcmp(input, "APRI\0COT") == 0)
printf("your string was APRI\n");
else if (strcasecmp(input, "Kiwi") == 0)
printf("your string was Kiwi\n");
else if (strncasecmp(input, "avocado", 9) == 0)
printf("your string was avocado\n");
else if (strncasecmp(input, "Grapes", argc > 2 ? atoi(argv[2]) : 3) == 0)
printf("your string was a prefix of Grapes\n");
else if (strcmp(input, "BUFFEROVERFLOW") == 0) { else if (strcmp(input, "BUFFEROVERFLOW") == 0) {
buf = (char *)malloc(16); buf = (char *)malloc(16);

View File

@ -22,6 +22,20 @@ else
GREPAOPTION= GREPAOPTION=
fi fi
test_compcov_binary_functionality() {
RUN="../afl-showmap -o /dev/null -- $1"
$RUN 'LIBTOKENCAP' | grep 'your string was LIBTOKENCAP' \
&& $RUN 'BUGMENOT' | grep 'your string was BUGMENOT' \
&& $RUN 'BANANA' | grep 'your string started with BAN' \
&& $RUN 'APRI' | grep 'your string was APRI' \
&& $RUN 'kiWI' | grep 'your string was Kiwi' \
&& $RUN 'Avocado' | grep 'your string was avocado' \
&& $RUN 'GRAX' 3 | grep 'your string was a prefix of Grapes' \
&& $RUN 'LOCALVARIABLE' | grep 'local var memcmp works!' \
&& $RUN 'abc' | grep 'short local var memcmp works!' \
&& $RUN 'GLOBALVARIABLE' | grep 'global var memcmp works!'
} > /dev/null
ECHO="printf %b\\n" ECHO="printf %b\\n"
$ECHO \\101 2>&1 | grep -qE '^A' || { $ECHO \\101 2>&1 | grep -qE '^A' || {
ECHO= ECHO=
@ -259,7 +273,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
$ECHO "$RED[!] llvm_mode failed" $ECHO "$RED[!] llvm_mode failed"
CODE=1 CODE=1
} }
test -e test-compcov.harden && { test -e test-compcov.harden && test_compcov_binary_functionality ./test-compcov.harden && {
grep -Eq$GREPAOPTION 'stack_chk_fail|fstack-protector-all|fortified' test-compcov.harden > /dev/null 2>&1 && { grep -Eq$GREPAOPTION 'stack_chk_fail|fstack-protector-all|fortified' test-compcov.harden > /dev/null 2>&1 && {
$ECHO "$GREEN[+] llvm_mode hardened mode succeeded and is working" $ECHO "$GREEN[+] llvm_mode hardened mode succeeded and is working"
} || { } || {
@ -360,8 +374,8 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
} }
AFL_LLVM_INSTRUMENT=AFL AFL_LLVM_INSTRUMENT=AFL
AFL_DEBUG=1 AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast -o test-compcov.compcov test-compcov.c > test.out 2>&1 AFL_DEBUG=1 AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast -o test-compcov.compcov test-compcov.c > test.out 2>&1
test -e test-compcov.compcov && { test -e test-compcov.compcov && test_compcov_binary_functionality ./test-compcov.compcov && {
grep --binary-files=text -Eq " [ 12][0-9][0-9] location| [3-9][0-9] location" test.out && { grep --binary-files=text -Eq " [ 123][0-9][0-9] location| [3-9][0-9] location" test.out && {
$ECHO "$GREEN[+] llvm_mode laf-intel/compcov feature works correctly" $ECHO "$GREEN[+] llvm_mode laf-intel/compcov feature works correctly"
} || { } || {
$ECHO "$RED[!] llvm_mode laf-intel/compcov feature failed" $ECHO "$RED[!] llvm_mode laf-intel/compcov feature failed"
@ -374,7 +388,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
rm -f test-compcov.compcov test.out rm -f test-compcov.compcov test.out
echo foobar.c > whitelist.txt echo foobar.c > whitelist.txt
AFL_DEBUG=1 AFL_LLVM_WHITELIST=whitelist.txt ../afl-clang-fast -o test-compcov test-compcov.c > test.out 2>&1 AFL_DEBUG=1 AFL_LLVM_WHITELIST=whitelist.txt ../afl-clang-fast -o test-compcov test-compcov.c > test.out 2>&1
test -e test-compcov && { test -e test-compcov && test_compcov_binary_functionality ./test-compcov && {
grep -q "No instrumentation targets found" test.out && { grep -q "No instrumentation targets found" test.out && {
$ECHO "$GREEN[+] llvm_mode whitelist feature works correctly" $ECHO "$GREEN[+] llvm_mode whitelist feature works correctly"
} || { } || {
@ -513,7 +527,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
CODE=1 CODE=1
} }
test -e test-compcov.harden.gccpi && { test -e test-compcov.harden.gccpi && test_compcov_binary_functionality ./test-compcov.harden.gccpi && {
grep -Eq$GREPAOPTION 'stack_chk_fail|fstack-protector-all|fortified' test-compcov.harden.gccpi > /dev/null 2>&1 && { grep -Eq$GREPAOPTION 'stack_chk_fail|fstack-protector-all|fortified' test-compcov.harden.gccpi > /dev/null 2>&1 && {
$ECHO "$GREEN[+] gcc_plugin hardened mode succeeded and is working" $ECHO "$GREEN[+] gcc_plugin hardened mode succeeded and is working"
} || { } || {
@ -558,7 +572,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
# now for the special gcc_plugin things # now for the special gcc_plugin things
echo foobar.c > whitelist.txt echo foobar.c > whitelist.txt
AFL_GCC_WHITELIST=whitelist.txt ../afl-gcc-fast -o test-compcov test-compcov.c > /dev/null 2>&1 AFL_GCC_WHITELIST=whitelist.txt ../afl-gcc-fast -o test-compcov test-compcov.c > /dev/null 2>&1
test -e test-compcov && { test -e test-compcov && test_compcov_binary_functionality ./test-compcov && {
echo 1 | ../afl-showmap -m ${MEM_LIMIT} -o - -r -- ./test-compcov 2>&1 | grep -q "Captured 1 tuples" && { echo 1 | ../afl-showmap -m ${MEM_LIMIT} -o - -r -- ./test-compcov 2>&1 | grep -q "Captured 1 tuples" && {
$ECHO "$GREEN[+] gcc_plugin whitelist feature works correctly" $ECHO "$GREEN[+] gcc_plugin whitelist feature works correctly"
} || { } || {