mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-18 12:48:06 +00:00
update the codes, readme
- add readme - add required qlpack.yml
This commit is contained in:
188
utils/autodict_ql/autodict_ql.py
Normal file
188
utils/autodict_ql/autodict_ql.py
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import string
|
||||||
|
import binascii
|
||||||
|
import codecs
|
||||||
|
import errno
|
||||||
|
import struct
|
||||||
|
import argparse
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from binascii import unhexlify
|
||||||
|
|
||||||
|
def ensure_dir(dir):
|
||||||
|
try:
|
||||||
|
os.makedirs(dir)
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno != errno.EEXIST:
|
||||||
|
raise
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description=(
|
||||||
|
"Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" ))
|
||||||
|
|
||||||
|
#parser.add_argument("tokenpath",
|
||||||
|
#help="Destination directory for tokens")
|
||||||
|
parser.add_argument("cur",
|
||||||
|
help = "Current Path")
|
||||||
|
parser.add_argument("db",
|
||||||
|
help = "CodeQL database Path")
|
||||||
|
parser.add_argument("tokenpath",
|
||||||
|
help="Destination directory for tokens")
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
def static_analysis(file,file2,cur,db) :
|
||||||
|
with open(cur+"/"+file, "w") as f:
|
||||||
|
print(cur+"/"+file)
|
||||||
|
stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db )
|
||||||
|
output = stream.read()
|
||||||
|
f.write(output)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
def copy_tokens(cur, tokenpath) :
|
||||||
|
subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
|
||||||
|
#strtool-strs
|
||||||
|
|
||||||
|
|
||||||
|
def codeql_analysis(cur, db) :
|
||||||
|
static_analysis("litout.out","litool.ql", cur, db)
|
||||||
|
static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db)
|
||||||
|
static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db)
|
||||||
|
static_analysis("strstr-strings.out","strstr-str.ql", cur, db)
|
||||||
|
static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db)
|
||||||
|
static_analysis("global-values-strings.out","globals-values.ql", cur, db)
|
||||||
|
static_analysis("local-strings.out","locals-strs.ql", cur, db)
|
||||||
|
static_analysis("strtool-strings.out","strtool.ql", cur, db)
|
||||||
|
static_analysis("arrays.out","array-literals.ql", cur, db)
|
||||||
|
start_aflql(0,cur)
|
||||||
|
#command1 = [
|
||||||
|
# 'codeql','query', 'run',
|
||||||
|
# cur + '/litool.ql',
|
||||||
|
# '-d',
|
||||||
|
# db, '>','fff.txt'
|
||||||
|
# ]
|
||||||
|
#with open("litool2.log", "w") as f:
|
||||||
|
# stream = os.popen("codeql query run litool.ql -d " + db )
|
||||||
|
# output = stream.read()
|
||||||
|
# f.write(output)
|
||||||
|
# f.close()
|
||||||
|
#worker1 = subprocess.Popen(command1)
|
||||||
|
#print(worker1.communicate())
|
||||||
|
|
||||||
|
|
||||||
|
def start_aflql(tokenpath, cur):
|
||||||
|
command = [
|
||||||
|
'python3',
|
||||||
|
cur + '/litan.py',
|
||||||
|
cur+'/lits/',
|
||||||
|
cur+'/litout.out'
|
||||||
|
]
|
||||||
|
worker1 = subprocess.Popen(command)
|
||||||
|
print(worker1.communicate())
|
||||||
|
|
||||||
|
command1 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/strcmp-strings.py',
|
||||||
|
cur + '/strcmp-strs/',
|
||||||
|
cur + '/strcmp-strings.out'
|
||||||
|
]
|
||||||
|
worker2 = subprocess.Popen(command1)
|
||||||
|
print(worker2.communicate())
|
||||||
|
|
||||||
|
command2 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/strncmp-strings.py',
|
||||||
|
cur + '/strncmp-strs/',
|
||||||
|
cur + '/strncmp-strings.out'
|
||||||
|
]
|
||||||
|
worker3 = subprocess.Popen(command2)
|
||||||
|
print(worker3.communicate())
|
||||||
|
|
||||||
|
command3 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/array-lits.py',
|
||||||
|
cur + '/arrays-lits/',
|
||||||
|
cur + '/arrays.out'
|
||||||
|
]
|
||||||
|
worker4 = subprocess.Popen(command3)
|
||||||
|
print(worker4.communicate())
|
||||||
|
|
||||||
|
command4 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/array-strings.py',
|
||||||
|
cur + '/arrays-strs/',
|
||||||
|
cur + '/arrays.out'
|
||||||
|
]
|
||||||
|
worker5 = subprocess.Popen(command4)
|
||||||
|
print(worker5.communicate())
|
||||||
|
|
||||||
|
|
||||||
|
command5 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/memcmp-strings.py',
|
||||||
|
cur + '/memcmp-strs/',
|
||||||
|
cur + '/memcmp-strings.out'
|
||||||
|
]
|
||||||
|
worker6 = subprocess.Popen(command5)
|
||||||
|
print(worker6.communicate())
|
||||||
|
|
||||||
|
command6 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/globals-strings.py',
|
||||||
|
cur + '/global-strs/',
|
||||||
|
cur + '/global-values-strings.out'
|
||||||
|
]
|
||||||
|
worker7 = subprocess.Popen(command6)
|
||||||
|
print(worker7.communicate())
|
||||||
|
|
||||||
|
command7 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/strstr-strings.py',
|
||||||
|
cur + '/strstr-strs/',
|
||||||
|
cur + '/strstr-strings.out'
|
||||||
|
]
|
||||||
|
worker8 = subprocess.Popen(command7)
|
||||||
|
print(worker8.communicate())
|
||||||
|
|
||||||
|
|
||||||
|
#strtool-strings.out
|
||||||
|
|
||||||
|
command8 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/stan-strings.py',
|
||||||
|
cur + '/strtool-strs/',
|
||||||
|
cur + '/strtool-strings.out'
|
||||||
|
]
|
||||||
|
worker9 = subprocess.Popen(command8)
|
||||||
|
print(worker9.communicate())
|
||||||
|
|
||||||
|
command9 = [
|
||||||
|
'python3',
|
||||||
|
cur + '/local-strings.py',
|
||||||
|
cur + '/local-strs/',
|
||||||
|
cur + '/local-strings.out'
|
||||||
|
]
|
||||||
|
worker10 = subprocess.Popen(command9)
|
||||||
|
print(worker10.communicate())
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
ensure_dir(args.tokenpath)
|
||||||
|
#copy_tokens(args.cur, args.tokenpath)
|
||||||
|
codeql_analysis(args.cur, args.db)
|
||||||
|
copy_tokens(args.cur, args.tokenpath)
|
||||||
|
#start_aflql(args.tokenpath, args.cur)
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
17
utils/autodict_ql/build-codeql.sh
Normal file
17
utils/autodict_ql/build-codeql.sh
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
cd ~
|
||||||
|
if [ -d "codeql-home" ]; then
|
||||||
|
echo "Exist !"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sudo apt install build-essential libtool-bin python3-dev automake git vim wget -y
|
||||||
|
mkdir codeql-home
|
||||||
|
cd codeql-home
|
||||||
|
git clone https://github.com/github/codeql.git codeql-repo
|
||||||
|
git clone https://github.com/github/codeql-go.git
|
||||||
|
wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/codeql-linux64.zip
|
||||||
|
unzip codeql-linux64.zip
|
||||||
|
mv codeql codeql-cli
|
||||||
|
export "PATH=~/codeql-home/codeql-cli/:$PATH"
|
||||||
|
codeql resolve languages
|
||||||
|
codeql resolve qlpacks
|
||||||
|
echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc
|
86
utils/autodict_ql/litan.py
Normal file
86
utils/autodict_ql/litan.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Autodict-QL - Optimal token generation for fuzzing
|
||||||
|
# Part of AFL++ Project
|
||||||
|
# Author : Microsvuln - Arash.vre@gmail.com
|
||||||
|
import string
|
||||||
|
import os
|
||||||
|
import binascii
|
||||||
|
import codecs
|
||||||
|
import struct
|
||||||
|
import errno
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
from binascii import unhexlify
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description=(
|
||||||
|
"Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt"))
|
||||||
|
parser.add_argument("corpdir",
|
||||||
|
help="The path to the corpus directory to generate files.")
|
||||||
|
parser.add_argument("infile",
|
||||||
|
help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt")
|
||||||
|
return parser.parse_args()
|
||||||
|
def ensure_dir(dir):
|
||||||
|
try:
|
||||||
|
os.makedirs(dir)
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno == errno.EEXIST:
|
||||||
|
#print "[-] Directory exists, specify another directory"
|
||||||
|
exit(1)
|
||||||
|
def do_analysis1(corpdir, infile):
|
||||||
|
with open(infile, "rb") as f:
|
||||||
|
lines = f.readlines()[1:]
|
||||||
|
f.close()
|
||||||
|
new_lst = []
|
||||||
|
n = 1
|
||||||
|
for i, num in enumerate(lines):
|
||||||
|
if i != 0:
|
||||||
|
new_lst.append(num)
|
||||||
|
str1 = str(num)
|
||||||
|
print ("num is " + str1)
|
||||||
|
str1 = str1.rstrip('\n\n')
|
||||||
|
#str1 = str1.replace("0x","");
|
||||||
|
str1 = str1.replace("|","")
|
||||||
|
str1 = str1.rstrip('\r\n')
|
||||||
|
str1 = str1.rstrip('\n')
|
||||||
|
str1 = str1.replace(" ","")
|
||||||
|
#str1 = str1.translate(None, string.punctuation)
|
||||||
|
translator=str.maketrans('','',string.punctuation)
|
||||||
|
str1=str1.translate(translator)
|
||||||
|
str1 = str1[1:]
|
||||||
|
str1 = str1[:-1]
|
||||||
|
print("After cleanup : " + str1)
|
||||||
|
if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8):
|
||||||
|
print ("first : "+str1)
|
||||||
|
if len(str1) > 8 :
|
||||||
|
str1 = str1[:-1]
|
||||||
|
elif (len(str1) == 5) :
|
||||||
|
str1 = str1 = "0"
|
||||||
|
try:
|
||||||
|
#str1 = str1.decode("hex")
|
||||||
|
with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file:
|
||||||
|
str1 = str1.replace("0x","");
|
||||||
|
print (str1)
|
||||||
|
str1 = int(str1,base=16)
|
||||||
|
str1 = str1.to_bytes(4, byteorder='little')
|
||||||
|
file.write(str(str1))
|
||||||
|
file.close()
|
||||||
|
with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q :
|
||||||
|
a = q.readline()
|
||||||
|
a = a[1:]
|
||||||
|
print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a))
|
||||||
|
q.close()
|
||||||
|
with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 :
|
||||||
|
w1.write(str(a))
|
||||||
|
print ("Done!")
|
||||||
|
w1.close()
|
||||||
|
except:
|
||||||
|
print("Error!")
|
||||||
|
n = n+1
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
ensure_dir(args.corpdir)
|
||||||
|
do_analysis1(args.corpdir, args.infile)
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
3
utils/autodict_ql/qlpack.yml
Normal file
3
utils/autodict_ql/qlpack.yml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
name: automate
|
||||||
|
version: 0.0.0
|
||||||
|
libraryPathDependencies: codeql-cpp
|
81
utils/autodict_ql/readme.md
Normal file
81
utils/autodict_ql/readme.md
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
# Autodict-QL - Optimal Token Generation for Fuzzing
|
||||||
|
|
||||||
|
## What is this?
|
||||||
|
|
||||||
|
Autodict-QL is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base.
|
||||||
|
|
||||||
|
Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) .
|
||||||
|
|
||||||
|
|
||||||
|
## Why CodeQL ?
|
||||||
|
We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts.
|
||||||
|
|
||||||
|
|
||||||
|
## CodeQL scripts
|
||||||
|
Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test.
|
||||||
|
|
||||||
|
Currently we provided the following CodeQL scripts :
|
||||||
|
|
||||||
|
`strcmp-str.ql` is used to extract strings that are related to `strcmp` function.
|
||||||
|
|
||||||
|
`strncmp-str.ql` is used to extract the strings from the `strncmp` function.
|
||||||
|
|
||||||
|
`memcmp-str.ql` is used to extract the strings from the `memcmp` function.
|
||||||
|
|
||||||
|
`litool.ql` extracts Magic numbers as Hexadecimal format.
|
||||||
|
|
||||||
|
`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens.
|
||||||
|
|
||||||
|
You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful.
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
The usage of Autodict-QL is pretty easy. But let's describe it as :
|
||||||
|
|
||||||
|
1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so :
|
||||||
|
|
||||||
|
` # chmod +x codeql-build.sh`
|
||||||
|
|
||||||
|
` # codeql `
|
||||||
|
|
||||||
|
Then you should get :
|
||||||
|
|
||||||
|
` Usage: codeql <command> <argument>...
|
||||||
|
Create and query CodeQL databases, or work with the QL language.
|
||||||
|
|
||||||
|
GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is
|
||||||
|
not itself free software. Type codeql --license to see the license terms.
|
||||||
|
|
||||||
|
--license Show the license terms for the CodeQL toolchain.
|
||||||
|
Common options:
|
||||||
|
-h, --help Show this help text.
|
||||||
|
-v, --verbose Incrementally increase the number of progress messages printed.
|
||||||
|
-q, --quiet Incrementally decrease the number of progress messages printed.
|
||||||
|
Some advanced options have been hidden; try --help -v for a fuller view.
|
||||||
|
Commands:
|
||||||
|
query Compile and execute QL code.
|
||||||
|
bqrs Get information from .bqrs files.
|
||||||
|
database Create, analyze and process CodeQL databases.
|
||||||
|
dataset [Plumbing] Work with raw QL datasets.
|
||||||
|
test Execute QL unit tests.
|
||||||
|
resolve [Deep plumbing] Helper commands to resolve disk locations etc.
|
||||||
|
execute [Deep plumbing] Low-level commands that need special JVM options.
|
||||||
|
version Show the version of the CodeQL toolchain.
|
||||||
|
generate Generate formatted QL documentation.
|
||||||
|
github Commands useful for interacting with the GitHub API through CodeQL.
|
||||||
|
`
|
||||||
|
|
||||||
|
2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard .
|
||||||
|
- First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands:
|
||||||
|
- `./configure --disable-shared`
|
||||||
|
- `codeql create database libxml-db --language=cpp --command=make
|
||||||
|
- Now you have the CodeQL database of the project :-)
|
||||||
|
3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz.
|
||||||
|
- `mkdir automate` (inside the libxml directory)
|
||||||
|
4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 :
|
||||||
|
- `codeql database upgrade ../libxml-db`
|
||||||
|
5. Everything is set! :-), now you should issue the following to get the tokens :
|
||||||
|
- `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]`
|
||||||
|
- example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens`
|
||||||
|
- This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag.
|
||||||
|
6. Done!
|
@ -3,8 +3,8 @@ import semmle.code.cpp.dataflow.DataFlow
|
|||||||
class StringLiteralNode extends DataFlow::Node {
|
class StringLiteralNode extends DataFlow::Node {
|
||||||
StringLiteralNode() { this.asExpr() instanceof StringLiteral }
|
StringLiteralNode() { this.asExpr() instanceof StringLiteral }
|
||||||
}
|
}
|
||||||
class MemcmpArgNode extends DataFlow::Node {
|
class CmpArgNode extends DataFlow::Node {
|
||||||
MemcmpArgNode() {
|
CmpArgNode() {
|
||||||
exists(FunctionCall fc |
|
exists(FunctionCall fc |
|
||||||
fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and
|
fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and
|
||||||
fc.getArgument(0) = this.asExpr()
|
fc.getArgument(0) = this.asExpr()
|
||||||
@ -17,7 +17,7 @@ class MemcmpArgNode extends DataFlow::Node {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
from StringLiteralNode src, MemcmpArgNode arg
|
from StringLiteralNode src, CmpArgNode arg
|
||||||
where
|
where
|
||||||
DataFlow::localFlow(src, arg)
|
DataFlow::localFlow(src, arg)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user