update the codes, readme

- add readme - add required qlpack.yml
2025-06-18 12:48:06 +00:00 · 2021-04-03 02:02:45 +04:00
parent 565f61a6ab
commit a26ed3b758
6 changed files with 378 additions and 3 deletions
--- a/utils/autodict_ql/autodict_ql.py
+++ b/utils/autodict_ql/autodict_ql.py
@ -0,0 +1,188 @@
 #!/usr/bin/env python3
 import os
 import string
 import binascii 
 import codecs
 import errno
 import struct
 import argparse
 import shutil
 import subprocess
 from binascii import unhexlify
 def ensure_dir(dir):
    try:
        os.makedirs(dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
 def parse_args():
    parser = argparse.ArgumentParser(description=(
        "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project ---------------------------------------------------------------------------  Example usage : python2 thisfile.py outdir str.txt"    ))
    #parser.add_argument("tokenpath",
        #help="Destination directory for tokens")
    parser.add_argument("cur",
            help = "Current Path")
    parser.add_argument("db",
            help = "CodeQL database Path")
    parser.add_argument("tokenpath",
            help="Destination directory for tokens")
    return parser.parse_args()
 def static_analysis(file,file2,cur,db) :
    with open(cur+"/"+file, "w") as f:
        print(cur+"/"+file)
        stream = os.popen("codeql query run " + cur +"/"+ file2 +  " -d " + db )
        output = stream.read()
        f.write(output)
        f.close()
 def copy_tokens(cur, tokenpath) :
    subprocess.call(["cp " + cur  + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    subprocess.call(["cp " + cur  + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
    #strtool-strs
 def codeql_analysis(cur, db) :
    static_analysis("litout.out","litool.ql", cur, db)
    static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db)
    static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db)
    static_analysis("strstr-strings.out","strstr-str.ql", cur, db)
    static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db)
    static_analysis("global-values-strings.out","globals-values.ql", cur, db)
    static_analysis("local-strings.out","locals-strs.ql", cur, db)
    static_analysis("strtool-strings.out","strtool.ql", cur, db)
    static_analysis("arrays.out","array-literals.ql", cur, db)
    start_aflql(0,cur)
    #command1 = [
    #       'codeql','query', 'run',
    #       cur + '/litool.ql',
    #       '-d',
    #       db, '>','fff.txt'
    #    ]
    #with open("litool2.log", "w") as f:
    #    stream = os.popen("codeql query run litool.ql -d " + db )
    #    output = stream.read()
    #    f.write(output)
    #    f.close()
    #worker1 = subprocess.Popen(command1)
    #print(worker1.communicate())
 def start_aflql(tokenpath, cur):
    command = [
           'python3',
           cur + '/litan.py',
           cur+'/lits/',
           cur+'/litout.out'
        ]
    worker1 = subprocess.Popen(command)
    print(worker1.communicate())
    command1 = [
           'python3',
           cur + '/strcmp-strings.py',
           cur + '/strcmp-strs/',
           cur + '/strcmp-strings.out'
        ]
    worker2 = subprocess.Popen(command1)
    print(worker2.communicate())
    command2 = [
           'python3',
           cur + '/strncmp-strings.py',
           cur + '/strncmp-strs/',
           cur + '/strncmp-strings.out'
        ]
    worker3 = subprocess.Popen(command2)
    print(worker3.communicate())
    command3 = [
           'python3',
           cur + '/array-lits.py',
           cur + '/arrays-lits/',
           cur + '/arrays.out'
        ]
    worker4 = subprocess.Popen(command3)
    print(worker4.communicate())
    command4 = [
           'python3',
           cur + '/array-strings.py',
           cur + '/arrays-strs/',
           cur + '/arrays.out'
        ]
    worker5 = subprocess.Popen(command4)
    print(worker5.communicate())
    command5 = [
           'python3',
           cur + '/memcmp-strings.py',
           cur + '/memcmp-strs/',
           cur + '/memcmp-strings.out'
        ]
    worker6 = subprocess.Popen(command5)
    print(worker6.communicate())
    command6 = [
           'python3',
           cur + '/globals-strings.py',
           cur + '/global-strs/',
           cur + '/global-values-strings.out'
        ]
    worker7 = subprocess.Popen(command6)
    print(worker7.communicate())
    command7 = [
           'python3',
           cur + '/strstr-strings.py',
           cur + '/strstr-strs/',
           cur + '/strstr-strings.out'
        ]
    worker8 = subprocess.Popen(command7)
    print(worker8.communicate())
    #strtool-strings.out
    command8 = [
           'python3',
           cur + '/stan-strings.py',
           cur + '/strtool-strs/',
           cur + '/strtool-strings.out'
        ]
    worker9 = subprocess.Popen(command8)
    print(worker9.communicate())
    command9 = [
           'python3',
           cur + '/local-strings.py',
           cur + '/local-strs/',
           cur + '/local-strings.out'
        ]
    worker10 = subprocess.Popen(command9)
    print(worker10.communicate())
 def main():
    args = parse_args()    
    ensure_dir(args.tokenpath)
    #copy_tokens(args.cur, args.tokenpath)
    codeql_analysis(args.cur, args.db)
    copy_tokens(args.cur, args.tokenpath)
    #start_aflql(args.tokenpath, args.cur)
 if __name__ == '__main__':
    main()
--- a/utils/autodict_ql/build-codeql.sh
+++ b/utils/autodict_ql/build-codeql.sh
@ -0,0 +1,17 @@
 cd ~
 if [ -d "codeql-home" ]; then
    echo "Exist !"
    exit 1
 fi
 sudo apt install build-essential libtool-bin python3-dev automake git vim wget -y
 mkdir codeql-home
 cd codeql-home
 git clone https://github.com/github/codeql.git codeql-repo
 git clone https://github.com/github/codeql-go.git
 wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/codeql-linux64.zip
 unzip codeql-linux64.zip 
 mv codeql codeql-cli
 export "PATH=~/codeql-home/codeql-cli/:$PATH"
 codeql resolve languages
 codeql resolve qlpacks
 echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc
--- a/utils/autodict_ql/litan.py
+++ b/utils/autodict_ql/litan.py
@ -0,0 +1,86 @@
 #!/usr/bin/env python3
 # Autodict-QL - Optimal token generation for fuzzing
 # Part of AFL++ Project
 # Author : Microsvuln - Arash.vre@gmail.com
 import string
 import os
 import binascii 
 import codecs
 import struct
 import errno
 import argparse
 import re
 import base64
 from binascii import unhexlify
 def parse_args():
    parser = argparse.ArgumentParser(description=(
        "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project -------  Example usage : python2 thisfile.py outdir o.txt"))
    parser.add_argument("corpdir",
        help="The path to the corpus directory to generate files.")
    parser.add_argument("infile",
        help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt")
    return parser.parse_args()    
 def ensure_dir(dir):
    try:
        os.makedirs(dir)
    except OSError as e:
        if e.errno == errno.EEXIST:
            #print "[-] Directory exists, specify another directory"
            exit(1)
 def do_analysis1(corpdir, infile):
    with open(infile, "rb") as f:        
        lines = f.readlines()[1:]       
        f.close()       
        new_lst = []
        n = 1
        for i, num in enumerate(lines):
            if i != 0:
                new_lst.append(num)                               
                str1 = str(num)
                print ("num is " + str1)
                str1 = str1.rstrip('\n\n')
                #str1 = str1.replace("0x","");
                str1 = str1.replace("|","")
                str1 = str1.rstrip('\r\n')
                str1 = str1.rstrip('\n')
                str1 = str1.replace(" ","")
                    #str1 = str1.translate(None, string.punctuation)
                translator=str.maketrans('','',string.punctuation)
                str1=str1.translate(translator)
                str1 = str1[1:]
                str1 = str1[:-1]
                print("After cleanup : " + str1)
                if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8):
                    print ("first : "+str1)
                    if len(str1) > 8 :
                        str1 = str1[:-1]
                    elif (len(str1) == 5) :
                        str1 = str1 = "0"
                    try:
                            #str1 = str1.decode("hex")
                            with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file:                    
                                    str1 = str1.replace("0x","");
                                    print (str1)                                    
                                    str1 = int(str1,base=16)                                    
                                    str1 = str1.to_bytes(4, byteorder='little')                                                          
                                    file.write(str(str1))                                    
                                    file.close()
                                    with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q :
                                        a = q.readline()                                        
                                        a = a[1:]
                                        print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a))
                                        q.close()
                                        with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 :
                                                w1.write(str(a))
                                                print ("Done!")
                                                w1.close()                                                                                
                    except:                                 
                            print("Error!") 
                    n = n+1
 def main():
    args = parse_args()    
    ensure_dir(args.corpdir)
    do_analysis1(args.corpdir, args.infile)
 if __name__ == '__main__':
    main()
--- a/utils/autodict_ql/qlpack.yml
+++ b/utils/autodict_ql/qlpack.yml
@ -0,0 +1,3 @@
 name: automate
 version: 0.0.0
 libraryPathDependencies: codeql-cpp
--- a/utils/autodict_ql/readme.md
+++ b/utils/autodict_ql/readme.md
@ -0,0 +1,81 @@
 # Autodict-QL - Optimal Token Generation for Fuzzing
 ## What is this?
 Autodict-QL is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base.
 Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) .
 ## Why CodeQL ?
 We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts.
 ## CodeQL scripts
 Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test.
 Currently we provided the following CodeQL scripts :
 `strcmp-str.ql` is used to extract strings that are related to `strcmp` function.
 `strncmp-str.ql` is used to extract the strings from the `strncmp` function.
 `memcmp-str.ql` is used to extract the strings from the `memcmp` function.
 `litool.ql` extracts Magic numbers as Hexadecimal format.
 `strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens.
 You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful.
 ## Usage
 The usage of Autodict-QL is pretty easy. But let's describe it as :
 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so :
 ` # chmod +x codeql-build.sh`
 ` # codeql `
 Then you should get :
 ` Usage: codeql <command> <argument>...
 Create and query CodeQL databases, or work with the QL language.
 GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is
 not itself free software. Type codeql --license to see the license terms.
      --license              Show the license terms for the CodeQL toolchain.
 Common options:
  -h, --help                 Show this help text.
  -v, --verbose              Incrementally increase the number of progress messages printed.
  -q, --quiet                Incrementally decrease the number of progress messages printed.
 Some advanced options have been hidden; try --help -v for a fuller view.
 Commands:
  query     Compile and execute QL code.
  bqrs      Get information from .bqrs files.
  database  Create, analyze and process CodeQL databases.
  dataset   [Plumbing] Work with raw QL datasets.
  test      Execute QL unit tests.
  resolve   [Deep plumbing] Helper commands to resolve disk locations etc.
  execute   [Deep plumbing] Low-level commands that need special JVM options.
  version   Show the version of the CodeQL toolchain.
  generate  Generate formatted QL documentation.
  github    Commands useful for interacting with the GitHub API through CodeQL.
 `
 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard .
 	- First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands:
 		- `./configure --disable-shared`
 		- `codeql create database libxml-db --language=cpp --command=make
 			- Now you have the CodeQL database of the project :-)
 3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz.
 	- `mkdir automate` (inside the libxml directory)
 4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 :
 	- `codeql database upgrade ../libxml-db`
 5. Everything is set! :-), now you should issue the following to get the tokens :
 		- `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]`
 			- example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens`
 				- This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag.
 6. Done! 
--- a/utils/autodict_ql/strtool.ql
+++ b/utils/autodict_ql/strtool.ql
@ -3,8 +3,8 @@ import semmle.code.cpp.dataflow.DataFlow
 class StringLiteralNode extends DataFlow::Node {
  StringLiteralNode() { this.asExpr() instanceof StringLiteral }
 }
-class MemcmpArgNode extends DataFlow::Node {
+class CmpArgNode extends DataFlow::Node {
-   MemcmpArgNode() {
+   CmpArgNode() {
    exists(FunctionCall fc |
      fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and
      fc.getArgument(0) = this.asExpr() 
@ -17,7 +17,7 @@ class MemcmpArgNode extends DataFlow::Node {
  }
 }
-from StringLiteralNode src, MemcmpArgNode arg
+from StringLiteralNode src, CmpArgNode arg
 where
  DataFlow::localFlow(src, arg)