mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-15 11:28:08 +00:00
@ -349,12 +349,15 @@ uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) {
|
||||
* @param data pointer returned in afl_custom_init for this fuzz case
|
||||
* @param filename_new_queue File name of the new queue entry
|
||||
* @param filename_orig_queue File name of the original queue entry
|
||||
* @return if the file contents was modified return 1 (True), 0 (False)
|
||||
* otherwise
|
||||
*/
|
||||
void afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
const uint8_t *filename_new_queue,
|
||||
const uint8_t *filename_orig_queue) {
|
||||
|
||||
/* Additional analysis on the original or new test case */
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
7
custom_mutators/gramatron/Makefile
Normal file
7
custom_mutators/gramatron/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
all: gramatron.so
|
||||
|
||||
gramatron.so: gramfuzz.c gramfuzz.h gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c hashmap.h test.c test.h utarray.h uthash.h
|
||||
$(CC) -O3 -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o gramatron.so -shared -I. -I/prg/dev/include gramfuzz.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c test.c -ljson-c
|
||||
|
||||
clean:
|
||||
rm -f gramatron.so
|
51
custom_mutators/gramatron/README.md
Normal file
51
custom_mutators/gramatron/README.md
Normal file
@ -0,0 +1,51 @@
|
||||
# GramaTron
|
||||
|
||||
Gramatron is a coverage-guided fuzzer that uses grammar automatons to perform
|
||||
grammar-aware fuzzing. Technical details about our framework are available in our
|
||||
[ISSTA'21 paper](https://nebelwelt.net/files/21ISSTA.pdf). The artifact to reproduce the
|
||||
experiments presented in our paper are present in `artifact/`. Instructions to run
|
||||
a sample campaign and incorporate new grammars is presented below:
|
||||
|
||||
# Compiling
|
||||
|
||||
- Install `json-c`
|
||||
```
|
||||
git clone https://github.com/json-c/json-c.git
|
||||
cd json-c && git reset --hard af8dd4a307e7b837f9fa2959549548ace4afe08b && sh autogen.sh && ./configure && make && make install
|
||||
```
|
||||
|
||||
afterwards you can just `make` GrammaTron
|
||||
|
||||
# Running
|
||||
|
||||
You have to set the grammar file to use with `GRAMMATRON_AUTOMATION`:
|
||||
|
||||
```
|
||||
export AFL_DISABLE_TRIM=1
|
||||
export AFL_CUSTOM_MUTATOR_ONLY=1
|
||||
export AFL_CUSTOM_MUTATOR_LIBRARY=./gramatron.so
|
||||
export GRAMATRON_AUTOMATION=grammars/ruby/source_automata.json
|
||||
afl-fuzz -i in -o out -- ./target
|
||||
```
|
||||
|
||||
# Adding and testing a new grammar
|
||||
|
||||
- Specify in a JSON format for CFG. Examples are correspond `source.json` files
|
||||
- Run the automaton generation script (in `src/gramfuzz-mutator/preprocess`)
|
||||
which will place the generated automaton in the same folder.
|
||||
```
|
||||
./preprocess/prep_automaton.sh <grammar_file> <start_symbol> [stack_limit]
|
||||
|
||||
Eg. ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM
|
||||
```
|
||||
- If the grammar has no self-embedding rules then you do not need to pass the
|
||||
stack limit parameter. However, if it does have self-embedding rules then you
|
||||
need to pass the stack limit parameter. We recommend starting with `5` and
|
||||
then increasing it if you need more complexity
|
||||
- To sanity-check that the automaton is generating inputs as expected you can use the `test` binary housed in `src/gramfuzz-mutator`
|
||||
```
|
||||
./test SanityCheck <automaton_file>
|
||||
|
||||
Eg. ./test SanityCheck ~/grammars/ruby/source_automata.json
|
||||
```
|
||||
|
336
custom_mutators/gramatron/gramfuzz-helpers.c
Normal file
336
custom_mutators/gramatron/gramfuzz-helpers.c
Normal file
@ -0,0 +1,336 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "afl-fuzz.h"
|
||||
#include "gramfuzz.h"
|
||||
|
||||
/*Slices from beginning till idx*/
|
||||
Array *slice(Array *input, int idx) {
|
||||
|
||||
// printf("\nSlice idx:%d", idx);
|
||||
terminal *origptr;
|
||||
terminal *term_ptr;
|
||||
Array * sliced = (Array *)malloc(sizeof(Array));
|
||||
initArray(sliced, input->size);
|
||||
// Populate dynamic array members
|
||||
if (idx == 0) { return sliced; }
|
||||
for (int x = 0; x < idx; x++) {
|
||||
|
||||
origptr = &input->start[x];
|
||||
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
|
||||
origptr->trigger_idx);
|
||||
|
||||
}
|
||||
|
||||
return sliced;
|
||||
|
||||
}
|
||||
|
||||
/* Slices from idx till end*/
|
||||
Array *slice_inverse(Array *input, int idx) {
|
||||
|
||||
// printf("\nSlice idx:%d", idx);
|
||||
terminal *origptr;
|
||||
terminal *term_ptr;
|
||||
Array * sliced = (Array *)malloc(sizeof(Array));
|
||||
initArray(sliced, input->size);
|
||||
for (int x = idx; x < input->used; x++) {
|
||||
|
||||
origptr = &input->start[x];
|
||||
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
|
||||
origptr->trigger_idx);
|
||||
|
||||
}
|
||||
|
||||
return sliced;
|
||||
|
||||
}
|
||||
|
||||
/*Carves with `start` included and `end` excluded*/
|
||||
Array *carve(Array *input, int start, int end) {
|
||||
|
||||
terminal *origptr;
|
||||
terminal *term_ptr;
|
||||
Array * sliced = (Array *)malloc(sizeof(Array));
|
||||
initArray(sliced, input->size);
|
||||
for (int x = start; x < end; x++) {
|
||||
|
||||
origptr = &input->start[x];
|
||||
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
|
||||
origptr->trigger_idx);
|
||||
|
||||
}
|
||||
|
||||
return sliced;
|
||||
|
||||
}
|
||||
|
||||
/*Concats prefix + feature *mult*/
|
||||
void concatPrefixFeature(Array *prefix, Array *feature) {
|
||||
|
||||
// XXX: Currently we have hardcoded the multiplication threshold for adding
|
||||
// the recursive feature. Might want to fix it to choose a random number upper
|
||||
// bounded by a static value instead.
|
||||
terminal *featureptr;
|
||||
int len = rand() % RECUR_THRESHOLD;
|
||||
for (int x = 0; x < len; x++) {
|
||||
|
||||
for (int y = 0; y < feature->used; y++) {
|
||||
|
||||
featureptr = &feature->start[y];
|
||||
insertArray(prefix, featureptr->state, featureptr->symbol,
|
||||
featureptr->symbol_len, featureptr->trigger_idx);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void concatPrefixFeatureBench(Array *prefix, Array *feature) {
|
||||
|
||||
// XXX: Currently we have hardcoded the multiplication threshold for adding
|
||||
// the recursive feature. Might want to fix it to choose a random number upper
|
||||
// bounded by a static value instead.
|
||||
terminal *featureptr;
|
||||
int len =
|
||||
5; // 5 is the number of times we compare performing random recursion.
|
||||
for (int x = 0; x < len; x++) {
|
||||
|
||||
for (int y = 0; y < feature->used; y++) {
|
||||
|
||||
featureptr = &feature->start[y];
|
||||
insertArray(prefix, featureptr->state, featureptr->symbol,
|
||||
featureptr->symbol_len, featureptr->trigger_idx);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Array *spliceGF(Array *orig, Array *toSplice, int idx) {
|
||||
|
||||
terminal *toSplicePtr;
|
||||
terminal *tempPtr;
|
||||
// Iterate through the splice candidate from the `idx` till end
|
||||
for (int x = idx; x < toSplice->used; x++) {
|
||||
|
||||
toSplicePtr = &toSplice->start[x];
|
||||
insertArray(orig, toSplicePtr->state, toSplicePtr->symbol,
|
||||
toSplicePtr->symbol_len, toSplicePtr->trigger_idx);
|
||||
|
||||
}
|
||||
|
||||
return orig;
|
||||
|
||||
}
|
||||
|
||||
Array *gen_input(state *pda, Array *input) {
|
||||
|
||||
state * state_ptr;
|
||||
trigger * trigger_ptr;
|
||||
terminal *term_ptr;
|
||||
int offset = 0;
|
||||
int randval, error;
|
||||
// Generating an input for the first time
|
||||
if (input == NULL) {
|
||||
|
||||
input = (Array *)calloc(1, sizeof(Array));
|
||||
initArray(input, INIT_SIZE);
|
||||
curr_state = init_state;
|
||||
|
||||
}
|
||||
|
||||
while (curr_state != final_state) {
|
||||
|
||||
// Retrieving the state from the pda
|
||||
state_ptr = pda + curr_state;
|
||||
|
||||
// Get a random trigger
|
||||
randval = rand() % (state_ptr->trigger_len);
|
||||
trigger_ptr = (state_ptr->ptr) + randval;
|
||||
|
||||
// Insert into the dynamic array
|
||||
insertArray(input, curr_state, trigger_ptr->term, trigger_ptr->term_len,
|
||||
randval);
|
||||
curr_state = trigger_ptr->dest;
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
return input;
|
||||
|
||||
}
|
||||
|
||||
Array *gen_input_count(state *pda, Array *input, int *mut_count) {
|
||||
|
||||
state * state_ptr;
|
||||
trigger * trigger_ptr;
|
||||
terminal *term_ptr;
|
||||
int offset = 0;
|
||||
int randval, error;
|
||||
// Generating an input for the first time
|
||||
if (input == NULL) {
|
||||
|
||||
input = (Array *)calloc(1, sizeof(Array));
|
||||
initArray(input, INIT_SIZE);
|
||||
curr_state = init_state;
|
||||
|
||||
}
|
||||
|
||||
while (curr_state != final_state) {
|
||||
|
||||
*mut_count += 1;
|
||||
// Retrieving the state from the pda
|
||||
state_ptr = pda + curr_state;
|
||||
|
||||
// Get a random trigger
|
||||
randval = rand() % (state_ptr->trigger_len);
|
||||
trigger_ptr = (state_ptr->ptr) + randval;
|
||||
|
||||
// Insert into the dynamic array
|
||||
insertArray(input, curr_state, trigger_ptr->term, trigger_ptr->term_len,
|
||||
randval);
|
||||
curr_state = trigger_ptr->dest;
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
return input;
|
||||
|
||||
}
|
||||
|
||||
/*Creates a candidate from walk with state hashmap and
|
||||
* recursion hashmap
|
||||
*/
|
||||
|
||||
Candidate *gen_candidate(Array *input) {
|
||||
|
||||
terminal * term_ptr;
|
||||
IdxMap_new *idxmapPtr;
|
||||
// Declare the State Hash Table
|
||||
IdxMap_new *idxmapStart =
|
||||
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
|
||||
for (int x = 0; x < numstates; x++) {
|
||||
|
||||
idxmapPtr = &idxmapStart[x];
|
||||
utarray_new(idxmapPtr->nums, &ut_int_icd);
|
||||
|
||||
}
|
||||
|
||||
char * trigger;
|
||||
int state;
|
||||
char * key;
|
||||
Candidate *candidate = (Candidate *)malloc(sizeof(Candidate));
|
||||
candidate->walk = input;
|
||||
int offset = 0, error;
|
||||
|
||||
// Generate statemap for splicing
|
||||
while (offset < input->used) {
|
||||
|
||||
term_ptr = &input->start[offset];
|
||||
state = term_ptr->state;
|
||||
// char *statenum = state + 1;
|
||||
// int num = atoi(statenum);
|
||||
idxmapPtr = &idxmapStart[state];
|
||||
utarray_push_back(idxmapPtr->nums, &offset);
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
candidate->statemap = idxmapStart;
|
||||
return candidate;
|
||||
|
||||
}
|
||||
|
||||
char *get_state(char *trigger) {
|
||||
|
||||
// Get the state from transition
|
||||
int trigger_idx = 0;
|
||||
printf("\nTrigger:%s", trigger);
|
||||
char *state = (char *)malloc(sizeof(char) * 10);
|
||||
while (trigger[trigger_idx] != '_') {
|
||||
|
||||
state[trigger_idx] = trigger[trigger_idx];
|
||||
trigger_idx += 1;
|
||||
|
||||
}
|
||||
|
||||
printf("\nTrigger Idx:%d", trigger_idx);
|
||||
state[trigger_idx] = '\0';
|
||||
return state;
|
||||
|
||||
}
|
||||
|
||||
void print_repr(Array *input, char *prefix) {
|
||||
|
||||
size_t offset = 0;
|
||||
terminal *term_ptr;
|
||||
char geninput[input->used * 100];
|
||||
if (!input->used) {
|
||||
|
||||
printf("\n=============");
|
||||
printf("\n%s:%s", prefix, "");
|
||||
printf("\n=============");
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
// This is done to create a null-terminated initial string
|
||||
term_ptr = &input->start[offset];
|
||||
strcpy(geninput, term_ptr->symbol);
|
||||
offset += 1;
|
||||
|
||||
while (offset < input->used) {
|
||||
|
||||
term_ptr = &input->start[offset];
|
||||
strcat(geninput, term_ptr->symbol);
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
printf("\n=============");
|
||||
printf("\n%s:%s", prefix, geninput);
|
||||
printf("\n=============");
|
||||
|
||||
}
|
||||
|
||||
// int main(int argc, char*argv[]) {
|
||||
|
||||
// char *mode;
|
||||
// if (argc == 1) {
|
||||
|
||||
// printf("\nUsage: ./gramfuzzer <mode>");
|
||||
// return -1;
|
||||
// }
|
||||
// if (argc >= 2) {
|
||||
|
||||
// mode = argv[1];
|
||||
// printf("\nMode:%s", mode);
|
||||
// }
|
||||
// if (! strcmp(mode, "Generate")) {
|
||||
|
||||
// GenInputBenchmark();
|
||||
// }
|
||||
// else if (! strcmp(mode, "RandomMutation")) {
|
||||
|
||||
// RandomMutationBenchmark();
|
||||
// }
|
||||
// else if (! strcmp(mode, "Splice")) {
|
||||
|
||||
// SpliceMutationBenchmark();
|
||||
// }
|
||||
// else if (! strcmp(mode, "Recursive")) {
|
||||
|
||||
// RandomRecursiveBenchmark();
|
||||
// }
|
||||
// else {
|
||||
|
||||
// printf("\nUnrecognized mode");
|
||||
// return -1;
|
||||
// }
|
||||
// return 0;
|
||||
// }
|
||||
|
248
custom_mutators/gramatron/gramfuzz-mutators.c
Normal file
248
custom_mutators/gramatron/gramfuzz-mutators.c
Normal file
@ -0,0 +1,248 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "afl-fuzz.h"
|
||||
#include "gramfuzz.h"
|
||||
|
||||
Array *performRandomMutation(state *pda, Array *input) {
|
||||
|
||||
terminal *term_ptr;
|
||||
// terminal *prev_ptr;
|
||||
Array *mutated;
|
||||
Array *sliced;
|
||||
|
||||
// Get offset at which to generate new input and slice it
|
||||
int idx = rand() % input->used;
|
||||
sliced = slice(input, idx);
|
||||
// print_repr(sliced, "Slice");
|
||||
|
||||
// prev_ptr = & input->start[idx - 1];
|
||||
// printf("\nState:%s Symbol:%s", prev_ptr->state, prev_ptr->symbol);
|
||||
// Reset current state to that of the slice's last member
|
||||
term_ptr = &input->start[idx];
|
||||
curr_state = term_ptr->state;
|
||||
// printf("\nState:%s Symbol:%s", curr_state, term_ptr->symbol);
|
||||
|
||||
// Set the next available cell to the one adjacent to this chosen point
|
||||
mutated = gen_input(pda, sliced);
|
||||
return mutated;
|
||||
|
||||
}
|
||||
|
||||
// Tries to perform splice operation between two automaton walks
|
||||
UT_icd intpair_icd = {sizeof(intpair_t), NULL, NULL, NULL};
|
||||
|
||||
Array *performSpliceOne(Array *originput, IdxMap_new *statemap_orig,
|
||||
Array *splicecand) {
|
||||
|
||||
UT_array * stateptr, *pairs;
|
||||
intpair_t ip;
|
||||
intpair_t *cand;
|
||||
|
||||
terminal *term_ptr;
|
||||
Array * prefix;
|
||||
int state;
|
||||
|
||||
// Initialize the dynamic holding the splice indice pairs
|
||||
utarray_new(pairs, &intpair_icd);
|
||||
// print_repr(originput, "Orig");
|
||||
// print_repr(splicecand, "SpliceCand");
|
||||
|
||||
// Iterate through the splice candidate identifying potential splice points
|
||||
// and pushing pair (orig_idx, splice_idx) to a dynamic array
|
||||
for (int x = 0; x < splicecand->used; x++) {
|
||||
|
||||
term_ptr = &splicecand->start[x];
|
||||
stateptr = statemap_orig[term_ptr->state].nums;
|
||||
int length = utarray_len(stateptr);
|
||||
if (length) {
|
||||
|
||||
int *splice_idx = (int *)utarray_eltptr(stateptr, rand() % length);
|
||||
ip.orig_idx = *splice_idx;
|
||||
ip.splice_idx = x;
|
||||
utarray_push_back(pairs, &ip);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Pick a random pair
|
||||
int length = utarray_len(pairs);
|
||||
cand = (intpair_t *)utarray_eltptr(pairs, rand() % length);
|
||||
// printf("\n Orig_idx:%d Splice_idx:%d", cand->orig_idx, cand->splice_idx);
|
||||
|
||||
// Perform the splicing
|
||||
prefix = slice(originput, cand->orig_idx);
|
||||
Array *spliced = spliceGF(prefix, splicecand, cand->splice_idx);
|
||||
// print_repr(spliced, "Spliced");
|
||||
//
|
||||
utarray_free(pairs);
|
||||
|
||||
return spliced;
|
||||
|
||||
}
|
||||
|
||||
UT_array **get_dupes(Array *input, int *recur_len) {
|
||||
|
||||
// Variables related to finding duplicates
|
||||
int offset = 0;
|
||||
int state;
|
||||
terminal * term_ptr;
|
||||
IdxMap_new *idxMapPtr;
|
||||
UT_array ** recurIdx;
|
||||
|
||||
// Declare the Recursive Map Table
|
||||
IdxMap_new *idxmapStart =
|
||||
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
|
||||
//
|
||||
// UT_array *(recurIdx[numstates]);
|
||||
recurIdx = malloc(sizeof(UT_array *) * numstates);
|
||||
|
||||
for (int x = 0; x < numstates; x++) {
|
||||
|
||||
idxMapPtr = &idxmapStart[x];
|
||||
utarray_new(idxMapPtr->nums, &ut_int_icd);
|
||||
|
||||
}
|
||||
|
||||
// Obtain frequency distribution of states
|
||||
while (offset < input->used) {
|
||||
|
||||
term_ptr = &input->start[offset];
|
||||
state = term_ptr->state;
|
||||
// int num = atoi(state + 1);
|
||||
idxMapPtr = &idxmapStart[state];
|
||||
utarray_push_back(idxMapPtr->nums, &offset);
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
// Retrieve the duplicated states
|
||||
offset = 0;
|
||||
while (offset < numstates) {
|
||||
|
||||
idxMapPtr = &idxmapStart[offset];
|
||||
int length = utarray_len(idxMapPtr->nums);
|
||||
if (length >= 2) {
|
||||
|
||||
recurIdx[*recur_len] = idxMapPtr->nums;
|
||||
*recur_len += 1;
|
||||
|
||||
}
|
||||
|
||||
// else {
|
||||
|
||||
// utarray_free(idxMapPtr->nums);
|
||||
// }
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
if (*recur_len) {
|
||||
|
||||
// Declare the return struct
|
||||
// We use this struct so that we save the reference to IdxMap_new and free
|
||||
// it after we have used it in doMult
|
||||
// Get_Dupes_Ret* getdupesret =
|
||||
// (Get_Dupes_Ret*)malloc(sizeof(Get_Dupes_Ret));
|
||||
return recurIdx;
|
||||
// getdupesret->idxmap = idxmapStart;
|
||||
// getdupesret->recurIdx = recurIdx;
|
||||
// return getdupesret;
|
||||
|
||||
} else {
|
||||
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Array *doMult(Array *input, UT_array **recur, int recurlen) {
|
||||
|
||||
int offset = 0;
|
||||
int idx = rand() % (recurlen);
|
||||
UT_array *recurMap = recur[idx];
|
||||
UT_array *recurPtr;
|
||||
Array * prefix;
|
||||
Array * postfix;
|
||||
Array * feature;
|
||||
|
||||
// Choose two indices to get the recursive feature
|
||||
int recurIndices = utarray_len(recurMap);
|
||||
int firstIdx = 0;
|
||||
int secondIdx = 0;
|
||||
getTwoIndices(recurMap, recurIndices, &firstIdx, &secondIdx);
|
||||
|
||||
// Perform the recursive mut
|
||||
// print_repr(input, "Orig");
|
||||
prefix = slice(input, firstIdx);
|
||||
// print_repr(prefix, "Prefix");
|
||||
if (firstIdx < secondIdx) {
|
||||
|
||||
feature = carve(input, firstIdx, secondIdx);
|
||||
|
||||
} else {
|
||||
|
||||
feature = carve(input, secondIdx, firstIdx);
|
||||
|
||||
}
|
||||
|
||||
// print_repr(feature, "Feature");
|
||||
concatPrefixFeature(prefix, feature);
|
||||
|
||||
// GC allocated structures
|
||||
free(feature->start);
|
||||
free(feature);
|
||||
// for(int x = 0; x < recurlen; x++) {
|
||||
|
||||
// utarray_free(recur[x]);
|
||||
// }
|
||||
// free(recur);
|
||||
// print_repr(prefix, "Concat");
|
||||
return spliceGF(prefix, input, secondIdx);
|
||||
|
||||
}
|
||||
|
||||
void getTwoIndices(UT_array *recur, int recurlen, int *firstIdx,
|
||||
int *secondIdx) {
|
||||
|
||||
int ArrayRecurIndices[recurlen];
|
||||
int offset = 0, *p;
|
||||
// Unroll into an array
|
||||
for (p = (int *)utarray_front(recur); p != NULL;
|
||||
p = (int *)utarray_next(recur, p)) {
|
||||
|
||||
ArrayRecurIndices[offset] = *p;
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
/*Source:
|
||||
* https://www.geeksforgeeks.org/shuffle-a-given-array-using-fisher-yates-shuffle-algorithm/
|
||||
*/
|
||||
for (int i = offset - 1; i > 0; i--) {
|
||||
|
||||
// Pick a random index from 0 to i
|
||||
int j = rand() % (i + 1);
|
||||
|
||||
// Swap arr[i] with the element at random index
|
||||
swap(&ArrayRecurIndices[i], &ArrayRecurIndices[j]);
|
||||
|
||||
}
|
||||
|
||||
// Get the first two indices
|
||||
*firstIdx = ArrayRecurIndices[0];
|
||||
*secondIdx = ArrayRecurIndices[1];
|
||||
|
||||
}
|
||||
|
||||
void swap(int *a, int *b) {
|
||||
|
||||
int temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
|
||||
}
|
||||
|
268
custom_mutators/gramatron/gramfuzz-util.c
Normal file
268
custom_mutators/gramatron/gramfuzz-util.c
Normal file
@ -0,0 +1,268 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "afl-fuzz.h"
|
||||
#include "gramfuzz.h"
|
||||
#ifdef _GNU_SOURCE
|
||||
#undef _GNU_SOURCE
|
||||
#endif
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/mman.h>
|
||||
|
||||
/* Dynamic Array for adding to the input repr
|
||||
* */
|
||||
void initArray(Array *a, size_t initialSize) {
|
||||
|
||||
a->start = (terminal *)calloc(1, sizeof(terminal) * initialSize);
|
||||
a->used = 0;
|
||||
a->size = initialSize;
|
||||
a->inputlen = 0;
|
||||
|
||||
}
|
||||
|
||||
void insertArray(Array *a, int state, char *symbol, size_t symbol_len,
|
||||
int trigger_idx) {
|
||||
|
||||
// a->used is the number of used entries, because a->array[a->used++] updates
|
||||
// a->used only *after* the array has been accessed. Therefore a->used can go
|
||||
// up to a->size
|
||||
terminal *term_ptr;
|
||||
if (a->used == a->size) {
|
||||
|
||||
a->size = a->size * sizeof(terminal);
|
||||
a->start = (terminal *)realloc(a->start, a->size * sizeof(terminal));
|
||||
|
||||
}
|
||||
|
||||
// Add the element
|
||||
term_ptr = &a->start[a->used];
|
||||
term_ptr->state = state;
|
||||
term_ptr->symbol = symbol;
|
||||
term_ptr->symbol_len = symbol_len;
|
||||
term_ptr->trigger_idx = trigger_idx;
|
||||
|
||||
// Increment the pointer
|
||||
a->used += 1;
|
||||
a->inputlen += symbol_len;
|
||||
|
||||
}
|
||||
|
||||
void freeArray(Array *a) {
|
||||
|
||||
terminal *ptr;
|
||||
for (int x = 0; x < a->used; x++) {
|
||||
|
||||
ptr = &a->start[x];
|
||||
free(ptr);
|
||||
|
||||
}
|
||||
|
||||
a->start = NULL;
|
||||
a->used = a->size = 0;
|
||||
|
||||
}
|
||||
|
||||
/* Dynamic array for adding indices of states/recursive features
|
||||
* Source:
|
||||
* https://stackoverflow.com/questions/3536153/c-dynamically-growing-array
|
||||
*/
|
||||
void initArrayIdx(IdxMap *a, size_t initialSize) {
|
||||
|
||||
a->array = (int *)malloc(initialSize * sizeof(int));
|
||||
a->used = 0;
|
||||
a->size = initialSize;
|
||||
|
||||
}
|
||||
|
||||
void insertArrayIdx(IdxMap *a, int idx) {
|
||||
|
||||
// a->used is the number of used entries, because a->array[a->used++] updates
|
||||
// a->used only *after* the array has been accessed. Therefore a->used can go
|
||||
// up to a->size
|
||||
if (a->used == a->size) {
|
||||
|
||||
a->size *= 2;
|
||||
a->array = (int *)realloc(a->array, a->size * sizeof(int));
|
||||
|
||||
}
|
||||
|
||||
a->array[a->used++] = idx;
|
||||
|
||||
}
|
||||
|
||||
void freeArrayIdx(IdxMap *a) {
|
||||
|
||||
free(a->array);
|
||||
a->array = NULL;
|
||||
a->used = a->size = 0;
|
||||
|
||||
}
|
||||
|
||||
/* Dynamic array for adding potential splice points
|
||||
*/
|
||||
void initArraySplice(SpliceCandArray *a, size_t initialSize) {
|
||||
|
||||
a->start = (SpliceCand *)malloc(initialSize * sizeof(SpliceCand));
|
||||
a->used = 0;
|
||||
a->size = initialSize;
|
||||
|
||||
}
|
||||
|
||||
void insertArraySplice(SpliceCandArray *a, Candidate *candidate, int idx) {
|
||||
|
||||
// a->used is the number of used entries, because a->array[a->used++] updates
|
||||
// a->used only *after* the array has been accessed. Therefore a->used can go
|
||||
// up to a->size
|
||||
SpliceCand *candptr;
|
||||
if (a->used == a->size) {
|
||||
|
||||
a->size = a->size * sizeof(SpliceCand);
|
||||
a->start = (SpliceCand *)realloc(a->start, a->size * sizeof(SpliceCand));
|
||||
|
||||
}
|
||||
|
||||
// Add the element
|
||||
candptr = &a->start[a->used];
|
||||
candptr->splice_cand = candidate;
|
||||
candptr->idx = idx;
|
||||
a->used += 1;
|
||||
|
||||
}
|
||||
|
||||
void freeArraySplice(IdxMap *a) {
|
||||
|
||||
free(a->array);
|
||||
a->array = NULL;
|
||||
a->used = a->size = 0;
|
||||
|
||||
}
|
||||
|
||||
int fact(int n) {
|
||||
|
||||
int i, f = 1;
|
||||
for (i = 1; i <= n; i++) {
|
||||
|
||||
f *= i;
|
||||
|
||||
}
|
||||
|
||||
return f;
|
||||
|
||||
}
|
||||
|
||||
/* Uses the walk to create the input in-memory */
|
||||
u8 *unparse_walk(Array *input) {
|
||||
|
||||
terminal *term_ptr;
|
||||
int offset = 0;
|
||||
u8 * unparsed = (u8 *)malloc(input->inputlen + 1);
|
||||
term_ptr = &input->start[offset];
|
||||
strcpy(unparsed, term_ptr->symbol);
|
||||
offset += 1;
|
||||
while (offset < input->used) {
|
||||
|
||||
term_ptr = &input->start[offset];
|
||||
strcat(unparsed, term_ptr->symbol);
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
return unparsed;
|
||||
|
||||
}
|
||||
|
||||
/*Dump the input representation into a file*/
|
||||
void write_input(Array *input, u8 *fn) {
|
||||
|
||||
FILE *fp;
|
||||
// If file already exists, then skip creating the file
|
||||
if (access(fn, F_OK) != -1) { return; }
|
||||
|
||||
fp = fopen(fn, "wbx+");
|
||||
// If the input has already been flushed, then skip silently
|
||||
if (fp == NULL) {
|
||||
|
||||
fprintf(stderr, "\n File '%s' could not be open, exiting\n", fn);
|
||||
exit(1);
|
||||
|
||||
}
|
||||
|
||||
// Write the length parameters
|
||||
fwrite(&input->used, sizeof(size_t), 1, fp);
|
||||
fwrite(&input->size, sizeof(size_t), 1, fp);
|
||||
fwrite(&input->inputlen, sizeof(size_t), 1, fp);
|
||||
|
||||
// Write the dynamic array to file
|
||||
fwrite(input->start, input->size * sizeof(terminal), 1, fp);
|
||||
// printf("\nUsed:%zu Size:%zu Inputlen:%zu", input->used, input->size,
|
||||
// input->inputlen);
|
||||
fclose(fp);
|
||||
|
||||
}
|
||||
|
||||
Array *parse_input(state *pda, FILE *fp) {
|
||||
|
||||
terminal *term;
|
||||
state * state_ptr;
|
||||
trigger * trigger;
|
||||
int trigger_idx;
|
||||
Array * input = (Array *)calloc(1, sizeof(Array));
|
||||
|
||||
// Read the length parameters
|
||||
fread(&input->used, sizeof(size_t), 1, fp);
|
||||
fread(&input->size, sizeof(size_t), 1, fp);
|
||||
fread(&input->inputlen, sizeof(size_t), 1, fp);
|
||||
|
||||
terminal *start_ptr = (terminal *)calloc(input->size, sizeof(terminal));
|
||||
if (!start_ptr) {
|
||||
|
||||
fprintf(stderr, "alloc failed!\n");
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
// Read the dynamic array to memory
|
||||
fread(start_ptr, input->size * sizeof(terminal), 1, fp);
|
||||
// Update the pointers to the terminals since they would have
|
||||
// changed
|
||||
int idx = 0;
|
||||
while (idx < input->used) {
|
||||
|
||||
terminal *term = &start_ptr[idx];
|
||||
// Find the state
|
||||
state_ptr = pda + term->state;
|
||||
// Find the trigger and update the terminal address
|
||||
trigger_idx = term->trigger_idx;
|
||||
trigger = (state_ptr->ptr) + trigger_idx;
|
||||
term->symbol = trigger->term;
|
||||
idx += 1;
|
||||
|
||||
}
|
||||
|
||||
input->start = start_ptr;
|
||||
// printf("\nUsed:%zu Size:%zu Inputlen:%zu", input->used, input->size,
|
||||
// input->inputlen);
|
||||
|
||||
return input;
|
||||
|
||||
}
|
||||
|
||||
// Read the input representation into memory
|
||||
Array *read_input(state *pda, u8 *fn) {
|
||||
|
||||
FILE *fp;
|
||||
fp = fopen(fn, "rb");
|
||||
if (fp == NULL) {
|
||||
|
||||
fprintf(stderr, "\n File '%s' does not exist, exiting\n", fn);
|
||||
exit(1);
|
||||
|
||||
}
|
||||
|
||||
Array *res = parse_input(pda, fp);
|
||||
fclose(fp);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
429
custom_mutators/gramatron/gramfuzz.c
Normal file
429
custom_mutators/gramatron/gramfuzz.c
Normal file
@ -0,0 +1,429 @@
|
||||
// This simple example just creates random buffer <= 100 filled with 'A'
|
||||
// needs -I /path/to/AFLplusplus/include
|
||||
//#include "custom_mutator_helpers.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "afl-fuzz.h"
|
||||
#include "gramfuzz.h"
|
||||
|
||||
#define MUTATORS 4 // Specify the total number of mutators
|
||||
|
||||
typedef struct my_mutator {
|
||||
|
||||
afl_state_t *afl;
|
||||
|
||||
u8 * mutator_buf;
|
||||
u8 * unparsed_input;
|
||||
Array *mutated_walk;
|
||||
Array *orig_walk;
|
||||
|
||||
IdxMap_new *statemap; // Keeps track of the statemap
|
||||
UT_array ** recurIdx;
|
||||
// Get_Dupes_Ret* getdupesret; // Recursive feature map
|
||||
int recurlen;
|
||||
|
||||
int mut_alloced;
|
||||
int orig_alloced;
|
||||
int mut_idx; // Signals the current mutator being used, used to cycle through
|
||||
// each mutator
|
||||
|
||||
unsigned int seed;
|
||||
|
||||
} my_mutator_t;
|
||||
|
||||
state *create_pda(u8 *automaton_file) {
|
||||
|
||||
struct json_object *parsed_json;
|
||||
state * pda;
|
||||
json_object * source_obj, *attr;
|
||||
int arraylen, ii, ii2, trigger_len, error;
|
||||
|
||||
printf("\n[GF] Automaton file passed:%s", automaton_file);
|
||||
// parsed_json =
|
||||
// json_object_from_file("./gramfuzz/php_gnf_processed_full.json");
|
||||
parsed_json = json_object_from_file(automaton_file);
|
||||
|
||||
// Getting final state
|
||||
source_obj = json_object_object_get(parsed_json, "final_state");
|
||||
printf("\t\nFinal=%s\n", json_object_get_string(source_obj));
|
||||
final_state = atoi(json_object_get_string(source_obj));
|
||||
|
||||
// Getting initial state
|
||||
source_obj = json_object_object_get(parsed_json, "init_state");
|
||||
init_state = atoi(json_object_get_string(source_obj));
|
||||
printf("\tInit=%s\n", json_object_get_string(source_obj));
|
||||
|
||||
// Getting number of states
|
||||
source_obj = json_object_object_get(parsed_json, "numstates");
|
||||
numstates = atoi(json_object_get_string(source_obj)) + 1;
|
||||
printf("\tNumStates=%d\n", numstates);
|
||||
|
||||
// Allocate state space for each pda state
|
||||
pda = (state *)calloc(atoi(json_object_get_string(source_obj)) + 1,
|
||||
sizeof(state));
|
||||
|
||||
// Getting PDA representation
|
||||
source_obj = json_object_object_get(parsed_json, "pda");
|
||||
enum json_type type;
|
||||
json_object_object_foreach(source_obj, key, val) {
|
||||
|
||||
state * state_ptr;
|
||||
trigger *trigger_ptr;
|
||||
int offset;
|
||||
|
||||
// Get the correct offset into the pda to store state information
|
||||
state_ptr = pda;
|
||||
offset = atoi(key);
|
||||
state_ptr += offset;
|
||||
// Store state string
|
||||
state_ptr->state_name = offset;
|
||||
|
||||
// Create trigger array of structs
|
||||
trigger_len = json_object_array_length(val);
|
||||
state_ptr->trigger_len = trigger_len;
|
||||
trigger_ptr = (trigger *)calloc(trigger_len, sizeof(trigger));
|
||||
state_ptr->ptr = trigger_ptr;
|
||||
|
||||
for (ii = 0; ii < trigger_len; ii++) {
|
||||
|
||||
json_object *obj = json_object_array_get_idx(val, ii);
|
||||
// Get all the trigger trigger attributes
|
||||
attr = json_object_array_get_idx(obj, 0);
|
||||
(trigger_ptr)->id = strdup(json_object_get_string(attr));
|
||||
|
||||
attr = json_object_array_get_idx(obj, 1);
|
||||
trigger_ptr->dest = atoi(json_object_get_string(attr));
|
||||
|
||||
attr = json_object_array_get_idx(obj, 2);
|
||||
if (!strcmp("\\n", json_object_get_string(attr))) {
|
||||
|
||||
trigger_ptr->term = strdup("\n");
|
||||
|
||||
} else {
|
||||
|
||||
trigger_ptr->term = strdup(json_object_get_string(attr));
|
||||
|
||||
}
|
||||
|
||||
trigger_ptr->term_len = strlen(trigger_ptr->term);
|
||||
trigger_ptr++;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Delete the JSON object
|
||||
json_object_put(parsed_json);
|
||||
|
||||
return pda;
|
||||
|
||||
}
|
||||
|
||||
my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
|
||||
|
||||
srand(seed);
|
||||
my_mutator_t *data = calloc(1, sizeof(my_mutator_t));
|
||||
if (!data) {
|
||||
|
||||
perror("afl_custom_init alloc");
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
if ((data->mutator_buf = malloc(MAX_FILE)) == NULL) {
|
||||
|
||||
perror("mutator_buf alloc");
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
data->afl = afl;
|
||||
data->seed = seed;
|
||||
|
||||
data->mut_alloced = 0;
|
||||
data->orig_alloced = 0;
|
||||
data->mut_idx = 0;
|
||||
data->recurlen = 0;
|
||||
|
||||
// data->mutator_buf = NULL;
|
||||
// data->unparsed_input = NULL;
|
||||
// data->mutated_walk = NULL;
|
||||
// data->orig_walk = NULL;
|
||||
//
|
||||
// data->statemap = NULL; // Keeps track of the statemap
|
||||
// data->recur_idx = NULL; // Will keep track of recursive feature indices
|
||||
// u32 recur_len = 0; // The number of recursive features
|
||||
// data->mutator_buf = NULL;
|
||||
|
||||
char *automaton_file = getenv("GRAMATRON_AUTOMATION");
|
||||
if (automaton_file) {
|
||||
|
||||
pda = create_pda(automaton_file);
|
||||
|
||||
} else {
|
||||
|
||||
fprintf(stderr,
|
||||
"\nError: GrammaTron needs an automation json file set in "
|
||||
"AFL_GRAMATRON_AUTOMATON\n");
|
||||
exit(-1);
|
||||
|
||||
}
|
||||
|
||||
return data;
|
||||
|
||||
}
|
||||
|
||||
size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
|
||||
u8 **out_buf, uint8_t *add_buf, size_t add_buf_size,
|
||||
size_t max_size) {
|
||||
|
||||
u8 *unparsed_input;
|
||||
|
||||
// Pick a mutator
|
||||
// int choice = rand() % MUTATORS;
|
||||
// data->mut_idx = 1;
|
||||
// GC old mutant
|
||||
if (data->mut_alloced) {
|
||||
|
||||
free(data->mutated_walk->start);
|
||||
free(data->mutated_walk);
|
||||
data->mut_alloced = 0;
|
||||
|
||||
};
|
||||
|
||||
// printf("\nChoice:%d", choice);
|
||||
|
||||
if (data->mut_idx == 0) { // Perform random mutation
|
||||
data->mutated_walk = performRandomMutation(pda, data->orig_walk);
|
||||
data->mut_alloced = 1;
|
||||
|
||||
} else if (data->mut_idx == 1 &&
|
||||
|
||||
data->recurlen) { // Perform recursive mutation
|
||||
data->mutated_walk =
|
||||
doMult(data->orig_walk, data->recurIdx, data->recurlen);
|
||||
data->mut_alloced = 1;
|
||||
|
||||
} else if (data->mut_idx == 2) { // Perform splice mutation
|
||||
|
||||
// we cannot use the supplied splice data so choose a new random file
|
||||
u32 tid = rand() % data->afl->queued_paths;
|
||||
struct queue_entry *q = data->afl->queue_buf[tid];
|
||||
|
||||
// Read the input representation for the splice candidate
|
||||
u8 * automaton_fn = alloc_printf("%s.aut", q->fname);
|
||||
Array *spliceCandidate = read_input(pda, automaton_fn);
|
||||
|
||||
if (spliceCandidate) {
|
||||
|
||||
data->mutated_walk =
|
||||
performSpliceOne(data->orig_walk, data->statemap, spliceCandidate);
|
||||
data->mut_alloced = 1;
|
||||
free(spliceCandidate->start);
|
||||
free(spliceCandidate);
|
||||
|
||||
} else {
|
||||
|
||||
data->mutated_walk = gen_input(pda, NULL);
|
||||
data->mut_alloced = 1;
|
||||
|
||||
}
|
||||
|
||||
ck_free(automaton_fn);
|
||||
|
||||
} else { // Generate an input from scratch
|
||||
|
||||
data->mutated_walk = gen_input(pda, NULL);
|
||||
data->mut_alloced = 1;
|
||||
|
||||
}
|
||||
|
||||
// Cycle to the next mutator
|
||||
if (data->mut_idx == MUTATORS - 1)
|
||||
data->mut_idx =
|
||||
0; // Wrap around if we have reached end of the mutator list
|
||||
else
|
||||
data->mut_idx += 1;
|
||||
|
||||
// Unparse the mutated automaton walk
|
||||
if (data->unparsed_input) { free(data->unparsed_input); }
|
||||
data->unparsed_input = unparse_walk(data->mutated_walk);
|
||||
*out_buf = data->unparsed_input;
|
||||
|
||||
return data->mutated_walk->inputlen;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the automaton-based representation for the corresponding input
|
||||
*
|
||||
* @param data pointer returned in afl_custom_init for this fuzz case
|
||||
* @param filename_new_queue File name of the new queue entry
|
||||
* @param filename_orig_queue File name of the original queue entry
|
||||
*/
|
||||
u8 afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
const uint8_t *filename_new_queue,
|
||||
const uint8_t *filename_orig_queue) {
|
||||
|
||||
// get the filename
|
||||
u8 * automaton_fn, *unparsed_input;
|
||||
Array *new_input;
|
||||
s32 fd;
|
||||
|
||||
automaton_fn = alloc_printf("%s.aut", filename_new_queue);
|
||||
// Check if this method is being called during initialization
|
||||
|
||||
// fprintf(stderr, "new: %s, old: %s, auto: %s\n",
|
||||
// filename_new_queue,filename_orig_queue,automaton_fn);
|
||||
|
||||
if (filename_orig_queue) {
|
||||
|
||||
write_input(data->mutated_walk, automaton_fn);
|
||||
|
||||
} else {
|
||||
|
||||
new_input = gen_input(pda, NULL);
|
||||
write_input(new_input, automaton_fn);
|
||||
|
||||
// Update the placeholder file
|
||||
if (unlink(filename_new_queue)) {
|
||||
|
||||
PFATAL("Unable to delete '%s'", filename_new_queue);
|
||||
|
||||
}
|
||||
|
||||
unparsed_input = unparse_walk(new_input);
|
||||
fd = open(filename_new_queue, O_WRONLY | O_CREAT | O_TRUNC,
|
||||
S_IRUSR | S_IWUSR);
|
||||
if (fd < 0) { PFATAL("Failed to update file '%s'", filename_new_queue); }
|
||||
int written = write(fd, unparsed_input, new_input->inputlen + 1);
|
||||
close(fd);
|
||||
|
||||
free(new_input->start);
|
||||
free(new_input);
|
||||
free(unparsed_input);
|
||||
|
||||
}
|
||||
|
||||
ck_free(automaton_fn);
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the corresponding tree representation for the candidate that is to be
|
||||
* mutated
|
||||
*
|
||||
* @param[in] data pointer returned in afl_custom_init for this fuzz case
|
||||
* @param filename File name of the test case in the queue entry
|
||||
* @return Return True(1) if the fuzzer will fuzz the queue entry, and
|
||||
* False(0) otherwise.
|
||||
*/
|
||||
uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) {
|
||||
|
||||
// get the filename
|
||||
u8 * automaton_fn = alloc_printf("%s.aut", filename);
|
||||
IdxMap_new *statemap_ptr;
|
||||
terminal * term_ptr;
|
||||
int state;
|
||||
|
||||
// TODO: I don't think we need to update pointers when reading back
|
||||
// Probably build two different versions of read_input one for flushing
|
||||
// inputs to disk and the other that
|
||||
if (data->orig_alloced) {
|
||||
|
||||
free(data->orig_walk->start);
|
||||
free(data->orig_walk);
|
||||
data->orig_alloced = 0;
|
||||
|
||||
}
|
||||
|
||||
if (data->statemap) {
|
||||
|
||||
for (int x = 0; x < numstates; x++) {
|
||||
|
||||
utarray_free(data->statemap[x].nums);
|
||||
|
||||
}
|
||||
|
||||
free(data->statemap);
|
||||
|
||||
}
|
||||
|
||||
if (data->recurIdx) {
|
||||
|
||||
data->recurlen = 0;
|
||||
free(data->recurIdx);
|
||||
|
||||
}
|
||||
|
||||
data->orig_walk = read_input(pda, automaton_fn);
|
||||
data->orig_alloced = 1;
|
||||
|
||||
// Create statemap for the fuzz candidate
|
||||
IdxMap_new *statemap_start =
|
||||
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
|
||||
for (int x = 0; x < numstates; x++) {
|
||||
|
||||
statemap_ptr = &statemap_start[x];
|
||||
utarray_new(statemap_ptr->nums, &ut_int_icd);
|
||||
|
||||
}
|
||||
|
||||
int offset = 0;
|
||||
while (offset < data->orig_walk->used) {
|
||||
|
||||
term_ptr = &data->orig_walk->start[offset];
|
||||
state = term_ptr->state;
|
||||
statemap_ptr = &statemap_start[state];
|
||||
utarray_push_back(statemap_ptr->nums, &offset);
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
data->statemap = statemap_start;
|
||||
|
||||
// Create recursive feature map (if it exists)
|
||||
data->recurIdx = malloc(sizeof(UT_array *) * numstates);
|
||||
// Retrieve the duplicated states
|
||||
offset = 0;
|
||||
while (offset < numstates) {
|
||||
|
||||
statemap_ptr = &data->statemap[offset];
|
||||
int length = utarray_len(statemap_ptr->nums);
|
||||
if (length >= 2) {
|
||||
|
||||
data->recurIdx[data->recurlen] = statemap_ptr->nums;
|
||||
data->recurlen += 1;
|
||||
|
||||
}
|
||||
|
||||
offset += 1;
|
||||
|
||||
}
|
||||
|
||||
// data->getdupesret = get_dupes(data->orig_walk, &data->recurlen);
|
||||
|
||||
ck_free(automaton_fn);
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Deinitialize everything
|
||||
*
|
||||
* @param data The data ptr from afl_custom_init
|
||||
*/
|
||||
|
||||
void afl_custom_deinit(my_mutator_t *data) {
|
||||
|
||||
free(data->mutator_buf);
|
||||
free(data);
|
||||
|
||||
}
|
||||
|
253
custom_mutators/gramatron/gramfuzz.h
Normal file
253
custom_mutators/gramatron/gramfuzz.h
Normal file
@ -0,0 +1,253 @@
|
||||
#ifndef _GRAMFUZZ_H
|
||||
|
||||
#define _GRAMFUZZ_H
|
||||
|
||||
#include <json-c/json.h>
|
||||
#include <unistd.h>
|
||||
#include "hashmap.h"
|
||||
#include "uthash.h"
|
||||
#include "utarray.h"
|
||||
|
||||
#define INIT_INPUTS 100 // No. of initial inputs to be generated
|
||||
|
||||
// Set this as `numstates` + 1 where `numstates` is retrieved from gen automata
|
||||
// json #define STATES 63
|
||||
|
||||
#define INIT_SIZE 100 // Initial size of the dynamic array holding the input
|
||||
|
||||
#define SPLICE_CORPUS 10000
|
||||
#define RECUR_THRESHOLD 6
|
||||
#define SIZE_THRESHOLD 2048
|
||||
|
||||
#define FLUSH_INTERVAL \
|
||||
3600 // Inputs that gave new coverage will be dumped every FLUSH_INTERVAL
|
||||
// seconds
|
||||
|
||||
typedef struct trigger {
|
||||
|
||||
char * id;
|
||||
int dest;
|
||||
char * term;
|
||||
size_t term_len;
|
||||
|
||||
} trigger;
|
||||
|
||||
typedef struct state {
|
||||
|
||||
int state_name; // Integer State name
|
||||
int trigger_len; // Number of triggers associated with this state
|
||||
trigger *ptr; // Pointer to beginning of the list of triggers
|
||||
|
||||
} state;
|
||||
|
||||
typedef struct terminal {
|
||||
|
||||
int state;
|
||||
int trigger_idx;
|
||||
size_t symbol_len;
|
||||
char * symbol;
|
||||
|
||||
} terminal;
|
||||
|
||||
typedef struct buckethash {
|
||||
|
||||
int freq;
|
||||
|
||||
} buckethash;
|
||||
|
||||
int init_state;
|
||||
int curr_state;
|
||||
int final_state;
|
||||
int numstates;
|
||||
|
||||
/*****************
|
||||
/ DYNAMIC ARRAY FOR WALKS
|
||||
*****************/
|
||||
|
||||
typedef struct {
|
||||
|
||||
size_t used;
|
||||
size_t size;
|
||||
size_t inputlen;
|
||||
terminal *start;
|
||||
|
||||
} Array;
|
||||
|
||||
/*****************
|
||||
/ DYNAMIC ARRAY FOR STATEMAPS/RECURSION MAPS
|
||||
*****************/
|
||||
|
||||
typedef struct {
|
||||
|
||||
int * array;
|
||||
size_t used;
|
||||
size_t size;
|
||||
|
||||
} IdxMap;
|
||||
|
||||
typedef struct {
|
||||
|
||||
UT_array *nums;
|
||||
|
||||
} IdxMap_new;
|
||||
|
||||
typedef struct {
|
||||
|
||||
IdxMap_new *idxmap;
|
||||
UT_array ** recurIdx;
|
||||
|
||||
} Get_Dupes_Ret;
|
||||
|
||||
/* Candidate Struct */
|
||||
typedef struct {
|
||||
|
||||
Array * walk;
|
||||
IdxMap_new *statemap;
|
||||
|
||||
} Candidate;
|
||||
|
||||
/* Splice Mutation helpers*/
|
||||
typedef struct {
|
||||
|
||||
Candidate *splice_cand;
|
||||
int idx;
|
||||
|
||||
} SpliceCand;
|
||||
|
||||
typedef struct {
|
||||
|
||||
SpliceCand *start;
|
||||
size_t used;
|
||||
size_t size;
|
||||
|
||||
} SpliceCandArray;
|
||||
|
||||
// Initialize dynamic array for potential splice points
|
||||
SpliceCand potential[SPLICE_CORPUS];
|
||||
|
||||
typedef struct {
|
||||
|
||||
int orig_idx;
|
||||
int splice_idx;
|
||||
|
||||
} intpair_t;
|
||||
|
||||
// Initialize dynamic array for potential splice points
|
||||
// SpliceCand potential[SPLICE_CORPUS];
|
||||
// IdxMap_new* rcuridx[STATES];
|
||||
|
||||
/* Prototypes*/
|
||||
Array * slice(Array *, int);
|
||||
state * create_pda(u8 *);
|
||||
Array * gen_input(state *, Array *);
|
||||
Array * gen_input_count(state *, Array *, int *);
|
||||
int updatebucket(map_t, int);
|
||||
void itoa(int, char *, int);
|
||||
void strrreverse(char *, char *);
|
||||
void dbg_hashmap(map_t);
|
||||
void print_repr(Array *, char *);
|
||||
int isSatisfied(map_t);
|
||||
char * get_state(char *);
|
||||
Candidate *gen_candidate(Array *);
|
||||
|
||||
Array *spliceGF(Array *, Array *, int);
|
||||
Array *performSpliceOne(Array *, IdxMap_new *, Array *);
|
||||
/* Mutation Methods*/
|
||||
Array * performRandomMutation(state *, Array *);
|
||||
Array * performRandomMutationCount(state *, Array *, int *);
|
||||
Array * performSpliceMutationBench(state *, Array *, Candidate **);
|
||||
UT_array **get_dupes(Array *, int *);
|
||||
Array * doMult(Array *, UT_array **, int);
|
||||
Array * doMultBench(Array *, UT_array **, int);
|
||||
|
||||
/* Benchmarks*/
|
||||
void SpaceBenchmark(char *);
|
||||
void GenInputBenchmark(char *, char *);
|
||||
void RandomMutationBenchmark(char *, char *);
|
||||
void MutationAggrBenchmark(char *, char *);
|
||||
void SpliceMutationBenchmark(char *, char *);
|
||||
void SpliceMutationBenchmarkOne(char *, char *);
|
||||
void RandomRecursiveBenchmark(char *, char *);
|
||||
|
||||
/* Testers */
|
||||
void SanityCheck(char *);
|
||||
|
||||
/*Helpers*/
|
||||
void initArray(Array *, size_t);
|
||||
void insertArray(Array *, int, char *, size_t, int);
|
||||
void freeArray(Array *);
|
||||
void initArrayIdx(IdxMap *, size_t);
|
||||
void insertArrayIdx(IdxMap *, int);
|
||||
void freeArrayIdx(IdxMap *);
|
||||
void initArraySplice(SpliceCandArray *, size_t);
|
||||
void insertArraySplice(SpliceCandArray *, Candidate *, int);
|
||||
void freeArraySplice(IdxMap *);
|
||||
void getTwoIndices(UT_array *, int, int *, int *);
|
||||
void swap(int *, int *);
|
||||
Array *slice_inverse(Array *, int);
|
||||
void concatPrefixFeature(Array *, Array *);
|
||||
void concatPrefixFeatureBench(Array *, Array *);
|
||||
Array *carve(Array *, int, int);
|
||||
int fact(int);
|
||||
|
||||
void add_to_corpus(struct json_object *, Array *);
|
||||
struct json_object *term_to_json(terminal *);
|
||||
|
||||
/* Gramatron specific prototypes */
|
||||
u8 * unparse_walk(Array *);
|
||||
Array *performSpliceGF(state *, Array *, afl_state_t *);
|
||||
void dump_input(u8 *, char *, int *);
|
||||
void write_input(Array *, u8 *);
|
||||
Array *read_input(state *, u8 *);
|
||||
state *pda;
|
||||
|
||||
// // AFL-specific struct
|
||||
// typedef uint8_t u8;
|
||||
// typedef uint16_t u16;
|
||||
// typedef uint32_t u32;
|
||||
// #ifdef __x86_64__
|
||||
// typedef unsigned long long u64;
|
||||
// #else
|
||||
// typedef uint64_t u64;
|
||||
// #endif /* ^__x86_64__ */
|
||||
//
|
||||
// struct queue_entry {
|
||||
|
||||
// Array* walk; /* Pointer to the automaton walk*/
|
||||
// u32 walk_len; /* Number of tokens in the input*/
|
||||
// Candidate* cand; /* Preprocessed info about the
|
||||
// candidate to allow for faster mutations*/
|
||||
//
|
||||
// u8* fname; /* File name for the test case */
|
||||
// u32 len; /* Input length */
|
||||
// UT_array** recur_idx; /* Keeps track of recursive feature
|
||||
// indices*/
|
||||
//
|
||||
// u32 recur_len; /* The number of recursive features*/
|
||||
//
|
||||
// u8 cal_failed, /* Calibration failed? */
|
||||
// trim_done, /* Trimmed? */
|
||||
// was_fuzzed, /* Had any fuzzing done yet? */
|
||||
// passed_det, /* Deterministic stages passed? */
|
||||
// has_new_cov, /* Triggers new coverage? */
|
||||
// var_behavior, /* Variable behavior? */
|
||||
// favored, /* Currently favored? */
|
||||
// fs_redundant; /* Marked as redundant in the fs? */
|
||||
//
|
||||
// u32 bitmap_size, /* Number of bits set in bitmap */
|
||||
// exec_cksum; /* Checksum of the execution trace */
|
||||
//
|
||||
// u64 exec_us, /* Execution time (us) */
|
||||
// handicap, /* Number of queue cycles behind */
|
||||
// depth; /* Path depth */
|
||||
//
|
||||
// u8* trace_mini; /* Trace bytes, if kept */
|
||||
// u32 tc_ref; /* Trace bytes ref count */
|
||||
//
|
||||
// struct queue_entry *next, /* Next element, if any */
|
||||
// *next_100; /* 100 elements ahead */
|
||||
//
|
||||
// };
|
||||
|
||||
#endif
|
||||
|
606
custom_mutators/gramatron/grammars/js/source.json
Normal file
606
custom_mutators/gramatron/grammars/js/source.json
Normal file
@ -0,0 +1,606 @@
|
||||
{
|
||||
"ARGLIST": [
|
||||
"EXPR ',' ARGLIST",
|
||||
"EXPR",
|
||||
"EXPR ',' ARGLIST",
|
||||
"EXPR"
|
||||
],
|
||||
"ARGS": [
|
||||
"'()'",
|
||||
"'(' ARGLIST ')'",
|
||||
"'()'",
|
||||
"'(' ARGLIST ')'"
|
||||
],
|
||||
"ARITHMETICOPERATION": [
|
||||
"EXPR '/' EXPR",
|
||||
"EXPR '*' EXPR",
|
||||
"EXPR '+' EXPR",
|
||||
"EXPR '-' EXPR",
|
||||
"EXPR '%' EXPR",
|
||||
"EXPR '**' EXPR",
|
||||
"EXPR '++'"
|
||||
],
|
||||
"ARRAY": [
|
||||
"'[' ARRAYCONTENT ']'",
|
||||
"'[]'"
|
||||
],
|
||||
"ARRAYCONTENT": [
|
||||
"EXPR ',' ARRAYCONTENT",
|
||||
"EXPR"
|
||||
],
|
||||
"BOOLEAN": [
|
||||
"'true'",
|
||||
"'false'"
|
||||
],
|
||||
"BYTEWISEOPERATION": [
|
||||
"EXPR '&' EXPR",
|
||||
"EXPR '|' EXPR"
|
||||
],
|
||||
"COMPARISONOPERATION": [
|
||||
"EXPR '<' EXPR"
|
||||
],
|
||||
"DECIMALDIGITS": [
|
||||
"'20'",
|
||||
"'1234'",
|
||||
"'66'",
|
||||
"'234_9'",
|
||||
"'99999999999999999999'"
|
||||
],
|
||||
"DECIMALNUMBER": [
|
||||
"DECIMALDIGITS"
|
||||
],
|
||||
"EXPR": [
|
||||
"'(' EXPR ')'",
|
||||
"VAR",
|
||||
"'delete' SP EXPR",
|
||||
"'new' SP IDENTIFIER ARGS",
|
||||
"LITERAL",
|
||||
"IDENTIFIER",
|
||||
"METHODCALL",
|
||||
"'(' ARITHMETICOPERATION ')'",
|
||||
"'(' COMPARISONOPERATION ')'",
|
||||
"'(' BYTEWISEOPERATION ')'",
|
||||
"'(' LOGICALOPERATION ')'"
|
||||
],
|
||||
"IDENTIFIER": [
|
||||
"'Object'",
|
||||
"VAR",
|
||||
"'Function'",
|
||||
"'main'",
|
||||
"'opt'",
|
||||
"'Boolean'",
|
||||
"'Symbol'",
|
||||
"'JSON'",
|
||||
"'Error'",
|
||||
"'EvalError'",
|
||||
"'RangeError'",
|
||||
"'ReferenceError'",
|
||||
"'SyntaxError'",
|
||||
"'TypeError'",
|
||||
"'URIError'",
|
||||
"'this'",
|
||||
"'Number'",
|
||||
"'Math'",
|
||||
"'Date'",
|
||||
"'String'",
|
||||
"'RegExp'",
|
||||
"'Array'",
|
||||
"'Int8Array'",
|
||||
"'Uint8Array'",
|
||||
"'Uint8ClampedArray'",
|
||||
"'Int16Array'",
|
||||
"'Uint16Array'",
|
||||
"'Int32Array'",
|
||||
"'Uint32Array'",
|
||||
"'Float32Array'",
|
||||
"'Float64Array'",
|
||||
"'DataView'",
|
||||
"'ArrayBuffer'",
|
||||
"'Map'",
|
||||
"'Set'",
|
||||
"'WeakMap'",
|
||||
"'WeakSet'",
|
||||
"'Promise'",
|
||||
"'AsyncFunction'",
|
||||
"'asyncGenerator'",
|
||||
"'Reflect'",
|
||||
"'Proxy'",
|
||||
"'Intl'",
|
||||
"'Intl.Collator'",
|
||||
"'Intl.DateTimeFormat'",
|
||||
"'Intl.NumberFormat'",
|
||||
"'Intl.PluralRules'",
|
||||
"'WebAssembly'",
|
||||
"'WebAssembly.Module'",
|
||||
"'WebAssembly.Instance'",
|
||||
"'WebAssembly.Memory'",
|
||||
"'WebAssembly.Table'",
|
||||
"'WebAssembly.CompileError'",
|
||||
"'WebAssembly.LinkError'",
|
||||
"'WebAssembly.RuntimeError'",
|
||||
"'arguments'",
|
||||
"'Infinity'",
|
||||
"'NaN'",
|
||||
"'undefined'",
|
||||
"'null'",
|
||||
"'console'",
|
||||
"' '"
|
||||
],
|
||||
"IDENTIFIERLIST": [
|
||||
"IDENTIFIER ',' IDENTIFIERLIST",
|
||||
"'(' IDENTIFIERLIST '),' IDENTIFIERLIST",
|
||||
"IDENTIFIER"
|
||||
],
|
||||
"JSBLOCK": [
|
||||
"JSSTATEMENT",
|
||||
"JSSTATEMENT JSBLOCK"
|
||||
],
|
||||
"JSSTATEMENT": [
|
||||
"STATEMENT NEWLINE"
|
||||
],
|
||||
"LITERAL": [
|
||||
"'null'",
|
||||
"BOOLEAN",
|
||||
"NUMBER",
|
||||
"ARRAY"
|
||||
],
|
||||
"LOGICALOPERATION": [
|
||||
"EXPR '&&' EXPR",
|
||||
"EXPR '||' EXPR"
|
||||
],
|
||||
"METHODCALL": [
|
||||
"OBJECT PROPERTY METHODCALL1"
|
||||
],
|
||||
"METHODCALL1": [
|
||||
"'.' METHOD_NAME ARGS METHODCALL1",
|
||||
"' '"
|
||||
],
|
||||
"METHOD_NAME": [
|
||||
"IDENTIFIER",
|
||||
"'print'",
|
||||
"'eval'",
|
||||
"'uneval'",
|
||||
"'isFinite'",
|
||||
"'isNaN'",
|
||||
"'parseFloat'",
|
||||
"'parseInt'",
|
||||
"'decodeURI'",
|
||||
"'decodeURIComponent'",
|
||||
"'encodeURI'",
|
||||
"'encodeURIComponent'",
|
||||
"'escape'",
|
||||
"'unescape'",
|
||||
"'assign'",
|
||||
"'create'",
|
||||
"'defineProperty'",
|
||||
"'defineProperties'",
|
||||
"'entries'",
|
||||
"'freeze'",
|
||||
"'getOwnPropertyDescriptor'",
|
||||
"'getOwnPropertyDescriptors'",
|
||||
"'getOwnPropertyNames'",
|
||||
"'getOwnPropertySymbols'",
|
||||
"'getPrototypeOf'",
|
||||
"'is'",
|
||||
"'isExtensible'",
|
||||
"'isFrozen'",
|
||||
"'isSealed'",
|
||||
"'keys'",
|
||||
"'preventExtensions'",
|
||||
"'seal'",
|
||||
"'setPrototypeOf'",
|
||||
"'values'",
|
||||
"'__defineGetter__'",
|
||||
"'__defineSetter__'",
|
||||
"'__lookupGetter__'",
|
||||
"'__lookupSetter__'",
|
||||
"'hasOwnProperty'",
|
||||
"'isPrototypeOf'",
|
||||
"'propertyIsEnumerable'",
|
||||
"'toSource'",
|
||||
"'toLocaleString'",
|
||||
"'toString'",
|
||||
"'unwatch'",
|
||||
"'valueOf'",
|
||||
"'watch'",
|
||||
"'apply'",
|
||||
"'bind'",
|
||||
"'call'",
|
||||
"'isGenerator'",
|
||||
"'valueOf'",
|
||||
"'for'",
|
||||
"'keyFor'",
|
||||
"'stringify'",
|
||||
"'isInteger'",
|
||||
"'isSafeInteger'",
|
||||
"'toInteger'",
|
||||
"'toExponential'",
|
||||
"'toFixed'",
|
||||
"'toLocaleString'",
|
||||
"'toPrecision'",
|
||||
"'abs'",
|
||||
"'acos'",
|
||||
"'acosh'",
|
||||
"'asin'",
|
||||
"'asinh'",
|
||||
"'atan'",
|
||||
"'atanh'",
|
||||
"'atan2'",
|
||||
"'cbrt'",
|
||||
"'ceil'",
|
||||
"'clz32'",
|
||||
"'cos'",
|
||||
"'cosh'",
|
||||
"'exp'",
|
||||
"'expm1'",
|
||||
"'floor'",
|
||||
"'fround'",
|
||||
"'hypot'",
|
||||
"'imul'",
|
||||
"'log'",
|
||||
"'log1p'",
|
||||
"'log10'",
|
||||
"'log2'",
|
||||
"'max'",
|
||||
"'min'",
|
||||
"'pow'",
|
||||
"'random'",
|
||||
"'round'",
|
||||
"'sign'",
|
||||
"'sin'",
|
||||
"'sinh'",
|
||||
"'sqrt'",
|
||||
"'tan'",
|
||||
"'tanh'",
|
||||
"'trunc'",
|
||||
"'now'",
|
||||
"'parse'",
|
||||
"'UTC'",
|
||||
"'getDate'",
|
||||
"'getDay'",
|
||||
"'getFullYear'",
|
||||
"'getHours'",
|
||||
"'getMilliseconds'",
|
||||
"'getMinutes'",
|
||||
"'getMonth'",
|
||||
"'getSeconds'",
|
||||
"'getTime'",
|
||||
"'getTimezoneOffset'",
|
||||
"'getUTCDate'",
|
||||
"'getUTCDay'",
|
||||
"'getUTCFullYear'",
|
||||
"'getUTCHours'",
|
||||
"'getUTCMilliseconds'",
|
||||
"'getUTCMinutes'",
|
||||
"'getUTCMonth'",
|
||||
"'getUTCSeconds'",
|
||||
"'getYear'",
|
||||
"'setDate'",
|
||||
"'setFullYear'",
|
||||
"'setHours'",
|
||||
"'setMilliseconds'",
|
||||
"'setMinutes'",
|
||||
"'setMonth'",
|
||||
"'setSeconds'",
|
||||
"'setTime'",
|
||||
"'setUTCDate'",
|
||||
"'setUTCFullYear'",
|
||||
"'setUTCHours'",
|
||||
"'setUTCMilliseconds'",
|
||||
"'setUTCMinutes'",
|
||||
"'setUTCMonth'",
|
||||
"'setUTCSeconds'",
|
||||
"'setYear'",
|
||||
"'toDateString'",
|
||||
"'toISOString'",
|
||||
"'toJSON'",
|
||||
"'toGMTString'",
|
||||
"'toLocaleDateString'",
|
||||
"'toLocaleFormat'",
|
||||
"'toLocaleString'",
|
||||
"'toLocaleTimeString'",
|
||||
"'toTimeString'",
|
||||
"'toUTCString'",
|
||||
"'indexOf'",
|
||||
"'substring'",
|
||||
"'charAt'",
|
||||
"'strcmp'",
|
||||
"'fromCharCode'",
|
||||
"'fromCodePoint'",
|
||||
"'raw'",
|
||||
"'charCodeAt'",
|
||||
"'slice'",
|
||||
"'codePointAt'",
|
||||
"'concat'",
|
||||
"'includes'",
|
||||
"'endsWith'",
|
||||
"'lastIndexOf'",
|
||||
"'localeCompare'",
|
||||
"'match'",
|
||||
"'normalize'",
|
||||
"'padEnd'",
|
||||
"'padStart'",
|
||||
"'quote'",
|
||||
"'repeat'",
|
||||
"'replace'",
|
||||
"'search'",
|
||||
"'split'",
|
||||
"'startsWith'",
|
||||
"'substr'",
|
||||
"'toLocaleLowerCase'",
|
||||
"'toLocaleUpperCase'",
|
||||
"'toLowerCase'",
|
||||
"'toUpperCase'",
|
||||
"'trim'",
|
||||
"'trimleft'",
|
||||
"'trimright'",
|
||||
"'anchor'",
|
||||
"'big'",
|
||||
"'blink'",
|
||||
"'bold'",
|
||||
"'fixed'",
|
||||
"'fontcolor'",
|
||||
"'fontsize'",
|
||||
"'italics'",
|
||||
"'link'",
|
||||
"'small'",
|
||||
"'strike'",
|
||||
"'sub'",
|
||||
"'sup'",
|
||||
"'compile'",
|
||||
"'exec'",
|
||||
"'test'",
|
||||
"'from'",
|
||||
"'isArray'",
|
||||
"'of'",
|
||||
"'copyWithin'",
|
||||
"'fill'",
|
||||
"'pop'",
|
||||
"'push'",
|
||||
"'reverse'",
|
||||
"'shift'",
|
||||
"'sort'",
|
||||
"'splice'",
|
||||
"'unshift'",
|
||||
"'concat'",
|
||||
"'join'",
|
||||
"'every'",
|
||||
"'filter'",
|
||||
"'findIndex'",
|
||||
"'forEach'",
|
||||
"'map'",
|
||||
"'reduce'",
|
||||
"'reduceRight'",
|
||||
"'some'",
|
||||
"'move'",
|
||||
"'getInt8'",
|
||||
"'getUint8'",
|
||||
"'getInt16'",
|
||||
"'getUint16'",
|
||||
"'getInt32'",
|
||||
"'getUint32'",
|
||||
"'getFloat32'",
|
||||
"'getFloat64'",
|
||||
"'setInt8'",
|
||||
"'setUint8'",
|
||||
"'setInt16'",
|
||||
"'setUint16'",
|
||||
"'setInt32'",
|
||||
"'setUint32'",
|
||||
"'setFloat32'",
|
||||
"'setFloat64'",
|
||||
"'isView'",
|
||||
"'transfer'",
|
||||
"'clear'",
|
||||
"'get'",
|
||||
"'has'",
|
||||
"'set'",
|
||||
"'add'",
|
||||
"'splat'",
|
||||
"'check'",
|
||||
"'extractLane'",
|
||||
"'replaceLane'",
|
||||
"'load'",
|
||||
"'load1'",
|
||||
"'load2'",
|
||||
"'load3'",
|
||||
"'store'",
|
||||
"'store1'",
|
||||
"'store2'",
|
||||
"'store3'",
|
||||
"'addSaturate'",
|
||||
"'div'",
|
||||
"'mul'",
|
||||
"'neg'",
|
||||
"'reciprocalApproximation'",
|
||||
"'reciprocalSqrtApproximation'",
|
||||
"'subSaturate'",
|
||||
"'shuffle'",
|
||||
"'swizzle'",
|
||||
"'maxNum'",
|
||||
"'minNum'",
|
||||
"'select'",
|
||||
"'equal'",
|
||||
"'notEqual'",
|
||||
"'lessThan'",
|
||||
"'lessThanOrEqual'",
|
||||
"'greaterThan'",
|
||||
"'greaterThanOrEqual'",
|
||||
"'and'",
|
||||
"'or'",
|
||||
"'xor'",
|
||||
"'not'",
|
||||
"'shiftLeftByScalar'",
|
||||
"'shiftRightByScalar'",
|
||||
"'allTrue'",
|
||||
"'anyTrue'",
|
||||
"'fromFloat32x4'",
|
||||
"'fromFloat32x4Bits'",
|
||||
"'fromFloat64x2Bits'",
|
||||
"'fromInt32x4'",
|
||||
"'fromInt32x4Bits'",
|
||||
"'fromInt16x8Bits'",
|
||||
"'fromInt8x16Bits'",
|
||||
"'fromUint32x4'",
|
||||
"'fromUint32x4Bits'",
|
||||
"'fromUint16x8Bits'",
|
||||
"'fromUint8x16Bits'",
|
||||
"'neg'",
|
||||
"'compareExchange'",
|
||||
"'exchange'",
|
||||
"'wait'",
|
||||
"'wake'",
|
||||
"'isLockFree'",
|
||||
"'all'",
|
||||
"'race'",
|
||||
"'reject'",
|
||||
"'resolve'",
|
||||
"'catch'",
|
||||
"'then'",
|
||||
"'finally'",
|
||||
"'next'",
|
||||
"'throw'",
|
||||
"'close'",
|
||||
"'send'",
|
||||
"'apply'",
|
||||
"'construct'",
|
||||
"'deleteProperty'",
|
||||
"'ownKeys'",
|
||||
"'getCanonicalLocales'",
|
||||
"'supportedLocalesOf'",
|
||||
"'resolvedOptions'",
|
||||
"'formatToParts'",
|
||||
"'resolvedOptions'",
|
||||
"'instantiate'",
|
||||
"'instantiateStreaming'",
|
||||
"'compileStreaming'",
|
||||
"'validate'",
|
||||
"'customSections'",
|
||||
"'exports'",
|
||||
"'imports'",
|
||||
"'grow'",
|
||||
"'super'",
|
||||
"'in'",
|
||||
"'instanceof'",
|
||||
"' '"
|
||||
],
|
||||
"NEWLINE": [
|
||||
"'\\n'"
|
||||
],
|
||||
"NUMBER": [
|
||||
"'1/2'",
|
||||
"'1E2'",
|
||||
"'1E02'",
|
||||
"'1E+02'",
|
||||
"'-1'",
|
||||
"'-1.00'",
|
||||
"'-1/2'",
|
||||
"'-1E2'",
|
||||
"'-1E02'",
|
||||
"'-1E+02'",
|
||||
"'1/0'",
|
||||
"'0/0'",
|
||||
"'-2147483648/-1'",
|
||||
"'-9223372036854775808/-1'",
|
||||
"'-0'",
|
||||
"'-0.0'",
|
||||
"'+0'"
|
||||
],
|
||||
"OBJECT": [
|
||||
"IDENTIFIER"
|
||||
],
|
||||
"PROGRAM": [
|
||||
"JSBLOCK"
|
||||
],
|
||||
"PROPERTY": [
|
||||
"'.length' PROPERTY",
|
||||
"'.prototype' PROPERTY",
|
||||
"'.constructor' PROPERTY",
|
||||
"'.__proto__' PROPERTY",
|
||||
"'.__noSuchMethod__' PROPERTY",
|
||||
"'.__count__' PROPERTY",
|
||||
"'.__parent__' PROPERTY",
|
||||
"'.arguments' PROPERTY",
|
||||
"'.arity' PROPERTY",
|
||||
"'.caller' PROPERTY",
|
||||
"'.name' PROPERTY",
|
||||
"'.displayName' PROPERTY",
|
||||
"'.iterator' PROPERTY",
|
||||
"'.asyncIterator' PROPERTY",
|
||||
"'.match' PROPERTY",
|
||||
"'.replace' PROPERTY",
|
||||
"'.search' PROPERTY",
|
||||
"'.split' PROPERTY",
|
||||
"'.hasInstance' PROPERTY",
|
||||
"'.isConcatSpreadable' PROPERTY",
|
||||
"'.unscopables' PROPERTY",
|
||||
"'.species' PROPERTY",
|
||||
"'.toPrimitive' PROPERTY",
|
||||
"'.toStringTag' PROPERTY",
|
||||
"'.fileName' PROPERTY",
|
||||
"'.lineNumber' PROPERTY",
|
||||
"'.columnNumber' PROPERTY",
|
||||
"'.message' PROPERTY",
|
||||
"'.name' PROPERTY",
|
||||
"'.EPSILON' PROPERTY",
|
||||
"'.MAX_SAFE_INTEGER' PROPERTY",
|
||||
"'.MAX_VALUE' PROPERTY",
|
||||
"'.MIN_SAFE_INTEGER' PROPERTY",
|
||||
"'.MIN_VALUE' PROPERTY",
|
||||
"'.NaN' PROPERTY",
|
||||
"'.NEGATIVE_INFINITY' PROPERTY",
|
||||
"'.POSITIVE_INFINITY' PROPERTY",
|
||||
"'.E' PROPERTY",
|
||||
"'.LN2' PROPERTY",
|
||||
"'.LN10' PROPERTY",
|
||||
"'.LOG2E' PROPERTY",
|
||||
"'.LOG10E' PROPERTY",
|
||||
"'.PI' PROPERTY",
|
||||
"'.SQRT1_2' PROPERTY",
|
||||
"'.SQRT2' PROPERTY",
|
||||
"'.flags' PROPERTY",
|
||||
"'.global' PROPERTY",
|
||||
"'.ignoreCase' PROPERTY",
|
||||
"'.multiline' PROPERTY",
|
||||
"'.source' PROPERTY",
|
||||
"'.sticky' PROPERTY",
|
||||
"'.unicode' PROPERTY",
|
||||
"'.buffer' PROPERTY",
|
||||
"'.byteLength' PROPERTY",
|
||||
"'.byteOffset' PROPERTY",
|
||||
"'.BYTES_PER_ELEMENT' PROPERTY",
|
||||
"'.compare' PROPERTY",
|
||||
"'.format' PROPERTY",
|
||||
"'.callee' PROPERTY",
|
||||
"'.caller' PROPERTY",
|
||||
"'.memory' PROPERTY",
|
||||
"'.exports' PROPERTY",
|
||||
"' '"
|
||||
],
|
||||
"SP": [
|
||||
"' '"
|
||||
],
|
||||
"STATEMENT": [
|
||||
"EXPR ';'",
|
||||
"'var' SP VAR '=' EXPR ';'",
|
||||
"'let' SP VAR '=' EXPR ';'",
|
||||
"VAR '=' EXPR ';'",
|
||||
"VAR PROPERTY '=' EXPR ';'",
|
||||
"VAR '[' DECIMALNUMBER ']' '=' EXPR ';'",
|
||||
"'const' SP VAR '=' EXPR ';'",
|
||||
"'typeof' SP EXPR ';'",
|
||||
"'void' SP EXPR ';'",
|
||||
"'return' SP EXPR ';'",
|
||||
"VAR ':'"
|
||||
],
|
||||
"VAR": [
|
||||
"'a'",
|
||||
"'b'",
|
||||
"'c'",
|
||||
"'d'",
|
||||
"'e'",
|
||||
"'f'",
|
||||
"'g'",
|
||||
"'h'"
|
||||
]
|
||||
}
|
File diff suppressed because one or more lines are too long
8707
custom_mutators/gramatron/grammars/php/source.json
Normal file
8707
custom_mutators/gramatron/grammars/php/source.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
1195
custom_mutators/gramatron/grammars/ruby/source.json
Normal file
1195
custom_mutators/gramatron/grammars/ruby/source.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
434
custom_mutators/gramatron/hashmap.c
Normal file
434
custom_mutators/gramatron/hashmap.c
Normal file
@ -0,0 +1,434 @@
|
||||
/*
|
||||
* Generic map implementation.
|
||||
*/
|
||||
#include "hashmap.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define INITIAL_SIZE (256)
|
||||
#define MAX_CHAIN_LENGTH (8)
|
||||
|
||||
/* We need to keep keys and values */
|
||||
typedef struct _hashmap_element {
|
||||
|
||||
char *key;
|
||||
int in_use;
|
||||
any_t data;
|
||||
|
||||
} hashmap_element;
|
||||
|
||||
/* A hashmap has some maximum size and current size,
|
||||
* as well as the data to hold. */
|
||||
typedef struct _hashmap_map {
|
||||
|
||||
int table_size;
|
||||
int size;
|
||||
hashmap_element *data;
|
||||
|
||||
} hashmap_map;
|
||||
|
||||
/*
|
||||
* Return an empty hashmap, or NULL on failure.
|
||||
*/
|
||||
map_t hashmap_new() {
|
||||
|
||||
hashmap_map *m = (hashmap_map *)malloc(sizeof(hashmap_map));
|
||||
if (!m) goto err;
|
||||
|
||||
m->data = (hashmap_element *)calloc(INITIAL_SIZE, sizeof(hashmap_element));
|
||||
if (!m->data) goto err;
|
||||
|
||||
m->table_size = INITIAL_SIZE;
|
||||
m->size = 0;
|
||||
|
||||
return m;
|
||||
err:
|
||||
if (m) hashmap_free(m);
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
/* The implementation here was originally done by Gary S. Brown. I have
|
||||
borrowed the tables directly, and made some minor changes to the
|
||||
crc32-function (including changing the interface). //ylo */
|
||||
|
||||
/* ============================================================= */
|
||||
/* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or */
|
||||
/* code or tables extracted from it, as desired without restriction. */
|
||||
/* */
|
||||
/* First, the polynomial itself and its table of feedback terms. The */
|
||||
/* polynomial is */
|
||||
/* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */
|
||||
/* */
|
||||
/* Note that we take it "backwards" and put the highest-order term in */
|
||||
/* the lowest-order bit. The X^32 term is "implied"; the LSB is the */
|
||||
/* X^31 term, etc. The X^0 term (usually shown as "+1") results in */
|
||||
/* the MSB being 1. */
|
||||
/* */
|
||||
/* Note that the usual hardware shift register implementation, which */
|
||||
/* is what we're using (we're merely optimizing it by doing eight-bit */
|
||||
/* chunks at a time) shifts bits into the lowest-order term. In our */
|
||||
/* implementation, that means shifting towards the right. Why do we */
|
||||
/* do it this way? Because the calculated CRC must be transmitted in */
|
||||
/* order from highest-order term to lowest-order term. UARTs transmit */
|
||||
/* characters in order from LSB to MSB. By storing the CRC this way, */
|
||||
/* we hand it to the UART in the order low-byte to high-byte; the UART */
|
||||
/* sends each low-bit to hight-bit; and the result is transmission bit */
|
||||
/* by bit from highest- to lowest-order term without requiring any bit */
|
||||
/* shuffling on our part. Reception works similarly. */
|
||||
/* */
|
||||
/* The feedback terms table consists of 256, 32-bit entries. Notes: */
|
||||
/* */
|
||||
/* The table can be generated at runtime if desired; code to do so */
|
||||
/* is shown later. It might not be obvious, but the feedback */
|
||||
/* terms simply represent the results of eight shift/xor opera- */
|
||||
/* tions for all combinations of data and CRC register values. */
|
||||
/* */
|
||||
/* The values must be right-shifted by eight bits by the "updcrc" */
|
||||
/* logic; the shift must be unsigned (bring in zeroes). On some */
|
||||
/* hardware you could probably optimize the shift in assembler by */
|
||||
/* using byte-swap instructions. */
|
||||
/* polynomial $edb88320 */
|
||||
/* */
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
||||
static unsigned long crc32_tab[] = {
|
||||
|
||||
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
|
||||
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
|
||||
0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
|
||||
0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
|
||||
0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
|
||||
0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
|
||||
0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
|
||||
0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
|
||||
0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
|
||||
0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
|
||||
0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
|
||||
0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
|
||||
0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
|
||||
0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
|
||||
0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
|
||||
0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
|
||||
0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
|
||||
0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
|
||||
0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
|
||||
0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
|
||||
0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
|
||||
0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
|
||||
0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
|
||||
0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
|
||||
0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
|
||||
0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
|
||||
0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
|
||||
0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
|
||||
0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
|
||||
0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
|
||||
0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
|
||||
0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
|
||||
0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
|
||||
0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
|
||||
0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
|
||||
0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
|
||||
0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
|
||||
0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
|
||||
0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
|
||||
0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
|
||||
0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
|
||||
0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
|
||||
0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
|
||||
0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
|
||||
0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
|
||||
0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
|
||||
0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
|
||||
0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
|
||||
0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
|
||||
0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
|
||||
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
|
||||
0x2d02ef8dL};
|
||||
|
||||
/* Return a 32-bit CRC of the contents of the buffer. */
|
||||
|
||||
unsigned long crc32(const unsigned char *s, unsigned int len) {
|
||||
|
||||
unsigned int i;
|
||||
unsigned long crc32val;
|
||||
|
||||
crc32val = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
|
||||
crc32val = crc32_tab[(crc32val ^ s[i]) & 0xff] ^ (crc32val >> 8);
|
||||
|
||||
}
|
||||
|
||||
return crc32val;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Hashing function for a string
|
||||
*/
|
||||
unsigned int hashmap_hash_int(hashmap_map *m, char *keystring) {
|
||||
|
||||
unsigned long key = crc32((unsigned char *)(keystring), strlen(keystring));
|
||||
|
||||
/* Robert Jenkins' 32 bit Mix Function */
|
||||
key += (key << 12);
|
||||
key ^= (key >> 22);
|
||||
key += (key << 4);
|
||||
key ^= (key >> 9);
|
||||
key += (key << 10);
|
||||
key ^= (key >> 2);
|
||||
key += (key << 7);
|
||||
key ^= (key >> 12);
|
||||
|
||||
/* Knuth's Multiplicative Method */
|
||||
key = (key >> 3) * 2654435761;
|
||||
|
||||
return key % m->table_size;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the integer of the location in data
|
||||
* to store the point to the item, or MAP_FULL.
|
||||
*/
|
||||
int hashmap_hash(map_t in, char *key) {
|
||||
|
||||
int curr;
|
||||
int i;
|
||||
|
||||
/* Cast the hashmap */
|
||||
hashmap_map *m = (hashmap_map *)in;
|
||||
|
||||
/* If full, return immediately */
|
||||
if (m->size >= (m->table_size / 2)) return MAP_FULL;
|
||||
|
||||
/* Find the best index */
|
||||
curr = hashmap_hash_int(m, key);
|
||||
|
||||
/* Linear probing */
|
||||
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
|
||||
|
||||
if (m->data[curr].in_use == 0) return curr;
|
||||
|
||||
if (m->data[curr].in_use == 1 && (strcmp(m->data[curr].key, key) == 0))
|
||||
return curr;
|
||||
|
||||
curr = (curr + 1) % m->table_size;
|
||||
|
||||
}
|
||||
|
||||
return MAP_FULL;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Doubles the size of the hashmap, and rehashes all the elements
|
||||
*/
|
||||
int hashmap_rehash(map_t in) {
|
||||
|
||||
int i;
|
||||
int old_size;
|
||||
hashmap_element *curr;
|
||||
|
||||
/* Setup the new elements */
|
||||
hashmap_map * m = (hashmap_map *)in;
|
||||
hashmap_element *temp =
|
||||
(hashmap_element *)calloc(2 * m->table_size, sizeof(hashmap_element));
|
||||
if (!temp) return MAP_OMEM;
|
||||
|
||||
/* Update the array */
|
||||
curr = m->data;
|
||||
m->data = temp;
|
||||
|
||||
/* Update the size */
|
||||
old_size = m->table_size;
|
||||
m->table_size = 2 * m->table_size;
|
||||
m->size = 0;
|
||||
|
||||
/* Rehash the elements */
|
||||
for (i = 0; i < old_size; i++) {
|
||||
|
||||
int status;
|
||||
|
||||
if (curr[i].in_use == 0) continue;
|
||||
|
||||
status = hashmap_put(m, curr[i].key, curr[i].data);
|
||||
if (status != MAP_OK) return status;
|
||||
|
||||
}
|
||||
|
||||
free(curr);
|
||||
|
||||
return MAP_OK;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a pointer to the hashmap with some key
|
||||
*/
|
||||
int hashmap_put(map_t in, char *key, any_t value) {
|
||||
|
||||
int index;
|
||||
hashmap_map *m;
|
||||
|
||||
/* Cast the hashmap */
|
||||
m = (hashmap_map *)in;
|
||||
|
||||
/* Find a place to put our value */
|
||||
index = hashmap_hash(in, key);
|
||||
while (index == MAP_FULL) {
|
||||
|
||||
if (hashmap_rehash(in) == MAP_OMEM) { return MAP_OMEM; }
|
||||
index = hashmap_hash(in, key);
|
||||
|
||||
}
|
||||
|
||||
/* Set the data */
|
||||
m->data[index].data = value;
|
||||
m->data[index].key = key;
|
||||
m->data[index].in_use = 1;
|
||||
m->size++;
|
||||
|
||||
return MAP_OK;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Get your pointer out of the hashmap with a key
|
||||
*/
|
||||
int hashmap_get(map_t in, char *key, any_t *arg) {
|
||||
|
||||
int curr;
|
||||
int i;
|
||||
hashmap_map *m;
|
||||
|
||||
/* Cast the hashmap */
|
||||
m = (hashmap_map *)in;
|
||||
|
||||
/* Find data location */
|
||||
curr = hashmap_hash_int(m, key);
|
||||
|
||||
/* Linear probing, if necessary */
|
||||
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
|
||||
|
||||
int in_use = m->data[curr].in_use;
|
||||
if (in_use == 1) {
|
||||
|
||||
if (strcmp(m->data[curr].key, key) == 0) {
|
||||
|
||||
*arg = (m->data[curr].data);
|
||||
return MAP_OK;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
curr = (curr + 1) % m->table_size;
|
||||
|
||||
}
|
||||
|
||||
*arg = NULL;
|
||||
|
||||
/* Not found */
|
||||
return MAP_MISSING;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate the function parameter over each element in the hashmap. The
|
||||
* additional any_t argument is passed to the function as its first
|
||||
* argument and the hashmap element is the second.
|
||||
*/
|
||||
int hashmap_iterate(map_t in, PFany f, any_t item) {
|
||||
|
||||
int i;
|
||||
|
||||
/* Cast the hashmap */
|
||||
hashmap_map *m = (hashmap_map *)in;
|
||||
|
||||
/* On empty hashmap, return immediately */
|
||||
if (hashmap_length(m) <= 0) return MAP_MISSING;
|
||||
|
||||
/* Linear probing */
|
||||
for (i = 0; i < m->table_size; i++)
|
||||
if (m->data[i].in_use != 0) {
|
||||
|
||||
any_t data = (any_t)(m->data[i].data);
|
||||
int status = f(item, data);
|
||||
if (status != MAP_OK) { return status; }
|
||||
|
||||
}
|
||||
|
||||
return MAP_OK;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove an element with that key from the map
|
||||
*/
|
||||
int hashmap_remove(map_t in, char *key) {
|
||||
|
||||
int i;
|
||||
int curr;
|
||||
hashmap_map *m;
|
||||
|
||||
/* Cast the hashmap */
|
||||
m = (hashmap_map *)in;
|
||||
|
||||
/* Find key */
|
||||
curr = hashmap_hash_int(m, key);
|
||||
|
||||
/* Linear probing, if necessary */
|
||||
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
|
||||
|
||||
int in_use = m->data[curr].in_use;
|
||||
if (in_use == 1) {
|
||||
|
||||
if (strcmp(m->data[curr].key, key) == 0) {
|
||||
|
||||
/* Blank out the fields */
|
||||
m->data[curr].in_use = 0;
|
||||
m->data[curr].data = NULL;
|
||||
m->data[curr].key = NULL;
|
||||
|
||||
/* Reduce the size */
|
||||
m->size--;
|
||||
return MAP_OK;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
curr = (curr + 1) % m->table_size;
|
||||
|
||||
}
|
||||
|
||||
/* Data not found */
|
||||
return MAP_MISSING;
|
||||
|
||||
}
|
||||
|
||||
/* Deallocate the hashmap */
|
||||
void hashmap_free(map_t in) {
|
||||
|
||||
hashmap_map *m = (hashmap_map *)in;
|
||||
free(m->data);
|
||||
free(m);
|
||||
|
||||
}
|
||||
|
||||
/* Return the length of the hashmap */
|
||||
int hashmap_length(map_t in) {
|
||||
|
||||
hashmap_map *m = (hashmap_map *)in;
|
||||
if (m != NULL)
|
||||
return m->size;
|
||||
else
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
83
custom_mutators/gramatron/hashmap.h
Normal file
83
custom_mutators/gramatron/hashmap.h
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Generic hashmap manipulation functions
|
||||
*
|
||||
* Originally by Elliot C Back -
|
||||
* http://elliottback.com/wp/hashmap-implementation-in-c/
|
||||
*
|
||||
* Modified by Pete Warden to fix a serious performance problem, support strings
|
||||
* as keys and removed thread synchronization - http://petewarden.typepad.com
|
||||
*/
|
||||
#ifndef __HASHMAP_H__
|
||||
#define __HASHMAP_H__
|
||||
|
||||
#define MAP_MISSING -3 /* No such element */
|
||||
#define MAP_FULL -2 /* Hashmap is full */
|
||||
#define MAP_OMEM -1 /* Out of Memory */
|
||||
#define MAP_OK 0 /* OK */
|
||||
|
||||
/*
|
||||
* any_t is a pointer. This allows you to put arbitrary structures in
|
||||
* the hashmap.
|
||||
*/
|
||||
typedef void *any_t;
|
||||
|
||||
/*
|
||||
* PFany is a pointer to a function that can take two any_t arguments
|
||||
* and return an integer. Returns status code..
|
||||
*/
|
||||
typedef int (*PFany)(any_t, any_t);
|
||||
|
||||
/*
|
||||
* map_t is a pointer to an internally maintained data structure.
|
||||
* Clients of this package do not need to know how hashmaps are
|
||||
* represented. They see and manipulate only map_t's.
|
||||
*/
|
||||
typedef any_t map_t;
|
||||
|
||||
/*
|
||||
* Return an empty hashmap. Returns NULL if empty.
|
||||
*/
|
||||
extern map_t hashmap_new();
|
||||
|
||||
/*
|
||||
* Iteratively call f with argument (item, data) for
|
||||
* each element data in the hashmap. The function must
|
||||
* return a map status code. If it returns anything other
|
||||
* than MAP_OK the traversal is terminated. f must
|
||||
* not reenter any hashmap functions, or deadlock may arise.
|
||||
*/
|
||||
extern int hashmap_iterate(map_t in, PFany f, any_t item);
|
||||
|
||||
/*
|
||||
* Add an element to the hashmap. Return MAP_OK or MAP_OMEM.
|
||||
*/
|
||||
extern int hashmap_put(map_t in, char *key, any_t value);
|
||||
|
||||
/*
|
||||
* Get an element from the hashmap. Return MAP_OK or MAP_MISSING.
|
||||
*/
|
||||
extern int hashmap_get(map_t in, char *key, any_t *arg);
|
||||
|
||||
/*
|
||||
* Remove an element from the hashmap. Return MAP_OK or MAP_MISSING.
|
||||
*/
|
||||
extern int hashmap_remove(map_t in, char *key);
|
||||
|
||||
/*
|
||||
* Get any element. Return MAP_OK or MAP_MISSING.
|
||||
* remove - should the element be removed from the hashmap
|
||||
*/
|
||||
extern int hashmap_get_one(map_t in, any_t *arg, int remove);
|
||||
|
||||
/*
|
||||
* Free the hashmap
|
||||
*/
|
||||
extern void hashmap_free(map_t in);
|
||||
|
||||
/*
|
||||
* Get the current size of a hashmap
|
||||
*/
|
||||
extern int hashmap_length(map_t in);
|
||||
|
||||
#endif
|
||||
|
275
custom_mutators/gramatron/preprocess/construct_automata.py
Normal file
275
custom_mutators/gramatron/preprocess/construct_automata.py
Normal file
@ -0,0 +1,275 @@
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
# import pygraphviz as pgv
|
||||
|
||||
gram_data = None
|
||||
state_count = 1
|
||||
pda = []
|
||||
worklist = []
|
||||
state_stacks = {}
|
||||
|
||||
# === If user provides upper bound on the stack size during FSA creation ===
|
||||
# Specifies the upper bound to which the stack is allowed to grow
|
||||
# If for any generated state, the stack size is >= stack_limit then this
|
||||
# state is not expanded further.
|
||||
stack_limit = None
|
||||
# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
|
||||
unexpanded_rules = set()
|
||||
|
||||
def main(grammar, limit):
|
||||
global worklist, gram_data, stack_limit
|
||||
current = '0'
|
||||
stack_limit = limit
|
||||
if stack_limit:
|
||||
print ('[X] Operating in bounded stack mode')
|
||||
|
||||
with open(grammar, 'r') as fd:
|
||||
gram_data = json.load(fd)
|
||||
start_symbol = gram_data["Start"][0]
|
||||
worklist.append([current, [start_symbol]])
|
||||
# print (grammar)
|
||||
filename = (grammar.split('/')[-1]).split('.')[0]
|
||||
|
||||
|
||||
while worklist:
|
||||
# Take an element from the worklist
|
||||
# print ('================')
|
||||
# print ('Worklist:', worklist)
|
||||
element = worklist.pop(0)
|
||||
prep_transitions(element)
|
||||
|
||||
pda_file = filename + '_transition.json'
|
||||
graph_file = filename + '.png'
|
||||
# print ('XXXXXXXXXXXXXXXX')
|
||||
# print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
|
||||
# XXX Commented out because visualization of current version of PHP causes segfault
|
||||
# Create the graph and dump the transitions to a file
|
||||
# create_graph(filename)
|
||||
transformed = postprocess()
|
||||
with open(filename + '_automata.json', 'w+') as fd:
|
||||
json.dump(transformed, fd)
|
||||
with open(filename + '_transition.json', 'w+') as fd:
|
||||
json.dump(pda, fd)
|
||||
if not unexpanded_rules:
|
||||
print ('[X] No unexpanded rules, absolute FSA formed')
|
||||
exit(0)
|
||||
else:
|
||||
print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
|
||||
print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
|
||||
with open(filename + '_disallowed.json', 'w+') as fd:
|
||||
json.dump(list(unexpanded_rules), fd)
|
||||
|
||||
def create_graph(filename):
|
||||
'''
|
||||
Creates a DOT representation of the PDA
|
||||
'''
|
||||
global pda
|
||||
G = pgv.AGraph(strict = False, directed = True)
|
||||
for transition in pda:
|
||||
print ('Transition:', transition)
|
||||
G.add_edge(transition['source'], transition['dest'],
|
||||
label = 'Term:{}'.format(transition['terminal']))
|
||||
G.layout(prog = 'dot')
|
||||
print ('Do it up 2')
|
||||
G.draw(filename + '.png')
|
||||
|
||||
def prep_transitions(element):
|
||||
'''
|
||||
Generates transitions
|
||||
'''
|
||||
global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
|
||||
state = element[0]
|
||||
try:
|
||||
nonterminal = element[1][0]
|
||||
except IndexError:
|
||||
# Final state was encountered, pop from worklist without doing anything
|
||||
return
|
||||
rules = gram_data[nonterminal]
|
||||
count = 1
|
||||
for rule in rules:
|
||||
isRecursive = False
|
||||
# print ('Current state:', state)
|
||||
terminal, ss, termIsRegex = tokenize(rule)
|
||||
transition = get_template()
|
||||
transition['trigger'] = '_'.join([state, str(count)])
|
||||
transition['source'] = state
|
||||
transition['dest'] = str(state_count)
|
||||
transition['ss'] = ss
|
||||
transition['terminal'] = terminal
|
||||
transition['rule'] = "{} -> {}".format(nonterminal, rule )
|
||||
if termIsRegex:
|
||||
transition['termIsRegex'] = True
|
||||
|
||||
# Creating a state stack for the new state
|
||||
try:
|
||||
state_stack = state_stacks[state][:]
|
||||
except:
|
||||
state_stack = []
|
||||
if len(state_stack):
|
||||
state_stack.pop(0)
|
||||
if ss:
|
||||
for symbol in ss[::-1]:
|
||||
state_stack.insert(0, symbol)
|
||||
transition['stack'] = state_stack
|
||||
|
||||
# Check if a recursive transition state being created, if so make a backward
|
||||
# edge and don't add anything to the worklist
|
||||
# print (state_stacks)
|
||||
if state_stacks:
|
||||
for state_element, stack in state_stacks.items():
|
||||
# print ('Stack:', sorted(stack))
|
||||
# print ('State stack:', sorted(state_stack))
|
||||
if sorted(stack) == sorted(state_stack):
|
||||
transition['dest'] = state_element
|
||||
# print ('Recursive:', transition)
|
||||
pda.append(transition)
|
||||
count += 1
|
||||
isRecursive = True
|
||||
break
|
||||
# If a recursive transition exercised don't add the same transition as a new
|
||||
# edge, continue onto the next transitions
|
||||
if isRecursive:
|
||||
continue
|
||||
|
||||
# If the generated state has a stack size > stack_limit then that state is abandoned
|
||||
# and not added to the FSA or the worklist for further expansion
|
||||
if stack_limit:
|
||||
if (len(transition['stack']) > stack_limit):
|
||||
unexpanded_rules.add(transition['rule'])
|
||||
continue
|
||||
|
||||
# Create transitions for the non-recursive relations and add to the worklist
|
||||
# print ('Normal:', transition)
|
||||
# print ('State2:', state)
|
||||
pda.append(transition)
|
||||
worklist.append([transition['dest'], transition['stack']])
|
||||
state_stacks[transition['dest']] = state_stack
|
||||
state_count += 1
|
||||
count += 1
|
||||
|
||||
def tokenize(rule):
|
||||
'''
|
||||
Gets the terminal and the corresponding stack symbols from a rule in GNF form
|
||||
'''
|
||||
pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
|
||||
terminal = None
|
||||
ss = None
|
||||
termIsRegex = False
|
||||
match = pattern.match(rule)
|
||||
if match.group(1):
|
||||
termIsRegex = True
|
||||
if match.group(2):
|
||||
terminal = match.group(2)
|
||||
else:
|
||||
raise AssertionError("Rule is not in GNF form")
|
||||
|
||||
if match.group(3):
|
||||
ss = (match.group(3)).split()
|
||||
|
||||
return terminal, ss, termIsRegex
|
||||
|
||||
def get_template():
|
||||
transition_template = {
|
||||
'trigger':None,
|
||||
'source': None,
|
||||
'dest': None,
|
||||
'termIsRegex': False,
|
||||
'terminal' : None,
|
||||
'stack': []
|
||||
}
|
||||
return transition_template
|
||||
|
||||
def postprocess():
|
||||
'''
|
||||
Creates a representation to be passed on to the C-module
|
||||
'''
|
||||
global pda
|
||||
final_struct = {}
|
||||
memoized = defaultdict(list)
|
||||
# Supporting data structures for if stack limit is imposed
|
||||
culled_pda = []
|
||||
culled_final = []
|
||||
num_transitions = 0 # Keep track of number of transitions
|
||||
|
||||
|
||||
states, final, initial = _get_states()
|
||||
|
||||
print (initial)
|
||||
assert len(initial) == 1, 'More than one init state found'
|
||||
|
||||
# Cull transitions to states which were not expanded owing to the stack limit
|
||||
if stack_limit:
|
||||
|
||||
blocklist = []
|
||||
for final_state in final:
|
||||
for transition in pda:
|
||||
if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
|
||||
blocklist.append(transition["dest"])
|
||||
continue
|
||||
else:
|
||||
culled_pda.append(transition)
|
||||
|
||||
culled_final = [state for state in final if state not in blocklist]
|
||||
|
||||
assert len(culled_final) == 1, 'More than one final state found'
|
||||
|
||||
for transition in culled_pda:
|
||||
state = transition["source"]
|
||||
if transition["dest"] in blocklist:
|
||||
continue
|
||||
num_transitions += 1
|
||||
memoized[state].append([transition["trigger"], transition["dest"],
|
||||
transition["terminal"]])
|
||||
final_struct["init_state"] = initial
|
||||
final_struct["final_state"] = culled_final[0]
|
||||
# The reason we do this is because when states are culled, the indexing is
|
||||
# still relative to the actual number of states hence we keep numstates recorded
|
||||
# as the original number of states
|
||||
print ('[X] Actual Number of states:', len(memoized.keys()))
|
||||
print ('[X] Number of transitions:', num_transitions)
|
||||
print ('[X] Original Number of states:', len(states))
|
||||
final_struct["numstates"] = len(states)
|
||||
final_struct["pda"] = memoized
|
||||
return final_struct
|
||||
|
||||
# Running FSA construction in exact approximation mode and postprocessing it like so
|
||||
for transition in pda:
|
||||
state = transition["source"]
|
||||
memoized[state].append([transition["trigger"], transition["dest"],
|
||||
transition["terminal"]])
|
||||
|
||||
final_struct["init_state"] = initial
|
||||
final_struct["final_state"] = final[0]
|
||||
print ('[X] Actual Number of states:', len(memoized.keys()))
|
||||
final_struct["numstates"] = len(memoized.keys())
|
||||
final_struct["pda"] = memoized
|
||||
return final_struct
|
||||
|
||||
|
||||
def _get_states():
|
||||
source = set()
|
||||
dest = set()
|
||||
global pda
|
||||
for transition in pda:
|
||||
source.add(transition["source"])
|
||||
dest.add(transition["dest"])
|
||||
source_copy = source.copy()
|
||||
source_copy.update(dest)
|
||||
return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
|
||||
parser.add_argument(
|
||||
'--gf',
|
||||
type = str,
|
||||
help = 'Location of GNF grammar')
|
||||
parser.add_argument(
|
||||
'--limit',
|
||||
type = int,
|
||||
default = None,
|
||||
help = 'Specify the upper bound for the stack size')
|
||||
args = parser.parse_args()
|
||||
main(args.gf, args.limit)
|
289
custom_mutators/gramatron/preprocess/gnf_converter.py
Normal file
289
custom_mutators/gramatron/preprocess/gnf_converter.py
Normal file
@ -0,0 +1,289 @@
|
||||
import sys
|
||||
import re
|
||||
import copy
|
||||
import json
|
||||
from string import ascii_uppercase
|
||||
from itertools import combinations
|
||||
from collections import defaultdict
|
||||
|
||||
NONTERMINALSET = []
|
||||
COUNT = 1
|
||||
|
||||
def main(grammar_file, out, start):
|
||||
grammar = None
|
||||
# If grammar file is a preprocessed NT file, then skip preprocessing
|
||||
if '.json' in grammar_file:
|
||||
with open(grammar_file, 'r') as fd:
|
||||
grammar = json.load(fd)
|
||||
elif '.g4' in grammar_file:
|
||||
with open(grammar_file, 'r') as fd:
|
||||
data = fd.readlines()
|
||||
grammar = preprocess(data)
|
||||
else:
|
||||
raise('Unknwown file format passed. Accepts (.g4/.json)')
|
||||
|
||||
with open('debug_preprocess.json', 'w+') as fd:
|
||||
json.dump(grammar, fd)
|
||||
grammar = remove_unit(grammar) # eliminates unit productions
|
||||
with open('debug_unit.json', 'w+') as fd:
|
||||
json.dump(grammar, fd)
|
||||
grammar = remove_mixed(grammar) # eliminate terminals existing with non-terminals
|
||||
with open('debug_mixed.json', 'w+') as fd:
|
||||
json.dump(grammar, fd)
|
||||
grammar = break_rules(grammar) # eliminate rules with more than two non-terminals
|
||||
with open('debug_break.json', 'w+') as fd:
|
||||
json.dump(grammar, fd)
|
||||
grammar = gnf(grammar)
|
||||
|
||||
# Dump GNF form of the grammar with only reachable rules
|
||||
# reachable_grammar = get_reachable(grammar, start)
|
||||
# with open('debug_gnf_reachable.json', 'w+') as fd:
|
||||
# json.dump(reachable_grammar, fd)
|
||||
with open('debug_gnf.json', 'w+') as fd:
|
||||
json.dump(grammar, fd)
|
||||
|
||||
grammar["Start"] = [start]
|
||||
with open(out, 'w+') as fd:
|
||||
json.dump(grammar, fd)
|
||||
|
||||
def get_reachable(grammar, start):
|
||||
'''
|
||||
Returns a grammar without dead rules
|
||||
'''
|
||||
reachable_nt = set()
|
||||
worklist = list()
|
||||
processed = set()
|
||||
reachable_grammar = dict()
|
||||
worklist.append(start)
|
||||
|
||||
while worklist:
|
||||
nt = worklist.pop(0)
|
||||
processed.add(nt)
|
||||
reachable_grammar[nt] = grammar[nt]
|
||||
rules = grammar[nt]
|
||||
for rule in rules:
|
||||
tokens = gettokens(rule)
|
||||
for token in tokens:
|
||||
if not isTerminal(token):
|
||||
if token not in processed:
|
||||
worklist.append(token)
|
||||
return reachable_grammar
|
||||
|
||||
|
||||
def gettokens(rule):
|
||||
pattern = re.compile("([^\s\"\']+)|\"([^\"]*)\"|\'([^\']*)\'")
|
||||
return [matched.group(0) for matched in pattern.finditer(rule)]
|
||||
|
||||
def gnf(grammar):
|
||||
old_grammar = copy.deepcopy(grammar)
|
||||
new_grammar = defaultdict(list)
|
||||
isgnf = False
|
||||
while not isgnf:
|
||||
for lhs, rules in old_grammar.items():
|
||||
for rule in rules:
|
||||
tokens = gettokens(rule)
|
||||
if len(tokens) == 1 and isTerminal(rule):
|
||||
new_grammar[lhs].append(rule)
|
||||
continue
|
||||
startoken = tokens[0]
|
||||
endrule = tokens[1:]
|
||||
if not isTerminal(startoken):
|
||||
newrules = []
|
||||
extendrules = old_grammar[startoken]
|
||||
for extension in extendrules:
|
||||
temprule = endrule[:]
|
||||
temprule.insert(0, extension)
|
||||
newrules.append(temprule)
|
||||
for newnew in newrules:
|
||||
new_grammar[lhs].append(' '.join(newnew))
|
||||
else:
|
||||
new_grammar[lhs].append(rule)
|
||||
isgnf = True
|
||||
for lhs, rules in new_grammar.items():
|
||||
for rule in rules:
|
||||
# if "\' \'" or isTerminal(rule):
|
||||
tokens = gettokens(rule)
|
||||
if len(tokens) == 1 and isTerminal(rule):
|
||||
continue
|
||||
startoken = tokens[0]
|
||||
if not isTerminal(startoken):
|
||||
isgnf = False
|
||||
break
|
||||
if not isgnf:
|
||||
old_grammar = copy.deepcopy(new_grammar)
|
||||
new_grammar = defaultdict(list)
|
||||
return new_grammar
|
||||
|
||||
|
||||
def preprocess(data):
|
||||
productions = []
|
||||
production = []
|
||||
for line in data:
|
||||
if line != '\n':
|
||||
production.append(line)
|
||||
else:
|
||||
productions.append(production)
|
||||
production = []
|
||||
final_rule_set = {}
|
||||
for production in productions:
|
||||
rules = []
|
||||
init = production[0]
|
||||
nonterminal = init.split(':')[0]
|
||||
rules.append(strip_chars(init.split(':')[1]).strip('| '))
|
||||
for production_rule in production[1:]:
|
||||
rules.append(strip_chars(production_rule.split('|')[0]))
|
||||
final_rule_set[nonterminal] = rules
|
||||
# for line in data:
|
||||
# if line != '\n':
|
||||
# production.append(line)
|
||||
return final_rule_set
|
||||
|
||||
def remove_unit(grammar):
|
||||
nounitproductions = False
|
||||
old_grammar = copy.deepcopy(grammar)
|
||||
new_grammar = defaultdict(list)
|
||||
while not nounitproductions:
|
||||
for lhs, rules in old_grammar.items():
|
||||
for rhs in rules:
|
||||
# Checking if the rule is a unit production rule
|
||||
if len(gettokens(rhs)) == 1:
|
||||
if not isTerminal(rhs):
|
||||
new_grammar[lhs].extend([rule for rule in old_grammar[rhs]])
|
||||
else:
|
||||
new_grammar[lhs].append(rhs)
|
||||
else:
|
||||
new_grammar[lhs].append(rhs)
|
||||
# Checking there are no unit productions left in the grammar
|
||||
nounitproductions = True
|
||||
for lhs, rules in new_grammar.items():
|
||||
for rhs in rules:
|
||||
if len(gettokens(rhs)) == 1:
|
||||
if not isTerminal(rhs):
|
||||
nounitproductions = False
|
||||
break
|
||||
if not nounitproductions:
|
||||
break
|
||||
# Unit productions are still there in the grammar -- repeat the process
|
||||
if not nounitproductions:
|
||||
old_grammar = copy.deepcopy(new_grammar)
|
||||
new_grammar = defaultdict(list)
|
||||
return new_grammar
|
||||
|
||||
def isTerminal(rule):
|
||||
# pattern = re.compile("([r]*\'[\s\S]+\')")
|
||||
pattern = re.compile("\'(.*?)\'")
|
||||
match = pattern.match(rule)
|
||||
if match:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def remove_mixed(grammar):
|
||||
'''
|
||||
Remove rules where there are terminals mixed in with non-terminals
|
||||
'''
|
||||
new_grammar = defaultdict(list)
|
||||
for lhs, rules in grammar.items():
|
||||
for rhs in rules:
|
||||
# tokens = rhs.split(' ')
|
||||
regen_rule = []
|
||||
tokens = gettokens(rhs)
|
||||
if len(gettokens(rhs)) == 1:
|
||||
new_grammar[lhs].append(rhs)
|
||||
continue
|
||||
for token in tokens:
|
||||
# Identify if there is a terminal in the RHS
|
||||
if isTerminal(token):
|
||||
# Check if a corresponding nonterminal already exists
|
||||
nonterminal = terminal_exist(token, new_grammar)
|
||||
if nonterminal:
|
||||
regen_rule.append(nonterminal)
|
||||
else:
|
||||
new_nonterm = get_nonterminal()
|
||||
new_grammar[new_nonterm].append(token)
|
||||
regen_rule.append(new_nonterm)
|
||||
else:
|
||||
regen_rule.append(token)
|
||||
new_grammar[lhs].append(' '.join(regen_rule))
|
||||
return new_grammar
|
||||
|
||||
def break_rules(grammar):
|
||||
new_grammar = defaultdict(list)
|
||||
old_grammar = copy.deepcopy(grammar)
|
||||
nomulti = False
|
||||
while not nomulti:
|
||||
for lhs, rules in old_grammar.items():
|
||||
for rhs in rules:
|
||||
tokens = gettokens(rhs)
|
||||
if len(tokens) > 2 and (not isTerminal(rhs)):
|
||||
split = tokens[:-1]
|
||||
nonterminal = terminal_exist(' '.join(split), new_grammar)
|
||||
if nonterminal:
|
||||
newrule = ' '.join([nonterminal, tokens[-1]])
|
||||
new_grammar[lhs].append(newrule)
|
||||
else:
|
||||
nonterminal = get_nonterminal()
|
||||
new_grammar[nonterminal].append(' '.join(split))
|
||||
newrule = ' '.join([nonterminal, tokens[-1]])
|
||||
new_grammar[lhs].append(newrule)
|
||||
else:
|
||||
new_grammar[lhs].append(rhs)
|
||||
nomulti = True
|
||||
for lhs, rules in new_grammar.items():
|
||||
for rhs in rules:
|
||||
# tokens = rhs.split(' ')
|
||||
tokens = gettokens(rhs)
|
||||
if len(tokens) > 2 and (not isTerminal(rhs)):
|
||||
nomulti = False
|
||||
break
|
||||
if not nomulti:
|
||||
old_grammar = copy.deepcopy(new_grammar)
|
||||
new_grammar = defaultdict(list)
|
||||
return new_grammar
|
||||
|
||||
def strip_chars(rule):
|
||||
return rule.strip('\n\t ')
|
||||
|
||||
def get_nonterminal():
|
||||
global NONTERMINALSET
|
||||
if NONTERMINALSET:
|
||||
return NONTERMINALSET.pop(0)
|
||||
else:
|
||||
_repopulate()
|
||||
return NONTERMINALSET.pop(0)
|
||||
|
||||
def _repopulate():
|
||||
global COUNT
|
||||
global NONTERMINALSET
|
||||
NONTERMINALSET = [''.join(x) for x in list(combinations(ascii_uppercase, COUNT))]
|
||||
COUNT += 1
|
||||
|
||||
def terminal_exist(token, grammar):
|
||||
for nonterminal, rules in grammar.items():
|
||||
if token in rules:
|
||||
return nonterminal
|
||||
return None
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description = 'Script to convert grammar to GNF form')
|
||||
parser.add_argument(
|
||||
'--gf',
|
||||
type = str,
|
||||
required = True,
|
||||
help = 'Location of grammar file')
|
||||
parser.add_argument(
|
||||
'--out',
|
||||
type = str,
|
||||
required = True,
|
||||
help = 'Location of output file')
|
||||
parser.add_argument(
|
||||
'--start',
|
||||
type = str,
|
||||
required = True,
|
||||
help = 'Start token')
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.gf, args.out, args.start)
|
38
custom_mutators/gramatron/preprocess/prep_automaton.sh
Executable file
38
custom_mutators/gramatron/preprocess/prep_automaton.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script creates a FSA describing the input grammar *.g4
|
||||
|
||||
if [ ! "$#" -lt 4 ]; then
|
||||
echo "Usage: ./prep_pda.sh <grammar_file> <start> [stack_limit]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GRAMMAR_FILE=$1
|
||||
GRAMMAR_DIR="$(dirname $GRAMMAR_FILE)"
|
||||
START="$2"
|
||||
STACK_LIMIT="$3"
|
||||
|
||||
# Get filename
|
||||
FILE=$(basename -- "$GRAMMAR_FILE")
|
||||
echo "File:$FILE"
|
||||
FILENAME="${FILE%.*}"
|
||||
echo "Name:$FILENAME"
|
||||
|
||||
|
||||
# Create the GNF form of the grammar
|
||||
CMD="python gnf_converter.py --gf $GRAMMAR_FILE --out ${FILENAME}.json --start $START"
|
||||
$CMD
|
||||
|
||||
# Generate grammar automaton
|
||||
# Check if user provided a stack limit
|
||||
if [ -z "${STACK_LIMIT}" ]; then
|
||||
CMD="python3 construct_automata.py --gf ${FILENAME}.json"
|
||||
else
|
||||
CMD="python construct_automata.py --gf ${FILENAME}.json --limit ${STACK_LIMIT}"
|
||||
fi
|
||||
echo $CMD
|
||||
$CMD
|
||||
|
||||
# Move PDA to the source dir of the grammar
|
||||
echo "Copying ${FILENAME}_automata.json to $GRAMMAR_DIR"
|
||||
mv "${FILENAME}_automata.json" $GRAMMAR_DIR/
|
154
custom_mutators/gramatron/test.c
Normal file
154
custom_mutators/gramatron/test.c
Normal file
@ -0,0 +1,154 @@
|
||||
/* This is the testing module for Gramatron
|
||||
*/
|
||||
#include "afl-fuzz.h"
|
||||
#include "gramfuzz.h"
|
||||
|
||||
#define NUMINPUTS 50
|
||||
|
||||
state *create_pda(u8 *automaton_file) {
|
||||
|
||||
struct json_object *parsed_json;
|
||||
state * pda;
|
||||
json_object * source_obj, *attr;
|
||||
int arraylen, ii, ii2, trigger_len, error;
|
||||
|
||||
printf("\n[GF] Automaton file passed:%s", automaton_file);
|
||||
// parsed_json =
|
||||
// json_object_from_file("./gramfuzz/php_gnf_processed_full.json");
|
||||
parsed_json = json_object_from_file(automaton_file);
|
||||
|
||||
// Getting final state
|
||||
source_obj = json_object_object_get(parsed_json, "final_state");
|
||||
printf("\t\nFinal=%s\n", json_object_get_string(source_obj));
|
||||
final_state = atoi(json_object_get_string(source_obj));
|
||||
|
||||
// Getting initial state
|
||||
source_obj = json_object_object_get(parsed_json, "init_state");
|
||||
init_state = atoi(json_object_get_string(source_obj));
|
||||
printf("\tInit=%s\n", json_object_get_string(source_obj));
|
||||
|
||||
// Getting number of states
|
||||
source_obj = json_object_object_get(parsed_json, "numstates");
|
||||
numstates = atoi(json_object_get_string(source_obj)) + 1;
|
||||
printf("\tNumStates=%d\n", numstates);
|
||||
|
||||
// Allocate state space for each pda state
|
||||
pda = (state *)calloc(atoi(json_object_get_string(source_obj)) + 1,
|
||||
sizeof(state));
|
||||
|
||||
// Getting PDA representation
|
||||
source_obj = json_object_object_get(parsed_json, "pda");
|
||||
enum json_type type;
|
||||
json_object_object_foreach(source_obj, key, val) {
|
||||
|
||||
state * state_ptr;
|
||||
trigger *trigger_ptr;
|
||||
int offset;
|
||||
|
||||
// Get the correct offset into the pda to store state information
|
||||
state_ptr = pda;
|
||||
offset = atoi(key);
|
||||
state_ptr += offset;
|
||||
|
||||
// Store state string
|
||||
state_ptr->state_name = offset;
|
||||
|
||||
// Create trigger array of structs
|
||||
trigger_len = json_object_array_length(val);
|
||||
state_ptr->trigger_len = trigger_len;
|
||||
trigger_ptr = (trigger *)calloc(trigger_len, sizeof(trigger));
|
||||
state_ptr->ptr = trigger_ptr;
|
||||
printf("\nName:%d Trigger:%d", offset, trigger_len);
|
||||
|
||||
for (ii = 0; ii < trigger_len; ii++) {
|
||||
|
||||
json_object *obj = json_object_array_get_idx(val, ii);
|
||||
// Get all the trigger trigger attributes
|
||||
attr = json_object_array_get_idx(obj, 0);
|
||||
(trigger_ptr)->id = strdup(json_object_get_string(attr));
|
||||
|
||||
attr = json_object_array_get_idx(obj, 1);
|
||||
trigger_ptr->dest = atoi(json_object_get_string(attr));
|
||||
|
||||
attr = json_object_array_get_idx(obj, 2);
|
||||
if (!strcmp("\\n", json_object_get_string(attr))) {
|
||||
|
||||
trigger_ptr->term = strdup("\n");
|
||||
|
||||
} else {
|
||||
|
||||
trigger_ptr->term = strdup(json_object_get_string(attr));
|
||||
|
||||
}
|
||||
|
||||
trigger_ptr->term_len = strlen(trigger_ptr->term);
|
||||
trigger_ptr++;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Delete the JSON object
|
||||
json_object_put(parsed_json);
|
||||
|
||||
return pda;
|
||||
|
||||
}
|
||||
|
||||
void SanityCheck(char *automaton_path) {
|
||||
|
||||
state * pda = create_pda(automaton_path);
|
||||
int count = 0, state;
|
||||
Get_Dupes_Ret *getdupesret;
|
||||
IdxMap_new * statemap;
|
||||
IdxMap_new * statemap_ptr;
|
||||
terminal * term_ptr;
|
||||
|
||||
while (count < NUMINPUTS) {
|
||||
|
||||
// Perform input generation
|
||||
Array *generated = gen_input(pda, NULL);
|
||||
print_repr(generated, "Gen");
|
||||
count += 1;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
char * mode;
|
||||
char * automaton_path;
|
||||
char * output_dir = NULL;
|
||||
struct timeval tv;
|
||||
struct timeval tz;
|
||||
// gettimeofday(&tv, &tz);
|
||||
srand(1337);
|
||||
if (argc == 3) {
|
||||
|
||||
mode = argv[1];
|
||||
automaton_path = strdup(argv[2]);
|
||||
printf("\nMode:%s Path:%s", mode, automaton_path);
|
||||
|
||||
} else {
|
||||
|
||||
printf("\nUsage: ./test <mode> <automaton_path>");
|
||||
return -1;
|
||||
|
||||
}
|
||||
|
||||
if (!strcmp(mode, "SanityCheck")) {
|
||||
|
||||
SanityCheck(automaton_path);
|
||||
|
||||
} else {
|
||||
|
||||
printf("\nUnrecognized mode");
|
||||
return -1;
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
57
custom_mutators/gramatron/test.h
Normal file
57
custom_mutators/gramatron/test.h
Normal file
@ -0,0 +1,57 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <json-c/json.h>
|
||||
#include <unistd.h>
|
||||
#include "hashmap.h"
|
||||
#include "uthash.h"
|
||||
#include "utarray.h"
|
||||
|
||||
#define INIT_SIZE 100 // Initial size of the dynamic array holding the input
|
||||
|
||||
typedef struct terminal {
|
||||
|
||||
int state;
|
||||
int trigger_idx;
|
||||
size_t symbol_len;
|
||||
char * symbol;
|
||||
|
||||
} terminal;
|
||||
|
||||
typedef struct trigger {
|
||||
|
||||
char * id;
|
||||
int dest;
|
||||
char * term;
|
||||
size_t term_len;
|
||||
|
||||
} trigger;
|
||||
|
||||
typedef struct state {
|
||||
|
||||
int state_name; // Integer State name
|
||||
int trigger_len; // Number of triggers associated with this state
|
||||
trigger *ptr; // Pointer to beginning of the list of triggers
|
||||
|
||||
} state;
|
||||
|
||||
typedef struct {
|
||||
|
||||
size_t used;
|
||||
size_t size;
|
||||
size_t inputlen;
|
||||
terminal *start;
|
||||
|
||||
} Array;
|
||||
|
||||
int init_state;
|
||||
int curr_state;
|
||||
int final_state;
|
||||
|
||||
state *create_pda(char *);
|
||||
Array *gen_input(state *, Array *);
|
||||
void print_repr(Array *, char *);
|
||||
void initArray(Array *, size_t);
|
||||
void insertArray(Array *, int, char *, size_t, int);
|
||||
|
392
custom_mutators/gramatron/utarray.h
Normal file
392
custom_mutators/gramatron/utarray.h
Normal file
@ -0,0 +1,392 @@
|
||||
/*
|
||||
Copyright (c) 2008-2018, Troy D. Hanson http://troydhanson.github.com/uthash/
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* a dynamic array implementation using macros
|
||||
*/
|
||||
#ifndef UTARRAY_H
|
||||
#define UTARRAY_H
|
||||
|
||||
#define UTARRAY_VERSION 2.1.0
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <string.h> /* memset, etc */
|
||||
#include <stdlib.h> /* exit */
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define UTARRAY_UNUSED __attribute__((__unused__))
|
||||
#else
|
||||
#define UTARRAY_UNUSED
|
||||
#endif
|
||||
|
||||
#ifdef oom
|
||||
#error \
|
||||
"The name of macro 'oom' has been changed to 'utarray_oom'. Please update your code."
|
||||
#define utarray_oom() oom()
|
||||
#endif
|
||||
|
||||
#ifndef utarray_oom
|
||||
#define utarray_oom() exit(-1)
|
||||
#endif
|
||||
|
||||
typedef void(ctor_f)(void *dst, const void *src);
|
||||
typedef void(dtor_f)(void *elt);
|
||||
typedef void(init_f)(void *elt);
|
||||
typedef struct {
|
||||
|
||||
size_t sz;
|
||||
init_f *init;
|
||||
ctor_f *copy;
|
||||
dtor_f *dtor;
|
||||
|
||||
} UT_icd;
|
||||
|
||||
typedef struct {
|
||||
|
||||
unsigned i, n; /* i: index of next available slot, n: num slots */
|
||||
UT_icd icd; /* initializer, copy and destructor functions */
|
||||
char * d; /* n slots of size icd->sz*/
|
||||
|
||||
} UT_array;
|
||||
|
||||
#define utarray_init(a, _icd) \
|
||||
do { \
|
||||
\
|
||||
memset(a, 0, sizeof(UT_array)); \
|
||||
(a)->icd = *(_icd); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_done(a) \
|
||||
do { \
|
||||
\
|
||||
if ((a)->n) { \
|
||||
\
|
||||
if ((a)->icd.dtor) { \
|
||||
\
|
||||
unsigned _ut_i; \
|
||||
for (_ut_i = 0; _ut_i < (a)->i; _ut_i++) { \
|
||||
\
|
||||
(a)->icd.dtor(utarray_eltptr(a, _ut_i)); \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} \
|
||||
free((a)->d); \
|
||||
\
|
||||
} \
|
||||
(a)->n = 0; \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_new(a, _icd) \
|
||||
do { \
|
||||
\
|
||||
(a) = (UT_array *)malloc(sizeof(UT_array)); \
|
||||
if ((a) == NULL) { utarray_oom(); } \
|
||||
utarray_init(a, _icd); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_free(a) \
|
||||
do { \
|
||||
\
|
||||
utarray_done(a); \
|
||||
free(a); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_reserve(a, by) \
|
||||
do { \
|
||||
\
|
||||
if (((a)->i + (by)) > (a)->n) { \
|
||||
\
|
||||
char *utarray_tmp; \
|
||||
while (((a)->i + (by)) > (a)->n) { \
|
||||
\
|
||||
(a)->n = ((a)->n ? (2 * (a)->n) : 8); \
|
||||
\
|
||||
} \
|
||||
utarray_tmp = (char *)realloc((a)->d, (a)->n * (a)->icd.sz); \
|
||||
if (utarray_tmp == NULL) { utarray_oom(); } \
|
||||
(a)->d = utarray_tmp; \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_push_back(a, p) \
|
||||
do { \
|
||||
\
|
||||
utarray_reserve(a, 1); \
|
||||
if ((a)->icd.copy) { \
|
||||
\
|
||||
(a)->icd.copy(_utarray_eltptr(a, (a)->i++), p); \
|
||||
\
|
||||
} else { \
|
||||
\
|
||||
memcpy(_utarray_eltptr(a, (a)->i++), p, (a)->icd.sz); \
|
||||
\
|
||||
}; \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_pop_back(a) \
|
||||
do { \
|
||||
\
|
||||
if ((a)->icd.dtor) { \
|
||||
\
|
||||
(a)->icd.dtor(_utarray_eltptr(a, --((a)->i))); \
|
||||
\
|
||||
} else { \
|
||||
\
|
||||
(a)->i--; \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_extend_back(a) \
|
||||
do { \
|
||||
\
|
||||
utarray_reserve(a, 1); \
|
||||
if ((a)->icd.init) { \
|
||||
\
|
||||
(a)->icd.init(_utarray_eltptr(a, (a)->i)); \
|
||||
\
|
||||
} else { \
|
||||
\
|
||||
memset(_utarray_eltptr(a, (a)->i), 0, (a)->icd.sz); \
|
||||
\
|
||||
} \
|
||||
(a)->i++; \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_len(a) ((a)->i)
|
||||
|
||||
#define utarray_eltptr(a, j) (((j) < (a)->i) ? _utarray_eltptr(a, j) : NULL)
|
||||
#define _utarray_eltptr(a, j) ((a)->d + ((a)->icd.sz * (j)))
|
||||
|
||||
#define utarray_insert(a, p, j) \
|
||||
do { \
|
||||
\
|
||||
if ((j) > (a)->i) utarray_resize(a, j); \
|
||||
utarray_reserve(a, 1); \
|
||||
if ((j) < (a)->i) { \
|
||||
\
|
||||
memmove(_utarray_eltptr(a, (j) + 1), _utarray_eltptr(a, j), \
|
||||
((a)->i - (j)) * ((a)->icd.sz)); \
|
||||
\
|
||||
} \
|
||||
if ((a)->icd.copy) { \
|
||||
\
|
||||
(a)->icd.copy(_utarray_eltptr(a, j), p); \
|
||||
\
|
||||
} else { \
|
||||
\
|
||||
memcpy(_utarray_eltptr(a, j), p, (a)->icd.sz); \
|
||||
\
|
||||
}; \
|
||||
(a)->i++; \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_inserta(a, w, j) \
|
||||
do { \
|
||||
\
|
||||
if (utarray_len(w) == 0) break; \
|
||||
if ((j) > (a)->i) utarray_resize(a, j); \
|
||||
utarray_reserve(a, utarray_len(w)); \
|
||||
if ((j) < (a)->i) { \
|
||||
\
|
||||
memmove(_utarray_eltptr(a, (j) + utarray_len(w)), _utarray_eltptr(a, j), \
|
||||
((a)->i - (j)) * ((a)->icd.sz)); \
|
||||
\
|
||||
} \
|
||||
if ((a)->icd.copy) { \
|
||||
\
|
||||
unsigned _ut_i; \
|
||||
for (_ut_i = 0; _ut_i < (w)->i; _ut_i++) { \
|
||||
\
|
||||
(a)->icd.copy(_utarray_eltptr(a, (j) + _ut_i), \
|
||||
_utarray_eltptr(w, _ut_i)); \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} else { \
|
||||
\
|
||||
memcpy(_utarray_eltptr(a, j), _utarray_eltptr(w, 0), \
|
||||
utarray_len(w) * ((a)->icd.sz)); \
|
||||
\
|
||||
} \
|
||||
(a)->i += utarray_len(w); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_resize(dst, num) \
|
||||
do { \
|
||||
\
|
||||
unsigned _ut_i; \
|
||||
if ((dst)->i > (unsigned)(num)) { \
|
||||
\
|
||||
if ((dst)->icd.dtor) { \
|
||||
\
|
||||
for (_ut_i = (num); _ut_i < (dst)->i; ++_ut_i) { \
|
||||
\
|
||||
(dst)->icd.dtor(_utarray_eltptr(dst, _ut_i)); \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} else if ((dst)->i < (unsigned)(num)) { \
|
||||
\
|
||||
utarray_reserve(dst, (num) - (dst)->i); \
|
||||
if ((dst)->icd.init) { \
|
||||
\
|
||||
for (_ut_i = (dst)->i; _ut_i < (unsigned)(num); ++_ut_i) { \
|
||||
\
|
||||
(dst)->icd.init(_utarray_eltptr(dst, _ut_i)); \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} else { \
|
||||
\
|
||||
memset(_utarray_eltptr(dst, (dst)->i), 0, \
|
||||
(dst)->icd.sz *((num) - (dst)->i)); \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} \
|
||||
(dst)->i = (num); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_concat(dst, src) \
|
||||
do { \
|
||||
\
|
||||
utarray_inserta(dst, src, utarray_len(dst)); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_erase(a, pos, len) \
|
||||
do { \
|
||||
\
|
||||
if ((a)->icd.dtor) { \
|
||||
\
|
||||
unsigned _ut_i; \
|
||||
for (_ut_i = 0; _ut_i < (len); _ut_i++) { \
|
||||
\
|
||||
(a)->icd.dtor(utarray_eltptr(a, (pos) + _ut_i)); \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} \
|
||||
if ((a)->i > ((pos) + (len))) { \
|
||||
\
|
||||
memmove(_utarray_eltptr(a, pos), _utarray_eltptr(a, (pos) + (len)), \
|
||||
((a)->i - ((pos) + (len))) * (a)->icd.sz); \
|
||||
\
|
||||
} \
|
||||
(a)->i -= (len); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_renew(a, u) \
|
||||
do { \
|
||||
\
|
||||
if (a) \
|
||||
utarray_clear(a); \
|
||||
else \
|
||||
utarray_new(a, u); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_clear(a) \
|
||||
do { \
|
||||
\
|
||||
if ((a)->i > 0) { \
|
||||
\
|
||||
if ((a)->icd.dtor) { \
|
||||
\
|
||||
unsigned _ut_i; \
|
||||
for (_ut_i = 0; _ut_i < (a)->i; _ut_i++) { \
|
||||
\
|
||||
(a)->icd.dtor(_utarray_eltptr(a, _ut_i)); \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} \
|
||||
(a)->i = 0; \
|
||||
\
|
||||
} \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_sort(a, cmp) \
|
||||
do { \
|
||||
\
|
||||
qsort((a)->d, (a)->i, (a)->icd.sz, cmp); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define utarray_find(a, v, cmp) bsearch((v), (a)->d, (a)->i, (a)->icd.sz, cmp)
|
||||
|
||||
#define utarray_front(a) (((a)->i) ? (_utarray_eltptr(a, 0)) : NULL)
|
||||
#define utarray_next(a, e) \
|
||||
(((e) == NULL) ? utarray_front(a) \
|
||||
: (((a)->i != utarray_eltidx(a, e) + 1) \
|
||||
? _utarray_eltptr(a, utarray_eltidx(a, e) + 1) \
|
||||
: NULL))
|
||||
#define utarray_prev(a, e) \
|
||||
(((e) == NULL) ? utarray_back(a) \
|
||||
: ((utarray_eltidx(a, e) != 0) \
|
||||
? _utarray_eltptr(a, utarray_eltidx(a, e) - 1) \
|
||||
: NULL))
|
||||
#define utarray_back(a) (((a)->i) ? (_utarray_eltptr(a, (a)->i - 1)) : NULL)
|
||||
#define utarray_eltidx(a, e) (((char *)(e) - (a)->d) / (a)->icd.sz)
|
||||
|
||||
/* last we pre-define a few icd for common utarrays of ints and strings */
|
||||
static void utarray_str_cpy(void *dst, const void *src) {
|
||||
|
||||
char **_src = (char **)src, **_dst = (char **)dst;
|
||||
*_dst = (*_src == NULL) ? NULL : strdup(*_src);
|
||||
|
||||
}
|
||||
|
||||
static void utarray_str_dtor(void *elt) {
|
||||
|
||||
char **eltc = (char **)elt;
|
||||
if (*eltc != NULL) free(*eltc);
|
||||
|
||||
}
|
||||
|
||||
static const UT_icd ut_str_icd UTARRAY_UNUSED = {
|
||||
|
||||
sizeof(char *), NULL, utarray_str_cpy, utarray_str_dtor};
|
||||
static const UT_icd ut_int_icd UTARRAY_UNUSED = {sizeof(int), NULL, NULL, NULL};
|
||||
static const UT_icd ut_ptr_icd UTARRAY_UNUSED = {sizeof(void *), NULL, NULL,
|
||||
NULL};
|
||||
|
||||
#endif /* UTARRAY_H */
|
||||
|
1611
custom_mutators/gramatron/uthash.h
Normal file
1611
custom_mutators/gramatron/uthash.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -65,7 +65,7 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
|
||||
/* When a new queue entry is added we check if there are new dictionary
|
||||
entries to add to honggfuzz structure */
|
||||
|
||||
void afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
const uint8_t *filename_new_queue,
|
||||
const uint8_t *filename_orig_queue) {
|
||||
|
||||
@ -97,6 +97,8 @@ void afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
/* we could set only_printable if is_ascii is set ... let's see
|
||||
|
@ -78,7 +78,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
|
||||
/* When a new queue entry is added we check if there are new dictionary
|
||||
entries to add to honggfuzz structure */
|
||||
#if 0
|
||||
extern "C" void afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
extern "C" uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
const uint8_t *filename_new_queue,
|
||||
const uint8_t *filename_orig_queue) {
|
||||
|
||||
@ -110,6 +110,8 @@ extern "C" void afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -53,7 +53,9 @@ pub trait RawCustomMutator {
|
||||
1
|
||||
}
|
||||
|
||||
fn queue_new_entry(&mut self, filename_new_queue: &Path, _filename_orig_queue: Option<&Path>) {}
|
||||
fn queue_new_entry(&mut self, filename_new_queue: &Path, _filename_orig_queue: Option<&Path>) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn queue_get(&mut self, filename: &Path) -> bool {
|
||||
true
|
||||
@ -246,7 +248,7 @@ pub mod wrappers {
|
||||
data: *mut c_void,
|
||||
filename_new_queue: *const c_char,
|
||||
filename_orig_queue: *const c_char,
|
||||
) {
|
||||
) -> bool {
|
||||
match catch_unwind(|| {
|
||||
let mut context = FFIContext::<M>::from(data);
|
||||
if filename_new_queue.is_null() {
|
||||
|
@ -101,7 +101,7 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
|
||||
|
||||
/* When a new queue entry is added we run this input with the symcc
|
||||
instrumented binary */
|
||||
void afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
const uint8_t *filename_new_queue,
|
||||
const uint8_t *filename_orig_queue) {
|
||||
|
||||
@ -232,6 +232,8 @@ void afl_custom_queue_new_entry(my_mutator_t * data,
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
uint32_t afl_custom_fuzz_count(my_mutator_t *data, const u8 *buf,
|
||||
|
@ -47,7 +47,7 @@ int afl_custom_post_trim(void *data, unsigned char success);
|
||||
size_t afl_custom_havoc_mutation(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf, size_t max_size);
|
||||
unsigned char afl_custom_havoc_mutation_probability(void *data);
|
||||
unsigned char afl_custom_queue_get(void *data, const unsigned char *filename);
|
||||
void afl_custom_queue_new_entry(void *data, const unsigned char *filename_new_queue, const unsigned int *filename_orig_queue);
|
||||
u8 afl_custom_queue_new_entry(void *data, const unsigned char *filename_new_queue, const unsigned int *filename_orig_queue);
|
||||
const char* afl_custom_introspection(my_mutator_t *data);
|
||||
void afl_custom_deinit(void *data);
|
||||
```
|
||||
@ -88,7 +88,7 @@ def queue_get(filename):
|
||||
return True
|
||||
|
||||
def queue_new_entry(filename_new_queue, filename_orig_queue):
|
||||
pass
|
||||
return False
|
||||
|
||||
def introspection():
|
||||
return string
|
||||
@ -156,6 +156,7 @@ def deinit(): # optional for Python
|
||||
- `queue_new_entry` (optional):
|
||||
|
||||
This methods is called after adding a new test case to the queue.
|
||||
If the contents of the file was changed return True, False otherwise.
|
||||
|
||||
- `introspection` (optional):
|
||||
|
||||
|
@ -972,7 +972,7 @@ struct custom_mutator {
|
||||
* @param filename_orig_queue File name of the original queue entry. This
|
||||
* argument can be NULL while initializing the fuzzer
|
||||
*/
|
||||
void (*afl_custom_queue_new_entry)(void *data, const u8 *filename_new_queue,
|
||||
u8 (*afl_custom_queue_new_entry)(void *data, const u8 *filename_new_queue,
|
||||
const u8 *filename_orig_queue);
|
||||
/**
|
||||
* Deinitialize the custom mutator.
|
||||
@ -1005,6 +1005,8 @@ void setup_custom_mutators(afl_state_t *);
|
||||
void destroy_custom_mutators(afl_state_t *);
|
||||
u8 trim_case_custom(afl_state_t *, struct queue_entry *q, u8 *in_buf,
|
||||
struct custom_mutator *mutator);
|
||||
void run_afl_custom_queue_new_entry(afl_state_t *, struct queue_entry *, u8 *,
|
||||
u8 *);
|
||||
|
||||
/* Python */
|
||||
#ifdef USE_PYTHON
|
||||
@ -1021,7 +1023,7 @@ size_t havoc_mutation_py(void *, u8 *, size_t, u8 **, size_t);
|
||||
u8 havoc_mutation_probability_py(void *);
|
||||
u8 queue_get_py(void *, const u8 *);
|
||||
const char *introspection_py(void *);
|
||||
void queue_new_entry_py(void *, const u8 *, const u8 *);
|
||||
u8 queue_new_entry_py(void *, const u8 *, const u8 *);
|
||||
void deinit_py(void *);
|
||||
|
||||
#endif
|
||||
|
@ -881,11 +881,7 @@ void perform_dry_run(afl_state_t *afl) {
|
||||
|
||||
u32 read_len = MIN(q->len, (u32)MAX_FILE);
|
||||
use_mem = afl_realloc(AFL_BUF_PARAM(in), read_len);
|
||||
if (read(fd, use_mem, read_len) != (ssize_t)read_len) {
|
||||
|
||||
FATAL("Short read from '%s'", q->fname);
|
||||
|
||||
}
|
||||
ck_read(fd, use_mem, read_len, q->fname);
|
||||
|
||||
close(fd);
|
||||
|
||||
@ -1350,6 +1346,12 @@ void pivot_inputs(afl_state_t *afl) {
|
||||
|
||||
if (q->passed_det) { mark_as_det_done(afl, q); }
|
||||
|
||||
if (afl->custom_mutators_count) {
|
||||
|
||||
run_afl_custom_queue_new_entry(afl, q, q->fname, NULL);
|
||||
|
||||
}
|
||||
|
||||
++id;
|
||||
|
||||
}
|
||||
|
@ -31,6 +31,45 @@ struct custom_mutator *load_custom_mutator(afl_state_t *, const char *);
|
||||
struct custom_mutator *load_custom_mutator_py(afl_state_t *, char *);
|
||||
#endif
|
||||
|
||||
void run_afl_custom_queue_new_entry(afl_state_t *afl, struct queue_entry *q,
|
||||
u8 *fname, u8 *mother_fname) {
|
||||
|
||||
if (afl->custom_mutators_count) {
|
||||
|
||||
u8 updated = 0;
|
||||
|
||||
LIST_FOREACH(&afl->custom_mutator_list, struct custom_mutator, {
|
||||
|
||||
if (el->afl_custom_queue_new_entry) {
|
||||
|
||||
if (el->afl_custom_queue_new_entry(el->data, fname, mother_fname)) {
|
||||
|
||||
updated = 1;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
if (updated) {
|
||||
|
||||
struct stat st;
|
||||
if (stat(fname, &st)) { PFATAL("File %s is gone!", fname); }
|
||||
if (!st.st_size) {
|
||||
|
||||
FATAL("File %s became empty in custom mutator!", fname);
|
||||
|
||||
}
|
||||
|
||||
q->len = st.st_size;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void setup_custom_mutators(afl_state_t *afl) {
|
||||
|
||||
/* Try mutator library first */
|
||||
|
@ -813,7 +813,7 @@ u8 queue_get_py(void *py_mutator, const u8 *filename) {
|
||||
|
||||
}
|
||||
|
||||
void queue_new_entry_py(void *py_mutator, const u8 *filename_new_queue,
|
||||
u8 queue_new_entry_py(void *py_mutator, const u8 *filename_new_queue,
|
||||
const u8 *filename_orig_queue) {
|
||||
|
||||
PyObject *py_args, *py_value;
|
||||
@ -861,7 +861,21 @@ void queue_new_entry_py(void *py_mutator, const u8 *filename_new_queue,
|
||||
py_args);
|
||||
Py_DECREF(py_args);
|
||||
|
||||
if (py_value == NULL) {
|
||||
if (py_value != NULL) {
|
||||
|
||||
int ret = PyObject_IsTrue(py_value);
|
||||
Py_DECREF(py_value);
|
||||
|
||||
if (ret == -1) {
|
||||
|
||||
PyErr_Print();
|
||||
FATAL("Failed to convert return value");
|
||||
|
||||
}
|
||||
|
||||
return (u8)ret & 0xFF;
|
||||
|
||||
} else {
|
||||
|
||||
PyErr_Print();
|
||||
FATAL("Call failed");
|
||||
|
@ -474,25 +474,13 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
|
||||
|
||||
if (afl->custom_mutators_count) {
|
||||
|
||||
LIST_FOREACH(&afl->custom_mutator_list, struct custom_mutator, {
|
||||
|
||||
if (el->afl_custom_queue_new_entry) {
|
||||
|
||||
u8 *fname_orig = NULL;
|
||||
|
||||
/* At the initialization stage, queue_cur is NULL */
|
||||
if (afl->queue_cur && !afl->syncing_party) {
|
||||
|
||||
fname_orig = afl->queue_cur->fname;
|
||||
run_afl_custom_queue_new_entry(afl, q, fname, afl->queue_cur->fname);
|
||||
|
||||
}
|
||||
|
||||
el->afl_custom_queue_new_entry(el->data, fname, fname_orig);
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
/* only redqueen currently uses is_ascii */
|
||||
|
Reference in New Issue
Block a user