Merge pull request #1034 from AFLplusplus/grammatron

Grammatron
This commit is contained in:
van Hauser
2021-07-20 08:57:37 +02:00
committed by GitHub
33 changed files with 15536 additions and 43 deletions

View File

@ -349,12 +349,15 @@ uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) {
* @param data pointer returned in afl_custom_init for this fuzz case
* @param filename_new_queue File name of the new queue entry
* @param filename_orig_queue File name of the original queue entry
* @return if the file contents was modified return 1 (True), 0 (False)
* otherwise
*/
void afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
/* Additional analysis on the original or new test case */
return 0;
}

View File

@ -0,0 +1,7 @@
all: gramatron.so
gramatron.so: gramfuzz.c gramfuzz.h gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c hashmap.h test.c test.h utarray.h uthash.h
$(CC) -O3 -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o gramatron.so -shared -I. -I/prg/dev/include gramfuzz.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c test.c -ljson-c
clean:
rm -f gramatron.so

View File

@ -0,0 +1,51 @@
# GramaTron
Gramatron is a coverage-guided fuzzer that uses grammar automatons to perform
grammar-aware fuzzing. Technical details about our framework are available in our
[ISSTA'21 paper](https://nebelwelt.net/files/21ISSTA.pdf). The artifact to reproduce the
experiments presented in our paper are present in `artifact/`. Instructions to run
a sample campaign and incorporate new grammars is presented below:
# Compiling
- Install `json-c`
```
git clone https://github.com/json-c/json-c.git
cd json-c && git reset --hard af8dd4a307e7b837f9fa2959549548ace4afe08b && sh autogen.sh && ./configure && make && make install
```
afterwards you can just `make` GrammaTron
# Running
You have to set the grammar file to use with `GRAMMATRON_AUTOMATION`:
```
export AFL_DISABLE_TRIM=1
export AFL_CUSTOM_MUTATOR_ONLY=1
export AFL_CUSTOM_MUTATOR_LIBRARY=./gramatron.so
export GRAMATRON_AUTOMATION=grammars/ruby/source_automata.json
afl-fuzz -i in -o out -- ./target
```
# Adding and testing a new grammar
- Specify in a JSON format for CFG. Examples are correspond `source.json` files
- Run the automaton generation script (in `src/gramfuzz-mutator/preprocess`)
which will place the generated automaton in the same folder.
```
./preprocess/prep_automaton.sh <grammar_file> <start_symbol> [stack_limit]
Eg. ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM
```
- If the grammar has no self-embedding rules then you do not need to pass the
stack limit parameter. However, if it does have self-embedding rules then you
need to pass the stack limit parameter. We recommend starting with `5` and
then increasing it if you need more complexity
- To sanity-check that the automaton is generating inputs as expected you can use the `test` binary housed in `src/gramfuzz-mutator`
```
./test SanityCheck <automaton_file>
Eg. ./test SanityCheck ~/grammars/ruby/source_automata.json
```

View File

@ -0,0 +1,336 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
/*Slices from beginning till idx*/
Array *slice(Array *input, int idx) {
// printf("\nSlice idx:%d", idx);
terminal *origptr;
terminal *term_ptr;
Array * sliced = (Array *)malloc(sizeof(Array));
initArray(sliced, input->size);
// Populate dynamic array members
if (idx == 0) { return sliced; }
for (int x = 0; x < idx; x++) {
origptr = &input->start[x];
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
origptr->trigger_idx);
}
return sliced;
}
/* Slices from idx till end*/
Array *slice_inverse(Array *input, int idx) {
// printf("\nSlice idx:%d", idx);
terminal *origptr;
terminal *term_ptr;
Array * sliced = (Array *)malloc(sizeof(Array));
initArray(sliced, input->size);
for (int x = idx; x < input->used; x++) {
origptr = &input->start[x];
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
origptr->trigger_idx);
}
return sliced;
}
/*Carves with `start` included and `end` excluded*/
Array *carve(Array *input, int start, int end) {
terminal *origptr;
terminal *term_ptr;
Array * sliced = (Array *)malloc(sizeof(Array));
initArray(sliced, input->size);
for (int x = start; x < end; x++) {
origptr = &input->start[x];
insertArray(sliced, origptr->state, origptr->symbol, origptr->symbol_len,
origptr->trigger_idx);
}
return sliced;
}
/*Concats prefix + feature *mult*/
void concatPrefixFeature(Array *prefix, Array *feature) {
// XXX: Currently we have hardcoded the multiplication threshold for adding
// the recursive feature. Might want to fix it to choose a random number upper
// bounded by a static value instead.
terminal *featureptr;
int len = rand() % RECUR_THRESHOLD;
for (int x = 0; x < len; x++) {
for (int y = 0; y < feature->used; y++) {
featureptr = &feature->start[y];
insertArray(prefix, featureptr->state, featureptr->symbol,
featureptr->symbol_len, featureptr->trigger_idx);
}
}
}
void concatPrefixFeatureBench(Array *prefix, Array *feature) {
// XXX: Currently we have hardcoded the multiplication threshold for adding
// the recursive feature. Might want to fix it to choose a random number upper
// bounded by a static value instead.
terminal *featureptr;
int len =
5; // 5 is the number of times we compare performing random recursion.
for (int x = 0; x < len; x++) {
for (int y = 0; y < feature->used; y++) {
featureptr = &feature->start[y];
insertArray(prefix, featureptr->state, featureptr->symbol,
featureptr->symbol_len, featureptr->trigger_idx);
}
}
}
Array *spliceGF(Array *orig, Array *toSplice, int idx) {
terminal *toSplicePtr;
terminal *tempPtr;
// Iterate through the splice candidate from the `idx` till end
for (int x = idx; x < toSplice->used; x++) {
toSplicePtr = &toSplice->start[x];
insertArray(orig, toSplicePtr->state, toSplicePtr->symbol,
toSplicePtr->symbol_len, toSplicePtr->trigger_idx);
}
return orig;
}
Array *gen_input(state *pda, Array *input) {
state * state_ptr;
trigger * trigger_ptr;
terminal *term_ptr;
int offset = 0;
int randval, error;
// Generating an input for the first time
if (input == NULL) {
input = (Array *)calloc(1, sizeof(Array));
initArray(input, INIT_SIZE);
curr_state = init_state;
}
while (curr_state != final_state) {
// Retrieving the state from the pda
state_ptr = pda + curr_state;
// Get a random trigger
randval = rand() % (state_ptr->trigger_len);
trigger_ptr = (state_ptr->ptr) + randval;
// Insert into the dynamic array
insertArray(input, curr_state, trigger_ptr->term, trigger_ptr->term_len,
randval);
curr_state = trigger_ptr->dest;
offset += 1;
}
return input;
}
Array *gen_input_count(state *pda, Array *input, int *mut_count) {
state * state_ptr;
trigger * trigger_ptr;
terminal *term_ptr;
int offset = 0;
int randval, error;
// Generating an input for the first time
if (input == NULL) {
input = (Array *)calloc(1, sizeof(Array));
initArray(input, INIT_SIZE);
curr_state = init_state;
}
while (curr_state != final_state) {
*mut_count += 1;
// Retrieving the state from the pda
state_ptr = pda + curr_state;
// Get a random trigger
randval = rand() % (state_ptr->trigger_len);
trigger_ptr = (state_ptr->ptr) + randval;
// Insert into the dynamic array
insertArray(input, curr_state, trigger_ptr->term, trigger_ptr->term_len,
randval);
curr_state = trigger_ptr->dest;
offset += 1;
}
return input;
}
/*Creates a candidate from walk with state hashmap and
* recursion hashmap
*/
Candidate *gen_candidate(Array *input) {
terminal * term_ptr;
IdxMap_new *idxmapPtr;
// Declare the State Hash Table
IdxMap_new *idxmapStart =
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
for (int x = 0; x < numstates; x++) {
idxmapPtr = &idxmapStart[x];
utarray_new(idxmapPtr->nums, &ut_int_icd);
}
char * trigger;
int state;
char * key;
Candidate *candidate = (Candidate *)malloc(sizeof(Candidate));
candidate->walk = input;
int offset = 0, error;
// Generate statemap for splicing
while (offset < input->used) {
term_ptr = &input->start[offset];
state = term_ptr->state;
// char *statenum = state + 1;
// int num = atoi(statenum);
idxmapPtr = &idxmapStart[state];
utarray_push_back(idxmapPtr->nums, &offset);
offset += 1;
}
candidate->statemap = idxmapStart;
return candidate;
}
char *get_state(char *trigger) {
// Get the state from transition
int trigger_idx = 0;
printf("\nTrigger:%s", trigger);
char *state = (char *)malloc(sizeof(char) * 10);
while (trigger[trigger_idx] != '_') {
state[trigger_idx] = trigger[trigger_idx];
trigger_idx += 1;
}
printf("\nTrigger Idx:%d", trigger_idx);
state[trigger_idx] = '\0';
return state;
}
void print_repr(Array *input, char *prefix) {
size_t offset = 0;
terminal *term_ptr;
char geninput[input->used * 100];
if (!input->used) {
printf("\n=============");
printf("\n%s:%s", prefix, "");
printf("\n=============");
return;
}
// This is done to create a null-terminated initial string
term_ptr = &input->start[offset];
strcpy(geninput, term_ptr->symbol);
offset += 1;
while (offset < input->used) {
term_ptr = &input->start[offset];
strcat(geninput, term_ptr->symbol);
offset += 1;
}
printf("\n=============");
printf("\n%s:%s", prefix, geninput);
printf("\n=============");
}
// int main(int argc, char*argv[]) {
// char *mode;
// if (argc == 1) {
// printf("\nUsage: ./gramfuzzer <mode>");
// return -1;
// }
// if (argc >= 2) {
// mode = argv[1];
// printf("\nMode:%s", mode);
// }
// if (! strcmp(mode, "Generate")) {
// GenInputBenchmark();
// }
// else if (! strcmp(mode, "RandomMutation")) {
// RandomMutationBenchmark();
// }
// else if (! strcmp(mode, "Splice")) {
// SpliceMutationBenchmark();
// }
// else if (! strcmp(mode, "Recursive")) {
// RandomRecursiveBenchmark();
// }
// else {
// printf("\nUnrecognized mode");
// return -1;
// }
// return 0;
// }

View File

@ -0,0 +1,248 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
Array *performRandomMutation(state *pda, Array *input) {
terminal *term_ptr;
// terminal *prev_ptr;
Array *mutated;
Array *sliced;
// Get offset at which to generate new input and slice it
int idx = rand() % input->used;
sliced = slice(input, idx);
// print_repr(sliced, "Slice");
// prev_ptr = & input->start[idx - 1];
// printf("\nState:%s Symbol:%s", prev_ptr->state, prev_ptr->symbol);
// Reset current state to that of the slice's last member
term_ptr = &input->start[idx];
curr_state = term_ptr->state;
// printf("\nState:%s Symbol:%s", curr_state, term_ptr->symbol);
// Set the next available cell to the one adjacent to this chosen point
mutated = gen_input(pda, sliced);
return mutated;
}
// Tries to perform splice operation between two automaton walks
UT_icd intpair_icd = {sizeof(intpair_t), NULL, NULL, NULL};
Array *performSpliceOne(Array *originput, IdxMap_new *statemap_orig,
Array *splicecand) {
UT_array * stateptr, *pairs;
intpair_t ip;
intpair_t *cand;
terminal *term_ptr;
Array * prefix;
int state;
// Initialize the dynamic holding the splice indice pairs
utarray_new(pairs, &intpair_icd);
// print_repr(originput, "Orig");
// print_repr(splicecand, "SpliceCand");
// Iterate through the splice candidate identifying potential splice points
// and pushing pair (orig_idx, splice_idx) to a dynamic array
for (int x = 0; x < splicecand->used; x++) {
term_ptr = &splicecand->start[x];
stateptr = statemap_orig[term_ptr->state].nums;
int length = utarray_len(stateptr);
if (length) {
int *splice_idx = (int *)utarray_eltptr(stateptr, rand() % length);
ip.orig_idx = *splice_idx;
ip.splice_idx = x;
utarray_push_back(pairs, &ip);
}
}
// Pick a random pair
int length = utarray_len(pairs);
cand = (intpair_t *)utarray_eltptr(pairs, rand() % length);
// printf("\n Orig_idx:%d Splice_idx:%d", cand->orig_idx, cand->splice_idx);
// Perform the splicing
prefix = slice(originput, cand->orig_idx);
Array *spliced = spliceGF(prefix, splicecand, cand->splice_idx);
// print_repr(spliced, "Spliced");
//
utarray_free(pairs);
return spliced;
}
UT_array **get_dupes(Array *input, int *recur_len) {
// Variables related to finding duplicates
int offset = 0;
int state;
terminal * term_ptr;
IdxMap_new *idxMapPtr;
UT_array ** recurIdx;
// Declare the Recursive Map Table
IdxMap_new *idxmapStart =
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
//
// UT_array *(recurIdx[numstates]);
recurIdx = malloc(sizeof(UT_array *) * numstates);
for (int x = 0; x < numstates; x++) {
idxMapPtr = &idxmapStart[x];
utarray_new(idxMapPtr->nums, &ut_int_icd);
}
// Obtain frequency distribution of states
while (offset < input->used) {
term_ptr = &input->start[offset];
state = term_ptr->state;
// int num = atoi(state + 1);
idxMapPtr = &idxmapStart[state];
utarray_push_back(idxMapPtr->nums, &offset);
offset += 1;
}
// Retrieve the duplicated states
offset = 0;
while (offset < numstates) {
idxMapPtr = &idxmapStart[offset];
int length = utarray_len(idxMapPtr->nums);
if (length >= 2) {
recurIdx[*recur_len] = idxMapPtr->nums;
*recur_len += 1;
}
// else {
// utarray_free(idxMapPtr->nums);
// }
offset += 1;
}
if (*recur_len) {
// Declare the return struct
// We use this struct so that we save the reference to IdxMap_new and free
// it after we have used it in doMult
// Get_Dupes_Ret* getdupesret =
// (Get_Dupes_Ret*)malloc(sizeof(Get_Dupes_Ret));
return recurIdx;
// getdupesret->idxmap = idxmapStart;
// getdupesret->recurIdx = recurIdx;
// return getdupesret;
} else {
return NULL;
}
}
Array *doMult(Array *input, UT_array **recur, int recurlen) {
int offset = 0;
int idx = rand() % (recurlen);
UT_array *recurMap = recur[idx];
UT_array *recurPtr;
Array * prefix;
Array * postfix;
Array * feature;
// Choose two indices to get the recursive feature
int recurIndices = utarray_len(recurMap);
int firstIdx = 0;
int secondIdx = 0;
getTwoIndices(recurMap, recurIndices, &firstIdx, &secondIdx);
// Perform the recursive mut
// print_repr(input, "Orig");
prefix = slice(input, firstIdx);
// print_repr(prefix, "Prefix");
if (firstIdx < secondIdx) {
feature = carve(input, firstIdx, secondIdx);
} else {
feature = carve(input, secondIdx, firstIdx);
}
// print_repr(feature, "Feature");
concatPrefixFeature(prefix, feature);
// GC allocated structures
free(feature->start);
free(feature);
// for(int x = 0; x < recurlen; x++) {
// utarray_free(recur[x]);
// }
// free(recur);
// print_repr(prefix, "Concat");
return spliceGF(prefix, input, secondIdx);
}
void getTwoIndices(UT_array *recur, int recurlen, int *firstIdx,
int *secondIdx) {
int ArrayRecurIndices[recurlen];
int offset = 0, *p;
// Unroll into an array
for (p = (int *)utarray_front(recur); p != NULL;
p = (int *)utarray_next(recur, p)) {
ArrayRecurIndices[offset] = *p;
offset += 1;
}
/*Source:
* https://www.geeksforgeeks.org/shuffle-a-given-array-using-fisher-yates-shuffle-algorithm/
*/
for (int i = offset - 1; i > 0; i--) {
// Pick a random index from 0 to i
int j = rand() % (i + 1);
// Swap arr[i] with the element at random index
swap(&ArrayRecurIndices[i], &ArrayRecurIndices[j]);
}
// Get the first two indices
*firstIdx = ArrayRecurIndices[0];
*secondIdx = ArrayRecurIndices[1];
}
void swap(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}

View File

@ -0,0 +1,268 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
#ifdef _GNU_SOURCE
#undef _GNU_SOURCE
#endif
#define _GNU_SOURCE
#include <sys/mman.h>
/* Dynamic Array for adding to the input repr
* */
void initArray(Array *a, size_t initialSize) {
a->start = (terminal *)calloc(1, sizeof(terminal) * initialSize);
a->used = 0;
a->size = initialSize;
a->inputlen = 0;
}
void insertArray(Array *a, int state, char *symbol, size_t symbol_len,
int trigger_idx) {
// a->used is the number of used entries, because a->array[a->used++] updates
// a->used only *after* the array has been accessed. Therefore a->used can go
// up to a->size
terminal *term_ptr;
if (a->used == a->size) {
a->size = a->size * sizeof(terminal);
a->start = (terminal *)realloc(a->start, a->size * sizeof(terminal));
}
// Add the element
term_ptr = &a->start[a->used];
term_ptr->state = state;
term_ptr->symbol = symbol;
term_ptr->symbol_len = symbol_len;
term_ptr->trigger_idx = trigger_idx;
// Increment the pointer
a->used += 1;
a->inputlen += symbol_len;
}
void freeArray(Array *a) {
terminal *ptr;
for (int x = 0; x < a->used; x++) {
ptr = &a->start[x];
free(ptr);
}
a->start = NULL;
a->used = a->size = 0;
}
/* Dynamic array for adding indices of states/recursive features
* Source:
* https://stackoverflow.com/questions/3536153/c-dynamically-growing-array
*/
void initArrayIdx(IdxMap *a, size_t initialSize) {
a->array = (int *)malloc(initialSize * sizeof(int));
a->used = 0;
a->size = initialSize;
}
void insertArrayIdx(IdxMap *a, int idx) {
// a->used is the number of used entries, because a->array[a->used++] updates
// a->used only *after* the array has been accessed. Therefore a->used can go
// up to a->size
if (a->used == a->size) {
a->size *= 2;
a->array = (int *)realloc(a->array, a->size * sizeof(int));
}
a->array[a->used++] = idx;
}
void freeArrayIdx(IdxMap *a) {
free(a->array);
a->array = NULL;
a->used = a->size = 0;
}
/* Dynamic array for adding potential splice points
*/
void initArraySplice(SpliceCandArray *a, size_t initialSize) {
a->start = (SpliceCand *)malloc(initialSize * sizeof(SpliceCand));
a->used = 0;
a->size = initialSize;
}
void insertArraySplice(SpliceCandArray *a, Candidate *candidate, int idx) {
// a->used is the number of used entries, because a->array[a->used++] updates
// a->used only *after* the array has been accessed. Therefore a->used can go
// up to a->size
SpliceCand *candptr;
if (a->used == a->size) {
a->size = a->size * sizeof(SpliceCand);
a->start = (SpliceCand *)realloc(a->start, a->size * sizeof(SpliceCand));
}
// Add the element
candptr = &a->start[a->used];
candptr->splice_cand = candidate;
candptr->idx = idx;
a->used += 1;
}
void freeArraySplice(IdxMap *a) {
free(a->array);
a->array = NULL;
a->used = a->size = 0;
}
int fact(int n) {
int i, f = 1;
for (i = 1; i <= n; i++) {
f *= i;
}
return f;
}
/* Uses the walk to create the input in-memory */
u8 *unparse_walk(Array *input) {
terminal *term_ptr;
int offset = 0;
u8 * unparsed = (u8 *)malloc(input->inputlen + 1);
term_ptr = &input->start[offset];
strcpy(unparsed, term_ptr->symbol);
offset += 1;
while (offset < input->used) {
term_ptr = &input->start[offset];
strcat(unparsed, term_ptr->symbol);
offset += 1;
}
return unparsed;
}
/*Dump the input representation into a file*/
void write_input(Array *input, u8 *fn) {
FILE *fp;
// If file already exists, then skip creating the file
if (access(fn, F_OK) != -1) { return; }
fp = fopen(fn, "wbx+");
// If the input has already been flushed, then skip silently
if (fp == NULL) {
fprintf(stderr, "\n File '%s' could not be open, exiting\n", fn);
exit(1);
}
// Write the length parameters
fwrite(&input->used, sizeof(size_t), 1, fp);
fwrite(&input->size, sizeof(size_t), 1, fp);
fwrite(&input->inputlen, sizeof(size_t), 1, fp);
// Write the dynamic array to file
fwrite(input->start, input->size * sizeof(terminal), 1, fp);
// printf("\nUsed:%zu Size:%zu Inputlen:%zu", input->used, input->size,
// input->inputlen);
fclose(fp);
}
Array *parse_input(state *pda, FILE *fp) {
terminal *term;
state * state_ptr;
trigger * trigger;
int trigger_idx;
Array * input = (Array *)calloc(1, sizeof(Array));
// Read the length parameters
fread(&input->used, sizeof(size_t), 1, fp);
fread(&input->size, sizeof(size_t), 1, fp);
fread(&input->inputlen, sizeof(size_t), 1, fp);
terminal *start_ptr = (terminal *)calloc(input->size, sizeof(terminal));
if (!start_ptr) {
fprintf(stderr, "alloc failed!\n");
return NULL;
}
// Read the dynamic array to memory
fread(start_ptr, input->size * sizeof(terminal), 1, fp);
// Update the pointers to the terminals since they would have
// changed
int idx = 0;
while (idx < input->used) {
terminal *term = &start_ptr[idx];
// Find the state
state_ptr = pda + term->state;
// Find the trigger and update the terminal address
trigger_idx = term->trigger_idx;
trigger = (state_ptr->ptr) + trigger_idx;
term->symbol = trigger->term;
idx += 1;
}
input->start = start_ptr;
// printf("\nUsed:%zu Size:%zu Inputlen:%zu", input->used, input->size,
// input->inputlen);
return input;
}
// Read the input representation into memory
Array *read_input(state *pda, u8 *fn) {
FILE *fp;
fp = fopen(fn, "rb");
if (fp == NULL) {
fprintf(stderr, "\n File '%s' does not exist, exiting\n", fn);
exit(1);
}
Array *res = parse_input(pda, fp);
fclose(fp);
return res;
}

View File

@ -0,0 +1,429 @@
// This simple example just creates random buffer <= 100 filled with 'A'
// needs -I /path/to/AFLplusplus/include
//#include "custom_mutator_helpers.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "afl-fuzz.h"
#include "gramfuzz.h"
#define MUTATORS 4 // Specify the total number of mutators
typedef struct my_mutator {
afl_state_t *afl;
u8 * mutator_buf;
u8 * unparsed_input;
Array *mutated_walk;
Array *orig_walk;
IdxMap_new *statemap; // Keeps track of the statemap
UT_array ** recurIdx;
// Get_Dupes_Ret* getdupesret; // Recursive feature map
int recurlen;
int mut_alloced;
int orig_alloced;
int mut_idx; // Signals the current mutator being used, used to cycle through
// each mutator
unsigned int seed;
} my_mutator_t;
state *create_pda(u8 *automaton_file) {
struct json_object *parsed_json;
state * pda;
json_object * source_obj, *attr;
int arraylen, ii, ii2, trigger_len, error;
printf("\n[GF] Automaton file passed:%s", automaton_file);
// parsed_json =
// json_object_from_file("./gramfuzz/php_gnf_processed_full.json");
parsed_json = json_object_from_file(automaton_file);
// Getting final state
source_obj = json_object_object_get(parsed_json, "final_state");
printf("\t\nFinal=%s\n", json_object_get_string(source_obj));
final_state = atoi(json_object_get_string(source_obj));
// Getting initial state
source_obj = json_object_object_get(parsed_json, "init_state");
init_state = atoi(json_object_get_string(source_obj));
printf("\tInit=%s\n", json_object_get_string(source_obj));
// Getting number of states
source_obj = json_object_object_get(parsed_json, "numstates");
numstates = atoi(json_object_get_string(source_obj)) + 1;
printf("\tNumStates=%d\n", numstates);
// Allocate state space for each pda state
pda = (state *)calloc(atoi(json_object_get_string(source_obj)) + 1,
sizeof(state));
// Getting PDA representation
source_obj = json_object_object_get(parsed_json, "pda");
enum json_type type;
json_object_object_foreach(source_obj, key, val) {
state * state_ptr;
trigger *trigger_ptr;
int offset;
// Get the correct offset into the pda to store state information
state_ptr = pda;
offset = atoi(key);
state_ptr += offset;
// Store state string
state_ptr->state_name = offset;
// Create trigger array of structs
trigger_len = json_object_array_length(val);
state_ptr->trigger_len = trigger_len;
trigger_ptr = (trigger *)calloc(trigger_len, sizeof(trigger));
state_ptr->ptr = trigger_ptr;
for (ii = 0; ii < trigger_len; ii++) {
json_object *obj = json_object_array_get_idx(val, ii);
// Get all the trigger trigger attributes
attr = json_object_array_get_idx(obj, 0);
(trigger_ptr)->id = strdup(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 1);
trigger_ptr->dest = atoi(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 2);
if (!strcmp("\\n", json_object_get_string(attr))) {
trigger_ptr->term = strdup("\n");
} else {
trigger_ptr->term = strdup(json_object_get_string(attr));
}
trigger_ptr->term_len = strlen(trigger_ptr->term);
trigger_ptr++;
}
}
// Delete the JSON object
json_object_put(parsed_json);
return pda;
}
my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
srand(seed);
my_mutator_t *data = calloc(1, sizeof(my_mutator_t));
if (!data) {
perror("afl_custom_init alloc");
return NULL;
}
if ((data->mutator_buf = malloc(MAX_FILE)) == NULL) {
perror("mutator_buf alloc");
return NULL;
}
data->afl = afl;
data->seed = seed;
data->mut_alloced = 0;
data->orig_alloced = 0;
data->mut_idx = 0;
data->recurlen = 0;
// data->mutator_buf = NULL;
// data->unparsed_input = NULL;
// data->mutated_walk = NULL;
// data->orig_walk = NULL;
//
// data->statemap = NULL; // Keeps track of the statemap
// data->recur_idx = NULL; // Will keep track of recursive feature indices
// u32 recur_len = 0; // The number of recursive features
// data->mutator_buf = NULL;
char *automaton_file = getenv("GRAMATRON_AUTOMATION");
if (automaton_file) {
pda = create_pda(automaton_file);
} else {
fprintf(stderr,
"\nError: GrammaTron needs an automation json file set in "
"AFL_GRAMATRON_AUTOMATON\n");
exit(-1);
}
return data;
}
size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
u8 **out_buf, uint8_t *add_buf, size_t add_buf_size,
size_t max_size) {
u8 *unparsed_input;
// Pick a mutator
// int choice = rand() % MUTATORS;
// data->mut_idx = 1;
// GC old mutant
if (data->mut_alloced) {
free(data->mutated_walk->start);
free(data->mutated_walk);
data->mut_alloced = 0;
};
// printf("\nChoice:%d", choice);
if (data->mut_idx == 0) { // Perform random mutation
data->mutated_walk = performRandomMutation(pda, data->orig_walk);
data->mut_alloced = 1;
} else if (data->mut_idx == 1 &&
data->recurlen) { // Perform recursive mutation
data->mutated_walk =
doMult(data->orig_walk, data->recurIdx, data->recurlen);
data->mut_alloced = 1;
} else if (data->mut_idx == 2) { // Perform splice mutation
// we cannot use the supplied splice data so choose a new random file
u32 tid = rand() % data->afl->queued_paths;
struct queue_entry *q = data->afl->queue_buf[tid];
// Read the input representation for the splice candidate
u8 * automaton_fn = alloc_printf("%s.aut", q->fname);
Array *spliceCandidate = read_input(pda, automaton_fn);
if (spliceCandidate) {
data->mutated_walk =
performSpliceOne(data->orig_walk, data->statemap, spliceCandidate);
data->mut_alloced = 1;
free(spliceCandidate->start);
free(spliceCandidate);
} else {
data->mutated_walk = gen_input(pda, NULL);
data->mut_alloced = 1;
}
ck_free(automaton_fn);
} else { // Generate an input from scratch
data->mutated_walk = gen_input(pda, NULL);
data->mut_alloced = 1;
}
// Cycle to the next mutator
if (data->mut_idx == MUTATORS - 1)
data->mut_idx =
0; // Wrap around if we have reached end of the mutator list
else
data->mut_idx += 1;
// Unparse the mutated automaton walk
if (data->unparsed_input) { free(data->unparsed_input); }
data->unparsed_input = unparse_walk(data->mutated_walk);
*out_buf = data->unparsed_input;
return data->mutated_walk->inputlen;
}
/**
* Create the automaton-based representation for the corresponding input
*
* @param data pointer returned in afl_custom_init for this fuzz case
* @param filename_new_queue File name of the new queue entry
* @param filename_orig_queue File name of the original queue entry
*/
u8 afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
// get the filename
u8 * automaton_fn, *unparsed_input;
Array *new_input;
s32 fd;
automaton_fn = alloc_printf("%s.aut", filename_new_queue);
// Check if this method is being called during initialization
// fprintf(stderr, "new: %s, old: %s, auto: %s\n",
// filename_new_queue,filename_orig_queue,automaton_fn);
if (filename_orig_queue) {
write_input(data->mutated_walk, automaton_fn);
} else {
new_input = gen_input(pda, NULL);
write_input(new_input, automaton_fn);
// Update the placeholder file
if (unlink(filename_new_queue)) {
PFATAL("Unable to delete '%s'", filename_new_queue);
}
unparsed_input = unparse_walk(new_input);
fd = open(filename_new_queue, O_WRONLY | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR);
if (fd < 0) { PFATAL("Failed to update file '%s'", filename_new_queue); }
int written = write(fd, unparsed_input, new_input->inputlen + 1);
close(fd);
free(new_input->start);
free(new_input);
free(unparsed_input);
}
ck_free(automaton_fn);
return 1;
}
/**
* Get the corresponding tree representation for the candidate that is to be
* mutated
*
* @param[in] data pointer returned in afl_custom_init for this fuzz case
* @param filename File name of the test case in the queue entry
* @return Return True(1) if the fuzzer will fuzz the queue entry, and
* False(0) otherwise.
*/
uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) {
// get the filename
u8 * automaton_fn = alloc_printf("%s.aut", filename);
IdxMap_new *statemap_ptr;
terminal * term_ptr;
int state;
// TODO: I don't think we need to update pointers when reading back
// Probably build two different versions of read_input one for flushing
// inputs to disk and the other that
if (data->orig_alloced) {
free(data->orig_walk->start);
free(data->orig_walk);
data->orig_alloced = 0;
}
if (data->statemap) {
for (int x = 0; x < numstates; x++) {
utarray_free(data->statemap[x].nums);
}
free(data->statemap);
}
if (data->recurIdx) {
data->recurlen = 0;
free(data->recurIdx);
}
data->orig_walk = read_input(pda, automaton_fn);
data->orig_alloced = 1;
// Create statemap for the fuzz candidate
IdxMap_new *statemap_start =
(IdxMap_new *)malloc(sizeof(IdxMap_new) * numstates);
for (int x = 0; x < numstates; x++) {
statemap_ptr = &statemap_start[x];
utarray_new(statemap_ptr->nums, &ut_int_icd);
}
int offset = 0;
while (offset < data->orig_walk->used) {
term_ptr = &data->orig_walk->start[offset];
state = term_ptr->state;
statemap_ptr = &statemap_start[state];
utarray_push_back(statemap_ptr->nums, &offset);
offset += 1;
}
data->statemap = statemap_start;
// Create recursive feature map (if it exists)
data->recurIdx = malloc(sizeof(UT_array *) * numstates);
// Retrieve the duplicated states
offset = 0;
while (offset < numstates) {
statemap_ptr = &data->statemap[offset];
int length = utarray_len(statemap_ptr->nums);
if (length >= 2) {
data->recurIdx[data->recurlen] = statemap_ptr->nums;
data->recurlen += 1;
}
offset += 1;
}
// data->getdupesret = get_dupes(data->orig_walk, &data->recurlen);
ck_free(automaton_fn);
return 1;
}
/**
* Deinitialize everything
*
* @param data The data ptr from afl_custom_init
*/
void afl_custom_deinit(my_mutator_t *data) {
free(data->mutator_buf);
free(data);
}

View File

@ -0,0 +1,253 @@
#ifndef _GRAMFUZZ_H
#define _GRAMFUZZ_H
#include <json-c/json.h>
#include <unistd.h>
#include "hashmap.h"
#include "uthash.h"
#include "utarray.h"
#define INIT_INPUTS 100 // No. of initial inputs to be generated
// Set this as `numstates` + 1 where `numstates` is retrieved from gen automata
// json #define STATES 63
#define INIT_SIZE 100 // Initial size of the dynamic array holding the input
#define SPLICE_CORPUS 10000
#define RECUR_THRESHOLD 6
#define SIZE_THRESHOLD 2048
#define FLUSH_INTERVAL \
3600 // Inputs that gave new coverage will be dumped every FLUSH_INTERVAL
// seconds
typedef struct trigger {
char * id;
int dest;
char * term;
size_t term_len;
} trigger;
typedef struct state {
int state_name; // Integer State name
int trigger_len; // Number of triggers associated with this state
trigger *ptr; // Pointer to beginning of the list of triggers
} state;
typedef struct terminal {
int state;
int trigger_idx;
size_t symbol_len;
char * symbol;
} terminal;
typedef struct buckethash {
int freq;
} buckethash;
int init_state;
int curr_state;
int final_state;
int numstates;
/*****************
/ DYNAMIC ARRAY FOR WALKS
*****************/
typedef struct {
size_t used;
size_t size;
size_t inputlen;
terminal *start;
} Array;
/*****************
/ DYNAMIC ARRAY FOR STATEMAPS/RECURSION MAPS
*****************/
typedef struct {
int * array;
size_t used;
size_t size;
} IdxMap;
typedef struct {
UT_array *nums;
} IdxMap_new;
typedef struct {
IdxMap_new *idxmap;
UT_array ** recurIdx;
} Get_Dupes_Ret;
/* Candidate Struct */
typedef struct {
Array * walk;
IdxMap_new *statemap;
} Candidate;
/* Splice Mutation helpers*/
typedef struct {
Candidate *splice_cand;
int idx;
} SpliceCand;
typedef struct {
SpliceCand *start;
size_t used;
size_t size;
} SpliceCandArray;
// Initialize dynamic array for potential splice points
SpliceCand potential[SPLICE_CORPUS];
typedef struct {
int orig_idx;
int splice_idx;
} intpair_t;
// Initialize dynamic array for potential splice points
// SpliceCand potential[SPLICE_CORPUS];
// IdxMap_new* rcuridx[STATES];
/* Prototypes*/
Array * slice(Array *, int);
state * create_pda(u8 *);
Array * gen_input(state *, Array *);
Array * gen_input_count(state *, Array *, int *);
int updatebucket(map_t, int);
void itoa(int, char *, int);
void strrreverse(char *, char *);
void dbg_hashmap(map_t);
void print_repr(Array *, char *);
int isSatisfied(map_t);
char * get_state(char *);
Candidate *gen_candidate(Array *);
Array *spliceGF(Array *, Array *, int);
Array *performSpliceOne(Array *, IdxMap_new *, Array *);
/* Mutation Methods*/
Array * performRandomMutation(state *, Array *);
Array * performRandomMutationCount(state *, Array *, int *);
Array * performSpliceMutationBench(state *, Array *, Candidate **);
UT_array **get_dupes(Array *, int *);
Array * doMult(Array *, UT_array **, int);
Array * doMultBench(Array *, UT_array **, int);
/* Benchmarks*/
void SpaceBenchmark(char *);
void GenInputBenchmark(char *, char *);
void RandomMutationBenchmark(char *, char *);
void MutationAggrBenchmark(char *, char *);
void SpliceMutationBenchmark(char *, char *);
void SpliceMutationBenchmarkOne(char *, char *);
void RandomRecursiveBenchmark(char *, char *);
/* Testers */
void SanityCheck(char *);
/*Helpers*/
void initArray(Array *, size_t);
void insertArray(Array *, int, char *, size_t, int);
void freeArray(Array *);
void initArrayIdx(IdxMap *, size_t);
void insertArrayIdx(IdxMap *, int);
void freeArrayIdx(IdxMap *);
void initArraySplice(SpliceCandArray *, size_t);
void insertArraySplice(SpliceCandArray *, Candidate *, int);
void freeArraySplice(IdxMap *);
void getTwoIndices(UT_array *, int, int *, int *);
void swap(int *, int *);
Array *slice_inverse(Array *, int);
void concatPrefixFeature(Array *, Array *);
void concatPrefixFeatureBench(Array *, Array *);
Array *carve(Array *, int, int);
int fact(int);
void add_to_corpus(struct json_object *, Array *);
struct json_object *term_to_json(terminal *);
/* Gramatron specific prototypes */
u8 * unparse_walk(Array *);
Array *performSpliceGF(state *, Array *, afl_state_t *);
void dump_input(u8 *, char *, int *);
void write_input(Array *, u8 *);
Array *read_input(state *, u8 *);
state *pda;
// // AFL-specific struct
// typedef uint8_t u8;
// typedef uint16_t u16;
// typedef uint32_t u32;
// #ifdef __x86_64__
// typedef unsigned long long u64;
// #else
// typedef uint64_t u64;
// #endif /* ^__x86_64__ */
//
// struct queue_entry {
// Array* walk; /* Pointer to the automaton walk*/
// u32 walk_len; /* Number of tokens in the input*/
// Candidate* cand; /* Preprocessed info about the
// candidate to allow for faster mutations*/
//
// u8* fname; /* File name for the test case */
// u32 len; /* Input length */
// UT_array** recur_idx; /* Keeps track of recursive feature
// indices*/
//
// u32 recur_len; /* The number of recursive features*/
//
// u8 cal_failed, /* Calibration failed? */
// trim_done, /* Trimmed? */
// was_fuzzed, /* Had any fuzzing done yet? */
// passed_det, /* Deterministic stages passed? */
// has_new_cov, /* Triggers new coverage? */
// var_behavior, /* Variable behavior? */
// favored, /* Currently favored? */
// fs_redundant; /* Marked as redundant in the fs? */
//
// u32 bitmap_size, /* Number of bits set in bitmap */
// exec_cksum; /* Checksum of the execution trace */
//
// u64 exec_us, /* Execution time (us) */
// handicap, /* Number of queue cycles behind */
// depth; /* Path depth */
//
// u8* trace_mini; /* Trace bytes, if kept */
// u32 tc_ref; /* Trace bytes ref count */
//
// struct queue_entry *next, /* Next element, if any */
// *next_100; /* 100 elements ahead */
//
// };
#endif

View File

@ -0,0 +1,606 @@
{
"ARGLIST": [
"EXPR ',' ARGLIST",
"EXPR",
"EXPR ',' ARGLIST",
"EXPR"
],
"ARGS": [
"'()'",
"'(' ARGLIST ')'",
"'()'",
"'(' ARGLIST ')'"
],
"ARITHMETICOPERATION": [
"EXPR '/' EXPR",
"EXPR '*' EXPR",
"EXPR '+' EXPR",
"EXPR '-' EXPR",
"EXPR '%' EXPR",
"EXPR '**' EXPR",
"EXPR '++'"
],
"ARRAY": [
"'[' ARRAYCONTENT ']'",
"'[]'"
],
"ARRAYCONTENT": [
"EXPR ',' ARRAYCONTENT",
"EXPR"
],
"BOOLEAN": [
"'true'",
"'false'"
],
"BYTEWISEOPERATION": [
"EXPR '&' EXPR",
"EXPR '|' EXPR"
],
"COMPARISONOPERATION": [
"EXPR '<' EXPR"
],
"DECIMALDIGITS": [
"'20'",
"'1234'",
"'66'",
"'234_9'",
"'99999999999999999999'"
],
"DECIMALNUMBER": [
"DECIMALDIGITS"
],
"EXPR": [
"'(' EXPR ')'",
"VAR",
"'delete' SP EXPR",
"'new' SP IDENTIFIER ARGS",
"LITERAL",
"IDENTIFIER",
"METHODCALL",
"'(' ARITHMETICOPERATION ')'",
"'(' COMPARISONOPERATION ')'",
"'(' BYTEWISEOPERATION ')'",
"'(' LOGICALOPERATION ')'"
],
"IDENTIFIER": [
"'Object'",
"VAR",
"'Function'",
"'main'",
"'opt'",
"'Boolean'",
"'Symbol'",
"'JSON'",
"'Error'",
"'EvalError'",
"'RangeError'",
"'ReferenceError'",
"'SyntaxError'",
"'TypeError'",
"'URIError'",
"'this'",
"'Number'",
"'Math'",
"'Date'",
"'String'",
"'RegExp'",
"'Array'",
"'Int8Array'",
"'Uint8Array'",
"'Uint8ClampedArray'",
"'Int16Array'",
"'Uint16Array'",
"'Int32Array'",
"'Uint32Array'",
"'Float32Array'",
"'Float64Array'",
"'DataView'",
"'ArrayBuffer'",
"'Map'",
"'Set'",
"'WeakMap'",
"'WeakSet'",
"'Promise'",
"'AsyncFunction'",
"'asyncGenerator'",
"'Reflect'",
"'Proxy'",
"'Intl'",
"'Intl.Collator'",
"'Intl.DateTimeFormat'",
"'Intl.NumberFormat'",
"'Intl.PluralRules'",
"'WebAssembly'",
"'WebAssembly.Module'",
"'WebAssembly.Instance'",
"'WebAssembly.Memory'",
"'WebAssembly.Table'",
"'WebAssembly.CompileError'",
"'WebAssembly.LinkError'",
"'WebAssembly.RuntimeError'",
"'arguments'",
"'Infinity'",
"'NaN'",
"'undefined'",
"'null'",
"'console'",
"' '"
],
"IDENTIFIERLIST": [
"IDENTIFIER ',' IDENTIFIERLIST",
"'(' IDENTIFIERLIST '),' IDENTIFIERLIST",
"IDENTIFIER"
],
"JSBLOCK": [
"JSSTATEMENT",
"JSSTATEMENT JSBLOCK"
],
"JSSTATEMENT": [
"STATEMENT NEWLINE"
],
"LITERAL": [
"'null'",
"BOOLEAN",
"NUMBER",
"ARRAY"
],
"LOGICALOPERATION": [
"EXPR '&&' EXPR",
"EXPR '||' EXPR"
],
"METHODCALL": [
"OBJECT PROPERTY METHODCALL1"
],
"METHODCALL1": [
"'.' METHOD_NAME ARGS METHODCALL1",
"' '"
],
"METHOD_NAME": [
"IDENTIFIER",
"'print'",
"'eval'",
"'uneval'",
"'isFinite'",
"'isNaN'",
"'parseFloat'",
"'parseInt'",
"'decodeURI'",
"'decodeURIComponent'",
"'encodeURI'",
"'encodeURIComponent'",
"'escape'",
"'unescape'",
"'assign'",
"'create'",
"'defineProperty'",
"'defineProperties'",
"'entries'",
"'freeze'",
"'getOwnPropertyDescriptor'",
"'getOwnPropertyDescriptors'",
"'getOwnPropertyNames'",
"'getOwnPropertySymbols'",
"'getPrototypeOf'",
"'is'",
"'isExtensible'",
"'isFrozen'",
"'isSealed'",
"'keys'",
"'preventExtensions'",
"'seal'",
"'setPrototypeOf'",
"'values'",
"'__defineGetter__'",
"'__defineSetter__'",
"'__lookupGetter__'",
"'__lookupSetter__'",
"'hasOwnProperty'",
"'isPrototypeOf'",
"'propertyIsEnumerable'",
"'toSource'",
"'toLocaleString'",
"'toString'",
"'unwatch'",
"'valueOf'",
"'watch'",
"'apply'",
"'bind'",
"'call'",
"'isGenerator'",
"'valueOf'",
"'for'",
"'keyFor'",
"'stringify'",
"'isInteger'",
"'isSafeInteger'",
"'toInteger'",
"'toExponential'",
"'toFixed'",
"'toLocaleString'",
"'toPrecision'",
"'abs'",
"'acos'",
"'acosh'",
"'asin'",
"'asinh'",
"'atan'",
"'atanh'",
"'atan2'",
"'cbrt'",
"'ceil'",
"'clz32'",
"'cos'",
"'cosh'",
"'exp'",
"'expm1'",
"'floor'",
"'fround'",
"'hypot'",
"'imul'",
"'log'",
"'log1p'",
"'log10'",
"'log2'",
"'max'",
"'min'",
"'pow'",
"'random'",
"'round'",
"'sign'",
"'sin'",
"'sinh'",
"'sqrt'",
"'tan'",
"'tanh'",
"'trunc'",
"'now'",
"'parse'",
"'UTC'",
"'getDate'",
"'getDay'",
"'getFullYear'",
"'getHours'",
"'getMilliseconds'",
"'getMinutes'",
"'getMonth'",
"'getSeconds'",
"'getTime'",
"'getTimezoneOffset'",
"'getUTCDate'",
"'getUTCDay'",
"'getUTCFullYear'",
"'getUTCHours'",
"'getUTCMilliseconds'",
"'getUTCMinutes'",
"'getUTCMonth'",
"'getUTCSeconds'",
"'getYear'",
"'setDate'",
"'setFullYear'",
"'setHours'",
"'setMilliseconds'",
"'setMinutes'",
"'setMonth'",
"'setSeconds'",
"'setTime'",
"'setUTCDate'",
"'setUTCFullYear'",
"'setUTCHours'",
"'setUTCMilliseconds'",
"'setUTCMinutes'",
"'setUTCMonth'",
"'setUTCSeconds'",
"'setYear'",
"'toDateString'",
"'toISOString'",
"'toJSON'",
"'toGMTString'",
"'toLocaleDateString'",
"'toLocaleFormat'",
"'toLocaleString'",
"'toLocaleTimeString'",
"'toTimeString'",
"'toUTCString'",
"'indexOf'",
"'substring'",
"'charAt'",
"'strcmp'",
"'fromCharCode'",
"'fromCodePoint'",
"'raw'",
"'charCodeAt'",
"'slice'",
"'codePointAt'",
"'concat'",
"'includes'",
"'endsWith'",
"'lastIndexOf'",
"'localeCompare'",
"'match'",
"'normalize'",
"'padEnd'",
"'padStart'",
"'quote'",
"'repeat'",
"'replace'",
"'search'",
"'split'",
"'startsWith'",
"'substr'",
"'toLocaleLowerCase'",
"'toLocaleUpperCase'",
"'toLowerCase'",
"'toUpperCase'",
"'trim'",
"'trimleft'",
"'trimright'",
"'anchor'",
"'big'",
"'blink'",
"'bold'",
"'fixed'",
"'fontcolor'",
"'fontsize'",
"'italics'",
"'link'",
"'small'",
"'strike'",
"'sub'",
"'sup'",
"'compile'",
"'exec'",
"'test'",
"'from'",
"'isArray'",
"'of'",
"'copyWithin'",
"'fill'",
"'pop'",
"'push'",
"'reverse'",
"'shift'",
"'sort'",
"'splice'",
"'unshift'",
"'concat'",
"'join'",
"'every'",
"'filter'",
"'findIndex'",
"'forEach'",
"'map'",
"'reduce'",
"'reduceRight'",
"'some'",
"'move'",
"'getInt8'",
"'getUint8'",
"'getInt16'",
"'getUint16'",
"'getInt32'",
"'getUint32'",
"'getFloat32'",
"'getFloat64'",
"'setInt8'",
"'setUint8'",
"'setInt16'",
"'setUint16'",
"'setInt32'",
"'setUint32'",
"'setFloat32'",
"'setFloat64'",
"'isView'",
"'transfer'",
"'clear'",
"'get'",
"'has'",
"'set'",
"'add'",
"'splat'",
"'check'",
"'extractLane'",
"'replaceLane'",
"'load'",
"'load1'",
"'load2'",
"'load3'",
"'store'",
"'store1'",
"'store2'",
"'store3'",
"'addSaturate'",
"'div'",
"'mul'",
"'neg'",
"'reciprocalApproximation'",
"'reciprocalSqrtApproximation'",
"'subSaturate'",
"'shuffle'",
"'swizzle'",
"'maxNum'",
"'minNum'",
"'select'",
"'equal'",
"'notEqual'",
"'lessThan'",
"'lessThanOrEqual'",
"'greaterThan'",
"'greaterThanOrEqual'",
"'and'",
"'or'",
"'xor'",
"'not'",
"'shiftLeftByScalar'",
"'shiftRightByScalar'",
"'allTrue'",
"'anyTrue'",
"'fromFloat32x4'",
"'fromFloat32x4Bits'",
"'fromFloat64x2Bits'",
"'fromInt32x4'",
"'fromInt32x4Bits'",
"'fromInt16x8Bits'",
"'fromInt8x16Bits'",
"'fromUint32x4'",
"'fromUint32x4Bits'",
"'fromUint16x8Bits'",
"'fromUint8x16Bits'",
"'neg'",
"'compareExchange'",
"'exchange'",
"'wait'",
"'wake'",
"'isLockFree'",
"'all'",
"'race'",
"'reject'",
"'resolve'",
"'catch'",
"'then'",
"'finally'",
"'next'",
"'throw'",
"'close'",
"'send'",
"'apply'",
"'construct'",
"'deleteProperty'",
"'ownKeys'",
"'getCanonicalLocales'",
"'supportedLocalesOf'",
"'resolvedOptions'",
"'formatToParts'",
"'resolvedOptions'",
"'instantiate'",
"'instantiateStreaming'",
"'compileStreaming'",
"'validate'",
"'customSections'",
"'exports'",
"'imports'",
"'grow'",
"'super'",
"'in'",
"'instanceof'",
"' '"
],
"NEWLINE": [
"'\\n'"
],
"NUMBER": [
"'1/2'",
"'1E2'",
"'1E02'",
"'1E+02'",
"'-1'",
"'-1.00'",
"'-1/2'",
"'-1E2'",
"'-1E02'",
"'-1E+02'",
"'1/0'",
"'0/0'",
"'-2147483648/-1'",
"'-9223372036854775808/-1'",
"'-0'",
"'-0.0'",
"'+0'"
],
"OBJECT": [
"IDENTIFIER"
],
"PROGRAM": [
"JSBLOCK"
],
"PROPERTY": [
"'.length' PROPERTY",
"'.prototype' PROPERTY",
"'.constructor' PROPERTY",
"'.__proto__' PROPERTY",
"'.__noSuchMethod__' PROPERTY",
"'.__count__' PROPERTY",
"'.__parent__' PROPERTY",
"'.arguments' PROPERTY",
"'.arity' PROPERTY",
"'.caller' PROPERTY",
"'.name' PROPERTY",
"'.displayName' PROPERTY",
"'.iterator' PROPERTY",
"'.asyncIterator' PROPERTY",
"'.match' PROPERTY",
"'.replace' PROPERTY",
"'.search' PROPERTY",
"'.split' PROPERTY",
"'.hasInstance' PROPERTY",
"'.isConcatSpreadable' PROPERTY",
"'.unscopables' PROPERTY",
"'.species' PROPERTY",
"'.toPrimitive' PROPERTY",
"'.toStringTag' PROPERTY",
"'.fileName' PROPERTY",
"'.lineNumber' PROPERTY",
"'.columnNumber' PROPERTY",
"'.message' PROPERTY",
"'.name' PROPERTY",
"'.EPSILON' PROPERTY",
"'.MAX_SAFE_INTEGER' PROPERTY",
"'.MAX_VALUE' PROPERTY",
"'.MIN_SAFE_INTEGER' PROPERTY",
"'.MIN_VALUE' PROPERTY",
"'.NaN' PROPERTY",
"'.NEGATIVE_INFINITY' PROPERTY",
"'.POSITIVE_INFINITY' PROPERTY",
"'.E' PROPERTY",
"'.LN2' PROPERTY",
"'.LN10' PROPERTY",
"'.LOG2E' PROPERTY",
"'.LOG10E' PROPERTY",
"'.PI' PROPERTY",
"'.SQRT1_2' PROPERTY",
"'.SQRT2' PROPERTY",
"'.flags' PROPERTY",
"'.global' PROPERTY",
"'.ignoreCase' PROPERTY",
"'.multiline' PROPERTY",
"'.source' PROPERTY",
"'.sticky' PROPERTY",
"'.unicode' PROPERTY",
"'.buffer' PROPERTY",
"'.byteLength' PROPERTY",
"'.byteOffset' PROPERTY",
"'.BYTES_PER_ELEMENT' PROPERTY",
"'.compare' PROPERTY",
"'.format' PROPERTY",
"'.callee' PROPERTY",
"'.caller' PROPERTY",
"'.memory' PROPERTY",
"'.exports' PROPERTY",
"' '"
],
"SP": [
"' '"
],
"STATEMENT": [
"EXPR ';'",
"'var' SP VAR '=' EXPR ';'",
"'let' SP VAR '=' EXPR ';'",
"VAR '=' EXPR ';'",
"VAR PROPERTY '=' EXPR ';'",
"VAR '[' DECIMALNUMBER ']' '=' EXPR ';'",
"'const' SP VAR '=' EXPR ';'",
"'typeof' SP EXPR ';'",
"'void' SP EXPR ';'",
"'return' SP EXPR ';'",
"VAR ':'"
],
"VAR": [
"'a'",
"'b'",
"'c'",
"'d'",
"'e'",
"'f'",
"'g'",
"'h'"
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,434 @@
/*
* Generic map implementation.
*/
#include "hashmap.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define INITIAL_SIZE (256)
#define MAX_CHAIN_LENGTH (8)
/* We need to keep keys and values */
typedef struct _hashmap_element {
char *key;
int in_use;
any_t data;
} hashmap_element;
/* A hashmap has some maximum size and current size,
* as well as the data to hold. */
typedef struct _hashmap_map {
int table_size;
int size;
hashmap_element *data;
} hashmap_map;
/*
* Return an empty hashmap, or NULL on failure.
*/
map_t hashmap_new() {
hashmap_map *m = (hashmap_map *)malloc(sizeof(hashmap_map));
if (!m) goto err;
m->data = (hashmap_element *)calloc(INITIAL_SIZE, sizeof(hashmap_element));
if (!m->data) goto err;
m->table_size = INITIAL_SIZE;
m->size = 0;
return m;
err:
if (m) hashmap_free(m);
return NULL;
}
/* The implementation here was originally done by Gary S. Brown. I have
borrowed the tables directly, and made some minor changes to the
crc32-function (including changing the interface). //ylo */
/* ============================================================= */
/* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or */
/* code or tables extracted from it, as desired without restriction. */
/* */
/* First, the polynomial itself and its table of feedback terms. The */
/* polynomial is */
/* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */
/* */
/* Note that we take it "backwards" and put the highest-order term in */
/* the lowest-order bit. The X^32 term is "implied"; the LSB is the */
/* X^31 term, etc. The X^0 term (usually shown as "+1") results in */
/* the MSB being 1. */
/* */
/* Note that the usual hardware shift register implementation, which */
/* is what we're using (we're merely optimizing it by doing eight-bit */
/* chunks at a time) shifts bits into the lowest-order term. In our */
/* implementation, that means shifting towards the right. Why do we */
/* do it this way? Because the calculated CRC must be transmitted in */
/* order from highest-order term to lowest-order term. UARTs transmit */
/* characters in order from LSB to MSB. By storing the CRC this way, */
/* we hand it to the UART in the order low-byte to high-byte; the UART */
/* sends each low-bit to hight-bit; and the result is transmission bit */
/* by bit from highest- to lowest-order term without requiring any bit */
/* shuffling on our part. Reception works similarly. */
/* */
/* The feedback terms table consists of 256, 32-bit entries. Notes: */
/* */
/* The table can be generated at runtime if desired; code to do so */
/* is shown later. It might not be obvious, but the feedback */
/* terms simply represent the results of eight shift/xor opera- */
/* tions for all combinations of data and CRC register values. */
/* */
/* The values must be right-shifted by eight bits by the "updcrc" */
/* logic; the shift must be unsigned (bring in zeroes). On some */
/* hardware you could probably optimize the shift in assembler by */
/* using byte-swap instructions. */
/* polynomial $edb88320 */
/* */
/* -------------------------------------------------------------------- */
static unsigned long crc32_tab[] = {
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
0x2d02ef8dL};
/* Return a 32-bit CRC of the contents of the buffer. */
unsigned long crc32(const unsigned char *s, unsigned int len) {
unsigned int i;
unsigned long crc32val;
crc32val = 0;
for (i = 0; i < len; i++) {
crc32val = crc32_tab[(crc32val ^ s[i]) & 0xff] ^ (crc32val >> 8);
}
return crc32val;
}
/*
* Hashing function for a string
*/
unsigned int hashmap_hash_int(hashmap_map *m, char *keystring) {
unsigned long key = crc32((unsigned char *)(keystring), strlen(keystring));
/* Robert Jenkins' 32 bit Mix Function */
key += (key << 12);
key ^= (key >> 22);
key += (key << 4);
key ^= (key >> 9);
key += (key << 10);
key ^= (key >> 2);
key += (key << 7);
key ^= (key >> 12);
/* Knuth's Multiplicative Method */
key = (key >> 3) * 2654435761;
return key % m->table_size;
}
/*
* Return the integer of the location in data
* to store the point to the item, or MAP_FULL.
*/
int hashmap_hash(map_t in, char *key) {
int curr;
int i;
/* Cast the hashmap */
hashmap_map *m = (hashmap_map *)in;
/* If full, return immediately */
if (m->size >= (m->table_size / 2)) return MAP_FULL;
/* Find the best index */
curr = hashmap_hash_int(m, key);
/* Linear probing */
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
if (m->data[curr].in_use == 0) return curr;
if (m->data[curr].in_use == 1 && (strcmp(m->data[curr].key, key) == 0))
return curr;
curr = (curr + 1) % m->table_size;
}
return MAP_FULL;
}
/*
* Doubles the size of the hashmap, and rehashes all the elements
*/
int hashmap_rehash(map_t in) {
int i;
int old_size;
hashmap_element *curr;
/* Setup the new elements */
hashmap_map * m = (hashmap_map *)in;
hashmap_element *temp =
(hashmap_element *)calloc(2 * m->table_size, sizeof(hashmap_element));
if (!temp) return MAP_OMEM;
/* Update the array */
curr = m->data;
m->data = temp;
/* Update the size */
old_size = m->table_size;
m->table_size = 2 * m->table_size;
m->size = 0;
/* Rehash the elements */
for (i = 0; i < old_size; i++) {
int status;
if (curr[i].in_use == 0) continue;
status = hashmap_put(m, curr[i].key, curr[i].data);
if (status != MAP_OK) return status;
}
free(curr);
return MAP_OK;
}
/*
* Add a pointer to the hashmap with some key
*/
int hashmap_put(map_t in, char *key, any_t value) {
int index;
hashmap_map *m;
/* Cast the hashmap */
m = (hashmap_map *)in;
/* Find a place to put our value */
index = hashmap_hash(in, key);
while (index == MAP_FULL) {
if (hashmap_rehash(in) == MAP_OMEM) { return MAP_OMEM; }
index = hashmap_hash(in, key);
}
/* Set the data */
m->data[index].data = value;
m->data[index].key = key;
m->data[index].in_use = 1;
m->size++;
return MAP_OK;
}
/*
* Get your pointer out of the hashmap with a key
*/
int hashmap_get(map_t in, char *key, any_t *arg) {
int curr;
int i;
hashmap_map *m;
/* Cast the hashmap */
m = (hashmap_map *)in;
/* Find data location */
curr = hashmap_hash_int(m, key);
/* Linear probing, if necessary */
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
int in_use = m->data[curr].in_use;
if (in_use == 1) {
if (strcmp(m->data[curr].key, key) == 0) {
*arg = (m->data[curr].data);
return MAP_OK;
}
}
curr = (curr + 1) % m->table_size;
}
*arg = NULL;
/* Not found */
return MAP_MISSING;
}
/*
* Iterate the function parameter over each element in the hashmap. The
* additional any_t argument is passed to the function as its first
* argument and the hashmap element is the second.
*/
int hashmap_iterate(map_t in, PFany f, any_t item) {
int i;
/* Cast the hashmap */
hashmap_map *m = (hashmap_map *)in;
/* On empty hashmap, return immediately */
if (hashmap_length(m) <= 0) return MAP_MISSING;
/* Linear probing */
for (i = 0; i < m->table_size; i++)
if (m->data[i].in_use != 0) {
any_t data = (any_t)(m->data[i].data);
int status = f(item, data);
if (status != MAP_OK) { return status; }
}
return MAP_OK;
}
/*
* Remove an element with that key from the map
*/
int hashmap_remove(map_t in, char *key) {
int i;
int curr;
hashmap_map *m;
/* Cast the hashmap */
m = (hashmap_map *)in;
/* Find key */
curr = hashmap_hash_int(m, key);
/* Linear probing, if necessary */
for (i = 0; i < MAX_CHAIN_LENGTH; i++) {
int in_use = m->data[curr].in_use;
if (in_use == 1) {
if (strcmp(m->data[curr].key, key) == 0) {
/* Blank out the fields */
m->data[curr].in_use = 0;
m->data[curr].data = NULL;
m->data[curr].key = NULL;
/* Reduce the size */
m->size--;
return MAP_OK;
}
}
curr = (curr + 1) % m->table_size;
}
/* Data not found */
return MAP_MISSING;
}
/* Deallocate the hashmap */
void hashmap_free(map_t in) {
hashmap_map *m = (hashmap_map *)in;
free(m->data);
free(m);
}
/* Return the length of the hashmap */
int hashmap_length(map_t in) {
hashmap_map *m = (hashmap_map *)in;
if (m != NULL)
return m->size;
else
return 0;
}

View File

@ -0,0 +1,83 @@
/*
* Generic hashmap manipulation functions
*
* Originally by Elliot C Back -
* http://elliottback.com/wp/hashmap-implementation-in-c/
*
* Modified by Pete Warden to fix a serious performance problem, support strings
* as keys and removed thread synchronization - http://petewarden.typepad.com
*/
#ifndef __HASHMAP_H__
#define __HASHMAP_H__
#define MAP_MISSING -3 /* No such element */
#define MAP_FULL -2 /* Hashmap is full */
#define MAP_OMEM -1 /* Out of Memory */
#define MAP_OK 0 /* OK */
/*
* any_t is a pointer. This allows you to put arbitrary structures in
* the hashmap.
*/
typedef void *any_t;
/*
* PFany is a pointer to a function that can take two any_t arguments
* and return an integer. Returns status code..
*/
typedef int (*PFany)(any_t, any_t);
/*
* map_t is a pointer to an internally maintained data structure.
* Clients of this package do not need to know how hashmaps are
* represented. They see and manipulate only map_t's.
*/
typedef any_t map_t;
/*
* Return an empty hashmap. Returns NULL if empty.
*/
extern map_t hashmap_new();
/*
* Iteratively call f with argument (item, data) for
* each element data in the hashmap. The function must
* return a map status code. If it returns anything other
* than MAP_OK the traversal is terminated. f must
* not reenter any hashmap functions, or deadlock may arise.
*/
extern int hashmap_iterate(map_t in, PFany f, any_t item);
/*
* Add an element to the hashmap. Return MAP_OK or MAP_OMEM.
*/
extern int hashmap_put(map_t in, char *key, any_t value);
/*
* Get an element from the hashmap. Return MAP_OK or MAP_MISSING.
*/
extern int hashmap_get(map_t in, char *key, any_t *arg);
/*
* Remove an element from the hashmap. Return MAP_OK or MAP_MISSING.
*/
extern int hashmap_remove(map_t in, char *key);
/*
* Get any element. Return MAP_OK or MAP_MISSING.
* remove - should the element be removed from the hashmap
*/
extern int hashmap_get_one(map_t in, any_t *arg, int remove);
/*
* Free the hashmap
*/
extern void hashmap_free(map_t in);
/*
* Get the current size of a hashmap
*/
extern int hashmap_length(map_t in);
#endif

View File

@ -0,0 +1,275 @@
import sys
import json
import re
from collections import defaultdict
# import pygraphviz as pgv
gram_data = None
state_count = 1
pda = []
worklist = []
state_stacks = {}
# === If user provides upper bound on the stack size during FSA creation ===
# Specifies the upper bound to which the stack is allowed to grow
# If for any generated state, the stack size is >= stack_limit then this
# state is not expanded further.
stack_limit = None
# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
unexpanded_rules = set()
def main(grammar, limit):
global worklist, gram_data, stack_limit
current = '0'
stack_limit = limit
if stack_limit:
print ('[X] Operating in bounded stack mode')
with open(grammar, 'r') as fd:
gram_data = json.load(fd)
start_symbol = gram_data["Start"][0]
worklist.append([current, [start_symbol]])
# print (grammar)
filename = (grammar.split('/')[-1]).split('.')[0]
while worklist:
# Take an element from the worklist
# print ('================')
# print ('Worklist:', worklist)
element = worklist.pop(0)
prep_transitions(element)
pda_file = filename + '_transition.json'
graph_file = filename + '.png'
# print ('XXXXXXXXXXXXXXXX')
# print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
# XXX Commented out because visualization of current version of PHP causes segfault
# Create the graph and dump the transitions to a file
# create_graph(filename)
transformed = postprocess()
with open(filename + '_automata.json', 'w+') as fd:
json.dump(transformed, fd)
with open(filename + '_transition.json', 'w+') as fd:
json.dump(pda, fd)
if not unexpanded_rules:
print ('[X] No unexpanded rules, absolute FSA formed')
exit(0)
else:
print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
with open(filename + '_disallowed.json', 'w+') as fd:
json.dump(list(unexpanded_rules), fd)
def create_graph(filename):
'''
Creates a DOT representation of the PDA
'''
global pda
G = pgv.AGraph(strict = False, directed = True)
for transition in pda:
print ('Transition:', transition)
G.add_edge(transition['source'], transition['dest'],
label = 'Term:{}'.format(transition['terminal']))
G.layout(prog = 'dot')
print ('Do it up 2')
G.draw(filename + '.png')
def prep_transitions(element):
'''
Generates transitions
'''
global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
state = element[0]
try:
nonterminal = element[1][0]
except IndexError:
# Final state was encountered, pop from worklist without doing anything
return
rules = gram_data[nonterminal]
count = 1
for rule in rules:
isRecursive = False
# print ('Current state:', state)
terminal, ss, termIsRegex = tokenize(rule)
transition = get_template()
transition['trigger'] = '_'.join([state, str(count)])
transition['source'] = state
transition['dest'] = str(state_count)
transition['ss'] = ss
transition['terminal'] = terminal
transition['rule'] = "{} -> {}".format(nonterminal, rule )
if termIsRegex:
transition['termIsRegex'] = True
# Creating a state stack for the new state
try:
state_stack = state_stacks[state][:]
except:
state_stack = []
if len(state_stack):
state_stack.pop(0)
if ss:
for symbol in ss[::-1]:
state_stack.insert(0, symbol)
transition['stack'] = state_stack
# Check if a recursive transition state being created, if so make a backward
# edge and don't add anything to the worklist
# print (state_stacks)
if state_stacks:
for state_element, stack in state_stacks.items():
# print ('Stack:', sorted(stack))
# print ('State stack:', sorted(state_stack))
if sorted(stack) == sorted(state_stack):
transition['dest'] = state_element
# print ('Recursive:', transition)
pda.append(transition)
count += 1
isRecursive = True
break
# If a recursive transition exercised don't add the same transition as a new
# edge, continue onto the next transitions
if isRecursive:
continue
# If the generated state has a stack size > stack_limit then that state is abandoned
# and not added to the FSA or the worklist for further expansion
if stack_limit:
if (len(transition['stack']) > stack_limit):
unexpanded_rules.add(transition['rule'])
continue
# Create transitions for the non-recursive relations and add to the worklist
# print ('Normal:', transition)
# print ('State2:', state)
pda.append(transition)
worklist.append([transition['dest'], transition['stack']])
state_stacks[transition['dest']] = state_stack
state_count += 1
count += 1
def tokenize(rule):
'''
Gets the terminal and the corresponding stack symbols from a rule in GNF form
'''
pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
terminal = None
ss = None
termIsRegex = False
match = pattern.match(rule)
if match.group(1):
termIsRegex = True
if match.group(2):
terminal = match.group(2)
else:
raise AssertionError("Rule is not in GNF form")
if match.group(3):
ss = (match.group(3)).split()
return terminal, ss, termIsRegex
def get_template():
transition_template = {
'trigger':None,
'source': None,
'dest': None,
'termIsRegex': False,
'terminal' : None,
'stack': []
}
return transition_template
def postprocess():
'''
Creates a representation to be passed on to the C-module
'''
global pda
final_struct = {}
memoized = defaultdict(list)
# Supporting data structures for if stack limit is imposed
culled_pda = []
culled_final = []
num_transitions = 0 # Keep track of number of transitions
states, final, initial = _get_states()
print (initial)
assert len(initial) == 1, 'More than one init state found'
# Cull transitions to states which were not expanded owing to the stack limit
if stack_limit:
blocklist = []
for final_state in final:
for transition in pda:
if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
blocklist.append(transition["dest"])
continue
else:
culled_pda.append(transition)
culled_final = [state for state in final if state not in blocklist]
assert len(culled_final) == 1, 'More than one final state found'
for transition in culled_pda:
state = transition["source"]
if transition["dest"] in blocklist:
continue
num_transitions += 1
memoized[state].append([transition["trigger"], transition["dest"],
transition["terminal"]])
final_struct["init_state"] = initial
final_struct["final_state"] = culled_final[0]
# The reason we do this is because when states are culled, the indexing is
# still relative to the actual number of states hence we keep numstates recorded
# as the original number of states
print ('[X] Actual Number of states:', len(memoized.keys()))
print ('[X] Number of transitions:', num_transitions)
print ('[X] Original Number of states:', len(states))
final_struct["numstates"] = len(states)
final_struct["pda"] = memoized
return final_struct
# Running FSA construction in exact approximation mode and postprocessing it like so
for transition in pda:
state = transition["source"]
memoized[state].append([transition["trigger"], transition["dest"],
transition["terminal"]])
final_struct["init_state"] = initial
final_struct["final_state"] = final[0]
print ('[X] Actual Number of states:', len(memoized.keys()))
final_struct["numstates"] = len(memoized.keys())
final_struct["pda"] = memoized
return final_struct
def _get_states():
source = set()
dest = set()
global pda
for transition in pda:
source.add(transition["source"])
dest.add(transition["dest"])
source_copy = source.copy()
source_copy.update(dest)
return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
parser.add_argument(
'--gf',
type = str,
help = 'Location of GNF grammar')
parser.add_argument(
'--limit',
type = int,
default = None,
help = 'Specify the upper bound for the stack size')
args = parser.parse_args()
main(args.gf, args.limit)

View File

@ -0,0 +1,289 @@
import sys
import re
import copy
import json
from string import ascii_uppercase
from itertools import combinations
from collections import defaultdict
NONTERMINALSET = []
COUNT = 1
def main(grammar_file, out, start):
grammar = None
# If grammar file is a preprocessed NT file, then skip preprocessing
if '.json' in grammar_file:
with open(grammar_file, 'r') as fd:
grammar = json.load(fd)
elif '.g4' in grammar_file:
with open(grammar_file, 'r') as fd:
data = fd.readlines()
grammar = preprocess(data)
else:
raise('Unknwown file format passed. Accepts (.g4/.json)')
with open('debug_preprocess.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = remove_unit(grammar) # eliminates unit productions
with open('debug_unit.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = remove_mixed(grammar) # eliminate terminals existing with non-terminals
with open('debug_mixed.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = break_rules(grammar) # eliminate rules with more than two non-terminals
with open('debug_break.json', 'w+') as fd:
json.dump(grammar, fd)
grammar = gnf(grammar)
# Dump GNF form of the grammar with only reachable rules
# reachable_grammar = get_reachable(grammar, start)
# with open('debug_gnf_reachable.json', 'w+') as fd:
# json.dump(reachable_grammar, fd)
with open('debug_gnf.json', 'w+') as fd:
json.dump(grammar, fd)
grammar["Start"] = [start]
with open(out, 'w+') as fd:
json.dump(grammar, fd)
def get_reachable(grammar, start):
'''
Returns a grammar without dead rules
'''
reachable_nt = set()
worklist = list()
processed = set()
reachable_grammar = dict()
worklist.append(start)
while worklist:
nt = worklist.pop(0)
processed.add(nt)
reachable_grammar[nt] = grammar[nt]
rules = grammar[nt]
for rule in rules:
tokens = gettokens(rule)
for token in tokens:
if not isTerminal(token):
if token not in processed:
worklist.append(token)
return reachable_grammar
def gettokens(rule):
pattern = re.compile("([^\s\"\']+)|\"([^\"]*)\"|\'([^\']*)\'")
return [matched.group(0) for matched in pattern.finditer(rule)]
def gnf(grammar):
old_grammar = copy.deepcopy(grammar)
new_grammar = defaultdict(list)
isgnf = False
while not isgnf:
for lhs, rules in old_grammar.items():
for rule in rules:
tokens = gettokens(rule)
if len(tokens) == 1 and isTerminal(rule):
new_grammar[lhs].append(rule)
continue
startoken = tokens[0]
endrule = tokens[1:]
if not isTerminal(startoken):
newrules = []
extendrules = old_grammar[startoken]
for extension in extendrules:
temprule = endrule[:]
temprule.insert(0, extension)
newrules.append(temprule)
for newnew in newrules:
new_grammar[lhs].append(' '.join(newnew))
else:
new_grammar[lhs].append(rule)
isgnf = True
for lhs, rules in new_grammar.items():
for rule in rules:
# if "\' \'" or isTerminal(rule):
tokens = gettokens(rule)
if len(tokens) == 1 and isTerminal(rule):
continue
startoken = tokens[0]
if not isTerminal(startoken):
isgnf = False
break
if not isgnf:
old_grammar = copy.deepcopy(new_grammar)
new_grammar = defaultdict(list)
return new_grammar
def preprocess(data):
productions = []
production = []
for line in data:
if line != '\n':
production.append(line)
else:
productions.append(production)
production = []
final_rule_set = {}
for production in productions:
rules = []
init = production[0]
nonterminal = init.split(':')[0]
rules.append(strip_chars(init.split(':')[1]).strip('| '))
for production_rule in production[1:]:
rules.append(strip_chars(production_rule.split('|')[0]))
final_rule_set[nonterminal] = rules
# for line in data:
# if line != '\n':
# production.append(line)
return final_rule_set
def remove_unit(grammar):
nounitproductions = False
old_grammar = copy.deepcopy(grammar)
new_grammar = defaultdict(list)
while not nounitproductions:
for lhs, rules in old_grammar.items():
for rhs in rules:
# Checking if the rule is a unit production rule
if len(gettokens(rhs)) == 1:
if not isTerminal(rhs):
new_grammar[lhs].extend([rule for rule in old_grammar[rhs]])
else:
new_grammar[lhs].append(rhs)
else:
new_grammar[lhs].append(rhs)
# Checking there are no unit productions left in the grammar
nounitproductions = True
for lhs, rules in new_grammar.items():
for rhs in rules:
if len(gettokens(rhs)) == 1:
if not isTerminal(rhs):
nounitproductions = False
break
if not nounitproductions:
break
# Unit productions are still there in the grammar -- repeat the process
if not nounitproductions:
old_grammar = copy.deepcopy(new_grammar)
new_grammar = defaultdict(list)
return new_grammar
def isTerminal(rule):
# pattern = re.compile("([r]*\'[\s\S]+\')")
pattern = re.compile("\'(.*?)\'")
match = pattern.match(rule)
if match:
return True
else:
return False
def remove_mixed(grammar):
'''
Remove rules where there are terminals mixed in with non-terminals
'''
new_grammar = defaultdict(list)
for lhs, rules in grammar.items():
for rhs in rules:
# tokens = rhs.split(' ')
regen_rule = []
tokens = gettokens(rhs)
if len(gettokens(rhs)) == 1:
new_grammar[lhs].append(rhs)
continue
for token in tokens:
# Identify if there is a terminal in the RHS
if isTerminal(token):
# Check if a corresponding nonterminal already exists
nonterminal = terminal_exist(token, new_grammar)
if nonterminal:
regen_rule.append(nonterminal)
else:
new_nonterm = get_nonterminal()
new_grammar[new_nonterm].append(token)
regen_rule.append(new_nonterm)
else:
regen_rule.append(token)
new_grammar[lhs].append(' '.join(regen_rule))
return new_grammar
def break_rules(grammar):
new_grammar = defaultdict(list)
old_grammar = copy.deepcopy(grammar)
nomulti = False
while not nomulti:
for lhs, rules in old_grammar.items():
for rhs in rules:
tokens = gettokens(rhs)
if len(tokens) > 2 and (not isTerminal(rhs)):
split = tokens[:-1]
nonterminal = terminal_exist(' '.join(split), new_grammar)
if nonterminal:
newrule = ' '.join([nonterminal, tokens[-1]])
new_grammar[lhs].append(newrule)
else:
nonterminal = get_nonterminal()
new_grammar[nonterminal].append(' '.join(split))
newrule = ' '.join([nonterminal, tokens[-1]])
new_grammar[lhs].append(newrule)
else:
new_grammar[lhs].append(rhs)
nomulti = True
for lhs, rules in new_grammar.items():
for rhs in rules:
# tokens = rhs.split(' ')
tokens = gettokens(rhs)
if len(tokens) > 2 and (not isTerminal(rhs)):
nomulti = False
break
if not nomulti:
old_grammar = copy.deepcopy(new_grammar)
new_grammar = defaultdict(list)
return new_grammar
def strip_chars(rule):
return rule.strip('\n\t ')
def get_nonterminal():
global NONTERMINALSET
if NONTERMINALSET:
return NONTERMINALSET.pop(0)
else:
_repopulate()
return NONTERMINALSET.pop(0)
def _repopulate():
global COUNT
global NONTERMINALSET
NONTERMINALSET = [''.join(x) for x in list(combinations(ascii_uppercase, COUNT))]
COUNT += 1
def terminal_exist(token, grammar):
for nonterminal, rules in grammar.items():
if token in rules:
return nonterminal
return None
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description = 'Script to convert grammar to GNF form')
parser.add_argument(
'--gf',
type = str,
required = True,
help = 'Location of grammar file')
parser.add_argument(
'--out',
type = str,
required = True,
help = 'Location of output file')
parser.add_argument(
'--start',
type = str,
required = True,
help = 'Start token')
args = parser.parse_args()
main(args.gf, args.out, args.start)

View File

@ -0,0 +1,38 @@
#!/bin/bash
# This script creates a FSA describing the input grammar *.g4
if [ ! "$#" -lt 4 ]; then
echo "Usage: ./prep_pda.sh <grammar_file> <start> [stack_limit]"
exit 1
fi
GRAMMAR_FILE=$1
GRAMMAR_DIR="$(dirname $GRAMMAR_FILE)"
START="$2"
STACK_LIMIT="$3"
# Get filename
FILE=$(basename -- "$GRAMMAR_FILE")
echo "File:$FILE"
FILENAME="${FILE%.*}"
echo "Name:$FILENAME"
# Create the GNF form of the grammar
CMD="python gnf_converter.py --gf $GRAMMAR_FILE --out ${FILENAME}.json --start $START"
$CMD
# Generate grammar automaton
# Check if user provided a stack limit
if [ -z "${STACK_LIMIT}" ]; then
CMD="python3 construct_automata.py --gf ${FILENAME}.json"
else
CMD="python construct_automata.py --gf ${FILENAME}.json --limit ${STACK_LIMIT}"
fi
echo $CMD
$CMD
# Move PDA to the source dir of the grammar
echo "Copying ${FILENAME}_automata.json to $GRAMMAR_DIR"
mv "${FILENAME}_automata.json" $GRAMMAR_DIR/

View File

@ -0,0 +1,154 @@
/* This is the testing module for Gramatron
*/
#include "afl-fuzz.h"
#include "gramfuzz.h"
#define NUMINPUTS 50
state *create_pda(u8 *automaton_file) {
struct json_object *parsed_json;
state * pda;
json_object * source_obj, *attr;
int arraylen, ii, ii2, trigger_len, error;
printf("\n[GF] Automaton file passed:%s", automaton_file);
// parsed_json =
// json_object_from_file("./gramfuzz/php_gnf_processed_full.json");
parsed_json = json_object_from_file(automaton_file);
// Getting final state
source_obj = json_object_object_get(parsed_json, "final_state");
printf("\t\nFinal=%s\n", json_object_get_string(source_obj));
final_state = atoi(json_object_get_string(source_obj));
// Getting initial state
source_obj = json_object_object_get(parsed_json, "init_state");
init_state = atoi(json_object_get_string(source_obj));
printf("\tInit=%s\n", json_object_get_string(source_obj));
// Getting number of states
source_obj = json_object_object_get(parsed_json, "numstates");
numstates = atoi(json_object_get_string(source_obj)) + 1;
printf("\tNumStates=%d\n", numstates);
// Allocate state space for each pda state
pda = (state *)calloc(atoi(json_object_get_string(source_obj)) + 1,
sizeof(state));
// Getting PDA representation
source_obj = json_object_object_get(parsed_json, "pda");
enum json_type type;
json_object_object_foreach(source_obj, key, val) {
state * state_ptr;
trigger *trigger_ptr;
int offset;
// Get the correct offset into the pda to store state information
state_ptr = pda;
offset = atoi(key);
state_ptr += offset;
// Store state string
state_ptr->state_name = offset;
// Create trigger array of structs
trigger_len = json_object_array_length(val);
state_ptr->trigger_len = trigger_len;
trigger_ptr = (trigger *)calloc(trigger_len, sizeof(trigger));
state_ptr->ptr = trigger_ptr;
printf("\nName:%d Trigger:%d", offset, trigger_len);
for (ii = 0; ii < trigger_len; ii++) {
json_object *obj = json_object_array_get_idx(val, ii);
// Get all the trigger trigger attributes
attr = json_object_array_get_idx(obj, 0);
(trigger_ptr)->id = strdup(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 1);
trigger_ptr->dest = atoi(json_object_get_string(attr));
attr = json_object_array_get_idx(obj, 2);
if (!strcmp("\\n", json_object_get_string(attr))) {
trigger_ptr->term = strdup("\n");
} else {
trigger_ptr->term = strdup(json_object_get_string(attr));
}
trigger_ptr->term_len = strlen(trigger_ptr->term);
trigger_ptr++;
}
}
// Delete the JSON object
json_object_put(parsed_json);
return pda;
}
void SanityCheck(char *automaton_path) {
state * pda = create_pda(automaton_path);
int count = 0, state;
Get_Dupes_Ret *getdupesret;
IdxMap_new * statemap;
IdxMap_new * statemap_ptr;
terminal * term_ptr;
while (count < NUMINPUTS) {
// Perform input generation
Array *generated = gen_input(pda, NULL);
print_repr(generated, "Gen");
count += 1;
}
}
int main(int argc, char *argv[]) {
char * mode;
char * automaton_path;
char * output_dir = NULL;
struct timeval tv;
struct timeval tz;
// gettimeofday(&tv, &tz);
srand(1337);
if (argc == 3) {
mode = argv[1];
automaton_path = strdup(argv[2]);
printf("\nMode:%s Path:%s", mode, automaton_path);
} else {
printf("\nUsage: ./test <mode> <automaton_path>");
return -1;
}
if (!strcmp(mode, "SanityCheck")) {
SanityCheck(automaton_path);
} else {
printf("\nUnrecognized mode");
return -1;
}
return 0;
}

View File

@ -0,0 +1,57 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <json-c/json.h>
#include <unistd.h>
#include "hashmap.h"
#include "uthash.h"
#include "utarray.h"
#define INIT_SIZE 100 // Initial size of the dynamic array holding the input
typedef struct terminal {
int state;
int trigger_idx;
size_t symbol_len;
char * symbol;
} terminal;
typedef struct trigger {
char * id;
int dest;
char * term;
size_t term_len;
} trigger;
typedef struct state {
int state_name; // Integer State name
int trigger_len; // Number of triggers associated with this state
trigger *ptr; // Pointer to beginning of the list of triggers
} state;
typedef struct {
size_t used;
size_t size;
size_t inputlen;
terminal *start;
} Array;
int init_state;
int curr_state;
int final_state;
state *create_pda(char *);
Array *gen_input(state *, Array *);
void print_repr(Array *, char *);
void initArray(Array *, size_t);
void insertArray(Array *, int, char *, size_t, int);

View File

@ -0,0 +1,392 @@
/*
Copyright (c) 2008-2018, Troy D. Hanson http://troydhanson.github.com/uthash/
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* a dynamic array implementation using macros
*/
#ifndef UTARRAY_H
#define UTARRAY_H
#define UTARRAY_VERSION 2.1.0
#include <stddef.h> /* size_t */
#include <string.h> /* memset, etc */
#include <stdlib.h> /* exit */
#ifdef __GNUC__
#define UTARRAY_UNUSED __attribute__((__unused__))
#else
#define UTARRAY_UNUSED
#endif
#ifdef oom
#error \
"The name of macro 'oom' has been changed to 'utarray_oom'. Please update your code."
#define utarray_oom() oom()
#endif
#ifndef utarray_oom
#define utarray_oom() exit(-1)
#endif
typedef void(ctor_f)(void *dst, const void *src);
typedef void(dtor_f)(void *elt);
typedef void(init_f)(void *elt);
typedef struct {
size_t sz;
init_f *init;
ctor_f *copy;
dtor_f *dtor;
} UT_icd;
typedef struct {
unsigned i, n; /* i: index of next available slot, n: num slots */
UT_icd icd; /* initializer, copy and destructor functions */
char * d; /* n slots of size icd->sz*/
} UT_array;
#define utarray_init(a, _icd) \
do { \
\
memset(a, 0, sizeof(UT_array)); \
(a)->icd = *(_icd); \
\
} while (0)
#define utarray_done(a) \
do { \
\
if ((a)->n) { \
\
if ((a)->icd.dtor) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (a)->i; _ut_i++) { \
\
(a)->icd.dtor(utarray_eltptr(a, _ut_i)); \
\
} \
\
} \
free((a)->d); \
\
} \
(a)->n = 0; \
\
} while (0)
#define utarray_new(a, _icd) \
do { \
\
(a) = (UT_array *)malloc(sizeof(UT_array)); \
if ((a) == NULL) { utarray_oom(); } \
utarray_init(a, _icd); \
\
} while (0)
#define utarray_free(a) \
do { \
\
utarray_done(a); \
free(a); \
\
} while (0)
#define utarray_reserve(a, by) \
do { \
\
if (((a)->i + (by)) > (a)->n) { \
\
char *utarray_tmp; \
while (((a)->i + (by)) > (a)->n) { \
\
(a)->n = ((a)->n ? (2 * (a)->n) : 8); \
\
} \
utarray_tmp = (char *)realloc((a)->d, (a)->n * (a)->icd.sz); \
if (utarray_tmp == NULL) { utarray_oom(); } \
(a)->d = utarray_tmp; \
\
} \
\
} while (0)
#define utarray_push_back(a, p) \
do { \
\
utarray_reserve(a, 1); \
if ((a)->icd.copy) { \
\
(a)->icd.copy(_utarray_eltptr(a, (a)->i++), p); \
\
} else { \
\
memcpy(_utarray_eltptr(a, (a)->i++), p, (a)->icd.sz); \
\
}; \
\
} while (0)
#define utarray_pop_back(a) \
do { \
\
if ((a)->icd.dtor) { \
\
(a)->icd.dtor(_utarray_eltptr(a, --((a)->i))); \
\
} else { \
\
(a)->i--; \
\
} \
\
} while (0)
#define utarray_extend_back(a) \
do { \
\
utarray_reserve(a, 1); \
if ((a)->icd.init) { \
\
(a)->icd.init(_utarray_eltptr(a, (a)->i)); \
\
} else { \
\
memset(_utarray_eltptr(a, (a)->i), 0, (a)->icd.sz); \
\
} \
(a)->i++; \
\
} while (0)
#define utarray_len(a) ((a)->i)
#define utarray_eltptr(a, j) (((j) < (a)->i) ? _utarray_eltptr(a, j) : NULL)
#define _utarray_eltptr(a, j) ((a)->d + ((a)->icd.sz * (j)))
#define utarray_insert(a, p, j) \
do { \
\
if ((j) > (a)->i) utarray_resize(a, j); \
utarray_reserve(a, 1); \
if ((j) < (a)->i) { \
\
memmove(_utarray_eltptr(a, (j) + 1), _utarray_eltptr(a, j), \
((a)->i - (j)) * ((a)->icd.sz)); \
\
} \
if ((a)->icd.copy) { \
\
(a)->icd.copy(_utarray_eltptr(a, j), p); \
\
} else { \
\
memcpy(_utarray_eltptr(a, j), p, (a)->icd.sz); \
\
}; \
(a)->i++; \
\
} while (0)
#define utarray_inserta(a, w, j) \
do { \
\
if (utarray_len(w) == 0) break; \
if ((j) > (a)->i) utarray_resize(a, j); \
utarray_reserve(a, utarray_len(w)); \
if ((j) < (a)->i) { \
\
memmove(_utarray_eltptr(a, (j) + utarray_len(w)), _utarray_eltptr(a, j), \
((a)->i - (j)) * ((a)->icd.sz)); \
\
} \
if ((a)->icd.copy) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (w)->i; _ut_i++) { \
\
(a)->icd.copy(_utarray_eltptr(a, (j) + _ut_i), \
_utarray_eltptr(w, _ut_i)); \
\
} \
\
} else { \
\
memcpy(_utarray_eltptr(a, j), _utarray_eltptr(w, 0), \
utarray_len(w) * ((a)->icd.sz)); \
\
} \
(a)->i += utarray_len(w); \
\
} while (0)
#define utarray_resize(dst, num) \
do { \
\
unsigned _ut_i; \
if ((dst)->i > (unsigned)(num)) { \
\
if ((dst)->icd.dtor) { \
\
for (_ut_i = (num); _ut_i < (dst)->i; ++_ut_i) { \
\
(dst)->icd.dtor(_utarray_eltptr(dst, _ut_i)); \
\
} \
\
} \
\
} else if ((dst)->i < (unsigned)(num)) { \
\
utarray_reserve(dst, (num) - (dst)->i); \
if ((dst)->icd.init) { \
\
for (_ut_i = (dst)->i; _ut_i < (unsigned)(num); ++_ut_i) { \
\
(dst)->icd.init(_utarray_eltptr(dst, _ut_i)); \
\
} \
\
} else { \
\
memset(_utarray_eltptr(dst, (dst)->i), 0, \
(dst)->icd.sz *((num) - (dst)->i)); \
\
} \
\
} \
(dst)->i = (num); \
\
} while (0)
#define utarray_concat(dst, src) \
do { \
\
utarray_inserta(dst, src, utarray_len(dst)); \
\
} while (0)
#define utarray_erase(a, pos, len) \
do { \
\
if ((a)->icd.dtor) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (len); _ut_i++) { \
\
(a)->icd.dtor(utarray_eltptr(a, (pos) + _ut_i)); \
\
} \
\
} \
if ((a)->i > ((pos) + (len))) { \
\
memmove(_utarray_eltptr(a, pos), _utarray_eltptr(a, (pos) + (len)), \
((a)->i - ((pos) + (len))) * (a)->icd.sz); \
\
} \
(a)->i -= (len); \
\
} while (0)
#define utarray_renew(a, u) \
do { \
\
if (a) \
utarray_clear(a); \
else \
utarray_new(a, u); \
\
} while (0)
#define utarray_clear(a) \
do { \
\
if ((a)->i > 0) { \
\
if ((a)->icd.dtor) { \
\
unsigned _ut_i; \
for (_ut_i = 0; _ut_i < (a)->i; _ut_i++) { \
\
(a)->icd.dtor(_utarray_eltptr(a, _ut_i)); \
\
} \
\
} \
(a)->i = 0; \
\
} \
\
} while (0)
#define utarray_sort(a, cmp) \
do { \
\
qsort((a)->d, (a)->i, (a)->icd.sz, cmp); \
\
} while (0)
#define utarray_find(a, v, cmp) bsearch((v), (a)->d, (a)->i, (a)->icd.sz, cmp)
#define utarray_front(a) (((a)->i) ? (_utarray_eltptr(a, 0)) : NULL)
#define utarray_next(a, e) \
(((e) == NULL) ? utarray_front(a) \
: (((a)->i != utarray_eltidx(a, e) + 1) \
? _utarray_eltptr(a, utarray_eltidx(a, e) + 1) \
: NULL))
#define utarray_prev(a, e) \
(((e) == NULL) ? utarray_back(a) \
: ((utarray_eltidx(a, e) != 0) \
? _utarray_eltptr(a, utarray_eltidx(a, e) - 1) \
: NULL))
#define utarray_back(a) (((a)->i) ? (_utarray_eltptr(a, (a)->i - 1)) : NULL)
#define utarray_eltidx(a, e) (((char *)(e) - (a)->d) / (a)->icd.sz)
/* last we pre-define a few icd for common utarrays of ints and strings */
static void utarray_str_cpy(void *dst, const void *src) {
char **_src = (char **)src, **_dst = (char **)dst;
*_dst = (*_src == NULL) ? NULL : strdup(*_src);
}
static void utarray_str_dtor(void *elt) {
char **eltc = (char **)elt;
if (*eltc != NULL) free(*eltc);
}
static const UT_icd ut_str_icd UTARRAY_UNUSED = {
sizeof(char *), NULL, utarray_str_cpy, utarray_str_dtor};
static const UT_icd ut_int_icd UTARRAY_UNUSED = {sizeof(int), NULL, NULL, NULL};
static const UT_icd ut_ptr_icd UTARRAY_UNUSED = {sizeof(void *), NULL, NULL,
NULL};
#endif /* UTARRAY_H */

File diff suppressed because it is too large Load Diff

View File

@ -65,9 +65,9 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
/* When a new queue entry is added we check if there are new dictionary
entries to add to honggfuzz structure */
void afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
if (run.global->mutate.dictionaryCnt >= 1024) return;
@ -97,6 +97,8 @@ void afl_custom_queue_new_entry(my_mutator_t * data,
}
return 0;
}
/* we could set only_printable if is_ascii is set ... let's see

View File

@ -78,9 +78,9 @@ extern "C" my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
/* When a new queue entry is added we check if there are new dictionary
entries to add to honggfuzz structure */
#if 0
extern "C" void afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
extern "C" uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
while (data->extras_cnt < afl_struct->extras_cnt) {
@ -110,6 +110,8 @@ extern "C" void afl_custom_queue_new_entry(my_mutator_t * data,
}
return 0;
}
#endif

View File

@ -53,7 +53,9 @@ pub trait RawCustomMutator {
1
}
fn queue_new_entry(&mut self, filename_new_queue: &Path, _filename_orig_queue: Option<&Path>) {}
fn queue_new_entry(&mut self, filename_new_queue: &Path, _filename_orig_queue: Option<&Path>) -> bool {
false
}
fn queue_get(&mut self, filename: &Path) -> bool {
true
@ -246,7 +248,7 @@ pub mod wrappers {
data: *mut c_void,
filename_new_queue: *const c_char,
filename_orig_queue: *const c_char,
) {
) -> bool {
match catch_unwind(|| {
let mut context = FFIContext::<M>::from(data);
if filename_new_queue.is_null() {

View File

@ -101,9 +101,9 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) {
/* When a new queue entry is added we run this input with the symcc
instrumented binary */
void afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
uint8_t afl_custom_queue_new_entry(my_mutator_t * data,
const uint8_t *filename_new_queue,
const uint8_t *filename_orig_queue) {
int pipefd[2];
struct stat st;
@ -232,6 +232,8 @@ void afl_custom_queue_new_entry(my_mutator_t * data,
}
return 0;
}
uint32_t afl_custom_fuzz_count(my_mutator_t *data, const u8 *buf,

View File

@ -47,7 +47,7 @@ int afl_custom_post_trim(void *data, unsigned char success);
size_t afl_custom_havoc_mutation(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf, size_t max_size);
unsigned char afl_custom_havoc_mutation_probability(void *data);
unsigned char afl_custom_queue_get(void *data, const unsigned char *filename);
void afl_custom_queue_new_entry(void *data, const unsigned char *filename_new_queue, const unsigned int *filename_orig_queue);
u8 afl_custom_queue_new_entry(void *data, const unsigned char *filename_new_queue, const unsigned int *filename_orig_queue);
const char* afl_custom_introspection(my_mutator_t *data);
void afl_custom_deinit(void *data);
```
@ -88,7 +88,7 @@ def queue_get(filename):
return True
def queue_new_entry(filename_new_queue, filename_orig_queue):
pass
return False
def introspection():
return string
@ -156,6 +156,7 @@ def deinit(): # optional for Python
- `queue_new_entry` (optional):
This methods is called after adding a new test case to the queue.
If the contents of the file was changed return True, False otherwise.
- `introspection` (optional):

View File

@ -972,8 +972,8 @@ struct custom_mutator {
* @param filename_orig_queue File name of the original queue entry. This
* argument can be NULL while initializing the fuzzer
*/
void (*afl_custom_queue_new_entry)(void *data, const u8 *filename_new_queue,
const u8 *filename_orig_queue);
u8 (*afl_custom_queue_new_entry)(void *data, const u8 *filename_new_queue,
const u8 *filename_orig_queue);
/**
* Deinitialize the custom mutator.
*
@ -1005,6 +1005,8 @@ void setup_custom_mutators(afl_state_t *);
void destroy_custom_mutators(afl_state_t *);
u8 trim_case_custom(afl_state_t *, struct queue_entry *q, u8 *in_buf,
struct custom_mutator *mutator);
void run_afl_custom_queue_new_entry(afl_state_t *, struct queue_entry *, u8 *,
u8 *);
/* Python */
#ifdef USE_PYTHON
@ -1021,7 +1023,7 @@ size_t havoc_mutation_py(void *, u8 *, size_t, u8 **, size_t);
u8 havoc_mutation_probability_py(void *);
u8 queue_get_py(void *, const u8 *);
const char *introspection_py(void *);
void queue_new_entry_py(void *, const u8 *, const u8 *);
u8 queue_new_entry_py(void *, const u8 *, const u8 *);
void deinit_py(void *);
#endif

View File

@ -881,11 +881,7 @@ void perform_dry_run(afl_state_t *afl) {
u32 read_len = MIN(q->len, (u32)MAX_FILE);
use_mem = afl_realloc(AFL_BUF_PARAM(in), read_len);
if (read(fd, use_mem, read_len) != (ssize_t)read_len) {
FATAL("Short read from '%s'", q->fname);
}
ck_read(fd, use_mem, read_len, q->fname);
close(fd);
@ -1350,6 +1346,12 @@ void pivot_inputs(afl_state_t *afl) {
if (q->passed_det) { mark_as_det_done(afl, q); }
if (afl->custom_mutators_count) {
run_afl_custom_queue_new_entry(afl, q, q->fname, NULL);
}
++id;
}

View File

@ -31,6 +31,45 @@ struct custom_mutator *load_custom_mutator(afl_state_t *, const char *);
struct custom_mutator *load_custom_mutator_py(afl_state_t *, char *);
#endif
void run_afl_custom_queue_new_entry(afl_state_t *afl, struct queue_entry *q,
u8 *fname, u8 *mother_fname) {
if (afl->custom_mutators_count) {
u8 updated = 0;
LIST_FOREACH(&afl->custom_mutator_list, struct custom_mutator, {
if (el->afl_custom_queue_new_entry) {
if (el->afl_custom_queue_new_entry(el->data, fname, mother_fname)) {
updated = 1;
}
}
});
if (updated) {
struct stat st;
if (stat(fname, &st)) { PFATAL("File %s is gone!", fname); }
if (!st.st_size) {
FATAL("File %s became empty in custom mutator!", fname);
}
q->len = st.st_size;
}
}
}
void setup_custom_mutators(afl_state_t *afl) {
/* Try mutator library first */

View File

@ -813,8 +813,8 @@ u8 queue_get_py(void *py_mutator, const u8 *filename) {
}
void queue_new_entry_py(void *py_mutator, const u8 *filename_new_queue,
const u8 *filename_orig_queue) {
u8 queue_new_entry_py(void *py_mutator, const u8 *filename_new_queue,
const u8 *filename_orig_queue) {
PyObject *py_args, *py_value;
@ -861,7 +861,21 @@ void queue_new_entry_py(void *py_mutator, const u8 *filename_new_queue,
py_args);
Py_DECREF(py_args);
if (py_value == NULL) {
if (py_value != NULL) {
int ret = PyObject_IsTrue(py_value);
Py_DECREF(py_value);
if (ret == -1) {
PyErr_Print();
FATAL("Failed to convert return value");
}
return (u8)ret & 0xFF;
} else {
PyErr_Print();
FATAL("Call failed");

View File

@ -474,24 +474,12 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
if (afl->custom_mutators_count) {
LIST_FOREACH(&afl->custom_mutator_list, struct custom_mutator, {
/* At the initialization stage, queue_cur is NULL */
if (afl->queue_cur && !afl->syncing_party) {
if (el->afl_custom_queue_new_entry) {
run_afl_custom_queue_new_entry(afl, q, fname, afl->queue_cur->fname);
u8 *fname_orig = NULL;
/* At the initialization stage, queue_cur is NULL */
if (afl->queue_cur && !afl->syncing_party) {
fname_orig = afl->queue_cur->fname;
}
el->afl_custom_queue_new_entry(el->data, fname, fname_orig);
}
});
}
}