mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-13 10:38:07 +00:00
fixes
This commit is contained in:
275
custom_mutators/gramatron/preprocess/construct_automata.py
Normal file
275
custom_mutators/gramatron/preprocess/construct_automata.py
Normal file
@ -0,0 +1,275 @@
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
# import pygraphviz as pgv
|
||||
|
||||
gram_data = None
|
||||
state_count = 1
|
||||
pda = []
|
||||
worklist = []
|
||||
state_stacks = {}
|
||||
|
||||
# === If user provides upper bound on the stack size during FSA creation ===
|
||||
# Specifies the upper bound to which the stack is allowed to grow
|
||||
# If for any generated state, the stack size is >= stack_limit then this
|
||||
# state is not expanded further.
|
||||
stack_limit = None
|
||||
# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
|
||||
unexpanded_rules = set()
|
||||
|
||||
def main(grammar, limit):
|
||||
global worklist, gram_data, stack_limit
|
||||
current = '0'
|
||||
stack_limit = limit
|
||||
if stack_limit:
|
||||
print ('[X] Operating in bounded stack mode')
|
||||
|
||||
with open(grammar, 'r') as fd:
|
||||
gram_data = json.load(fd)
|
||||
start_symbol = gram_data["Start"][0]
|
||||
worklist.append([current, [start_symbol]])
|
||||
# print (grammar)
|
||||
filename = (grammar.split('/')[-1]).split('.')[0]
|
||||
|
||||
|
||||
while worklist:
|
||||
# Take an element from the worklist
|
||||
# print ('================')
|
||||
# print ('Worklist:', worklist)
|
||||
element = worklist.pop(0)
|
||||
prep_transitions(element)
|
||||
|
||||
pda_file = filename + '_transition.json'
|
||||
graph_file = filename + '.png'
|
||||
# print ('XXXXXXXXXXXXXXXX')
|
||||
# print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
|
||||
# XXX Commented out because visualization of current version of PHP causes segfault
|
||||
# Create the graph and dump the transitions to a file
|
||||
# create_graph(filename)
|
||||
transformed = postprocess()
|
||||
with open(filename + '_automata.json', 'w+') as fd:
|
||||
json.dump(transformed, fd)
|
||||
with open(filename + '_transition.json', 'w+') as fd:
|
||||
json.dump(pda, fd)
|
||||
if not unexpanded_rules:
|
||||
print ('[X] No unexpanded rules, absolute FSA formed')
|
||||
exit(0)
|
||||
else:
|
||||
print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
|
||||
print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
|
||||
with open(filename + '_disallowed.json', 'w+') as fd:
|
||||
json.dump(list(unexpanded_rules), fd)
|
||||
|
||||
def create_graph(filename):
|
||||
'''
|
||||
Creates a DOT representation of the PDA
|
||||
'''
|
||||
global pda
|
||||
G = pgv.AGraph(strict = False, directed = True)
|
||||
for transition in pda:
|
||||
print ('Transition:', transition)
|
||||
G.add_edge(transition['source'], transition['dest'],
|
||||
label = 'Term:{}'.format(transition['terminal']))
|
||||
G.layout(prog = 'dot')
|
||||
print ('Do it up 2')
|
||||
G.draw(filename + '.png')
|
||||
|
||||
def prep_transitions(element):
|
||||
'''
|
||||
Generates transitions
|
||||
'''
|
||||
global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
|
||||
state = element[0]
|
||||
try:
|
||||
nonterminal = element[1][0]
|
||||
except IndexError:
|
||||
# Final state was encountered, pop from worklist without doing anything
|
||||
return
|
||||
rules = gram_data[nonterminal]
|
||||
count = 1
|
||||
for rule in rules:
|
||||
isRecursive = False
|
||||
# print ('Current state:', state)
|
||||
terminal, ss, termIsRegex = tokenize(rule)
|
||||
transition = get_template()
|
||||
transition['trigger'] = '_'.join([state, str(count)])
|
||||
transition['source'] = state
|
||||
transition['dest'] = str(state_count)
|
||||
transition['ss'] = ss
|
||||
transition['terminal'] = terminal
|
||||
transition['rule'] = "{} -> {}".format(nonterminal, rule )
|
||||
if termIsRegex:
|
||||
transition['termIsRegex'] = True
|
||||
|
||||
# Creating a state stack for the new state
|
||||
try:
|
||||
state_stack = state_stacks[state][:]
|
||||
except:
|
||||
state_stack = []
|
||||
if len(state_stack):
|
||||
state_stack.pop(0)
|
||||
if ss:
|
||||
for symbol in ss[::-1]:
|
||||
state_stack.insert(0, symbol)
|
||||
transition['stack'] = state_stack
|
||||
|
||||
# Check if a recursive transition state being created, if so make a backward
|
||||
# edge and don't add anything to the worklist
|
||||
# print (state_stacks)
|
||||
if state_stacks:
|
||||
for state_element, stack in state_stacks.items():
|
||||
# print ('Stack:', sorted(stack))
|
||||
# print ('State stack:', sorted(state_stack))
|
||||
if sorted(stack) == sorted(state_stack):
|
||||
transition['dest'] = state_element
|
||||
# print ('Recursive:', transition)
|
||||
pda.append(transition)
|
||||
count += 1
|
||||
isRecursive = True
|
||||
break
|
||||
# If a recursive transition exercised don't add the same transition as a new
|
||||
# edge, continue onto the next transitions
|
||||
if isRecursive:
|
||||
continue
|
||||
|
||||
# If the generated state has a stack size > stack_limit then that state is abandoned
|
||||
# and not added to the FSA or the worklist for further expansion
|
||||
if stack_limit:
|
||||
if (len(transition['stack']) > stack_limit):
|
||||
unexpanded_rules.add(transition['rule'])
|
||||
continue
|
||||
|
||||
# Create transitions for the non-recursive relations and add to the worklist
|
||||
# print ('Normal:', transition)
|
||||
# print ('State2:', state)
|
||||
pda.append(transition)
|
||||
worklist.append([transition['dest'], transition['stack']])
|
||||
state_stacks[transition['dest']] = state_stack
|
||||
state_count += 1
|
||||
count += 1
|
||||
|
||||
def tokenize(rule):
|
||||
'''
|
||||
Gets the terminal and the corresponding stack symbols from a rule in GNF form
|
||||
'''
|
||||
pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
|
||||
terminal = None
|
||||
ss = None
|
||||
termIsRegex = False
|
||||
match = pattern.match(rule)
|
||||
if match.group(1):
|
||||
termIsRegex = True
|
||||
if match.group(2):
|
||||
terminal = match.group(2)
|
||||
else:
|
||||
raise AssertionError("Rule is not in GNF form")
|
||||
|
||||
if match.group(3):
|
||||
ss = (match.group(3)).split()
|
||||
|
||||
return terminal, ss, termIsRegex
|
||||
|
||||
def get_template():
|
||||
transition_template = {
|
||||
'trigger':None,
|
||||
'source': None,
|
||||
'dest': None,
|
||||
'termIsRegex': False,
|
||||
'terminal' : None,
|
||||
'stack': []
|
||||
}
|
||||
return transition_template
|
||||
|
||||
def postprocess():
|
||||
'''
|
||||
Creates a representation to be passed on to the C-module
|
||||
'''
|
||||
global pda
|
||||
final_struct = {}
|
||||
memoized = defaultdict(list)
|
||||
# Supporting data structures for if stack limit is imposed
|
||||
culled_pda = []
|
||||
culled_final = []
|
||||
num_transitions = 0 # Keep track of number of transitions
|
||||
|
||||
|
||||
states, final, initial = _get_states()
|
||||
|
||||
print (initial)
|
||||
assert len(initial) == 1, 'More than one init state found'
|
||||
|
||||
# Cull transitions to states which were not expanded owing to the stack limit
|
||||
if stack_limit:
|
||||
|
||||
blocklist = []
|
||||
for final_state in final:
|
||||
for transition in pda:
|
||||
if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
|
||||
blocklist.append(transition["dest"])
|
||||
continue
|
||||
else:
|
||||
culled_pda.append(transition)
|
||||
|
||||
culled_final = [state for state in final if state not in blocklist]
|
||||
|
||||
assert len(culled_final) == 1, 'More than one final state found'
|
||||
|
||||
for transition in culled_pda:
|
||||
state = transition["source"]
|
||||
if transition["dest"] in blocklist:
|
||||
continue
|
||||
num_transitions += 1
|
||||
memoized[state].append([transition["trigger"], transition["dest"],
|
||||
transition["terminal"]])
|
||||
final_struct["init_state"] = initial
|
||||
final_struct["final_state"] = culled_final[0]
|
||||
# The reason we do this is because when states are culled, the indexing is
|
||||
# still relative to the actual number of states hence we keep numstates recorded
|
||||
# as the original number of states
|
||||
print ('[X] Actual Number of states:', len(memoized.keys()))
|
||||
print ('[X] Number of transitions:', num_transitions)
|
||||
print ('[X] Original Number of states:', len(states))
|
||||
final_struct["numstates"] = len(states)
|
||||
final_struct["pda"] = memoized
|
||||
return final_struct
|
||||
|
||||
# Running FSA construction in exact approximation mode and postprocessing it like so
|
||||
for transition in pda:
|
||||
state = transition["source"]
|
||||
memoized[state].append([transition["trigger"], transition["dest"],
|
||||
transition["terminal"]])
|
||||
|
||||
final_struct["init_state"] = initial
|
||||
final_struct["final_state"] = final[0]
|
||||
print ('[X] Actual Number of states:', len(memoized.keys()))
|
||||
final_struct["numstates"] = len(memoized.keys())
|
||||
final_struct["pda"] = memoized
|
||||
return final_struct
|
||||
|
||||
|
||||
def _get_states():
|
||||
source = set()
|
||||
dest = set()
|
||||
global pda
|
||||
for transition in pda:
|
||||
source.add(transition["source"])
|
||||
dest.add(transition["dest"])
|
||||
source_copy = source.copy()
|
||||
source_copy.update(dest)
|
||||
return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
|
||||
parser.add_argument(
|
||||
'--gf',
|
||||
type = str,
|
||||
help = 'Location of GNF grammar')
|
||||
parser.add_argument(
|
||||
'--limit',
|
||||
type = int,
|
||||
default = None,
|
||||
help = 'Specify the upper bound for the stack size')
|
||||
args = parser.parse_args()
|
||||
main(args.gf, args.limit)
|
Reference in New Issue
Block a user