mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-07 15:51:32 +00:00
276 lines
9.5 KiB
Python
276 lines
9.5 KiB
Python
import sys
|
|
import json
|
|
import re
|
|
from collections import defaultdict
|
|
# import pygraphviz as pgv
|
|
|
|
gram_data = None
|
|
state_count = 1
|
|
pda = []
|
|
worklist = []
|
|
state_stacks = {}
|
|
|
|
# === If user provides upper bound on the stack size during FSA creation ===
|
|
# Specifies the upper bound to which the stack is allowed to grow
|
|
# If for any generated state, the stack size is >= stack_limit then this
|
|
# state is not expanded further.
|
|
stack_limit = None
|
|
# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
|
|
unexpanded_rules = set()
|
|
|
|
def main(grammar, limit):
|
|
global worklist, gram_data, stack_limit
|
|
current = '0'
|
|
stack_limit = limit
|
|
if stack_limit:
|
|
print ('[X] Operating in bounded stack mode')
|
|
|
|
with open(grammar, 'r') as fd:
|
|
gram_data = json.load(fd)
|
|
start_symbol = gram_data["Start"][0]
|
|
worklist.append([current, [start_symbol]])
|
|
# print (grammar)
|
|
filename = (grammar.split('/')[-1]).split('.')[0]
|
|
|
|
|
|
while worklist:
|
|
# Take an element from the worklist
|
|
# print ('================')
|
|
# print ('Worklist:', worklist)
|
|
element = worklist.pop(0)
|
|
prep_transitions(element)
|
|
|
|
pda_file = filename + '_transition.json'
|
|
graph_file = filename + '.png'
|
|
# print ('XXXXXXXXXXXXXXXX')
|
|
# print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
|
|
# XXX Commented out because visualization of current version of PHP causes segfault
|
|
# Create the graph and dump the transitions to a file
|
|
# create_graph(filename)
|
|
transformed = postprocess()
|
|
with open(filename + '_automata.json', 'w+') as fd:
|
|
json.dump(transformed, fd)
|
|
with open(filename + '_transition.json', 'w+') as fd:
|
|
json.dump(pda, fd)
|
|
if not unexpanded_rules:
|
|
print ('[X] No unexpanded rules, absolute FSA formed')
|
|
exit(0)
|
|
else:
|
|
print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
|
|
print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
|
|
with open(filename + '_disallowed.json', 'w+') as fd:
|
|
json.dump(list(unexpanded_rules), fd)
|
|
|
|
def create_graph(filename):
|
|
'''
|
|
Creates a DOT representation of the PDA
|
|
'''
|
|
global pda
|
|
G = pgv.AGraph(strict = False, directed = True)
|
|
for transition in pda:
|
|
print ('Transition:', transition)
|
|
G.add_edge(transition['source'], transition['dest'],
|
|
label = 'Term:{}'.format(transition['terminal']))
|
|
G.layout(prog = 'dot')
|
|
print ('Do it up 2')
|
|
G.draw(filename + '.png')
|
|
|
|
def prep_transitions(element):
|
|
'''
|
|
Generates transitions
|
|
'''
|
|
global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
|
|
state = element[0]
|
|
try:
|
|
nonterminal = element[1][0]
|
|
except IndexError:
|
|
# Final state was encountered, pop from worklist without doing anything
|
|
return
|
|
rules = gram_data[nonterminal]
|
|
count = 1
|
|
for rule in rules:
|
|
isRecursive = False
|
|
# print ('Current state:', state)
|
|
terminal, ss, termIsRegex = tokenize(rule)
|
|
transition = get_template()
|
|
transition['trigger'] = '_'.join([state, str(count)])
|
|
transition['source'] = state
|
|
transition['dest'] = str(state_count)
|
|
transition['ss'] = ss
|
|
transition['terminal'] = terminal
|
|
transition['rule'] = "{} -> {}".format(nonterminal, rule )
|
|
if termIsRegex:
|
|
transition['termIsRegex'] = True
|
|
|
|
# Creating a state stack for the new state
|
|
try:
|
|
state_stack = state_stacks[state][:]
|
|
except:
|
|
state_stack = []
|
|
if len(state_stack):
|
|
state_stack.pop(0)
|
|
if ss:
|
|
for symbol in ss[::-1]:
|
|
state_stack.insert(0, symbol)
|
|
transition['stack'] = state_stack
|
|
|
|
# Check if a recursive transition state being created, if so make a backward
|
|
# edge and don't add anything to the worklist
|
|
# print (state_stacks)
|
|
if state_stacks:
|
|
for state_element, stack in state_stacks.items():
|
|
# print ('Stack:', sorted(stack))
|
|
# print ('State stack:', sorted(state_stack))
|
|
if sorted(stack) == sorted(state_stack):
|
|
transition['dest'] = state_element
|
|
# print ('Recursive:', transition)
|
|
pda.append(transition)
|
|
count += 1
|
|
isRecursive = True
|
|
break
|
|
# If a recursive transition exercised don't add the same transition as a new
|
|
# edge, continue onto the next transitions
|
|
if isRecursive:
|
|
continue
|
|
|
|
# If the generated state has a stack size > stack_limit then that state is abandoned
|
|
# and not added to the FSA or the worklist for further expansion
|
|
if stack_limit:
|
|
if (len(transition['stack']) > stack_limit):
|
|
unexpanded_rules.add(transition['rule'])
|
|
continue
|
|
|
|
# Create transitions for the non-recursive relations and add to the worklist
|
|
# print ('Normal:', transition)
|
|
# print ('State2:', state)
|
|
pda.append(transition)
|
|
worklist.append([transition['dest'], transition['stack']])
|
|
state_stacks[transition['dest']] = state_stack
|
|
state_count += 1
|
|
count += 1
|
|
|
|
def tokenize(rule):
|
|
'''
|
|
Gets the terminal and the corresponding stack symbols from a rule in GNF form
|
|
'''
|
|
pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
|
|
terminal = None
|
|
ss = None
|
|
termIsRegex = False
|
|
match = pattern.match(rule)
|
|
if match.group(1):
|
|
termIsRegex = True
|
|
if match.group(2):
|
|
terminal = match.group(2)
|
|
else:
|
|
raise AssertionError("Rule is not in GNF form")
|
|
|
|
if match.group(3):
|
|
ss = (match.group(3)).split()
|
|
|
|
return terminal, ss, termIsRegex
|
|
|
|
def get_template():
|
|
transition_template = {
|
|
'trigger':None,
|
|
'source': None,
|
|
'dest': None,
|
|
'termIsRegex': False,
|
|
'terminal' : None,
|
|
'stack': []
|
|
}
|
|
return transition_template
|
|
|
|
def postprocess():
|
|
'''
|
|
Creates a representation to be passed on to the C-module
|
|
'''
|
|
global pda
|
|
final_struct = {}
|
|
memoized = defaultdict(list)
|
|
# Supporting data structures for if stack limit is imposed
|
|
culled_pda = []
|
|
culled_final = []
|
|
num_transitions = 0 # Keep track of number of transitions
|
|
|
|
|
|
states, final, initial = _get_states()
|
|
|
|
print (initial)
|
|
assert len(initial) == 1, 'More than one init state found'
|
|
|
|
# Cull transitions to states which were not expanded owing to the stack limit
|
|
if stack_limit:
|
|
|
|
blocklist = []
|
|
for final_state in final:
|
|
for transition in pda:
|
|
if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
|
|
blocklist.append(transition["dest"])
|
|
continue
|
|
else:
|
|
culled_pda.append(transition)
|
|
|
|
culled_final = [state for state in final if state not in blocklist]
|
|
|
|
assert len(culled_final) == 1, 'More than one final state found'
|
|
|
|
for transition in culled_pda:
|
|
state = transition["source"]
|
|
if transition["dest"] in blocklist:
|
|
continue
|
|
num_transitions += 1
|
|
memoized[state].append([transition["trigger"], transition["dest"],
|
|
transition["terminal"]])
|
|
final_struct["init_state"] = initial
|
|
final_struct["final_state"] = culled_final[0]
|
|
# The reason we do this is because when states are culled, the indexing is
|
|
# still relative to the actual number of states hence we keep numstates recorded
|
|
# as the original number of states
|
|
print ('[X] Actual Number of states:', len(memoized.keys()))
|
|
print ('[X] Number of transitions:', num_transitions)
|
|
print ('[X] Original Number of states:', len(states))
|
|
final_struct["numstates"] = len(states)
|
|
final_struct["pda"] = memoized
|
|
return final_struct
|
|
|
|
# Running FSA construction in exact approximation mode and postprocessing it like so
|
|
for transition in pda:
|
|
state = transition["source"]
|
|
memoized[state].append([transition["trigger"], transition["dest"],
|
|
transition["terminal"]])
|
|
|
|
final_struct["init_state"] = initial
|
|
final_struct["final_state"] = final[0]
|
|
print ('[X] Actual Number of states:', len(memoized.keys()))
|
|
final_struct["numstates"] = len(memoized.keys())
|
|
final_struct["pda"] = memoized
|
|
return final_struct
|
|
|
|
|
|
def _get_states():
|
|
source = set()
|
|
dest = set()
|
|
global pda
|
|
for transition in pda:
|
|
source.add(transition["source"])
|
|
dest.add(transition["dest"])
|
|
source_copy = source.copy()
|
|
source_copy.update(dest)
|
|
return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
|
|
|
|
if __name__ == '__main__':
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
|
|
parser.add_argument(
|
|
'--gf',
|
|
type = str,
|
|
help = 'Location of GNF grammar')
|
|
parser.add_argument(
|
|
'--limit',
|
|
type = int,
|
|
default = None,
|
|
help = 'Specify the upper bound for the stack size')
|
|
args = parser.parse_args()
|
|
main(args.gf, args.limit)
|