tree-sitter/test/fuzz/gen-dict.py
Andrew Helwer 89edb2ddca
fix(fuzzer): only use rule strings for fuzz dictionary
The `find_literals` function can also pick up tokens in `precedences`
2023-07-10 19:12:24 -04:00

31 lines
796 B
Python

import json
import sys
def find_literals(literals, node):
'''Recursively find STRING literals in the grammar definition'''
if type(node) is dict:
if 'type' in node and node['type'] == 'STRING' and 'value' in node:
literals.add(node['value'])
for key, value in node.iteritems():
find_literals(literals, value)
elif type(node) is list:
for item in node:
find_literals(literals, item)
def main():
'''Generate a libFuzzer / AFL dictionary from a tree-sitter grammar.json'''
with open(sys.argv[1]) as f:
grammar = json.load(f)
literals = set()
find_literals(literals, grammar['rules'])
for lit in sorted(literals):
if lit:
print '"%s"' % ''.join(['\\x%02x' % ord(b) for b in lit.encode('utf-8')])
if __name__ == '__main__':
main()