diff --git a/script/build-fuzzers b/script/build-fuzzers index bff43c8b..1d07cba7 100755 --- a/script/build-fuzzers +++ b/script/build-fuzzers @@ -6,14 +6,8 @@ if [[ "$(uname -s)" != Linux ]]; then exit 1 fi -if [[ -z "$LIB_FUZZER_PATH" ]]; then - echo "LIB_FUZZER_PATH not set" - exit 1 -fi - CC=${CC:-clang} CXX=${CXX:-clang++} -LINK=${LINK:-clang++} default_fuzz_flags="-fsanitize=fuzzer,address,undefined" @@ -21,7 +15,7 @@ CFLAGS=${CFLAGS:-"$default_fuzz_flags"} CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"} export CFLAGS -make +make CC="$CC" CXX="$CXX" if [ -z "$@" ]; then languages=$(ls test/fixtures/grammars) @@ -29,7 +23,13 @@ else languages="$@" fi +mkdir -p test/fuzz/out + for lang in ${languages[@]}; do + # skip typescript + if [[ $lang == "typescript" ]]; then + continue + fi echo "Building $lang fuzzer..." lang_dir="test/fixtures/grammars/$lang" @@ -54,7 +54,7 @@ for lang in ${languages[@]}; do highlights_filename="${lang_dir}/queries/highlights.scm" if [ -e "${highlights_filename}" ]; then ts_lang_query_filename="${lang}.scm" - cp "${highlights_filename}" "out/${ts_lang_query_filename}" + cp "${highlights_filename}" "test/fuzz/out/${ts_lang_query_filename}" else ts_lang_query_filename="" fi @@ -62,11 +62,16 @@ for lang in ${languages[@]}; do # FIXME: We should extract the grammar name from grammar.js. Use the name of # the directory instead. Also, the grammar name needs to be a valid C # identifier so replace any '-' characters - ts_lang="tree_sitter_$(echo $lang | tr -- - _)" + ts_lang="tree_sitter_$(echo "$lang" | tr -- - _)" $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \ "test/fuzz/fuzzer.cc" "${objects[@]}" \ - libtree-sitter.a "$LIB_FUZZER_PATH" \ - -o "out/${lang}_fuzzer" + libtree-sitter.a \ + -o "test/fuzz/out/${lang}_fuzzer" - python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict" + jq ' + [ .. + | if .type? == "STRING" or (.type? == "ALIAS" and .named? == false) then .value else empty end + | select(test("\\S") and length == utf8bytelength) + ] | unique | .[] + ' | sort done diff --git a/script/run-fuzzer b/script/run-fuzzer index ddd481d9..ae73958b 100755 --- a/script/run-fuzzer +++ b/script/run-fuzzer @@ -6,7 +6,13 @@ root=$(dirname "$0")/.. export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1" export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1" -declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" ) +# check if CI env var exists + +if [ -z "${CI:-}" ]; then + declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=2048" ["recover"]="-timeout=10 -rss_limit_mb=2048" ) +else + declare -A mode_config=( ["halt"]="-max_total_time=120 -timeout=1 -rss_limit_mb=2048" ["recover"]="-time=120 -timeout=10 -rss_limit_mb=2048" ) +fi run_fuzzer() { if [ "$#" -lt 2 ]; then @@ -21,7 +27,7 @@ run_fuzzer() { # Treat remainder of arguments as libFuzzer arguments # Fuzzing logs and testcases are always written to `pwd`, so `cd` there first - results="${root}/out/fuzz-results/${lang}_${mode}" + results="${root}/test/fuzz/out/fuzz-results/${lang}" mkdir -p "${results}" cd "${results}" @@ -29,7 +35,8 @@ run_fuzzer() { # then be loaded on subsequent fuzzing runs mkdir -p corpus - "../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@" + pwd + "../../${lang}_fuzzer" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_" -max_len=2048 "${mode_config[$mode]}" "./corpus" "$@" } reproduce() { @@ -46,7 +53,7 @@ reproduce() { shift # Treat remainder of arguments as libFuzzer arguments - "${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}" "$@" + "${root}/test/fuzz/out/${lang}_fuzzer" "${mode_config[$mode]}" -runs=1 "${testcase}" "$@" } script=$(basename "$0") diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc index ef800883..3b933746 100644 --- a/test/fuzz/fuzzer.cc +++ b/test/fuzz/fuzzer.cc @@ -47,7 +47,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { TSTree *tree = ts_parser_parse_string(parser, NULL, str, size); TSNode root_node = ts_tree_root_node(tree); - if (lang_query) { + if (lang_query != nullptr) { { TSQueryCursor *cursor = ts_query_cursor_new(); diff --git a/test/fuzz/gen-dict.py b/test/fuzz/gen-dict.py deleted file mode 100644 index c9845671..00000000 --- a/test/fuzz/gen-dict.py +++ /dev/null @@ -1,31 +0,0 @@ -import json -import sys - -def find_literals(literals, node): - '''Recursively find STRING literals in the grammar definition''' - - if type(node) is dict: - if 'type' in node and node['type'] == 'STRING' and 'value' in node: - literals.add(node['value']) - - for key, value in node.iteritems(): - find_literals(literals, value) - - elif type(node) is list: - for item in node: - find_literals(literals, item) - -def main(): - '''Generate a libFuzzer / AFL dictionary from a tree-sitter grammar.json''' - with open(sys.argv[1]) as f: - grammar = json.load(f) - - literals = set() - find_literals(literals, grammar['rules']) - - for lit in sorted(literals): - if lit: - print '"%s"' % ''.join(['\\x%02x' % ord(b) for b in lit.encode('utf-8')]) - -if __name__ == '__main__': - main()