diff --git a/script/build-fuzzers b/script/build-fuzzers index d4786daf..2c48f6c9 100755 --- a/script/build-fuzzers +++ b/script/build-fuzzers @@ -54,10 +54,13 @@ for lang in ${languages[@]}; do $CC $CFLAGS -g -O0 "-I${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o" objects+=("${lang_dir}/src/parser.o") - $CXX $CXXFLAGS -std=c++11 -Iinclude -D TSLANG="tree_sitter_$lang" \ - "test/fuzz/fuzzer.cc" "${objects[@]}" \ - out/Fuzz/obj.target/libruntime.a "$LIB_FUZZER_PATH" \ - -o "out/${lang}_fuzzer" + modes=(true halt false recover) + for i in 0 2; do + $CXX $CXXFLAGS -std=c++11 -Iinclude -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="tree_sitter_$lang" \ + "test/fuzz/fuzzer.cc" "${objects[@]}" \ + out/Fuzz/obj.target/libruntime.a "$LIB_FUZZER_PATH" \ + -o "out/${lang}_fuzzer_${modes[i+1]}" + done python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict" done diff --git a/script/reproduce b/script/reproduce deleted file mode 100755 index ee7b3a23..00000000 --- a/script/reproduce +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -ex - -if [ "$#" -lt 2 ]; then - echo "usage: $0 " - exit 1 -fi - -lang="$1" -testcase="$2" - -out="out" -ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \ - "${out}/${lang}_fuzzer" "$testcase" -timeout=1 -runs=100 diff --git a/script/reproduce b/script/reproduce new file mode 120000 index 00000000..1c28442a --- /dev/null +++ b/script/reproduce @@ -0,0 +1 @@ +run-fuzzer \ No newline at end of file diff --git a/script/run-fuzzer b/script/run-fuzzer index 4cdbaa26..349d2416 100755 --- a/script/run-fuzzer +++ b/script/run-fuzzer @@ -1,23 +1,53 @@ #!/bin/bash -set -ex +set -eux -if [ "$#" -lt 1 ]; then - echo "usage: $0 " - exit 1 +root=$(dirname "$0")/.. +export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1" +export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1" + +declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" ) + +run_fuzzer() { + if [ "$#" -lt 2 ]; then + echo "usage: $0 " + exit 1 + fi + + lang="$1" + shift + mode="$1" + shift + # Treat remainder of arguments as libFuzzer arguments + + # Fuzzing logs and testcases are always written to `pwd`, so `cd` there first + results="${root}/out/fuzz-results/${lang}_${mode}" + mkdir -p "${results}" + cd "${results}" + + # Create a corpus directory, so new discoveries are stored on disk. These will + # then be loaded on subsequent fuzzing runs + mkdir -p corpus + + "../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@" +} + +reproduce() { + if [ ! "$#" == 3 ]; then + echo "usage: $0 (halt|recover) " + exit 1 + fi + + lang="$1" + mode="$2" + testcase="$3" + + "${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}" +} + +script=$(basename "$0") +if [ "$script" == "run-fuzzer" ]; then + run_fuzzer "$@" +elif [ "$script" == "reproduce" ]; then + reproduce "$@" fi - -lang="$1" -shift # Treat remainder of arguments as libFuzzer arguments - -# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first -mkdir -p "fuzz-results/${lang}" -cd "fuzz-results/${lang}" - -# Create a corpus directory, so new discoveries are stored on disk. These will -# then be loaded on subsequent fuzzing runs -mkdir -p corpus - -out="../../out" -ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \ - "${out}/${lang}_fuzzer" "-dict=${out}/${lang}.dict" "-artifact_prefix=${lang}_" -max_len=128 -timeout=1 "./corpus" "$@" diff --git a/test/fuzz/README.md b/test/fuzz/README.md index ba19b10b..a032e601 100644 --- a/test/fuzz/README.md +++ b/test/fuzz/README.md @@ -29,15 +29,15 @@ This will generate a separate fuzzer for each grammar defined in `test/fixtures/ The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments: ``` -./script/run-fuzzer +./script/run-fuzzer (halt|recover) ``` which will log information to stdout. Failing testcases and a fuzz corpus will be saved to `fuzz-results/`. The most important extra `libFuzzer` options are `-jobs` and `-workers` which allow parallel fuzzing. This is can done with, e.g.: ``` -./script/run-fuzzer -jobs=32 -workers=32 +./script/run-fuzzer halt -jobs=32 -workers=32 ``` The testcase can be used to reproduce the crash by running: ``` -./script/reproduce +./script/reproduce (halt|recover) ``` diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc index c1b5095e..7b1aea5b 100644 --- a/test/fuzz/fuzzer.cc +++ b/test/fuzz/fuzzer.cc @@ -7,17 +7,17 @@ TSLogger logger = { .log = test_log, }; -extern "C" const TSLanguage *TSLANG(); +extern "C" const TSLanguage *TS_LANG(); extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const char *str = reinterpret_cast(data); TSDocument *document = ts_document_new(); - ts_document_set_language(document, TSLANG()); + ts_document_set_language(document, TS_LANG()); ts_document_set_input_string_with_length(document, str, size); TSParseOptions options = {}; - options.halt_on_error = false; + options.halt_on_error = TS_HALT_ON_ERROR; ts_document_parse_with_options(document, options); TSNode root_node = ts_document_root_node(document);