Build fuzzer in 'halt' and 'recover' modes

Build each language fuzzer in two modes (halt_on_error=true and halt_on_error=false) and use different timeouts for each fuzzer. Also merge the run-fuzzer and reproduce scripts so they use identical values of ASAN_OPTIONS/UBSAN_OPTIONS/etc0
2018-03-02 09:10:52 -08:00 · 2018-03-02 09:10:52 -08:00 · bc192d95ca
commit bc192d95ca
parent d3ac345644
5 changed files with 63 additions and 44 deletions
--- a/script/build-fuzzers
+++ b/script/build-fuzzers
@ -54,10 +54,13 @@ for lang in ${languages[@]}; do
  $CC $CFLAGS -g -O0 "-I${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o"
  objects+=("${lang_dir}/src/parser.o")

-  $CXX $CXXFLAGS -std=c++11 -Iinclude -D TSLANG="tree_sitter_$lang" \
-    "test/fuzz/fuzzer.cc" "${objects[@]}" \
-    out/Fuzz/obj.target/libruntime.a "$LIB_FUZZER_PATH" \
-    -o "out/${lang}_fuzzer"
+  modes=(true halt false recover)
+  for i in 0 2; do
+    $CXX $CXXFLAGS -std=c++11 -Iinclude -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="tree_sitter_$lang" \
+      "test/fuzz/fuzzer.cc" "${objects[@]}" \
+      out/Fuzz/obj.target/libruntime.a "$LIB_FUZZER_PATH" \
+      -o "out/${lang}_fuzzer_${modes[i+1]}"
+  done

  python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict"
 done
--- a/script/reproduce
+++ b/script/reproduce
@ -1,15 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-if [ "$#" -lt 2 ]; then
-  echo "usage: $0 <language> <testcase>"
-  exit 1
-fi
-
-lang="$1"
-testcase="$2"
-
-out="out"
-ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \
-  "${out}/${lang}_fuzzer" "$testcase" -timeout=1 -runs=100
--- a/script/reproduce
+++ b/script/reproduce
@ -0,0 +1 @@
+run-fuzzer
--- a/script/run-fuzzer
+++ b/script/run-fuzzer
@ -1,23 +1,53 @@
 #!/bin/bash

-set -ex
+set -eux

-if [ "$#" -lt 1 ]; then
-  echo "usage: $0 <language> <libFuzzer args...>"
-  exit 1
+root=$(dirname "$0")/..
+export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1"
+export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1"
+
+declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" )
+
+run_fuzzer() {
+  if [ "$#" -lt 2 ]; then
+    echo "usage: $0 <language> <halt|recover> <libFuzzer args...>"
+    exit 1
+  fi
+
+  lang="$1"
+  shift
+  mode="$1"
+  shift
+  # Treat remainder of arguments as libFuzzer arguments
+
+  # Fuzzing logs and testcases are always written to `pwd`, so `cd` there first
+  results="${root}/out/fuzz-results/${lang}_${mode}"
+  mkdir -p "${results}"
+  cd "${results}"
+
+  # Create a corpus directory, so new discoveries are stored on disk. These will
+  # then be loaded on subsequent fuzzing runs
+  mkdir -p corpus
+
+  "../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@"
+}
+
+reproduce() {
+  if [ ! "$#" == 3 ]; then
+    echo "usage: $0 <language> (halt|recover) <testcase>"
+    exit 1
+  fi
+
+  lang="$1"
+  mode="$2"
+  testcase="$3"
+
+  "${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}"
+}
+
+script=$(basename "$0")
+if [ "$script" == "run-fuzzer" ]; then
+  run_fuzzer "$@"
+elif [ "$script" == "reproduce" ]; then
+  reproduce "$@"
 fi
-
-lang="$1"
-shift # Treat remainder of arguments as libFuzzer arguments
-
-# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first
-mkdir -p "fuzz-results/${lang}"
-cd "fuzz-results/${lang}"
-
-# Create a corpus directory, so new discoveries are stored on disk. These will
-# then be loaded on subsequent fuzzing runs
-mkdir -p corpus
-
-out="../../out"
-ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \
-  "${out}/${lang}_fuzzer" "-dict=${out}/${lang}.dict" "-artifact_prefix=${lang}_" -max_len=128 -timeout=1 "./corpus" "$@"
--- a/test/fuzz/README.md
+++ b/test/fuzz/README.md
@ -29,15 +29,15 @@ This will generate a separate fuzzer for each grammar defined in `test/fixtures/

 The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments:
 ```
-./script/run-fuzzer <grammar-name> <extra libFuzzer arguments...>
+./script/run-fuzzer <grammar-name> (halt|recover) <extra libFuzzer arguments...>
 ```

 which will log information to stdout. Failing testcases and a fuzz corpus will be saved to `fuzz-results/<grammar-name>`. The most important extra `libFuzzer` options are `-jobs` and `-workers` which allow parallel fuzzing. This is can done with, e.g.:
 ```
-./script/run-fuzzer <grammer-name> -jobs=32 -workers=32
+./script/run-fuzzer <grammer-name> halt -jobs=32 -workers=32
 ```

 The testcase can be used to reproduce the crash by running:
 ```
-./script/reproduce <grammar-name> <path-to-testcase>
+./script/reproduce <grammar-name> (halt|recover) <path-to-testcase>
 ```
--- a/test/fuzz/fuzzer.cc
+++ b/test/fuzz/fuzzer.cc
@ -7,17 +7,17 @@ TSLogger logger = {
  .log = test_log,
 };

-extern "C" const TSLanguage *TSLANG();
+extern "C" const TSLanguage *TS_LANG();

 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
  const char *str = reinterpret_cast<const char *>(data);

  TSDocument *document = ts_document_new();
-  ts_document_set_language(document, TSLANG());
+  ts_document_set_language(document, TS_LANG());
  ts_document_set_input_string_with_length(document, str, size);

  TSParseOptions options = {};
-  options.halt_on_error = false;
+  options.halt_on_error = TS_HALT_ON_ERROR;
  ts_document_parse_with_options(document, options);

  TSNode root_node = ts_document_root_node(document);