Add libFuzzer support

This adds support for fuzzing tree-sitter grammars with libFuzzer. This
currently only works on Linux because of linking issues on macOS. Breifly, the
AddressSanitizer library is dynamically linked into the fuzzer binary and
cannot be found at runtime if built with a compiler that wasn't provided by
Xcode(?). The runtime library is statically linked on Linux so this isn't a
problem.
This commit is contained in:
Phil Turnbull 2017-07-14 10:42:01 -07:00
parent 69500c9dd7
commit 798ef5e4dc
8 changed files with 205 additions and 0 deletions

60
script/build-fuzzers Executable file
View file

@ -0,0 +1,60 @@
#!/bin/bash
set -e
if [[ "$(uname -s)" != Linux ]]; then
echo "Fuzzing is only supported on Linux"
exit 1
fi
if [[ -z "$LIB_FUZZER_PATH" ]]; then
echo "LIB_FUZZER_PATH not set"
exit 1
fi
CC=${CC:-clang}
CXX=${CXX:-clang++}
LINK=${LINK:-clang++}
CC=$CC CXX=$CXX LINK=$LINK ./script/configure
export BUILDTYPE=Fuzz
make runtime
CFLAGS="-fsanitize=address,undefined -fsanitize-coverage=trace-pc-guard"
CXXFLAGS="-fsanitize=address,undefined -fsanitize-coverage=trace-pc-guard"
if [ -z "$@" ]; then
languages=$(ls test/fixtures/grammars)
else
languages="$@"
fi
for lang in ${languages[@]}; do
echo "Building $lang fuzzer..."
lang_dir="test/fixtures/grammars/$lang"
# The following assumes each language is implemented as src/parser.c plus an
# optional scanner in src/scanner.{c,cc}
objects=()
lang_scanner="${lang_dir}/src/scanner"
if [ -e "${lang_scanner}.cc" ]; then
$CXX $CXXFLAGS -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.cc" -o "${lang_scanner}.o"
objects+=("${lang_scanner}.o")
elif [ -e "${lang_scanner}.c" ]; then
$CC $CFLAGS -std=c99 -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.c" -o "${lang_scanner}.o"
objects+=("${lang_scanner}.o")
fi
# Compiling with -O0 speeds up the build dramatically
$CC $CFLAGS -g -O0 "-I${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o"
objects+=("${lang_dir}/src/parser.o")
$CXX $CXXFLAGS -std=c++11 -Iinclude -D TSLANG="tree_sitter_$lang" \
"test/fuzz/fuzzer.cc" "${objects[@]}" \
out/Fuzz/obj.target/libruntime.a "$LIB_FUZZER_PATH" \
-o "out/${lang}_fuzzer"
python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict"
done

15
script/reproduce Executable file
View file

@ -0,0 +1,15 @@
#!/bin/bash
set -ex
if [ "$#" -lt 2 ]; then
echo "usage: $0 <language> <testcase>"
exit 1
fi
lang="$1"
testcase="$2"
out="out"
ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \
"${out}/${lang}_fuzzer" "$testcase" -timeout=1 -runs=100

23
script/run-fuzzer Executable file
View file

@ -0,0 +1,23 @@
#!/bin/bash
set -ex
if [ "$#" -lt 1 ]; then
echo "usage: $0 <language> <libFuzzer args...>"
exit 1
fi
lang="$1"
shift # Treat remainder of arguments as libFuzzer arguments
# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first
mkdir -p "fuzz-results/${lang}"
cd "fuzz-results/${lang}"
# Create a corpus directory, so new discoveries are stored on disk. These will
# then be loaded on subsequent fuzzing runs
mkdir -p corpus
out="../../out"
ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \
"${out}/${lang}_fuzzer" "-dict=${out}/${lang}.dict" "-artifact_prefix=${lang}_" -max_len=128 -timeout=1 "./corpus" "$@"