Merge pull request #91 from tree-sitter/libFuzzer
Add support for fuzzing with libFuzzer
This commit is contained in:
commit
34279257f9
9 changed files with 208 additions and 0 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -1,6 +1,8 @@
|
|||
# Compiled binaries
|
||||
out
|
||||
|
||||
fuzz-results
|
||||
|
||||
# Generated build config files
|
||||
gyp-mac-tool
|
||||
Makefile
|
||||
|
|
|
|||
|
|
@ -136,6 +136,10 @@
|
|||
'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'],
|
||||
},
|
||||
},
|
||||
'Fuzz': {
|
||||
'cflags': [ '-g', '-fsanitize=address,undefined', '-fsanitize-coverage=trace-pc-guard' ],
|
||||
'ldflags': [ '-g', '-fsanitize=address,undefined', '-fsanitize-coverage=trace-pc-guard' ],
|
||||
},
|
||||
'Release': {
|
||||
'cflags': [ '-O2', '-fno-strict-aliasing' ],
|
||||
'cflags!': [ '-O3', '-fstrict-aliasing' ],
|
||||
|
|
|
|||
60
script/build-fuzzers
Executable file
60
script/build-fuzzers
Executable file
|
|
@ -0,0 +1,60 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ "$(uname -s)" != Linux ]]; then
|
||||
echo "Fuzzing is only supported on Linux"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$LIB_FUZZER_PATH" ]]; then
|
||||
echo "LIB_FUZZER_PATH not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CC=${CC:-clang}
|
||||
CXX=${CXX:-clang++}
|
||||
LINK=${LINK:-clang++}
|
||||
|
||||
CC=$CC CXX=$CXX LINK=$LINK ./script/configure
|
||||
|
||||
export BUILDTYPE=Fuzz
|
||||
make runtime
|
||||
|
||||
CFLAGS="-fsanitize=address,undefined -fsanitize-coverage=trace-pc-guard"
|
||||
CXXFLAGS="-fsanitize=address,undefined -fsanitize-coverage=trace-pc-guard"
|
||||
|
||||
if [ -z "$@" ]; then
|
||||
languages=$(ls test/fixtures/grammars)
|
||||
else
|
||||
languages="$@"
|
||||
fi
|
||||
|
||||
for lang in ${languages[@]}; do
|
||||
echo "Building $lang fuzzer..."
|
||||
lang_dir="test/fixtures/grammars/$lang"
|
||||
|
||||
# The following assumes each language is implemented as src/parser.c plus an
|
||||
# optional scanner in src/scanner.{c,cc}
|
||||
objects=()
|
||||
|
||||
lang_scanner="${lang_dir}/src/scanner"
|
||||
if [ -e "${lang_scanner}.cc" ]; then
|
||||
$CXX $CXXFLAGS -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.cc" -o "${lang_scanner}.o"
|
||||
objects+=("${lang_scanner}.o")
|
||||
elif [ -e "${lang_scanner}.c" ]; then
|
||||
$CC $CFLAGS -std=c99 -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.c" -o "${lang_scanner}.o"
|
||||
objects+=("${lang_scanner}.o")
|
||||
fi
|
||||
|
||||
|
||||
# Compiling with -O0 speeds up the build dramatically
|
||||
$CC $CFLAGS -g -O0 "-I${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o"
|
||||
objects+=("${lang_dir}/src/parser.o")
|
||||
|
||||
$CXX $CXXFLAGS -std=c++11 -Iinclude -D TSLANG="tree_sitter_$lang" \
|
||||
"test/fuzz/fuzzer.cc" "${objects[@]}" \
|
||||
out/Fuzz/obj.target/libruntime.a "$LIB_FUZZER_PATH" \
|
||||
-o "out/${lang}_fuzzer"
|
||||
|
||||
python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict"
|
||||
done
|
||||
|
|
@ -26,3 +26,6 @@ fetch_grammar 'json' 'origin/master'
|
|||
fetch_grammar 'c' 'origin/master'
|
||||
fetch_grammar 'cpp' 'origin/master'
|
||||
fetch_grammar 'python' 'origin/master'
|
||||
fetch_grammar 'go' 'origin/master'
|
||||
fetch_grammar 'ruby' 'origin/master'
|
||||
fetch_grammar 'typescript' 'origin/master'
|
||||
|
|
|
|||
15
script/reproduce
Executable file
15
script/reproduce
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ "$#" -lt 2 ]; then
|
||||
echo "usage: $0 <language> <testcase>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
lang="$1"
|
||||
testcase="$2"
|
||||
|
||||
out="out"
|
||||
ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \
|
||||
"${out}/${lang}_fuzzer" "$testcase" -timeout=1 -runs=100
|
||||
23
script/run-fuzzer
Executable file
23
script/run-fuzzer
Executable file
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ "$#" -lt 1 ]; then
|
||||
echo "usage: $0 <language> <libFuzzer args...>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
lang="$1"
|
||||
shift # Treat remainder of arguments as libFuzzer arguments
|
||||
|
||||
# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first
|
||||
mkdir -p "fuzz-results/${lang}"
|
||||
cd "fuzz-results/${lang}"
|
||||
|
||||
# Create a corpus directory, so new discoveries are stored on disk. These will
|
||||
# then be loaded on subsequent fuzzing runs
|
||||
mkdir -p corpus
|
||||
|
||||
out="../../out"
|
||||
ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" UBSAN="print_stacktrace=1:halt_on_error=1" \
|
||||
"${out}/${lang}_fuzzer" "-dict=${out}/${lang}.dict" "-artifact_prefix=${lang}_" -max_len=128 -timeout=1 "./corpus" "$@"
|
||||
43
test/fuzz/README.md
Normal file
43
test/fuzz/README.md
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Fuzzing tree-sitter
|
||||
|
||||
The tree-sitter fuzzing support requires 1) the `libFuzzer` runtime library and 2) a recent version of clang
|
||||
|
||||
## libFuzzer
|
||||
|
||||
The main fuzzing logic is implemented by `libFuzzer` which is part of the LLVM project but is not shipped by distros. It will need to be built from source but does not require building the _whole_ LLVM project. LLVM can be downloaded from llvm.org using SVN or [llvm-mirror](https://github.com/llvm-mirror/llvm) using git. `libFuzzer` can be built as, e.g.:
|
||||
|
||||
```
|
||||
cd ~/src
|
||||
git clone https://github.com/llvm-mirror/llvm
|
||||
cd llvm/lib/Fuzzer
|
||||
./build.sh
|
||||
```
|
||||
|
||||
## clang
|
||||
|
||||
Using libFuzzer requires a reasonably new version of `clang` and will probably _not_ work with your system-installed version. The easiest way to get started is to use the version provided by the Chromium team. Instructions are available at [libFuzzer.info](http://libfuzzer.info).
|
||||
|
||||
The fuzzers can then be built with:
|
||||
```
|
||||
export CLANG_DIR=$HOME/src/third_party/llvm-build/Release+Asserts/bin
|
||||
CC="$CLANG_DIR/clang" CXX="$CLANG_DIR/clang++" LINK="$CLANG_DIR/clang++" \
|
||||
LIB_FUZZER_PATH=$HOME/src/llvm/lib/Fuzzer/libFuzzer.a \
|
||||
./script/build_fuzzers
|
||||
```
|
||||
|
||||
This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build_fuzzers python ruby`.
|
||||
|
||||
The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments:
|
||||
```
|
||||
./script/run-fuzzer <grammar-name> <extra libFuzzer arguments...>
|
||||
```
|
||||
|
||||
which will log information to stdout. Failing testcases and a fuzz corpus will be saved to `fuzz-results/<grammar-name>`. The most important extra `libFuzzer` options are `-jobs` and `-workers` which allow parallel fuzzing. This is can done with, e.g.:
|
||||
```
|
||||
./script/run-fuzzer <grammer-name> -jobs=32 -workers=32
|
||||
```
|
||||
|
||||
The testcase can be used to reproduce the crash by running:
|
||||
```
|
||||
./script/reproduce <grammar-name> <path-to-testcase>
|
||||
```
|
||||
27
test/fuzz/fuzzer.cc
Normal file
27
test/fuzz/fuzzer.cc
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#include <string.h>
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
void test_log(void *payload, TSLogType type, const char *string) { }
|
||||
|
||||
TSLogger logger = {
|
||||
.log = test_log,
|
||||
};
|
||||
|
||||
extern "C" const TSLanguage *TSLANG();
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
const char *str = reinterpret_cast<const char *>(data);
|
||||
|
||||
TSDocument *document = ts_document_new();
|
||||
ts_document_set_language(document, TSLANG());
|
||||
ts_document_set_input_string_with_length(document, str, size);
|
||||
|
||||
TSParseOptions options = {};
|
||||
options.halt_on_error = false;
|
||||
ts_document_parse_with_options(document, options);
|
||||
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
ts_document_free(document);
|
||||
|
||||
return 0;
|
||||
}
|
||||
31
test/fuzz/gen-dict.py
Normal file
31
test/fuzz/gen-dict.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import json
|
||||
import sys
|
||||
|
||||
def find_literals(literals, node):
|
||||
'''Recursively find STRING literals in the grammar definition'''
|
||||
|
||||
if type(node) is dict:
|
||||
if 'type' in node and node['type'] == 'STRING' and 'value' in node:
|
||||
literals.add(node['value'])
|
||||
|
||||
for key, value in node.iteritems():
|
||||
find_literals(literals, value)
|
||||
|
||||
elif type(node) is list:
|
||||
for item in node:
|
||||
find_literals(literals, item)
|
||||
|
||||
def main():
|
||||
'''Generate a libFuzzer / AFL dictionary from a tree-sitter grammar.json'''
|
||||
with open(sys.argv[1]) as f:
|
||||
grammar = json.load(f)
|
||||
|
||||
literals = set()
|
||||
find_literals(literals, grammar)
|
||||
|
||||
for lit in sorted(literals):
|
||||
if lit:
|
||||
print '"%s"' % ''.join([(c if c.isalnum() else '\\x%02x' % ord(c)) for c in lit])
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue