Merge pull request #2411 from amaanq/fuzz-and-more
update fuzz script, some minor fixes & improvements, add asan to ci
This commit is contained in:
commit
8204d63da5
12 changed files with 54 additions and 57 deletions
13
.github/workflows/sanitize.yml
vendored
13
.github/workflows/sanitize.yml
vendored
|
|
@ -1,4 +1,4 @@
|
|||
name: Sunitize
|
||||
name: Sanitize
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
|
@ -9,7 +9,7 @@ on:
|
|||
|
||||
jobs:
|
||||
check_undefined_behaviour:
|
||||
name: Undefined behaviour checks
|
||||
name: Sanitizer checks
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter
|
||||
|
|
@ -38,3 +38,12 @@ jobs:
|
|||
CFLAGS: -fsanitize=undefined
|
||||
RUSTFLAGS: -lubsan
|
||||
run: cargo test -- --test-threads 1
|
||||
|
||||
- name: Run main tests with address sanitizer (ASAN)
|
||||
env:
|
||||
CFLAGS: -fsanitize=address
|
||||
RUSTFLAGS: -Zsanitizer=address
|
||||
run: |
|
||||
rustup install nightly
|
||||
rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
|
||||
cargo +nightly test -Z build-std --target x86_64-unknown-linux-gnu -- --test-threads 1
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ version = "0.4.19"
|
|||
features = ["std"]
|
||||
|
||||
[dev-dependencies]
|
||||
proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
|
||||
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
|
||||
|
||||
rand = "0.8.5"
|
||||
tempfile = "3.6.0"
|
||||
|
|
|
|||
|
|
@ -61,7 +61,8 @@ pub fn generate_parser_in_directory(
|
|||
None => {
|
||||
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
|
||||
grammar_json = load_grammar_file(&grammar_js_path)?;
|
||||
fs::write(&src_path.join("grammar.json"), &grammar_json)?;
|
||||
fs::write(&src_path.join("grammar.json"), &grammar_json)
|
||||
.with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ use crate::{
|
|||
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
|
||||
util,
|
||||
};
|
||||
use proc_macro::test_with_seed;
|
||||
use std::{env, fs};
|
||||
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
|
||||
use tree_sitter_proc_macro::test_with_seed;
|
||||
|
||||
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
|
||||
fn test_corpus_for_bash(seed: usize) {
|
||||
|
|
|
|||
|
|
@ -86,6 +86,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
|
|||
|
||||
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
|
||||
let dir = fixtures_dir().join("test_grammars").join(name);
|
||||
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
|
||||
let grammar = fs::read_to_string(&dir.join("grammar.json")).expect(&format!(
|
||||
"Can't find grammar.json for test grammar {}",
|
||||
name
|
||||
));
|
||||
(grammar, Some(dir))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,12 +8,12 @@ use crate::{
|
|||
generate::generate_parser_for_grammar,
|
||||
parse::{perform_edit, Edit},
|
||||
};
|
||||
use proc_macro::retry;
|
||||
use std::{
|
||||
sync::atomic::{AtomicUsize, Ordering},
|
||||
thread, time,
|
||||
};
|
||||
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
|
||||
use tree_sitter_proc_macro::retry;
|
||||
|
||||
#[test]
|
||||
fn test_parsing_simple_string() {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
|
|
|||
|
|
@ -6,14 +6,8 @@ if [[ "$(uname -s)" != Linux ]]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$LIB_FUZZER_PATH" ]]; then
|
||||
echo "LIB_FUZZER_PATH not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CC=${CC:-clang}
|
||||
CXX=${CXX:-clang++}
|
||||
LINK=${LINK:-clang++}
|
||||
|
||||
default_fuzz_flags="-fsanitize=fuzzer,address,undefined"
|
||||
|
||||
|
|
@ -21,7 +15,7 @@ CFLAGS=${CFLAGS:-"$default_fuzz_flags"}
|
|||
CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"}
|
||||
|
||||
export CFLAGS
|
||||
make
|
||||
make CC="$CC" CXX="$CXX"
|
||||
|
||||
if [ -z "$@" ]; then
|
||||
languages=$(ls test/fixtures/grammars)
|
||||
|
|
@ -29,7 +23,13 @@ else
|
|||
languages="$@"
|
||||
fi
|
||||
|
||||
mkdir -p test/fuzz/out
|
||||
|
||||
for lang in ${languages[@]}; do
|
||||
# skip typescript
|
||||
if [[ $lang == "typescript" ]]; then
|
||||
continue
|
||||
fi
|
||||
echo "Building $lang fuzzer..."
|
||||
lang_dir="test/fixtures/grammars/$lang"
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ for lang in ${languages[@]}; do
|
|||
highlights_filename="${lang_dir}/queries/highlights.scm"
|
||||
if [ -e "${highlights_filename}" ]; then
|
||||
ts_lang_query_filename="${lang}.scm"
|
||||
cp "${highlights_filename}" "out/${ts_lang_query_filename}"
|
||||
cp "${highlights_filename}" "test/fuzz/out/${ts_lang_query_filename}"
|
||||
else
|
||||
ts_lang_query_filename=""
|
||||
fi
|
||||
|
|
@ -62,11 +62,16 @@ for lang in ${languages[@]}; do
|
|||
# FIXME: We should extract the grammar name from grammar.js. Use the name of
|
||||
# the directory instead. Also, the grammar name needs to be a valid C
|
||||
# identifier so replace any '-' characters
|
||||
ts_lang="tree_sitter_$(echo $lang | tr -- - _)"
|
||||
ts_lang="tree_sitter_$(echo "$lang" | tr -- - _)"
|
||||
$CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \
|
||||
"test/fuzz/fuzzer.cc" "${objects[@]}" \
|
||||
libtree-sitter.a "$LIB_FUZZER_PATH" \
|
||||
-o "out/${lang}_fuzzer"
|
||||
libtree-sitter.a \
|
||||
-o "test/fuzz/out/${lang}_fuzzer"
|
||||
|
||||
python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict"
|
||||
jq '
|
||||
[ ..
|
||||
| if .type? == "STRING" or (.type? == "ALIAS" and .named? == false) then .value else empty end
|
||||
| select(test("\\S") and length == utf8bytelength)
|
||||
] | unique | .[]
|
||||
' | sort
|
||||
done
|
||||
|
|
|
|||
|
|
@ -6,7 +6,13 @@ root=$(dirname "$0")/..
|
|||
export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1"
|
||||
export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1"
|
||||
|
||||
declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" )
|
||||
# check if CI env var exists
|
||||
|
||||
if [ -z "${CI:-}" ]; then
|
||||
declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=2048" ["recover"]="-timeout=10 -rss_limit_mb=2048" )
|
||||
else
|
||||
declare -A mode_config=( ["halt"]="-max_total_time=120 -timeout=1 -rss_limit_mb=2048" ["recover"]="-time=120 -timeout=10 -rss_limit_mb=2048" )
|
||||
fi
|
||||
|
||||
run_fuzzer() {
|
||||
if [ "$#" -lt 2 ]; then
|
||||
|
|
@ -21,7 +27,7 @@ run_fuzzer() {
|
|||
# Treat remainder of arguments as libFuzzer arguments
|
||||
|
||||
# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first
|
||||
results="${root}/out/fuzz-results/${lang}_${mode}"
|
||||
results="${root}/test/fuzz/out/fuzz-results/${lang}"
|
||||
mkdir -p "${results}"
|
||||
cd "${results}"
|
||||
|
||||
|
|
@ -29,7 +35,8 @@ run_fuzzer() {
|
|||
# then be loaded on subsequent fuzzing runs
|
||||
mkdir -p corpus
|
||||
|
||||
"../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@"
|
||||
pwd
|
||||
"../../${lang}_fuzzer" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_" -max_len=2048 "${mode_config[$mode]}" "./corpus" "$@"
|
||||
}
|
||||
|
||||
reproduce() {
|
||||
|
|
@ -46,7 +53,7 @@ reproduce() {
|
|||
shift
|
||||
# Treat remainder of arguments as libFuzzer arguments
|
||||
|
||||
"${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}" "$@"
|
||||
"${root}/test/fuzz/out/${lang}_fuzzer" "${mode_config[$mode]}" -runs=1 "${testcase}" "$@"
|
||||
}
|
||||
|
||||
script=$(basename "$0")
|
||||
|
|
|
|||
|
|
@ -84,7 +84,11 @@ pub extern "C" fn ts_tagger_add_language(
|
|||
let tagger = unwrap_mut_ptr(this);
|
||||
let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
|
||||
let tags_query = unsafe { slice::from_raw_parts(tags_query, tags_query_len as usize) };
|
||||
let locals_query = unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) };
|
||||
let locals_query = if locals_query != std::ptr::null() {
|
||||
unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) }
|
||||
} else {
|
||||
&[]
|
||||
};
|
||||
let tags_query = match str::from_utf8(tags_query) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return TSTagsError::InvalidUtf8,
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
TSTree *tree = ts_parser_parse_string(parser, NULL, str, size);
|
||||
TSNode root_node = ts_tree_root_node(tree);
|
||||
|
||||
if (lang_query) {
|
||||
if (lang_query != nullptr) {
|
||||
{
|
||||
TSQueryCursor *cursor = ts_query_cursor_new();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,31 +0,0 @@
|
|||
import json
|
||||
import sys
|
||||
|
||||
def find_literals(literals, node):
|
||||
'''Recursively find STRING literals in the grammar definition'''
|
||||
|
||||
if type(node) is dict:
|
||||
if 'type' in node and node['type'] == 'STRING' and 'value' in node:
|
||||
literals.add(node['value'])
|
||||
|
||||
for key, value in node.iteritems():
|
||||
find_literals(literals, value)
|
||||
|
||||
elif type(node) is list:
|
||||
for item in node:
|
||||
find_literals(literals, item)
|
||||
|
||||
def main():
|
||||
'''Generate a libFuzzer / AFL dictionary from a tree-sitter grammar.json'''
|
||||
with open(sys.argv[1]) as f:
|
||||
grammar = json.load(f)
|
||||
|
||||
literals = set()
|
||||
find_literals(literals, grammar['rules'])
|
||||
|
||||
for lit in sorted(literals):
|
||||
if lit:
|
||||
print '"%s"' % ''.join(['\\x%02x' % ord(b) for b in lit.encode('utf-8')])
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue