From f9e5696bcbbac7af1ce4d1fb7f6f1d1e0509b1cb Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 19 Jul 2023 22:20:01 -0400 Subject: [PATCH 1/7] ci: rework fuzzer script --- script/build-fuzzers | 29 +++++++++++++++++------------ script/run-fuzzer | 15 +++++++++++---- test/fuzz/fuzzer.cc | 2 +- test/fuzz/gen-dict.py | 31 ------------------------------- 4 files changed, 29 insertions(+), 48 deletions(-) delete mode 100644 test/fuzz/gen-dict.py diff --git a/script/build-fuzzers b/script/build-fuzzers index bff43c8b..1d07cba7 100755 --- a/script/build-fuzzers +++ b/script/build-fuzzers @@ -6,14 +6,8 @@ if [[ "$(uname -s)" != Linux ]]; then exit 1 fi -if [[ -z "$LIB_FUZZER_PATH" ]]; then - echo "LIB_FUZZER_PATH not set" - exit 1 -fi - CC=${CC:-clang} CXX=${CXX:-clang++} -LINK=${LINK:-clang++} default_fuzz_flags="-fsanitize=fuzzer,address,undefined" @@ -21,7 +15,7 @@ CFLAGS=${CFLAGS:-"$default_fuzz_flags"} CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"} export CFLAGS -make +make CC="$CC" CXX="$CXX" if [ -z "$@" ]; then languages=$(ls test/fixtures/grammars) @@ -29,7 +23,13 @@ else languages="$@" fi +mkdir -p test/fuzz/out + for lang in ${languages[@]}; do + # skip typescript + if [[ $lang == "typescript" ]]; then + continue + fi echo "Building $lang fuzzer..." lang_dir="test/fixtures/grammars/$lang" @@ -54,7 +54,7 @@ for lang in ${languages[@]}; do highlights_filename="${lang_dir}/queries/highlights.scm" if [ -e "${highlights_filename}" ]; then ts_lang_query_filename="${lang}.scm" - cp "${highlights_filename}" "out/${ts_lang_query_filename}" + cp "${highlights_filename}" "test/fuzz/out/${ts_lang_query_filename}" else ts_lang_query_filename="" fi @@ -62,11 +62,16 @@ for lang in ${languages[@]}; do # FIXME: We should extract the grammar name from grammar.js. Use the name of # the directory instead. Also, the grammar name needs to be a valid C # identifier so replace any '-' characters - ts_lang="tree_sitter_$(echo $lang | tr -- - _)" + ts_lang="tree_sitter_$(echo "$lang" | tr -- - _)" $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \ "test/fuzz/fuzzer.cc" "${objects[@]}" \ - libtree-sitter.a "$LIB_FUZZER_PATH" \ - -o "out/${lang}_fuzzer" + libtree-sitter.a \ + -o "test/fuzz/out/${lang}_fuzzer" - python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict" + jq ' + [ .. + | if .type? == "STRING" or (.type? == "ALIAS" and .named? == false) then .value else empty end + | select(test("\\S") and length == utf8bytelength) + ] | unique | .[] + ' | sort done diff --git a/script/run-fuzzer b/script/run-fuzzer index ddd481d9..ae73958b 100755 --- a/script/run-fuzzer +++ b/script/run-fuzzer @@ -6,7 +6,13 @@ root=$(dirname "$0")/.. export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1" export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1" -declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" ) +# check if CI env var exists + +if [ -z "${CI:-}" ]; then + declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=2048" ["recover"]="-timeout=10 -rss_limit_mb=2048" ) +else + declare -A mode_config=( ["halt"]="-max_total_time=120 -timeout=1 -rss_limit_mb=2048" ["recover"]="-time=120 -timeout=10 -rss_limit_mb=2048" ) +fi run_fuzzer() { if [ "$#" -lt 2 ]; then @@ -21,7 +27,7 @@ run_fuzzer() { # Treat remainder of arguments as libFuzzer arguments # Fuzzing logs and testcases are always written to `pwd`, so `cd` there first - results="${root}/out/fuzz-results/${lang}_${mode}" + results="${root}/test/fuzz/out/fuzz-results/${lang}" mkdir -p "${results}" cd "${results}" @@ -29,7 +35,8 @@ run_fuzzer() { # then be loaded on subsequent fuzzing runs mkdir -p corpus - "../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@" + pwd + "../../${lang}_fuzzer" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_" -max_len=2048 "${mode_config[$mode]}" "./corpus" "$@" } reproduce() { @@ -46,7 +53,7 @@ reproduce() { shift # Treat remainder of arguments as libFuzzer arguments - "${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}" "$@" + "${root}/test/fuzz/out/${lang}_fuzzer" "${mode_config[$mode]}" -runs=1 "${testcase}" "$@" } script=$(basename "$0") diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc index ef800883..3b933746 100644 --- a/test/fuzz/fuzzer.cc +++ b/test/fuzz/fuzzer.cc @@ -47,7 +47,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { TSTree *tree = ts_parser_parse_string(parser, NULL, str, size); TSNode root_node = ts_tree_root_node(tree); - if (lang_query) { + if (lang_query != nullptr) { { TSQueryCursor *cursor = ts_query_cursor_new(); diff --git a/test/fuzz/gen-dict.py b/test/fuzz/gen-dict.py deleted file mode 100644 index c9845671..00000000 --- a/test/fuzz/gen-dict.py +++ /dev/null @@ -1,31 +0,0 @@ -import json -import sys - -def find_literals(literals, node): - '''Recursively find STRING literals in the grammar definition''' - - if type(node) is dict: - if 'type' in node and node['type'] == 'STRING' and 'value' in node: - literals.add(node['value']) - - for key, value in node.iteritems(): - find_literals(literals, value) - - elif type(node) is list: - for item in node: - find_literals(literals, item) - -def main(): - '''Generate a libFuzzer / AFL dictionary from a tree-sitter grammar.json''' - with open(sys.argv[1]) as f: - grammar = json.load(f) - - literals = set() - find_literals(literals, grammar['rules']) - - for lit in sorted(literals): - if lit: - print '"%s"' % ''.join(['\\x%02x' % ord(b) for b in lit.encode('utf-8')]) - -if __name__ == '__main__': - main() From 9a057398514a950d0b4e6f9d865579f91c71bf93 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 19 Jul 2023 22:19:06 -0400 Subject: [PATCH 2/7] fix: typo --- .github/workflows/sanitize.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sanitize.yml b/.github/workflows/sanitize.yml index 6348f222..3d470a84 100644 --- a/.github/workflows/sanitize.yml +++ b/.github/workflows/sanitize.yml @@ -1,4 +1,4 @@ -name: Sunitize +name: Sanitize env: CARGO_TERM_COLOR: always From c521e9c18e0590cd18f51df6954cb35f30a78804 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 19 Jul 2023 22:19:22 -0400 Subject: [PATCH 3/7] chore: improve error message in some spots loading `grammar.json` --- cli/src/generate/mod.rs | 3 ++- cli/src/tests/helpers/fixtures.rs | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 4838828b..206b74f9 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -61,7 +61,8 @@ pub fn generate_parser_in_directory( None => { let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into()); grammar_json = load_grammar_file(&grammar_js_path)?; - fs::write(&src_path.join("grammar.json"), &grammar_json)?; + fs::write(&src_path.join("grammar.json"), &grammar_json) + .with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?; } } diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 7d04b24a..46c674cd 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -86,6 +86,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> pub fn get_test_grammar(name: &str) -> (String, Option) { let dir = fixtures_dir().join("test_grammars").join(name); - let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap(); + let grammar = fs::read_to_string(&dir.join("grammar.json")).expect(&format!( + "Can't find grammar.json for test grammar {}", + name + )); (grammar, Some(dir)) } From 75e1bcf70a0a3de5ff32bc2b6672bfc6d1033207 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 19 Jul 2023 22:19:43 -0400 Subject: [PATCH 4/7] refactor!: rename proc_macro to avoid conflicts when building std with `-Z build-std` --- cli/Cargo.toml | 2 +- cli/src/tests/corpus_test.rs | 2 +- cli/src/tests/parser_test.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c50a29ed..7bc2e2ea 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -72,7 +72,7 @@ version = "0.4.19" features = ["std"] [dev-dependencies] -proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" } +tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" } rand = "0.8.5" tempfile = "3.6.0" diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 0bd1db88..7166d249 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -14,9 +14,9 @@ use crate::{ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use proc_macro::test_with_seed; use std::{env, fs}; use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; +use tree_sitter_proc_macro::test_with_seed; #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_bash(seed: usize) { diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index af5f36f5..01063359 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -8,12 +8,12 @@ use crate::{ generate::generate_parser_for_grammar, parse::{perform_edit, Edit}, }; -use proc_macro::retry; use std::{ sync::atomic::{AtomicUsize, Ordering}, thread, time, }; use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; +use tree_sitter_proc_macro::retry; #[test] fn test_parsing_simple_string() { From a76701103e15b449fd915edd519a0bd254def2b0 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 19 Jul 2023 22:20:30 -0400 Subject: [PATCH 5/7] fix(asan): only use `slice::from_raw_parts` if locals_query is not null --- tags/src/c_lib.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 088cc7bc..c8f39d2c 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -84,7 +84,11 @@ pub extern "C" fn ts_tagger_add_language( let tagger = unwrap_mut_ptr(this); let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; let tags_query = unsafe { slice::from_raw_parts(tags_query, tags_query_len as usize) }; - let locals_query = unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) }; + let locals_query = if locals_query != std::ptr::null() { + unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) } + } else { + &[] + }; let tags_query = match str::from_utf8(tags_query) { Ok(e) => e, Err(_) => return TSTagsError::InvalidUtf8, From 56870fdda2f10b3202823f8c300d7a34342c5503 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 19 Jul 2023 22:19:55 -0400 Subject: [PATCH 6/7] chore: remove unneeded include --- lib/include/tree_sitter/api.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index dc7704d5..1e67f217 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -5,7 +5,6 @@ extern "C" { #endif -#include #include #include #include From 211e13d56670950da94ad628f72cb88c7c9b5f74 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Mon, 24 Jul 2023 00:48:12 -0400 Subject: [PATCH 7/7] ci: add asan --- .github/workflows/sanitize.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sanitize.yml b/.github/workflows/sanitize.yml index 3d470a84..bb085eae 100644 --- a/.github/workflows/sanitize.yml +++ b/.github/workflows/sanitize.yml @@ -9,7 +9,7 @@ on: jobs: check_undefined_behaviour: - name: Undefined behaviour checks + name: Sanitizer checks runs-on: ubuntu-latest env: TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter @@ -38,3 +38,12 @@ jobs: CFLAGS: -fsanitize=undefined RUSTFLAGS: -lubsan run: cargo test -- --test-threads 1 + + - name: Run main tests with address sanitizer (ASAN) + env: + CFLAGS: -fsanitize=address + RUSTFLAGS: -Zsanitizer=address + run: | + rustup install nightly + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu + cargo +nightly test -Z build-std --target x86_64-unknown-linux-gnu -- --test-threads 1