Merge branch 'master' into wasm-language

This commit is contained in:
Max Brunsfeld 2023-10-27 11:57:04 +01:00
commit f4e2f68f14
161 changed files with 10293 additions and 4253 deletions

View file

@ -6,14 +6,8 @@ if [[ "$(uname -s)" != Linux ]]; then
exit 1
fi
if [[ -z "$LIB_FUZZER_PATH" ]]; then
echo "LIB_FUZZER_PATH not set"
exit 1
fi
CC=${CC:-clang}
CXX=${CXX:-clang++}
LINK=${LINK:-clang++}
default_fuzz_flags="-fsanitize=fuzzer,address,undefined"
@ -21,7 +15,7 @@ CFLAGS=${CFLAGS:-"$default_fuzz_flags"}
CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"}
export CFLAGS
make
make CC="$CC" CXX="$CXX"
if [ -z "$@" ]; then
languages=$(ls test/fixtures/grammars)
@ -29,7 +23,13 @@ else
languages="$@"
fi
mkdir -p test/fuzz/out
for lang in ${languages[@]}; do
# skip typescript
if [[ $lang == "typescript" ]]; then
continue
fi
echo "Building $lang fuzzer..."
lang_dir="test/fixtures/grammars/$lang"
@ -54,7 +54,7 @@ for lang in ${languages[@]}; do
highlights_filename="${lang_dir}/queries/highlights.scm"
if [ -e "${highlights_filename}" ]; then
ts_lang_query_filename="${lang}.scm"
cp "${highlights_filename}" "out/${ts_lang_query_filename}"
cp "${highlights_filename}" "test/fuzz/out/${ts_lang_query_filename}"
else
ts_lang_query_filename=""
fi
@ -62,11 +62,16 @@ for lang in ${languages[@]}; do
# FIXME: We should extract the grammar name from grammar.js. Use the name of
# the directory instead. Also, the grammar name needs to be a valid C
# identifier so replace any '-' characters
ts_lang="tree_sitter_$(echo $lang | tr -- - _)"
ts_lang="tree_sitter_$(echo "$lang" | tr -- - _)"
$CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \
"test/fuzz/fuzzer.cc" "${objects[@]}" \
libtree-sitter.a "$LIB_FUZZER_PATH" \
-o "out/${lang}_fuzzer"
libtree-sitter.a \
-o "test/fuzz/out/${lang}_fuzzer"
python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict"
jq '
[ ..
| if .type? == "STRING" or (.type? == "ALIAS" and .named? == false) then .value else empty end
| select(test("\\S") and length == utf8bytelength)
] | unique | .[]
' | sort
done

View file

@ -51,6 +51,10 @@ while [[ $# > 0 ]]; do
force_docker=1
;;
-v|--verbose)
emscripten_flags="-s VERBOSE=1 -v $emscripten_flags"
;;
*)
usage
echo "Unrecognized argument '$1'"
@ -71,7 +75,11 @@ elif which docker > /dev/null; then
emscripten/emsdk:$emscripen_version \
emcc"
else
echo 'You must have either `docker` or `emcc` on your PATH to run this script'
if [[ "$force_docker" == "1" ]]; then
echo 'You must have `docker` on your PATH to run this script with --docker'
else
echo 'You must have either `docker` or `emcc` on your PATH to run this script'
fi
exit 1
fi
@ -83,10 +91,10 @@ runtime_methods='stringToUTF16','AsciiToString'
# in the `target/scratch` directory
$emcc \
-s WASM=1 \
-s TOTAL_MEMORY=33554432 \
-s INITIAL_MEMORY=33554432 \
-s ALLOW_MEMORY_GROWTH=1 \
-s MAIN_MODULE=2 \
-s NO_FILESYSTEM=1 \
-s FILESYSTEM=0 \
-s NODEJS_CATCH_EXIT=0 \
-s NODEJS_CATCH_REJECTION=0 \
-s EXPORTED_FUNCTIONS=@${web_dir}/exports.json \

View file

@ -28,7 +28,7 @@ fetch_grammar embedded-template master
fetch_grammar go master
fetch_grammar html master
fetch_grammar java master
fetch_grammar javascript partial-order-precedences
fetch_grammar javascript master
fetch_grammar jsdoc master
fetch_grammar json master
fetch_grammar php master

View file

@ -7,7 +7,7 @@ call:fetch_grammar embedded-template master
call:fetch_grammar go master
call:fetch_grammar html master
call:fetch_grammar java master
call:fetch_grammar javascript partial-order-precedences
call:fetch_grammar javascript master
call:fetch_grammar jsdoc master
call:fetch_grammar json master
call:fetch_grammar php master

View file

@ -2,14 +2,41 @@
output_path=lib/binding_rust/bindings.rs
header_path='lib/include/tree_sitter/api.h'
no_derive_copy=(
TSInput
TSLanguage
TSLogger
TSLookaheadIterator
TSParser
TSTree
TSQuery
TSQueryCursor
TSQueryCapture
TSQueryMatch
TSQueryPredicateStep
)
no_copy=$(IFS='|'; echo "${no_derive_copy[*]}")
file_version=$(head -n1 "$output_path" | cut -d' ' -f6)
tool_version=$(bindgen --version | cut -d' ' -f2)
higher_version=$(echo -e "${file_version}\n${tool_version}" | sort -V | tail -n1)
if [ "$higher_version" != "$tool_version" ]; then
echo "Latest used bindgen version was $file_version" >&2
echo "Currently installed bindgen CLI version is $tool_version" >&2
echo >&2
echo "It's needed to upgrade bindgen CLI first with \`cargo install bindgen-cli\`" >&2
exit 1
fi
bindgen \
--no-layout-tests \
--whitelist-type '^TS.*' \
--whitelist-function '^ts_.*' \
--opaque-type FILE \
--blocklist-type FILE \
--allowlist-type '^TS.*' \
--allowlist-function '^ts_.*' \
--allowlist-var "^TREE_SITTER.*" \
--blocklist-type '^__.*' \
--no-prepend-enum-name \
--no-copy "$no_copy" \
--blocklist-function ts_tree_print_dot_graph \
--size_t-is-usize \
$header_path \

View file

@ -2,12 +2,18 @@
set -e
cargo build --release
root_dir=$PWD
if [ "$CI" == true ]; then
set -x
tree_sitter="$TREE_SITTER"
else
cargo build --release
tree_sitter=${root_dir}/target/release/tree-sitter
fi
filter_grammar_name=$1
root_dir=$PWD
tree_sitter=${root_dir}/target/release/tree-sitter
grammars_dir=${root_dir}/test/fixtures/grammars
grammar_files=$(find $grammars_dir -name grammar.js | grep -v node_modules)

View file

@ -2,7 +2,15 @@
set -e
cargo build --release
root_dir=$PWD
if [ "$CI" == true ]; then
set -x
tree_sitter="$TREE_SITTER"
else
cargo build --release
tree_sitter=${root_dir}/target/release/tree-sitter
fi
build_wasm_args=
if [[ $1 == "--docker" ]]; then
@ -12,8 +20,6 @@ fi
filter_grammar_name=$1
root_dir=$PWD
tree_sitter=${root_dir}/target/release/tree-sitter
grammars_dir=${root_dir}/test/fixtures/grammars
grammar_files=$(find $grammars_dir -name grammar.js | grep -v node_modules)

View file

@ -6,7 +6,13 @@ root=$(dirname "$0")/..
export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1"
export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1"
declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" )
# check if CI env var exists
if [ -z "${CI:-}" ]; then
declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=2048" ["recover"]="-timeout=10 -rss_limit_mb=2048" )
else
declare -A mode_config=( ["halt"]="-max_total_time=120 -timeout=1 -rss_limit_mb=2048" ["recover"]="-time=120 -timeout=10 -rss_limit_mb=2048" )
fi
run_fuzzer() {
if [ "$#" -lt 2 ]; then
@ -21,7 +27,7 @@ run_fuzzer() {
# Treat remainder of arguments as libFuzzer arguments
# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first
results="${root}/out/fuzz-results/${lang}_${mode}"
results="${root}/test/fuzz/out/fuzz-results/${lang}"
mkdir -p "${results}"
cd "${results}"
@ -29,7 +35,8 @@ run_fuzzer() {
# then be loaded on subsequent fuzzing runs
mkdir -p corpus
"../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@"
pwd
"../../${lang}_fuzzer" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_" -max_len=2048 "${mode_config[$mode]}" "./corpus" "$@"
}
reproduce() {
@ -46,7 +53,7 @@ reproduce() {
shift
# Treat remainder of arguments as libFuzzer arguments
"${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}" "$@"
"${root}/test/fuzz/out/${lang}_fuzzer" "${mode_config[$mode]}" -runs=1 "${testcase}" "$@"
}
script=$(basename "$0")

View file

@ -6,7 +6,7 @@ function usage {
cat <<-EOF
USAGE
$0 [-adDg] [-s SEED] [-l LANGUAGE] [-e EXAMPLE] [-t TRIAL]
$0 [-adDg] [-s SEED] [-l LANGUAGE] [-e EXAMPLE]
OPTIONS