diff --git a/script/build-fuzzers b/script/build-fuzzers index 2a44b10c..f4ddb293 100755 --- a/script/build-fuzzers +++ b/script/build-fuzzers @@ -51,13 +51,21 @@ for lang in ${languages[@]}; do $CC $CFLAGS -g -O0 "-I${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o" objects+=("${lang_dir}/src/parser.o") + highlights_filename="${lang_dir}/queries/highlights.scm" + if [ -e "${highlights_filename}" ]; then + ts_lang_query_filename="${lang}.scm" + cp "${highlights_filename}" "out/${ts_lang_query_filename}" + else + ts_lang_query_filename="" + fi + modes=(true halt false recover) for i in 0 2; do # FIXME: We should extract the grammar name from grammar.js. Use the name of # the directory instead. Also, the grammar name needs to be a valid C # identifier so replace any '-' characters ts_lang="tree_sitter_$(echo $lang | tr -- - _)" - $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="$ts_lang" \ + $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \ "test/fuzz/fuzzer.cc" "${objects[@]}" \ libtree-sitter.a "$LIB_FUZZER_PATH" \ -o "out/${lang}_fuzzer_${modes[i+1]}" diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc index 8d6f9cef..2d01b2fd 100644 --- a/test/fuzz/fuzzer.cc +++ b/test/fuzz/fuzzer.cc @@ -1,8 +1,40 @@ #include +#include #include "tree_sitter/api.h" extern "C" const TSLanguage *TS_LANG(); +static TSQuery *lang_query; + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { + if(TS_LANG_QUERY_FILENAME[0]) { + // The query filename is relative to the fuzzing binary. Convert it + // to an absolute path first + auto binary_filename = std::string((*argv)[0]); + auto binary_directory = binary_filename.substr(0, binary_filename.find_last_of("\\/")); + auto lang_query_filename = binary_directory + "/" + TS_LANG_QUERY_FILENAME; + + auto f = std::ifstream(lang_query_filename); + assert(f.good()); + std::string lang_query_source((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + + uint32_t error_offset = 0; + TSQueryError error_type = TSQueryErrorNone; + + lang_query = ts_query_new( + TS_LANG(), + lang_query_source.c_str(), + lang_query_source.size(), + &error_offset, + &error_type + ); + + assert(lang_query); + } + + return 0; +} + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const char *str = reinterpret_cast(data); @@ -17,6 +49,31 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { TSTree *tree = ts_parser_parse_string(parser, NULL, str, size); TSNode root_node = ts_tree_root_node(tree); + if (lang_query) { + { + TSQueryCursor *cursor = ts_query_cursor_new(); + + ts_query_cursor_exec(cursor, lang_query, root_node); + TSQueryMatch match; + while (ts_query_cursor_next_match(cursor, &match)) { + } + + ts_query_cursor_delete(cursor); + } + + { + TSQueryCursor *cursor = ts_query_cursor_new(); + + ts_query_cursor_exec(cursor, lang_query, root_node); + TSQueryMatch match; + uint32_t capture_index; + while (ts_query_cursor_next_capture(cursor, &match, &capture_index)) { + } + + ts_query_cursor_delete(cursor); + } + } + ts_tree_delete(tree); ts_parser_delete(parser); diff --git a/test/fuzz/gen-dict.py b/test/fuzz/gen-dict.py index a9e07838..f8cf834e 100644 --- a/test/fuzz/gen-dict.py +++ b/test/fuzz/gen-dict.py @@ -25,7 +25,7 @@ def main(): for lit in sorted(literals): if lit: - print '"%s"' % ''.join([(c if c.isalnum() else '\\x%02x' % ord(c)) for c in lit]) + print '"%s"' % ''.join(['\\x%02x' % ord(b) for b in lit.encode('utf-8')]) if __name__ == '__main__': main()