From 3d8f6665816e860225700dd105fe686180f7c237 Mon Sep 17 00:00:00 2001 From: breandan Date: Sun, 16 May 2021 21:38:05 -0400 Subject: [PATCH 1/9] Add link to Kotlin parser --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index b5538c55..a773437b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -69,6 +69,7 @@ Parsers for these languages are in development: * [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) * [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) * [Julia](https://github.com/tree-sitter/tree-sitter-julia) +* [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) * [Nix](https://github.com/cstrahan/tree-sitter-nix) * [Perl](https://github.com/ganezdragon/tree-sitter-perl) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) From cc519b3121cbbd704de98039bc71e3e6c440f8d4 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Wed, 19 May 2021 12:49:57 +0200 Subject: [PATCH 2/9] cli: Improve const-correctness of the generated parsers (part 2 of 2). This is a follow-up to my previous commit 1badd131f91 . I've made this an extra patch as it requires a minor API change in . This commit moves the remaining generated tables into the read-only segment. Before: $ for f in bash c cpp go html java javascript jsdoc json php python ruby rust; do \ gcc -o $f.o -O2 -Ilib/include -c test/fixtures/grammars/$f/src/parser.c; \ done $ size --totals *.o text data bss dec hex filename 5353477 24472 0 5377949 520f9d (TOTALS) After: $ for f in bash c cpp go html java javascript jsdoc json php python ruby rust; do \ gcc -o $f.o -O2 -Ilib/include -c test/fixtures/grammars/$f/src/parser.c; \ done $ size --totals *.o 5378147 0 0 5378147 521063 (TOTALS) --- cli/src/generate/render.rs | 4 ++-- lib/include/tree_sitter/parser.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 9d22f4a3..a987a0a3 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -355,7 +355,7 @@ impl Generator { } fn add_symbol_names_list(&mut self) { - add_line!(self, "static const char *ts_symbol_names[] = {{"); + add_line!(self, "static const char * const ts_symbol_names[] = {{"); indent!(self); for symbol in self.parse_table.symbols.iter() { let name = self.sanitize_string( @@ -417,7 +417,7 @@ impl Generator { } fn add_field_name_names_list(&mut self) { - add_line!(self, "static const char *ts_field_names[] = {{"); + add_line!(self, "static const char * const ts_field_names[] = {{"); indent!(self); add_line!(self, "[0] = NULL,"); for field_name in &self.field_names { diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index a3a87bd1..cbbc7b4e 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -102,8 +102,8 @@ struct TSLanguage { const uint16_t *small_parse_table; const uint32_t *small_parse_table_map; const TSParseActionEntry *parse_actions; - const char **symbol_names; - const char **field_names; + const char * const *symbol_names; + const char * const *field_names; const TSFieldMapSlice *field_map_slices; const TSFieldMapEntry *field_map_entries; const TSSymbolMetadata *symbol_metadata; From 3c0152a3312d1f708bcf2b6f0e494e4892277979 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Sun, 16 May 2021 17:55:58 +0300 Subject: [PATCH 3/9] chore(fmt): Apply 'cargo fmt' to the whole code base --- cli/src/generate/binding_files.rs | 6 +++- cli/src/generate/render.rs | 45 ++++++++++++------------- cli/src/main.rs | 7 +++- cli/src/parse.rs | 3 +- cli/src/tests/helpers/random.rs | 2 +- cli/src/tests/helpers/scope_sequence.rs | 5 ++- 6 files changed, 39 insertions(+), 29 deletions(-) diff --git a/cli/src/generate/binding_files.rs b/cli/src/generate/binding_files.rs index ed56fcde..0a55e35a 100644 --- a/cli/src/generate/binding_files.rs +++ b/cli/src/generate/binding_files.rs @@ -95,7 +95,11 @@ pub fn generate_binding_files(repo_path: &Path, language_name: &str) -> Result<( write_file(&package_json_path, package_json_str)?; } } else { - generate_file(&package_json_path, PACKAGE_JSON_TEMPLATE, dashed_language_name)?; + generate_file( + &package_json_path, + PACKAGE_JSON_TEMPLATE, + dashed_language_name, + )?; } // Remove files from old node binding paths. diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index a987a0a3..78a07a22 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -543,7 +543,10 @@ impl Generator { let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::>(); alias_ids_by_symbol.sort_unstable_by_key(|e| e.0); - add_line!(self, "static const uint16_t ts_non_terminal_alias_map[] = {{"); + add_line!( + self, + "static const uint16_t ts_non_terminal_alias_map[] = {{" + ); indent!(self); for (symbol, alias_ids) in alias_ids_by_symbol { let symbol_id = &self.symbol_ids[symbol]; @@ -962,7 +965,10 @@ impl Generator { } fn add_lex_modes_list(&mut self) { - add_line!(self, "static const TSLexMode ts_lex_modes[STATE_COUNT] = {{"); + add_line!( + self, + "static const TSLexMode ts_lex_modes[STATE_COUNT] = {{" + ); indent!(self); for (i, state) in self.parse_table.states.iter().enumerate() { if state.is_end_of_non_terminal_extra() { @@ -1200,7 +1206,10 @@ impl Generator { add_line!(self, "}};"); add_line!(self, ""); - add_line!(self, "static const uint32_t ts_small_parse_table_map[] = {{"); + add_line!( + self, + "static const uint32_t ts_small_parse_table_map[] = {{" + ); indent!(self); for i in self.large_state_count..self.parse_table.states.len() { add_line!( @@ -1219,7 +1228,10 @@ impl Generator { } fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) { - add_line!(self, "static const TSParseActionEntry ts_parse_actions[] = {{"); + add_line!( + self, + "static const TSParseActionEntry ts_parse_actions[] = {{" + ); indent!(self); for (i, entry) in parse_table_entries { add!( @@ -1334,14 +1346,8 @@ impl Generator { // Parse table add_line!(self, ".parse_table = &ts_parse_table[0][0],"); if self.large_state_count < self.parse_table.states.len() { - add_line!( - self, - ".small_parse_table = ts_small_parse_table," - ); - add_line!( - self, - ".small_parse_table_map = ts_small_parse_table_map," - ); + add_line!(self, ".small_parse_table = ts_small_parse_table,"); + add_line!(self, ".small_parse_table_map = ts_small_parse_table_map,"); } add_line!(self, ".parse_actions = ts_parse_actions,"); @@ -1349,23 +1355,14 @@ impl Generator { add_line!(self, ".symbol_names = ts_symbol_names,"); if !self.field_names.is_empty() { add_line!(self, ".field_names = ts_field_names,"); - add_line!( - self, - ".field_map_slices = ts_field_map_slices," - ); - add_line!( - self, - ".field_map_entries = ts_field_map_entries," - ); + add_line!(self, ".field_map_slices = ts_field_map_slices,"); + add_line!(self, ".field_map_entries = ts_field_map_entries,"); } add_line!(self, ".symbol_metadata = ts_symbol_metadata,"); add_line!(self, ".public_symbol_map = ts_symbol_map,"); add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); if !self.parse_table.production_infos.is_empty() { - add_line!( - self, - ".alias_sequences = &ts_alias_sequences[0][0]," - ); + add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],"); } // Lexing diff --git a/cli/src/main.rs b/cli/src/main.rs index e7600b30..a2d0a7da 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -122,7 +122,12 @@ fn run() -> error::Result<()> { .takes_value(true) .help("Only run corpus test cases whose name includes the given string"), ) - .arg(Arg::with_name("update").long("update").short("u").help("Update all syntax trees in corpus files with current parser output")) + .arg( + Arg::with_name("update") + .long("update") + .short("u") + .help("Update all syntax trees in corpus files with current parser output"), + ) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), ) diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 5266b19f..e843663e 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -197,7 +197,8 @@ pub fn parse_file_at_path( did_visit_children = true; let start = node.start_byte(); let end = node.end_byte(); - let value = std::str::from_utf8(&source_code[start..end]).expect("has a string"); + let value = + std::str::from_utf8(&source_code[start..end]).expect("has a string"); write!(&mut stdout, "{}", html_escape::encode_text(value))?; } } diff --git a/cli/src/tests/helpers/random.rs b/cli/src/tests/helpers/random.rs index b0490e7e..4fc8f989 100644 --- a/cli/src/tests/helpers/random.rs +++ b/cli/src/tests/helpers/random.rs @@ -13,7 +13,7 @@ impl Rand { } pub fn unsigned(&mut self, max: usize) -> usize { - self.0.gen_range(0 .. max + 1) + self.0.gen_range(0..max + 1) } pub fn words(&mut self, max_count: usize) -> Vec { diff --git a/cli/src/tests/helpers/scope_sequence.rs b/cli/src/tests/helpers/scope_sequence.rs index 685fe91f..2f904025 100644 --- a/cli/src/tests/helpers/scope_sequence.rs +++ b/cli/src/tests/helpers/scope_sequence.rs @@ -47,7 +47,10 @@ impl ScopeSequence { if self.0.len() != text.len() { panic!( "Inconsistent scope sequence: {:?}", - self.0.iter().zip(text.iter().map(|c| *c as char)).collect::>() + self.0 + .iter() + .zip(text.iter().map(|c| *c as char)) + .collect::>() ); } From 96ad90a646c5d284a67dd897375727b515f58430 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Fri, 30 Apr 2021 22:22:36 +0300 Subject: [PATCH 4/9] CI: Add 'cargo fmt -- --check' to ci.yml The step is placed right after Rust installation to fail faster. --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 65e051d4..a4b3d145 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,9 @@ jobs: toolchain: stable profile: minimal + - name: Check Rust code formatting + run: cargo fmt -- --check + - name: Install emscripten uses: mymindstorm/setup-emsdk@v7 with: From 399b5e4daff56e8252e1ebadcaf6570d21729eb2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 May 2021 13:36:57 -0700 Subject: [PATCH 5/9] Remove interior mutability for parent-node caching in Tree In Rust binding, mark Tree as Sync --- lib/binding_rust/lib.rs | 3 ++- lib/src/node.c | 11 +---------- lib/src/tree.c | 43 ----------------------------------------- lib/src/tree.h | 5 ----- 4 files changed, 3 insertions(+), 59 deletions(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 7a8c7e24..22a0c2c1 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1942,7 +1942,8 @@ impl error::Error for QueryError {} unsafe impl Send for Language {} unsafe impl Send for Parser {} unsafe impl Send for Query {} -unsafe impl Send for Tree {} unsafe impl Send for QueryCursor {} +unsafe impl Send for Tree {} unsafe impl Sync for Language {} unsafe impl Sync for Query {} +unsafe impl Sync for Tree {} diff --git a/lib/src/node.c b/lib/src/node.c index 8498f9c5..9c244be7 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -150,9 +150,6 @@ static inline TSNode ts_node__child( while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node__is_relevant(child, include_anonymous)) { if (index == child_index) { - if (ts_node__is_relevant(self, true)) { - ts_tree_set_cached_parent(self.tree, &child, &self); - } return child; } index++; @@ -355,7 +352,6 @@ static inline TSNode ts_node__descendant_for_byte_range( node = child; if (ts_node__is_relevant(node, include_anonymous)) { - ts_tree_set_cached_parent(self.tree, &child, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -395,7 +391,6 @@ static inline TSNode ts_node__descendant_for_point_range( node = child; if (ts_node__is_relevant(node, include_anonymous)) { - ts_tree_set_cached_parent(self.tree, &child, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -464,10 +459,7 @@ bool ts_node_has_error(TSNode self) { } TSNode ts_node_parent(TSNode self) { - TSNode node = ts_tree_get_cached_parent(self.tree, &self); - if (node.id) return node; - - node = ts_tree_root_node(self.tree); + TSNode node = ts_tree_root_node(self.tree); uint32_t end_byte = ts_node_end_byte(self); if (node.id == self.id) return ts_node__null(); @@ -486,7 +478,6 @@ TSNode ts_node_parent(TSNode self) { if (iterator.position.bytes >= end_byte) { node = child; if (ts_node__is_relevant(child, true)) { - ts_tree_set_cached_parent(self.tree, &node, &last_visible_node); last_visible_node = node; } did_descend = true; diff --git a/lib/src/tree.c b/lib/src/tree.c index 391fa7f5..f2cc85ef 100644 --- a/lib/src/tree.c +++ b/lib/src/tree.c @@ -5,8 +5,6 @@ #include "./tree_cursor.h" #include "./tree.h" -static const unsigned PARENT_CACHE_CAPACITY = 32; - TSTree *ts_tree_new( Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count @@ -14,9 +12,6 @@ TSTree *ts_tree_new( TSTree *result = ts_malloc(sizeof(TSTree)); result->root = root; result->language = language; - result->parent_cache = NULL; - result->parent_cache_start = 0; - result->parent_cache_size = 0; result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); result->included_range_count = included_range_count; @@ -35,7 +30,6 @@ void ts_tree_delete(TSTree *self) { ts_subtree_release(&pool, self->root); ts_subtree_pool_delete(&pool); ts_free(self->included_ranges); - if (self->parent_cache) ts_free(self->parent_cache); ts_free(self); } @@ -78,8 +72,6 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { SubtreePool pool = ts_subtree_pool_new(0); self->root = ts_subtree_edit(self->root, edit, &pool); - self->parent_cache_start = 0; - self->parent_cache_size = 0; ts_subtree_pool_delete(&pool); } @@ -111,38 +103,3 @@ TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uin void ts_tree_print_dot_graph(const TSTree *self, FILE *file) { ts_subtree_print_dot_graph(self->root, self->language, file); } - -TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) { - for (uint32_t i = 0; i < self->parent_cache_size; i++) { - uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY; - ParentCacheEntry *entry = &self->parent_cache[index]; - if (entry->child == node->id) { - return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol); - } - } - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) { - TSTree *self = (TSTree *)_self; - if (!self->parent_cache) { - self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry)); - } - - uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY; - self->parent_cache[index] = (ParentCacheEntry) { - .child = node->id, - .parent = (const Subtree *)parent->id, - .position = { - parent->context[0], - {parent->context[1], parent->context[2]} - }, - .alias_symbol = parent->context[3], - }; - - if (self->parent_cache_size == PARENT_CACHE_CAPACITY) { - self->parent_cache_start++; - } else { - self->parent_cache_size++; - } -} diff --git a/lib/src/tree.h b/lib/src/tree.h index 92a7e641..0334b824 100644 --- a/lib/src/tree.h +++ b/lib/src/tree.h @@ -15,17 +15,12 @@ typedef struct { struct TSTree { Subtree root; const TSLanguage *language; - ParentCacheEntry *parent_cache; - uint32_t parent_cache_start; - uint32_t parent_cache_size; TSRange *included_ranges; unsigned included_range_count; }; TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned); TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); -TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *); -void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *); #ifdef __cplusplus } From 5664b77535a96071223fbe3daff0ae851104a139 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 May 2021 14:29:25 -0700 Subject: [PATCH 6/9] rust: 0.19.5 --- Cargo.lock | 2 +- lib/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 26bfdb1a..06eaabb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tree-sitter" -version = "0.19.4" +version = "0.19.5" dependencies = [ "cc", "lazy_static", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8e6fdced..dadc0367 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.19.4" +version = "0.19.5" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" From 242e089379782fe02f6a49e84bdd61cea5e7b66b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 May 2021 14:29:39 -0700 Subject: [PATCH 7/9] web: 0.19.4 --- lib/binding_web/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json index a3d11b55..8b1765d4 100644 --- a/lib/binding_web/package.json +++ b/lib/binding_web/package.json @@ -1,6 +1,6 @@ { "name": "web-tree-sitter", - "version": "0.19.3", + "version": "0.19.4", "description": "Tree-sitter bindings for the web", "main": "tree-sitter.js", "types": "tree-sitter-web.d.ts", @@ -30,4 +30,4 @@ "mocha": "^6.1.4", "terser": "^3.17.0" } -} +} \ No newline at end of file From 9d0eedc01f2a38feafee506f4b9ea6bb0562cb2f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 May 2021 14:33:25 -0700 Subject: [PATCH 8/9] Remove stray entry from wasm exported-symbol list --- lib/binding_web/exports.json | 1 - lib/binding_web/package.json | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index 67787b25..01f93434 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -26,7 +26,6 @@ "_memcpy", "_strlen", "_towupper", - "abort", "_ts_init", "_ts_language_field_count", diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json index 8b1765d4..b63a4072 100644 --- a/lib/binding_web/package.json +++ b/lib/binding_web/package.json @@ -30,4 +30,4 @@ "mocha": "^6.1.4", "terser": "^3.17.0" } -} \ No newline at end of file +} From 8d8690538ef0029885c7ef1f163b0e32f256a5aa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 May 2021 15:02:46 -0700 Subject: [PATCH 9/9] 0.19.5 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06eaabb7..f735619c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -644,7 +644,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.19.4" +version = "0.19.5" dependencies = [ "ansi_term 0.12.1", "atty", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a1884d2c..693ac817 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.19.4" +version = "0.19.5" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index 1d0fe136..2c5e364a 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.19.4", + "version": "0.19.5", "author": "Max Brunsfeld", "license": "MIT", "repository": {