From a003e5f6bd2c13685281beaef42b932929e1bc54 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 20 Mar 2020 11:35:11 -0700 Subject: [PATCH] generate: Avoid duplicate string tokens in unique symbol map --- cli/src/generate/render.rs | 17 ++++++++++++++++- cli/src/tests/query_test.rs | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 824c3bcf..e8c59d07 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -325,12 +325,13 @@ impl Generator { add_line!(self, "static TSSymbol ts_symbol_map[] = {{"); indent!(self); for symbol in &self.parse_table.symbols { + let mut mapping = symbol; + // There can be multiple symbols in the grammar that have the same name and kind, // due to simple aliases. When that happens, ensure that they map to the same // public-facing symbol. If one of the symbols is not aliased, choose that one // to be the public-facing symbol. Otherwise, pick the symbol with the lowest // numeric value. - let mut mapping = symbol; if let Some(alias) = self.simple_aliases.get(symbol) { let kind = alias.kind(); for other_symbol in &self.parse_table.symbols { @@ -344,6 +345,20 @@ impl Generator { } } } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + mapping = other_symbol; + break; + } + } + } add_line!( self, diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 0daa4d5a..f69074a8 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -491,7 +491,7 @@ fn test_query_matches_with_wildcard_at_the_root() { } #[test] -fn test_query_with_immediate_siblings() { +fn test_query_matches_with_immediate_siblings() { allocations::record(|| { let language = get_language("python"); @@ -677,6 +677,41 @@ fn test_query_matches_in_language_with_simple_aliases() { }); } +#[test] +fn test_query_matches_with_different_tokens_with_the_same_string_value() { + allocations::record(|| { + let language = get_language("rust"); + let query = Query::new( + language, + r#" + "<" @less + ">" @greater + "#, + ) + .unwrap(); + + // In Rust, there are two '<' tokens: one for the binary operator, + // and one with higher precedence for generics. + let source = "const A: B = d < e || f > g;"; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("less", "<")]), + (1, vec![("greater", ">")]), + (0, vec![("less", "<")]), + (1, vec![("greater", ">")]), + ] + ); + }); +} + #[test] fn test_query_matches_with_too_many_permutations_to_track() { allocations::record(|| {