From 683fe442e49bb2c8b9e37c6a9b49ec5f7a50c2ac Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Wed, 23 Aug 2023 15:14:32 +0300 Subject: [PATCH 1/2] fix(gen): cycle between aliases and anonymous symbols An example of an error cycle in a `parser.c`: ``` static const TSSymbol ts_symbol_map[] = { ... [anon_sym_RBRACE] = anon_sym_RBRACE2, [anon_sym_RBRACE2] = anon_sym_RBRACE, ... }; ``` --- cli/src/generate/render.rs | 76 +++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 197ce364..f92bf312 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -152,49 +152,51 @@ impl Generator { self.symbol_ids[&Symbol::end()].clone(), ); - self.symbol_map = self - .parse_table - .symbols - .iter() - .map(|symbol| { - let mut mapping = symbol; + self.symbol_map = HashMap::new(); - // There can be multiple symbols in the grammar that have the same name and kind, - // due to simple aliases. When that happens, ensure that they map to the same - // public-facing symbol. If one of the symbols is not aliased, choose that one - // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - // numeric value. - if let Some(alias) = self.default_aliases.get(symbol) { - let kind = alias.kind(); - for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.default_aliases.get(other_symbol) { - if other_symbol < mapping && other_alias == alias { - mapping = other_symbol; + for symbol in self.parse_table.symbols.iter() { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.default_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { + mapping = other_symbol; + } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; + } + } + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + if let Some(mapped) = self.symbol_map.get(other_symbol) { + if mapped == symbol { + break; } - } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { - mapping = other_symbol; - break; - } - } - } - // Two anonymous tokens with different flags but the same string value - // should be represented with the same symbol in the public API. Examples: - // * "<" and token(prec(1, "<")) - // * "(" and token.immediate("(") - else if symbol.is_terminal() { - let metadata = self.metadata_for_symbol(*symbol); - for other_symbol in &self.parse_table.symbols { - let other_metadata = self.metadata_for_symbol(*other_symbol); - if other_metadata == metadata { - mapping = other_symbol; - break; } + mapping = other_symbol; + break; } } + } - (*symbol, *mapping) - }) - .collect(); + self.symbol_map.insert(*symbol, *mapping); + } for production_info in &self.parse_table.production_infos { // Build a list of all field names From 1dbb986515b32308a6f0b0e42cf4c1eef56367f6 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 23 Aug 2023 08:56:14 -0400 Subject: [PATCH 2/2] chore: add a test for an aliased anonymous symbol with flags --- cli/src/tests/query_test.rs | 90 ++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c0994d31..94d5ca97 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1,10 +1,13 @@ use super::helpers::{ allocations, - fixtures::get_language, + fixtures::{get_language, get_test_language}, query_helpers::{assert_query_matches, Match, Pattern}, ITERATION_COUNT, }; -use crate::tests::helpers::query_helpers::{collect_captures, collect_matches}; +use crate::{ + generate::generate_parser_for_grammar, + tests::helpers::query_helpers::{collect_captures, collect_matches}, +}; use indoc::indoc; use lazy_static::lazy_static; use rand::{prelude::StdRng, SeedableRng}; @@ -4812,3 +4815,86 @@ fn test_query_max_start_depth_more() { } }); } + +#[test] +fn test_grammar_with_aliased_literal_query() { + // module.exports = grammar({ + // name: 'test', + // + // rules: { + // source: $ => repeat(choice($.compound_statement, $.expansion)), + // + // compound_statement: $ => seq(alias(token(prec(-1, '}')), '}')), + // + // expansion: $ => seq('}'), + // }, + // }); + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test", + "rules": { + "source": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "compound_statement" + }, + { + "type": "SYMBOL", + "name": "expansion" + } + ] + } + }, + "compound_statement": { + "type": "SEQ", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": -1, + "content": { + "type": "STRING", + "value": "}" + } + } + }, + "named": false, + "value": "}" + } + ] + }, + "expansion": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "}" + } + ] + } + } + } + "#, + ) + .unwrap(); + + let language = get_test_language(&parser_name, &parser_code, None); + + let query = Query::new( + language, + r#" + (compound_statement "}" @bracket1) + (expansion "}" @bracket2) + "#, + ); + + assert!(query.is_ok()); +}