diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 26dae5b7..2531754b 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -45,6 +45,7 @@ struct ParseStateQueueEntry { class ParseTableBuilderImpl : public ParseTableBuilder { const SyntaxGrammar grammar; const LexicalGrammar lexical_grammar; + const std::unordered_map &simple_aliases; unordered_map state_ids_by_item_set; vector item_sets_by_state_id; deque parse_state_queue; @@ -56,9 +57,13 @@ class ParseTableBuilderImpl : public ParseTableBuilder { set> logged_conflict_tokens; public: - ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar) - : grammar(syntax_grammar), + ParseTableBuilderImpl( + const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar, + const std::unordered_map &simple_aliases + ) : grammar(syntax_grammar), lexical_grammar(lexical_grammar), + simple_aliases(simple_aliases), item_set_builder(syntax_grammar, lexical_grammar) {} BuildResult build() { @@ -403,12 +408,12 @@ class ParseTableBuilderImpl : public ParseTableBuilder { } void eliminate_unit_reductions() { - set aliased_symbols; + set aliased_symbols; for (auto &variable : grammar.variables) { for (auto &production : variable.productions) { for (auto &step : production) { if (!step.alias.value.empty()) { - aliased_symbols.insert(step.symbol.index); + aliased_symbols.insert(step.symbol); } } } @@ -430,7 +435,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder { if (action.type == ParseActionTypeReduce && action.consumed_symbol_count == 1 && action.alias_sequence_id == 0 && - !aliased_symbols.count(action.symbol.index) && + !simple_aliases.count(action.symbol) && + !aliased_symbols.count(action.symbol) && grammar.variables[action.symbol.index].type != VariableTypeNamed && (unit_reduction_symbol == -1 || unit_reduction_symbol == action.symbol.index) ) { @@ -887,9 +893,14 @@ class ParseTableBuilderImpl : public ParseTableBuilder { unique_ptr ParseTableBuilder::create( const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar + const LexicalGrammar &lexical_grammar, + const std::unordered_map &simple_aliases ) { - return unique_ptr(new ParseTableBuilderImpl(syntax_grammar, lexical_grammar)); + return unique_ptr(new ParseTableBuilderImpl( + syntax_grammar, + lexical_grammar, + simple_aliases + )); } ParseTableBuilder::BuildResult ParseTableBuilder::build() { diff --git a/src/compiler/build_tables/parse_table_builder.h b/src/compiler/build_tables/parse_table_builder.h index cb642d6c..bfc8641f 100644 --- a/src/compiler/build_tables/parse_table_builder.h +++ b/src/compiler/build_tables/parse_table_builder.h @@ -2,6 +2,7 @@ #define COMPILER_BUILD_TABLES_PARSE_TABLE_BUILDER_H_ #include +#include #include "compiler/parse_table.h" #include "compiler/compile_error.h" @@ -16,7 +17,11 @@ namespace build_tables { class ParseTableBuilder { public: - static std::unique_ptr create(const SyntaxGrammar &, const LexicalGrammar &); + static std::unique_ptr create( + const SyntaxGrammar &, + const LexicalGrammar &, + const std::unordered_map & + ); struct BuildResult { ParseTable parse_table; diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 9fe3f7ef..83bdbcc2 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -22,8 +22,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) ParseGrammarResult parse_result = parse_grammar(string(input)); if (!parse_result.error_message.empty()) { - return { nullptr, strdup(parse_result.error_message.c_str()), - TSCompileErrorTypeInvalidGrammar }; + return {nullptr, strdup(parse_result.error_message.c_str()), TSCompileErrorTypeInvalidGrammar}; } auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar); @@ -35,7 +34,11 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) return {nullptr, strdup(error.message.c_str()), error.type}; } - auto builder = build_tables::ParseTableBuilder::create(syntax_grammar, lexical_grammar); + auto builder = build_tables::ParseTableBuilder::create( + syntax_grammar, + lexical_grammar, + simple_aliases + ); auto build_tables_result = builder->build(); error = build_tables_result.error; if (error.type != 0) { @@ -54,7 +57,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) ); set_log_file(nullptr); - return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone }; + return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone}; } } // namespace tree_sitter diff --git a/test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt b/test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt index 80217b76..d9be3f85 100644 --- a/test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt +++ b/test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt @@ -2,11 +2,12 @@ Aliases on rules that are unit reductions ========================================== -one two three; +one two three four; --- (statement (identifier) (b_prime (identifier)) - (c_prime (identifier))) + (c_prime (identifier)) + (identifier)) diff --git a/test/fixtures/test_grammars/aliased_unit_reductions/grammar.json b/test/fixtures/test_grammars/aliased_unit_reductions/grammar.json index d2e4153f..34080b7e 100644 --- a/test/fixtures/test_grammars/aliased_unit_reductions/grammar.json +++ b/test/fixtures/test_grammars/aliased_unit_reductions/grammar.json @@ -10,6 +10,9 @@ "type": "SEQ", "members": [ {"type": "SYMBOL", "name": "_a"}, + + // The `_b` rule is always aliased to `b_prime`, so it is internally treated + // as a simple alias. { "type": "ALIAS", "named": true, @@ -19,6 +22,9 @@ "name": "_b" } }, + + // The `_c` rule is used without an alias in addition to being aliased to `c_prime`, + // so it is not a simple alias. { "type": "ALIAS", "named": true, @@ -28,6 +34,11 @@ "name": "_c" } }, + { + "type": "SYMBOL", + "name": "_c" + }, + { "type": "STRING", "value": ";" @@ -57,7 +68,7 @@ "_c": { "type": "SYMBOL", - "name": "_B" + "name": "_C" }, "_C": {