Account for simple aliases in unit reduction elimination

This commit is contained in:
Max Brunsfeld 2018-08-30 09:40:00 -07:00
parent 5372a81947
commit 7fc64ed25a
5 changed files with 46 additions and 15 deletions

View file

@ -45,6 +45,7 @@ struct ParseStateQueueEntry {
class ParseTableBuilderImpl : public ParseTableBuilder {
const SyntaxGrammar grammar;
const LexicalGrammar lexical_grammar;
const std::unordered_map<rules::Symbol, rules::Alias> &simple_aliases;
unordered_map<ParseItemSet, ParseStateId> state_ids_by_item_set;
vector<const ParseItemSet *> item_sets_by_state_id;
deque<ParseStateQueueEntry> parse_state_queue;
@ -56,9 +57,13 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
set<std::pair<Symbol, Symbol>> logged_conflict_tokens;
public:
ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar)
: grammar(syntax_grammar),
ParseTableBuilderImpl(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
const std::unordered_map<rules::Symbol, rules::Alias> &simple_aliases
) : grammar(syntax_grammar),
lexical_grammar(lexical_grammar),
simple_aliases(simple_aliases),
item_set_builder(syntax_grammar, lexical_grammar) {}
BuildResult build() {
@ -403,12 +408,12 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
}
void eliminate_unit_reductions() {
set<Symbol::Index> aliased_symbols;
set<Symbol> aliased_symbols;
for (auto &variable : grammar.variables) {
for (auto &production : variable.productions) {
for (auto &step : production) {
if (!step.alias.value.empty()) {
aliased_symbols.insert(step.symbol.index);
aliased_symbols.insert(step.symbol);
}
}
}
@ -430,7 +435,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (action.type == ParseActionTypeReduce &&
action.consumed_symbol_count == 1 &&
action.alias_sequence_id == 0 &&
!aliased_symbols.count(action.symbol.index) &&
!simple_aliases.count(action.symbol) &&
!aliased_symbols.count(action.symbol) &&
grammar.variables[action.symbol.index].type != VariableTypeNamed &&
(unit_reduction_symbol == -1 || unit_reduction_symbol == action.symbol.index)
) {
@ -887,9 +893,14 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
unique_ptr<ParseTableBuilder> ParseTableBuilder::create(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar
const LexicalGrammar &lexical_grammar,
const std::unordered_map<rules::Symbol, rules::Alias> &simple_aliases
) {
return unique_ptr<ParseTableBuilder>(new ParseTableBuilderImpl(syntax_grammar, lexical_grammar));
return unique_ptr<ParseTableBuilder>(new ParseTableBuilderImpl(
syntax_grammar,
lexical_grammar,
simple_aliases
));
}
ParseTableBuilder::BuildResult ParseTableBuilder::build() {

View file

@ -2,6 +2,7 @@
#define COMPILER_BUILD_TABLES_PARSE_TABLE_BUILDER_H_
#include <memory>
#include <unordered_map>
#include "compiler/parse_table.h"
#include "compiler/compile_error.h"
@ -16,7 +17,11 @@ namespace build_tables {
class ParseTableBuilder {
public:
static std::unique_ptr<ParseTableBuilder> create(const SyntaxGrammar &, const LexicalGrammar &);
static std::unique_ptr<ParseTableBuilder> create(
const SyntaxGrammar &,
const LexicalGrammar &,
const std::unordered_map<rules::Symbol, rules::Alias> &
);
struct BuildResult {
ParseTable parse_table;

View file

@ -22,8 +22,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
ParseGrammarResult parse_result = parse_grammar(string(input));
if (!parse_result.error_message.empty()) {
return { nullptr, strdup(parse_result.error_message.c_str()),
TSCompileErrorTypeInvalidGrammar };
return {nullptr, strdup(parse_result.error_message.c_str()), TSCompileErrorTypeInvalidGrammar};
}
auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar);
@ -35,7 +34,11 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
return {nullptr, strdup(error.message.c_str()), error.type};
}
auto builder = build_tables::ParseTableBuilder::create(syntax_grammar, lexical_grammar);
auto builder = build_tables::ParseTableBuilder::create(
syntax_grammar,
lexical_grammar,
simple_aliases
);
auto build_tables_result = builder->build();
error = build_tables_result.error;
if (error.type != 0) {
@ -54,7 +57,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
);
set_log_file(nullptr);
return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone};
}
} // namespace tree_sitter

View file

@ -2,11 +2,12 @@
Aliases on rules that are unit reductions
==========================================
one two three;
one two three four;
---
(statement
(identifier)
(b_prime (identifier))
(c_prime (identifier)))
(c_prime (identifier))
(identifier))

View file

@ -10,6 +10,9 @@
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_a"},
// The `_b` rule is always aliased to `b_prime`, so it is internally treated
// as a simple alias.
{
"type": "ALIAS",
"named": true,
@ -19,6 +22,9 @@
"name": "_b"
}
},
// The `_c` rule is used without an alias in addition to being aliased to `c_prime`,
// so it is not a simple alias.
{
"type": "ALIAS",
"named": true,
@ -28,6 +34,11 @@
"name": "_c"
}
},
{
"type": "SYMBOL",
"name": "_c"
},
{
"type": "STRING",
"value": ";"
@ -57,7 +68,7 @@
"_c": {
"type": "SYMBOL",
"name": "_B"
"name": "_C"
},
"_C": {