Merge pull request #80 from tree-sitter/fix-another-parse-state-merging-problem
Fix another parse state merging problem
This commit is contained in:
commit
d6cd0ca4d0
4 changed files with 168 additions and 55 deletions
|
|
@ -413,26 +413,22 @@ class ParseTableBuilder {
|
|||
ParseState &state = parse_table.states[i];
|
||||
ParseState &other = parse_table.states[j];
|
||||
|
||||
if (state.nonterminal_entries != other.nonterminal_entries)
|
||||
return false;
|
||||
if (state.nonterminal_entries != other.nonterminal_entries) return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_tokens = incompatible_tokens_by_index[lookahead.index];
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (entry.second.actions.back().type != ParseActionTypeReduce) return false;
|
||||
if (!has_entry(other, entry.second)) return false;
|
||||
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (const Symbol &incompatible_token : incompatible_tokens) {
|
||||
for (const Symbol &incompatible_token : incompatible_tokens_by_index[lookahead.index]) {
|
||||
if (other.terminal_entries.count(incompatible_token)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -442,26 +438,25 @@ class ParseTableBuilder {
|
|||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_tokens = incompatible_tokens_by_index[lookahead.index];
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (entry.second.actions.back().type != ParseActionTypeReduce) return false;
|
||||
if (!has_entry(state, entry.second)) return false;
|
||||
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (const Symbol &incompatible_token : incompatible_tokens) {
|
||||
for (const Symbol &incompatible_token : incompatible_tokens_by_index[lookahead.index]) {
|
||||
if (state.terminal_entries.count(incompatible_token)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
for (const Symbol &lookahead : symbols_to_merge) {
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,9 @@ bool LexConflictManager::resolve(const LexItemSet &item_set,
|
|||
}
|
||||
return true;
|
||||
} else {
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
possible_homonyms[item.lhs.index].insert(old_action.symbol.index);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,7 +70,8 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
LexTable lex_table;
|
||||
const LexicalGrammar grammar;
|
||||
vector<Rule> separator_rules;
|
||||
CharacterSet first_separator_characters;
|
||||
CharacterSet separator_start_characters;
|
||||
CharacterSet token_start_characters;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<LexItemSet, LexStateId> lex_state_ids;
|
||||
|
||||
|
|
@ -78,13 +79,26 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
vector<bool> shadowed_token_indices;
|
||||
|
||||
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
|
||||
StartingCharacterAggregator starting_character_aggregator;
|
||||
StartingCharacterAggregator separator_character_aggregator;
|
||||
for (const auto &rule : grammar.separators) {
|
||||
separator_rules.push_back(Repeat{rule});
|
||||
starting_character_aggregator.apply(rule);
|
||||
separator_character_aggregator.apply(rule);
|
||||
}
|
||||
separator_rules.push_back(Blank{});
|
||||
first_separator_characters = starting_character_aggregator.result;
|
||||
separator_start_characters = separator_character_aggregator.result;
|
||||
|
||||
StartingCharacterAggregator token_start_character_aggregator;
|
||||
for (const auto &variable : grammar.variables) {
|
||||
token_start_character_aggregator.apply(variable.rule);
|
||||
}
|
||||
token_start_characters = token_start_character_aggregator.result;
|
||||
token_start_characters
|
||||
.exclude('a', 'z')
|
||||
.exclude('A', 'Z')
|
||||
.exclude('0', '9')
|
||||
.exclude('_')
|
||||
.exclude('$');
|
||||
|
||||
shadowed_token_indices.resize(grammar.variables.size());
|
||||
}
|
||||
|
||||
|
|
@ -100,24 +114,21 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
}
|
||||
|
||||
bool detect_conflict(Symbol::Index left, Symbol::Index right) {
|
||||
clear();
|
||||
StartingCharacterAggregator left_starting_characters;
|
||||
StartingCharacterAggregator right_starting_characters;
|
||||
left_starting_characters.apply(grammar.variables[left].rule);
|
||||
right_starting_characters.apply(grammar.variables[right].rule);
|
||||
if (!left_starting_characters.result.intersects(right_starting_characters.result) &&
|
||||
!left_starting_characters.result.intersects(separator_start_characters) &&
|
||||
!right_starting_characters.result.intersects(separator_start_characters)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
clear();
|
||||
map<Symbol, ParseTableEntry> terminals;
|
||||
terminals[Symbol::terminal(left)];
|
||||
terminals[Symbol::terminal(right)];
|
||||
|
||||
if (grammar.variables[left].is_string && grammar.variables[right].is_string) {
|
||||
StartingCharacterAggregator left_starting_characters;
|
||||
left_starting_characters.apply(grammar.variables[left].rule);
|
||||
StartingCharacterAggregator right_starting_characters;
|
||||
right_starting_characters.apply(grammar.variables[right].rule);
|
||||
if (!(left_starting_characters.result == right_starting_characters.result)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
add_lex_state(item_set_for_terminals(terminals));
|
||||
|
||||
return shadowed_token_indices[right];
|
||||
}
|
||||
|
||||
|
|
@ -148,25 +159,27 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
const LexItemSet::Transition &transition = pair.second;
|
||||
|
||||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (current_action.is_present()) {
|
||||
bool prefer_advancing = conflict_manager.resolve(transition.destination, action, current_action);
|
||||
bool matches_accepted_token = false;
|
||||
AcceptTokenAction &accept_action = lex_table.states[state_id].accept_action;
|
||||
if (accept_action.is_present()) {
|
||||
bool prefer_advancing = conflict_manager.resolve(transition.destination, action, accept_action);
|
||||
bool can_advance_for_accepted_token = false;
|
||||
for (const LexItem &item : transition.destination.entries) {
|
||||
if (item.lhs == current_action.symbol) {
|
||||
matches_accepted_token = true;
|
||||
} else if (!transition.in_main_token && !item.lhs.is_built_in() && !prefer_advancing) {
|
||||
if (item.lhs == accept_action.symbol) {
|
||||
can_advance_for_accepted_token = true;
|
||||
} else if (!prefer_advancing && !transition.in_main_token && !item.lhs.is_built_in()) {
|
||||
shadowed_token_indices[item.lhs.index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!matches_accepted_token && characters.intersects(first_separator_characters)) {
|
||||
shadowed_token_indices[current_action.symbol.index] = true;
|
||||
if (!can_advance_for_accepted_token) {
|
||||
if (characters.intersects(separator_start_characters) ||
|
||||
(grammar.variables[accept_action.symbol.index].is_string &&
|
||||
characters.intersects(token_start_characters))) {
|
||||
shadowed_token_indices[accept_action.symbol.index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!prefer_advancing) {
|
||||
continue;
|
||||
}
|
||||
if (!prefer_advancing) continue;
|
||||
}
|
||||
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
|
|
@ -181,15 +194,11 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
AcceptTokenAction action(item.lhs, completion_status.precedence.max,
|
||||
item.lhs.is_built_in() ||
|
||||
grammar.variables[item.lhs.index].is_string);
|
||||
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (current_action.is_present()) {
|
||||
if (!conflict_manager.resolve(action, current_action)) {
|
||||
continue;
|
||||
}
|
||||
AcceptTokenAction &existing_action = lex_table.states[state_id].accept_action;
|
||||
if (!existing_action.is_present() ||
|
||||
conflict_manager.resolve(action, existing_action)) {
|
||||
lex_table.states[state_id].accept_action = action;
|
||||
}
|
||||
|
||||
lex_table.states[state_id].accept_action = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
106
test/compiler/build_tables/lex_table_builder_test.cc
Normal file
106
test/compiler/build_tables/lex_table_builder_test.cc
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("LexTableBuilder::detect_conflict", []() {
|
||||
vector<Rule> separators({
|
||||
CharacterSet({ ' ', '\t' }),
|
||||
});
|
||||
|
||||
it("returns false for tokens that don't match the same string", [&]() {
|
||||
auto builder = LexTableBuilder::create(LexicalGrammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"token_1",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ 'a' }),
|
||||
CharacterSet({ 'b' }),
|
||||
CharacterSet({ 'c' }),
|
||||
}),
|
||||
false
|
||||
},
|
||||
LexicalVariable{
|
||||
"token_2",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ 'b' }),
|
||||
CharacterSet({ 'c' }),
|
||||
CharacterSet({ 'd' }),
|
||||
}),
|
||||
false
|
||||
},
|
||||
},
|
||||
separators
|
||||
});
|
||||
|
||||
AssertThat(builder->detect_conflict(0, 1), IsFalse());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true when one token matches a string that the other matches, "
|
||||
"plus some addition content that begins with a separator character", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"token_1",
|
||||
VariableTypeNamed,
|
||||
Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
|
||||
false
|
||||
},
|
||||
LexicalVariable{
|
||||
"token_2",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
|
||||
true
|
||||
},
|
||||
},
|
||||
separators
|
||||
};
|
||||
|
||||
auto builder = LexTableBuilder::create(grammar);
|
||||
AssertThat(builder->detect_conflict(0, 1), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
|
||||
grammar.variables[1].is_string = false;
|
||||
AssertThat(builder->detect_conflict(0, 1), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true when one token matches a string that the other matches, "
|
||||
"plus some addition content that matches another one-character token", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"token_1",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ '>' }),
|
||||
CharacterSet({ '>' }),
|
||||
}),
|
||||
true
|
||||
},
|
||||
LexicalVariable{
|
||||
"token_2",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ '>' }),
|
||||
}),
|
||||
true
|
||||
},
|
||||
},
|
||||
separators
|
||||
};
|
||||
|
||||
auto builder = LexTableBuilder::create(grammar);
|
||||
AssertThat(builder->detect_conflict(0, 1), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
Loading…
Add table
Add a link
Reference in a new issue