Use LexTableBuilder to detect conflicts between tokens more correctly
This commit is contained in:
parent
abf8a4f2c2
commit
64e9230071
11 changed files with 203 additions and 309 deletions
|
|
@ -11,13 +11,12 @@
|
|||
'externals/json-parser',
|
||||
],
|
||||
'sources': [
|
||||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/compatible_tokens.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lex_table_builder.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
'src/compiler/build_tables/parse_item.cc',
|
||||
'src/compiler/build_tables/parse_item_set_builder.cc',
|
||||
|
|
|
|||
|
|
@ -1,38 +0,0 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/build_tables/compatible_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("recovery_tokens(rule)", []() {
|
||||
it("includes rules that can only begin and end with an explicit set of characters", [&]() {
|
||||
LexicalGrammar grammar;
|
||||
|
||||
grammar.separators = {
|
||||
character({ ' ' }),
|
||||
};
|
||||
|
||||
grammar.variables = {
|
||||
LexicalVariable{"var0", VariableTypeNamed, character({}, false), false},
|
||||
LexicalVariable{"var1", VariableTypeNamed, seq({
|
||||
character({ 'a', 'b' }),
|
||||
character({}, false),
|
||||
character({ 'c', 'd' }),
|
||||
}), false},
|
||||
};
|
||||
|
||||
AssertThat(
|
||||
get_compatible_tokens(grammar).recovery_tokens,
|
||||
Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) })
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -20,6 +20,10 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
Symbol sym4(3, Symbol::Terminal);
|
||||
LexItemSet item_set({ LexItem(sym4, blank() )});
|
||||
|
||||
before_each([&]() {
|
||||
conflict_manager = LexConflictManager();
|
||||
});
|
||||
|
||||
it("favors advance actions over empty accept token actions", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
|
||||
AssertThat(update, IsTrue());
|
||||
|
|
@ -65,6 +69,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
describe("advance/accept-token conflicts", [&]() {
|
||||
describe("when the token to accept has higher precedence", [&]() {
|
||||
it("prefers the accept-token action", [&]() {
|
||||
AssertThat(conflict_manager.possible_extensions, IsEmpty());
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(update, IsFalse());
|
||||
AssertThat(conflict_manager.possible_extensions, IsEmpty());
|
||||
|
|
@ -72,13 +77,9 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
});
|
||||
|
||||
describe("when the token to accept does not have a higher precedence", [&]() {
|
||||
it("favors the advance action", [&]() {
|
||||
it("favors the advance action and adds the in-progress tokens as possible extensions of the discarded token", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
it("adds the in-progress tokens as possible extensions of the discarded token", [&]() {
|
||||
conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,18 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
struct ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
LexTable build_lex_table(ParseTable *, const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/build_tables/compatible_tokens.h"
|
||||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -40,7 +40,7 @@ class ParseTableBuilder {
|
|||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
CompatibleTokensResult compatible_tokens;
|
||||
vector<set<Symbol::Index>> incompatible_token_indices_by_index;
|
||||
bool allow_any_conflict;
|
||||
|
||||
public:
|
||||
|
|
@ -49,7 +49,6 @@ class ParseTableBuilder {
|
|||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar, lex_grammar),
|
||||
compatible_tokens(get_compatible_tokens(lex_grammar)),
|
||||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
|
|
@ -76,7 +75,7 @@ class ParseTableBuilder {
|
|||
return { parse_table, error };
|
||||
}
|
||||
|
||||
update_unmergable_token_pairs();
|
||||
compute_unmergable_token_pairs();
|
||||
|
||||
build_error_parse_state();
|
||||
|
||||
|
|
@ -112,8 +111,18 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (const Symbol symbol : compatible_tokens.recovery_tokens) {
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
for (Symbol::Index i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
bool has_non_reciprocal_conflict = false;
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices_by_index[i]) {
|
||||
if (!incompatible_token_indices_by_index[incompatible_index].count(i)) {
|
||||
has_non_reciprocal_conflict = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_non_reciprocal_conflict) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal));
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
|
|
@ -294,20 +303,29 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void update_unmergable_token_pairs() {
|
||||
for (const ParseState &state : parse_table.states) {
|
||||
for (Symbol::Index token_index = 0, token_count = lexical_grammar.variables.size(); token_index < token_count; token_index++) {
|
||||
Symbol token(token_index, Symbol::Terminal);
|
||||
if (state.terminal_entries.count(token)) {
|
||||
auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[token_index];
|
||||
auto iter = incompatible_token_indices.begin();
|
||||
while (iter != incompatible_token_indices.end()) {
|
||||
if (state.terminal_entries.count(Symbol(*iter, Symbol::NonTerminal))) {
|
||||
iter = incompatible_token_indices.erase(iter);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
void compute_unmergable_token_pairs() {
|
||||
incompatible_token_indices_by_index.resize(lexical_grammar.variables.size());
|
||||
|
||||
// First, assume that all tokens are mutually incompatible.
|
||||
for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
auto &incompatible_indices = incompatible_token_indices_by_index[i];
|
||||
for (Symbol::Index j = 0; j < n; j++) {
|
||||
if (j != i) incompatible_indices.insert(j);
|
||||
}
|
||||
}
|
||||
|
||||
// For the remaining possibly-incompatible pairs of tokens, check if they
|
||||
// are actually incompatible by actually generating lexical states that
|
||||
// contain them both.
|
||||
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
|
||||
for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
auto &incompatible_indices = incompatible_token_indices_by_index[i];
|
||||
auto iter = incompatible_indices.begin();
|
||||
while (iter != incompatible_indices.end()) {
|
||||
if (lex_table_builder->detect_conflict(i, *iter)) {
|
||||
++iter;
|
||||
} else {
|
||||
iter = incompatible_indices.erase(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -403,17 +421,15 @@ class ParseTableBuilder {
|
|||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[lookahead.index];
|
||||
auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
if (!compatible_tokens.recovery_tokens.count(lookahead))
|
||||
return false;
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
if (other.terminal_entries.count(Symbol(incompatible_index, Symbol::Terminal))) {
|
||||
return false;
|
||||
}
|
||||
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
|
||||
if (other.terminal_entries.count(incompatible_symbol)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
|
|
@ -430,16 +446,14 @@ class ParseTableBuilder {
|
|||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_token_indices = compatible_tokens.unmergeable_pairs[lookahead.index];
|
||||
auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
if (!compatible_tokens.recovery_tokens.count(lookahead))
|
||||
return false;
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
if (state.terminal_entries.count(Symbol(incompatible_index, Symbol::Terminal))) {
|
||||
return false;
|
||||
}
|
||||
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
|
||||
if (state.terminal_entries.count(incompatible_symbol)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include <tuple>
|
||||
#include "compiler/build_tables/build_lex_table.h"
|
||||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
|
@ -16,12 +16,12 @@ using std::make_tuple;
|
|||
|
||||
tuple<ParseTable, LexTable, CompileError> build_tables(
|
||||
const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar
|
||||
const LexicalGrammar &lexical_grammar
|
||||
) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
auto parse_table_result = build_parse_table(grammar, lexical_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
const CompileError error = parse_table_result.second;
|
||||
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
LexTable lex_table = LexTableBuilder::create(lexical_grammar)->build(&parse_table);
|
||||
return make_tuple(parse_table, lex_table, error);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,136 +0,0 @@
|
|||
#include "compiler/build_tables/compatible_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using rules::Symbol;
|
||||
using std::set;
|
||||
|
||||
template <bool left, bool right>
|
||||
class CharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
if (left) apply(rule->left);
|
||||
if (right) apply(rule->right);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements) {
|
||||
apply(element);
|
||||
}
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
rules::CharacterSet result;
|
||||
};
|
||||
|
||||
template <bool left, bool right>
|
||||
class CharacterIntersector : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
bool result = false;
|
||||
if (left) result = apply(rule->left);
|
||||
if (right && !result) result = apply(rule->right);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements) {
|
||||
if (apply(element)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Repeat *rule) {
|
||||
return apply(rule->content);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::CharacterSet *rule) {
|
||||
return character_set->intersects(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
rules::CharacterSet *character_set;
|
||||
|
||||
CharacterIntersector(rules::CharacterSet *set) : character_set {set} {}
|
||||
};
|
||||
|
||||
using FirstCharacters = CharacterAggregator<true, false>;
|
||||
using LastCharacters = CharacterAggregator<false, true>;
|
||||
using AllCharacters = CharacterAggregator<true, true>;
|
||||
using FirstCharactersIntersector = CharacterIntersector<true, false>;
|
||||
|
||||
CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &grammar) {
|
||||
CompatibleTokensResult result;
|
||||
result.unmergeable_pairs.resize(grammar.variables.size());
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
all_separator_characters.apply(separator);
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, Symbol::Terminal);
|
||||
const LexicalVariable &variable = grammar.variables[i];
|
||||
rule_ptr rule = variable.rule;
|
||||
|
||||
FirstCharacters first_characters;
|
||||
first_characters.apply(rule);
|
||||
|
||||
LastCharacters last_characters;
|
||||
last_characters.apply(rule);
|
||||
|
||||
AllCharacters all_characters;
|
||||
all_characters.apply(rule);
|
||||
|
||||
bool has_distinct_start =
|
||||
!first_characters.result.includes_all &&
|
||||
!first_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_distinct_end =
|
||||
!last_characters.result.includes_all &&
|
||||
!last_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_separators =
|
||||
all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || !has_separators)
|
||||
result.recovery_tokens.insert(symbol);
|
||||
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
const LexicalVariable &other_variable = grammar.variables[j];
|
||||
if (has_separators) {
|
||||
FirstCharactersIntersector intersector(&first_characters.result);
|
||||
if (intersector.apply(other_variable.rule)) {
|
||||
result.unmergeable_pairs[i].insert(j);
|
||||
result.unmergeable_pairs[j].insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
|
||||
#define COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
|
||||
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
struct CompatibleTokensResult {
|
||||
std::set<rules::Symbol> recovery_tokens;
|
||||
std::vector<std::unordered_set<rules::Symbol::Index>> unmergeable_pairs;
|
||||
};
|
||||
|
||||
CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
|
||||
|
|
@ -10,11 +10,10 @@ namespace build_tables {
|
|||
bool LexConflictManager::resolve(const LexItemSet &item_set,
|
||||
const AdvanceAction &new_action,
|
||||
const AcceptTokenAction &old_action) {
|
||||
if (!old_action.is_present())
|
||||
return true;
|
||||
if (new_action.precedence_range.max >= old_action.precedence) {
|
||||
for (const LexItem &item : item_set.entries)
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
possible_extensions[old_action.symbol.index].insert(item.lhs.index);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
|
@ -23,30 +22,26 @@ bool LexConflictManager::resolve(const LexItemSet &item_set,
|
|||
|
||||
bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
|
||||
const AcceptTokenAction &old_action) {
|
||||
if (!old_action.is_present())
|
||||
return true;
|
||||
|
||||
int old_precedence = old_action.precedence;
|
||||
int new_precedence = new_action.precedence;
|
||||
|
||||
bool result;
|
||||
if (new_precedence > old_precedence)
|
||||
if (new_action.precedence > old_action.precedence) {
|
||||
result = true;
|
||||
else if (new_precedence < old_precedence)
|
||||
} else if (new_action.precedence < old_action.precedence) {
|
||||
result = false;
|
||||
else if (new_action.is_string && !old_action.is_string)
|
||||
} else if (new_action.is_string && !old_action.is_string) {
|
||||
result = true;
|
||||
else if (old_action.is_string && !new_action.is_string)
|
||||
} else if (old_action.is_string && !new_action.is_string) {
|
||||
result = false;
|
||||
else if (new_action.symbol.index < old_action.symbol.index)
|
||||
} else if (new_action.symbol.index < old_action.symbol.index) {
|
||||
result = true;
|
||||
else
|
||||
} else {
|
||||
result = false;
|
||||
}
|
||||
|
||||
if (result)
|
||||
if (result) {
|
||||
possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index);
|
||||
else
|
||||
} else {
|
||||
possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#include "compiler/build_tables/build_lex_table.h"
|
||||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
|
@ -16,15 +16,18 @@
|
|||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::unordered_map;
|
||||
using std::unique_ptr;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::CharacterSet;
|
||||
|
|
@ -33,37 +36,74 @@ using rules::Symbol;
|
|||
using rules::Metadata;
|
||||
using rules::Seq;
|
||||
|
||||
class LexTableBuilder {
|
||||
class StartingCharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
apply(rule->left);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements) apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
CharacterSet result;
|
||||
};
|
||||
|
||||
class LexTableBuilderImpl : public LexTableBuilder {
|
||||
LexTable lex_table;
|
||||
ParseTable *parse_table;
|
||||
const LexicalGrammar lex_grammar;
|
||||
const LexicalGrammar grammar;
|
||||
vector<rule_ptr> separator_rules;
|
||||
CharacterSet first_separator_characters;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<LexItemSet, LexStateId> lex_state_ids;
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
|
||||
: parse_table(parse_table), lex_grammar(lex_grammar) {
|
||||
for (const rule_ptr &rule : lex_grammar.separators)
|
||||
vector<bool> shadowed_token_indices;
|
||||
|
||||
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
|
||||
StartingCharacterAggregator starting_character_aggregator;
|
||||
for (const rule_ptr &rule : grammar.separators) {
|
||||
separator_rules.push_back(Repeat::build(rule));
|
||||
starting_character_aggregator.apply(rule);
|
||||
}
|
||||
separator_rules.push_back(Blank::build());
|
||||
first_separator_characters = starting_character_aggregator.result;
|
||||
shadowed_token_indices.resize(grammar.variables.size());
|
||||
}
|
||||
|
||||
LexTable build() {
|
||||
for (ParseState &parse_state : parse_table->states)
|
||||
add_lex_state_for_parse_state(&parse_state);
|
||||
|
||||
mark_fragile_tokens();
|
||||
remove_duplicate_lex_states();
|
||||
|
||||
LexTable build(ParseTable *parse_table) {
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
parse_state.lex_state_id = add_lex_state(
|
||||
item_set_for_terminals(parse_state.terminal_entries)
|
||||
);
|
||||
}
|
||||
mark_fragile_tokens(parse_table);
|
||||
remove_duplicate_lex_states(parse_table);
|
||||
return lex_table;
|
||||
}
|
||||
|
||||
private:
|
||||
void add_lex_state_for_parse_state(ParseState *parse_state) {
|
||||
parse_state->lex_state_id = add_lex_state(
|
||||
item_set_for_terminals(parse_state->terminal_entries)
|
||||
);
|
||||
bool detect_conflict(Symbol::Index left, Symbol::Index right) {
|
||||
clear();
|
||||
|
||||
map<Symbol, ParseTableEntry> terminals;
|
||||
terminals[Symbol(left, Symbol::Terminal)];
|
||||
terminals[Symbol(right, Symbol::Terminal)];
|
||||
|
||||
add_lex_state(item_set_for_terminals(terminals));
|
||||
|
||||
return shadowed_token_indices[right];
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
|
|
@ -80,6 +120,13 @@ class LexTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
lex_table.states.clear();
|
||||
lex_state_ids.clear();
|
||||
shadowed_token_indices.assign(grammar.variables.size(), false);
|
||||
}
|
||||
|
||||
private:
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &pair : item_set.transitions()) {
|
||||
const CharacterSet &characters = pair.first;
|
||||
|
|
@ -87,11 +134,28 @@ class LexTableBuilder {
|
|||
|
||||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (conflict_manager.resolve(transition.destination, action,
|
||||
current_action)) {
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
lex_table.states[state_id].advance_actions[characters] = action;
|
||||
if (current_action.is_present()) {
|
||||
bool prefer_advancing = conflict_manager.resolve(transition.destination, action, current_action);
|
||||
bool matches_accepted_token = false;
|
||||
for (const LexItem &item : transition.destination.entries) {
|
||||
if (item.lhs == current_action.symbol) {
|
||||
matches_accepted_token = true;
|
||||
} else if (!transition.in_main_token && !item.lhs.is_built_in() && !prefer_advancing) {
|
||||
shadowed_token_indices[item.lhs.index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!matches_accepted_token && characters.intersects(first_separator_characters)) {
|
||||
shadowed_token_indices[current_action.symbol.index] = true;
|
||||
}
|
||||
|
||||
if (!prefer_advancing) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
lex_table.states[state_id].advance_actions[characters] = action;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -101,16 +165,21 @@ class LexTableBuilder {
|
|||
if (completion_status.is_done) {
|
||||
AcceptTokenAction action(item.lhs, completion_status.precedence.max,
|
||||
item.lhs.is_built_in() ||
|
||||
lex_grammar.variables[item.lhs.index].is_string);
|
||||
grammar.variables[item.lhs.index].is_string);
|
||||
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (conflict_manager.resolve(action, current_action))
|
||||
lex_table.states[state_id].accept_action = action;
|
||||
if (current_action.is_present()) {
|
||||
if (!conflict_manager.resolve(action, current_action)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
lex_table.states[state_id].accept_action = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mark_fragile_tokens() {
|
||||
void mark_fragile_tokens(ParseTable *parse_table) {
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol symbol = entry.first;
|
||||
|
|
@ -138,7 +207,7 @@ class LexTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void remove_duplicate_lex_states() {
|
||||
void remove_duplicate_lex_states(ParseTable *parse_table) {
|
||||
for (LexState &state : lex_table.states) {
|
||||
state.accept_action.is_string = false;
|
||||
state.accept_action.precedence = 0;
|
||||
|
|
@ -229,7 +298,7 @@ class LexTableBuilder {
|
|||
if (symbol == rules::END_OF_INPUT())
|
||||
return { CharacterSet().include(0).copy() };
|
||||
|
||||
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
|
||||
rule_ptr rule = grammar.variables[symbol.index].rule;
|
||||
|
||||
auto choice = rule->as<Choice>();
|
||||
if (choice)
|
||||
|
|
@ -239,8 +308,16 @@ class LexTableBuilder {
|
|||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {
|
||||
return LexTableBuilder(table, grammar).build();
|
||||
unique_ptr<LexTableBuilder> LexTableBuilder::create(const LexicalGrammar &grammar) {
|
||||
return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(grammar));
|
||||
}
|
||||
|
||||
LexTable LexTableBuilder::build(ParseTable *parse_table) {
|
||||
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
|
||||
}
|
||||
|
||||
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right) {
|
||||
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
26
src/compiler/build_tables/lex_table_builder.h
Normal file
26
src/compiler/build_tables/lex_table_builder.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
|
||||
#define COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
|
||||
|
||||
#include <memory>
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct ParseTable;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
class LexTableBuilder {
|
||||
public:
|
||||
static std::unique_ptr<LexTableBuilder> create(const LexicalGrammar &);
|
||||
LexTable build(ParseTable *);
|
||||
bool detect_conflict(rules::Symbol::Index, rules::Symbol::Index);
|
||||
protected:
|
||||
LexTableBuilder() = default;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue