Move incompatible token map into LexTableBuilder

This commit is contained in:
Max Brunsfeld 2017-08-31 15:40:43 -07:00
parent f8649824fa
commit 9d668c5004
10 changed files with 78 additions and 254 deletions

View file

@ -11,7 +11,6 @@
'externals/json-parser',
],
'sources': [
'src/compiler/build_tables/build_tables.cc',
'src/compiler/build_tables/lex_item.cc',
'src/compiler/build_tables/lex_item_transitions.cc',
'src/compiler/build_tables/lex_conflict_manager.cc',

View file

@ -1,35 +0,0 @@
#include "compiler/build_tables/build_tables.h"
#include <tuple>
#include "compiler/build_tables/lex_table_builder.h"
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
namespace build_tables {
using std::tuple;
using std::make_tuple;
tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar
) {
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
auto parse_table_builder = ParseTableBuilder::create(
syntax_grammar,
lexical_grammar,
lex_table_builder.get()
);
auto parse_table_result = parse_table_builder->build();
ParseTable parse_table = parse_table_result.first;
const CompileError error = parse_table_result.second;
LexTable lex_table = lex_table_builder->build(&parse_table);
return make_tuple(parse_table, lex_table, error);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,24 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_BUILD_TABLES_H_
#define COMPILER_BUILD_TABLES_BUILD_TABLES_H_
#include <tuple>
#include "compiler/parse_table.h"
#include "compiler/lex_table.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &,
const LexicalGrammar &
);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_

View file

@ -9,6 +9,7 @@
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/parse_table.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rule.h"
@ -76,13 +77,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
unordered_map<LexItemSet, LexStateId> lex_state_ids;
map<Symbol::Index, CharacterSet> following_characters_by_token_index;
vector<set<Symbol>> incompatible_tokens_by_token_index;
CharacterSet separator_start_characters;
CharacterSet current_conflict_detection_following_characters;
Symbol::Index current_conflict_detection_token_index;
bool current_conflict_value;
public:
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
const vector<set<Symbol::Index>> &following_tokens_by_token_index) :
grammar(lexical_grammar),
incompatible_tokens_by_token_index(lexical_grammar.variables.size()) {
StartingCharacterAggregator separator_character_aggregator;
for (const auto &rule : grammar.separators) {
separator_rules.push_back(Repeat{rule});
@ -91,6 +97,26 @@ class LexTableBuilderImpl : public LexTableBuilder {
separator_rules.push_back(Blank{});
separator_start_characters = separator_character_aggregator.result;
clear();
for (unsigned i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol token = Symbol::terminal(i);
auto &incompatible_indices = incompatible_tokens_by_token_index[i];
for (unsigned j = 0; j < n; j++) {
if (i == j) continue;
if (detect_conflict(i, j, following_tokens_by_token_index)) {
incompatible_indices.insert(Symbol::terminal(j));
}
}
for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
if (external_token.corresponding_internal_token == token) {
for (unsigned j = 0; j < syntax_grammar.external_tokens.size(); j++) {
incompatible_indices.insert(Symbol::external(j));
}
}
}
}
}
LexTable build(ParseTable *parse_table) {
@ -104,8 +130,12 @@ class LexTableBuilderImpl : public LexTableBuilder {
return lex_table;
}
const set<Symbol> &get_incompatible_tokens(Symbol::Index index) const {
return incompatible_tokens_by_token_index[index];
}
bool detect_conflict(Symbol::Index left, Symbol::Index right,
const vector<set<Symbol::Index>> &following_terminals_by_terminal_index) {
const vector<set<Symbol::Index>> &following_tokens_by_token_index) {
StartingCharacterAggregator left_starting_characters;
StartingCharacterAggregator right_starting_characters;
left_starting_characters.apply(grammar.variables[left].rule);
@ -119,7 +149,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
auto following_characters_entry = following_characters_by_token_index.find(right);
if (following_characters_entry == following_characters_by_token_index.end()) {
StartingCharacterAggregator aggregator;
for (auto following_token_index : following_terminals_by_terminal_index[right]) {
for (auto following_token_index : following_tokens_by_token_index[right]) {
aggregator.apply(grammar.variables[following_token_index].rule);
}
following_characters_entry =
@ -369,17 +399,22 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
};
unique_ptr<LexTableBuilder> LexTableBuilder::create(const LexicalGrammar &grammar) {
return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(grammar));
unique_ptr<LexTableBuilder> LexTableBuilder::create(const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
const vector<set<Symbol::Index>> &following_tokens) {
return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(
syntax_grammar,
lexical_grammar,
following_tokens
));
}
LexTable LexTableBuilder::build(ParseTable *parse_table) {
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
}
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right,
const vector<set<Symbol::Index>> &following_terminals) {
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right, following_terminals);
const set<Symbol> &LexTableBuilder::get_incompatible_tokens(Symbol::Index token) const {
return static_cast<const LexTableBuilderImpl *>(this)->get_incompatible_tokens(token);
}
} // namespace build_tables

View file

@ -9,19 +9,19 @@
namespace tree_sitter {
struct ParseTable;
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
class LexTableBuilder {
public:
static std::unique_ptr<LexTableBuilder> create(const LexicalGrammar &);
static std::unique_ptr<LexTableBuilder> create(const SyntaxGrammar &,
const LexicalGrammar &,
const std::vector<std::set<rules::Symbol::Index>> &);
LexTable build(ParseTable *);
bool detect_conflict(
rules::Symbol::Index,
rules::Symbol::Index,
const std::vector<std::set<rules::Symbol::Index>> &following_terminals_by_terminal_index
);
const std::set<rules::Symbol> &get_incompatible_tokens(rules::Symbol::Index) const;
protected:
LexTableBuilder() = default;
};

View file

@ -19,9 +19,10 @@ namespace build_tables {
using std::deque;
using std::find;
using std::pair;
using std::vector;
using std::set;
using std::tuple;
using std::make_tuple;
using std::map;
using std::move;
using std::string;
@ -49,26 +50,20 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
deque<ParseStateQueueEntry> parse_state_queue;
ParseTable parse_table;
ParseItemSetBuilder item_set_builder;
LexTableBuilder *lex_table_builder;
unique_ptr<LexTableBuilder> lex_table_builder;
set<ParseAction> fragile_reductions;
vector<set<Symbol>> incompatible_tokens_by_token_index;
vector<set<Symbol::Index>> following_tokens_by_token_index;
bool processing_recovery_states;
public:
ParseTableBuilderImpl(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
LexTableBuilder *lex_table_builder
) : grammar(syntax_grammar),
ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar)
: grammar(syntax_grammar),
lexical_grammar(lexical_grammar),
item_set_builder(syntax_grammar, lexical_grammar),
lex_table_builder(lex_table_builder),
incompatible_tokens_by_token_index(lexical_grammar.variables.size()),
following_tokens_by_token_index(lexical_grammar.variables.size()),
processing_recovery_states(false) {}
pair<ParseTable, CompileError> build() {
tuple<ParseTable, LexTable, CompileError> build() {
// Ensure that the empty rename sequence has index 0.
parse_table.alias_sequences.push_back({});
@ -90,9 +85,13 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
}});
CompileError error = process_part_state_queue();
if (error) return {parse_table, error};
if (error) return make_tuple(parse_table, LexTable(), error);
compute_unmergable_token_pairs();
lex_table_builder = LexTableBuilder::create(
grammar,
lexical_grammar,
following_tokens_by_token_index
);
processing_recovery_states = true;
build_error_parse_state(error_state_id);
@ -100,7 +99,9 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
mark_fragile_actions();
remove_duplicate_parse_states();
return {parse_table, CompileError::none()};
auto lex_table = lex_table_builder->build(&parse_table);
return make_tuple(parse_table, lex_table, CompileError::none());
}
private:
@ -131,9 +132,9 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
Symbol token = Symbol::terminal(i);
bool has_non_reciprocal_conflict = false;
for (Symbol incompatible_token : incompatible_tokens_by_token_index[i]) {
for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(i)) {
if (incompatible_token.is_terminal() &&
!incompatible_tokens_by_token_index[incompatible_token.index].count(token)) {
!lex_table_builder->get_incompatible_tokens(incompatible_token.index).count(token)) {
has_non_reciprocal_conflict = true;
break;
}
@ -355,28 +356,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
return false;
}
void compute_unmergable_token_pairs() {
for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
Symbol token = Symbol::terminal(i);
auto &incompatible_indices = incompatible_tokens_by_token_index[i];
for (unsigned j = 0; j < n; j++) {
if (i == j) continue;
if (lex_table_builder->detect_conflict(i, j, following_tokens_by_token_index)) {
incompatible_indices.insert(Symbol::terminal(j));
}
}
for (const ExternalToken &external_token : grammar.external_tokens) {
if (external_token.corresponding_internal_token == token) {
for (unsigned j = 0; j < grammar.external_tokens.size(); j++) {
incompatible_indices.insert(Symbol::external(j));
}
}
}
}
}
void remove_duplicate_parse_states() {
unordered_map<size_t, set<ParseStateId>> state_indices_by_signature;
@ -474,7 +453,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (left_entry.second.actions.back().type != ParseActionTypeReduce) return false;
if (!has_actions(right_state, left_entry.second)) return false;
if (!lookahead.is_built_in()) {
for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) {
for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) {
if (right_state.terminal_entries.count(incompatible_token)) return false;
}
}
@ -492,7 +471,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (right_entry.second.actions.back().type != ParseActionTypeReduce) return false;
if (!has_actions(left_state, right_entry.second)) return false;
if (!lookahead.is_built_in()) {
for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) {
for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) {
if (left_state.terminal_entries.count(incompatible_token)) return false;
}
}
@ -805,15 +784,12 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
unique_ptr<ParseTableBuilder> ParseTableBuilder::create(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
LexTableBuilder *lex_table_builder
const LexicalGrammar &lexical_grammar
) {
return unique_ptr<ParseTableBuilder>(
new ParseTableBuilderImpl(syntax_grammar, lexical_grammar, lex_table_builder)
);
return unique_ptr<ParseTableBuilder>(new ParseTableBuilderImpl(syntax_grammar, lexical_grammar));
}
pair<ParseTable, CompileError> ParseTableBuilder::build() {
tuple<ParseTable, LexTable, CompileError> ParseTableBuilder::build() {
return static_cast<ParseTableBuilderImpl *>(this)->build();
}

View file

@ -8,21 +8,17 @@
namespace tree_sitter {
struct ParseTable;
struct LexTable;
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
class LexTableBuilder;
class ParseTableBuilder {
public:
static std::unique_ptr<ParseTableBuilder> create(
const SyntaxGrammar &,
const LexicalGrammar &,
LexTableBuilder *
);
std::pair<ParseTable, CompileError> build();
static std::unique_ptr<ParseTableBuilder> create(const SyntaxGrammar &, const LexicalGrammar &);
std::tuple<ParseTable, LexTable, CompileError> build();
protected:
ParseTableBuilder() = default;
};

View file

@ -1,6 +1,6 @@
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/prepare_grammar.h"
#include "compiler/build_tables/build_tables.h"
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/generate_code/c_code.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
@ -30,8 +30,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
return { nullptr, strdup(error.message.c_str()), error.type };
}
auto table_build_result =
build_tables::build_tables(syntax_grammar, lexical_grammar);
auto builder = build_tables::ParseTableBuilder::create(syntax_grammar, lexical_grammar);
auto table_build_result = builder->build();
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
error = get<2>(table_build_result);

View file

@ -1,122 +0,0 @@
#include "test_helper.h"
#include "compiler/lexical_grammar.h"
#include "compiler/build_tables/lex_table_builder.h"
using namespace build_tables;
using namespace rules;
START_TEST
describe("LexTableBuilder::detect_conflict", []() {
vector<Rule> separators({
CharacterSet({ ' ', '\t' }),
});
it("returns false for tokens that don't match the same string", [&]() {
auto builder = LexTableBuilder::create(LexicalGrammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'a' }),
CharacterSet({ 'b' }),
CharacterSet({ 'c' }),
}),
false
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'b' }),
CharacterSet({ 'c' }),
CharacterSet({ 'd' }),
}),
false
},
},
separators
});
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when the left token can match a string that the right token matches, "
"plus a separator character", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
false
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
true
},
},
separators
};
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
grammar.variables[1].is_string = false;
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when the left token matches a string that the right token matches, "
"plus the first character of some token that can follow the right token", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '=' }),
}),
true
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
}),
true
},
LexicalVariable{
"token_2",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '=' }),
}),
true
},
},
separators
};
// If no tokens can follow token_1, then there's no conflict
auto builder = LexTableBuilder::create(grammar);
vector<set<Symbol::Index>> following_tokens_by_token_index(3);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
// If token_2 can follow token_1, then token_0 conflicts with token_1
builder = LexTableBuilder::create(grammar);
following_tokens_by_token_index[1].insert(2);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
});
});
END_TEST

View file

@ -39,7 +39,6 @@
'sources': [
'test/compiler/build_tables/lex_conflict_manager_test.cc',
'test/compiler/build_tables/lex_item_test.cc',
'test/compiler/build_tables/lex_table_builder_test.cc',
'test/compiler/build_tables/parse_item_set_builder_test.cc',
'test/compiler/build_tables/rule_can_be_blank_test.cc',
'test/compiler/prepare_grammar/expand_repeats_test.cc',