Move incompatible token map into LexTableBuilder

This commit is contained in:
Max Brunsfeld 2017-08-31 15:40:43 -07:00
parent f8649824fa
commit 9d668c5004
10 changed files with 78 additions and 254 deletions

View file

@ -1,35 +0,0 @@
#include "compiler/build_tables/build_tables.h"
#include <tuple>
#include "compiler/build_tables/lex_table_builder.h"
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
namespace build_tables {
using std::tuple;
using std::make_tuple;
tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar
) {
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
auto parse_table_builder = ParseTableBuilder::create(
syntax_grammar,
lexical_grammar,
lex_table_builder.get()
);
auto parse_table_result = parse_table_builder->build();
ParseTable parse_table = parse_table_result.first;
const CompileError error = parse_table_result.second;
LexTable lex_table = lex_table_builder->build(&parse_table);
return make_tuple(parse_table, lex_table, error);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,24 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_BUILD_TABLES_H_
#define COMPILER_BUILD_TABLES_BUILD_TABLES_H_
#include <tuple>
#include "compiler/parse_table.h"
#include "compiler/lex_table.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &,
const LexicalGrammar &
);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_

View file

@ -9,6 +9,7 @@
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/parse_table.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rule.h"
@ -76,13 +77,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
unordered_map<LexItemSet, LexStateId> lex_state_ids;
map<Symbol::Index, CharacterSet> following_characters_by_token_index;
vector<set<Symbol>> incompatible_tokens_by_token_index;
CharacterSet separator_start_characters;
CharacterSet current_conflict_detection_following_characters;
Symbol::Index current_conflict_detection_token_index;
bool current_conflict_value;
public:
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
const vector<set<Symbol::Index>> &following_tokens_by_token_index) :
grammar(lexical_grammar),
incompatible_tokens_by_token_index(lexical_grammar.variables.size()) {
StartingCharacterAggregator separator_character_aggregator;
for (const auto &rule : grammar.separators) {
separator_rules.push_back(Repeat{rule});
@ -91,6 +97,26 @@ class LexTableBuilderImpl : public LexTableBuilder {
separator_rules.push_back(Blank{});
separator_start_characters = separator_character_aggregator.result;
clear();
for (unsigned i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol token = Symbol::terminal(i);
auto &incompatible_indices = incompatible_tokens_by_token_index[i];
for (unsigned j = 0; j < n; j++) {
if (i == j) continue;
if (detect_conflict(i, j, following_tokens_by_token_index)) {
incompatible_indices.insert(Symbol::terminal(j));
}
}
for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
if (external_token.corresponding_internal_token == token) {
for (unsigned j = 0; j < syntax_grammar.external_tokens.size(); j++) {
incompatible_indices.insert(Symbol::external(j));
}
}
}
}
}
LexTable build(ParseTable *parse_table) {
@ -104,8 +130,12 @@ class LexTableBuilderImpl : public LexTableBuilder {
return lex_table;
}
const set<Symbol> &get_incompatible_tokens(Symbol::Index index) const {
return incompatible_tokens_by_token_index[index];
}
bool detect_conflict(Symbol::Index left, Symbol::Index right,
const vector<set<Symbol::Index>> &following_terminals_by_terminal_index) {
const vector<set<Symbol::Index>> &following_tokens_by_token_index) {
StartingCharacterAggregator left_starting_characters;
StartingCharacterAggregator right_starting_characters;
left_starting_characters.apply(grammar.variables[left].rule);
@ -119,7 +149,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
auto following_characters_entry = following_characters_by_token_index.find(right);
if (following_characters_entry == following_characters_by_token_index.end()) {
StartingCharacterAggregator aggregator;
for (auto following_token_index : following_terminals_by_terminal_index[right]) {
for (auto following_token_index : following_tokens_by_token_index[right]) {
aggregator.apply(grammar.variables[following_token_index].rule);
}
following_characters_entry =
@ -369,17 +399,22 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
};
unique_ptr<LexTableBuilder> LexTableBuilder::create(const LexicalGrammar &grammar) {
return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(grammar));
unique_ptr<LexTableBuilder> LexTableBuilder::create(const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
const vector<set<Symbol::Index>> &following_tokens) {
return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(
syntax_grammar,
lexical_grammar,
following_tokens
));
}
LexTable LexTableBuilder::build(ParseTable *parse_table) {
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
}
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right,
const vector<set<Symbol::Index>> &following_terminals) {
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right, following_terminals);
const set<Symbol> &LexTableBuilder::get_incompatible_tokens(Symbol::Index token) const {
return static_cast<const LexTableBuilderImpl *>(this)->get_incompatible_tokens(token);
}
} // namespace build_tables

View file

@ -9,19 +9,19 @@
namespace tree_sitter {
struct ParseTable;
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
class LexTableBuilder {
public:
static std::unique_ptr<LexTableBuilder> create(const LexicalGrammar &);
static std::unique_ptr<LexTableBuilder> create(const SyntaxGrammar &,
const LexicalGrammar &,
const std::vector<std::set<rules::Symbol::Index>> &);
LexTable build(ParseTable *);
bool detect_conflict(
rules::Symbol::Index,
rules::Symbol::Index,
const std::vector<std::set<rules::Symbol::Index>> &following_terminals_by_terminal_index
);
const std::set<rules::Symbol> &get_incompatible_tokens(rules::Symbol::Index) const;
protected:
LexTableBuilder() = default;
};

View file

@ -19,9 +19,10 @@ namespace build_tables {
using std::deque;
using std::find;
using std::pair;
using std::vector;
using std::set;
using std::tuple;
using std::make_tuple;
using std::map;
using std::move;
using std::string;
@ -49,26 +50,20 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
deque<ParseStateQueueEntry> parse_state_queue;
ParseTable parse_table;
ParseItemSetBuilder item_set_builder;
LexTableBuilder *lex_table_builder;
unique_ptr<LexTableBuilder> lex_table_builder;
set<ParseAction> fragile_reductions;
vector<set<Symbol>> incompatible_tokens_by_token_index;
vector<set<Symbol::Index>> following_tokens_by_token_index;
bool processing_recovery_states;
public:
ParseTableBuilderImpl(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
LexTableBuilder *lex_table_builder
) : grammar(syntax_grammar),
ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar)
: grammar(syntax_grammar),
lexical_grammar(lexical_grammar),
item_set_builder(syntax_grammar, lexical_grammar),
lex_table_builder(lex_table_builder),
incompatible_tokens_by_token_index(lexical_grammar.variables.size()),
following_tokens_by_token_index(lexical_grammar.variables.size()),
processing_recovery_states(false) {}
pair<ParseTable, CompileError> build() {
tuple<ParseTable, LexTable, CompileError> build() {
// Ensure that the empty rename sequence has index 0.
parse_table.alias_sequences.push_back({});
@ -90,9 +85,13 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
}});
CompileError error = process_part_state_queue();
if (error) return {parse_table, error};
if (error) return make_tuple(parse_table, LexTable(), error);
compute_unmergable_token_pairs();
lex_table_builder = LexTableBuilder::create(
grammar,
lexical_grammar,
following_tokens_by_token_index
);
processing_recovery_states = true;
build_error_parse_state(error_state_id);
@ -100,7 +99,9 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
mark_fragile_actions();
remove_duplicate_parse_states();
return {parse_table, CompileError::none()};
auto lex_table = lex_table_builder->build(&parse_table);
return make_tuple(parse_table, lex_table, CompileError::none());
}
private:
@ -131,9 +132,9 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
Symbol token = Symbol::terminal(i);
bool has_non_reciprocal_conflict = false;
for (Symbol incompatible_token : incompatible_tokens_by_token_index[i]) {
for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(i)) {
if (incompatible_token.is_terminal() &&
!incompatible_tokens_by_token_index[incompatible_token.index].count(token)) {
!lex_table_builder->get_incompatible_tokens(incompatible_token.index).count(token)) {
has_non_reciprocal_conflict = true;
break;
}
@ -355,28 +356,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
return false;
}
void compute_unmergable_token_pairs() {
for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
Symbol token = Symbol::terminal(i);
auto &incompatible_indices = incompatible_tokens_by_token_index[i];
for (unsigned j = 0; j < n; j++) {
if (i == j) continue;
if (lex_table_builder->detect_conflict(i, j, following_tokens_by_token_index)) {
incompatible_indices.insert(Symbol::terminal(j));
}
}
for (const ExternalToken &external_token : grammar.external_tokens) {
if (external_token.corresponding_internal_token == token) {
for (unsigned j = 0; j < grammar.external_tokens.size(); j++) {
incompatible_indices.insert(Symbol::external(j));
}
}
}
}
}
void remove_duplicate_parse_states() {
unordered_map<size_t, set<ParseStateId>> state_indices_by_signature;
@ -474,7 +453,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (left_entry.second.actions.back().type != ParseActionTypeReduce) return false;
if (!has_actions(right_state, left_entry.second)) return false;
if (!lookahead.is_built_in()) {
for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) {
for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) {
if (right_state.terminal_entries.count(incompatible_token)) return false;
}
}
@ -492,7 +471,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (right_entry.second.actions.back().type != ParseActionTypeReduce) return false;
if (!has_actions(left_state, right_entry.second)) return false;
if (!lookahead.is_built_in()) {
for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) {
for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) {
if (left_state.terminal_entries.count(incompatible_token)) return false;
}
}
@ -805,15 +784,12 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
unique_ptr<ParseTableBuilder> ParseTableBuilder::create(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
LexTableBuilder *lex_table_builder
const LexicalGrammar &lexical_grammar
) {
return unique_ptr<ParseTableBuilder>(
new ParseTableBuilderImpl(syntax_grammar, lexical_grammar, lex_table_builder)
);
return unique_ptr<ParseTableBuilder>(new ParseTableBuilderImpl(syntax_grammar, lexical_grammar));
}
pair<ParseTable, CompileError> ParseTableBuilder::build() {
tuple<ParseTable, LexTable, CompileError> ParseTableBuilder::build() {
return static_cast<ParseTableBuilderImpl *>(this)->build();
}

View file

@ -8,21 +8,17 @@
namespace tree_sitter {
struct ParseTable;
struct LexTable;
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
class LexTableBuilder;
class ParseTableBuilder {
public:
static std::unique_ptr<ParseTableBuilder> create(
const SyntaxGrammar &,
const LexicalGrammar &,
LexTableBuilder *
);
std::pair<ParseTable, CompileError> build();
static std::unique_ptr<ParseTableBuilder> create(const SyntaxGrammar &, const LexicalGrammar &);
std::tuple<ParseTable, LexTable, CompileError> build();
protected:
ParseTableBuilder() = default;
};

View file

@ -1,6 +1,6 @@
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/prepare_grammar.h"
#include "compiler/build_tables/build_tables.h"
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/generate_code/c_code.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
@ -30,8 +30,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
return { nullptr, strdup(error.message.c_str()), error.type };
}
auto table_build_result =
build_tables::build_tables(syntax_grammar, lexical_grammar);
auto builder = build_tables::ParseTableBuilder::create(syntax_grammar, lexical_grammar);
auto table_build_result = builder->build();
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
error = get<2>(table_build_result);