Don't include reduce actions for nonterminal lookaheads
This commit is contained in:
parent
c9dcb29c6f
commit
8d9c261e3a
9 changed files with 158 additions and 110 deletions
|
|
@ -15,12 +15,12 @@
|
|||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/recovery_tokens.cc',
|
||||
'src/compiler/build_tables/item_set_closure.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
'src/compiler/build_tables/parse_item.cc',
|
||||
'src/compiler/build_tables/parse_item_set_builder.cc',
|
||||
'src/compiler/build_tables/parse_conflict_manager.cc',
|
||||
'src/compiler/build_tables/rule_can_be_blank.cc',
|
||||
'src/compiler/compile.cc',
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
|
|
@ -9,7 +9,7 @@ using namespace rules;
|
|||
|
||||
START_TEST
|
||||
|
||||
describe("item_set_closure", []() {
|
||||
describe("ParseItemSetBuilder", []() {
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
|
|
@ -39,12 +39,15 @@ describe("item_set_closure", []() {
|
|||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) }),
|
||||
}
|
||||
}), grammar);
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
|
|
@ -87,12 +90,15 @@ describe("item_set_closure", []() {
|
|||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) }),
|
||||
}
|
||||
}), grammar);
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
@ -40,6 +40,7 @@ class ParseTableBuilder {
|
|||
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
||||
ParseTable parse_table;
|
||||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
bool allow_any_conflict;
|
||||
|
||||
|
|
@ -48,6 +49,7 @@ class ParseTableBuilder {
|
|||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar),
|
||||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
|
|
@ -88,11 +90,11 @@ class ParseTableBuilder {
|
|||
CompileError process_part_state_queue() {
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
ParseItemSet item_set = item_set_closure(pair.first, grammar);
|
||||
|
||||
ParseItemSet &item_set = pair.first;
|
||||
ParseStateId state_id = pair.second;
|
||||
item_sets_to_process.pop_back();
|
||||
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
add_reduce_actions(item_set, state_id);
|
||||
add_shift_actions(item_set, state_id);
|
||||
add_shift_extra_actions(state_id);
|
||||
|
|
@ -143,7 +145,7 @@ class ParseTableBuilder {
|
|||
ParseStateId state_id = parse_table.add_state();
|
||||
|
||||
parse_state_ids[item_set] = state_id;
|
||||
item_sets_to_process.push_back({ item_set, state_id });
|
||||
item_sets_to_process.push_back({ std::move(item_set), state_id });
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::shared_ptr;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
||||
ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
|
||||
// An item set's closure is defined recursively. Use an explicit stack to
|
||||
// store the recursively-added items.
|
||||
vector<pair<ParseItem, LookaheadSet>> items_to_process(
|
||||
input_item_set.entries.begin(), input_item_set.entries.end());
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
LookaheadSet lookahead_symbols = items_to_process.back().second;
|
||||
items_to_process.pop_back();
|
||||
|
||||
// Add the parse-item and lookahead symbols to the item set.
|
||||
// If they were already present, skip to the next item.
|
||||
if (!result.entries[item].insert_all(lookahead_symbols))
|
||||
continue;
|
||||
|
||||
// If the next symbol in the production is not a non-terminal, skip to the
|
||||
// next item.
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol == NONE() || next_symbol.is_token ||
|
||||
next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
// If the next symbol is the last symbol in the item's production, then the
|
||||
// lookahead symbols for the new items are the same as for the current item.
|
||||
// Otherwise, compute the FOLLOW-SET of the symbol in this production. This
|
||||
// is defined recursively as well, so use another queue to store the
|
||||
// recursively-added follow symbols.
|
||||
LookaheadSet next_lookahead_symbols;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process(
|
||||
{ item.production->at(next_step).symbol });
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
||||
if (!next_lookahead_symbols.insert(symbol))
|
||||
continue;
|
||||
|
||||
for (const Production &production : grammar.productions(symbol))
|
||||
if (!production.empty())
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
for (const Production &production : grammar.productions(next_symbol))
|
||||
items_to_process.push_back({
|
||||
ParseItem(next_symbol, production, 0), next_lookahead_symbols,
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
#define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
ParseItemSet item_set_closure(const ParseItemSet &, const SyntaxGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
|
|
@ -28,6 +28,8 @@ bool LookaheadSet::contains(const Symbol &symbol) const {
|
|||
}
|
||||
|
||||
bool LookaheadSet::insert_all(const LookaheadSet &other) {
|
||||
if (!other.entries.get())
|
||||
return false;
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol>>();
|
||||
size_t previous_size = entries->size();
|
||||
|
|
|
|||
109
src/compiler/build_tables/parse_item_set_builder.cc
Normal file
109
src/compiler/build_tables/parse_item_set_builder.cc
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::get;
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
using std::shared_ptr;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
||||
static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar) {
|
||||
map<Symbol, LookaheadSet> result;
|
||||
vector<Symbol> symbol_stack;
|
||||
set<Symbol> processed_symbols;
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol symbol(i);
|
||||
LookaheadSet first_set;
|
||||
|
||||
processed_symbols.clear();
|
||||
symbol_stack.clear();
|
||||
symbol_stack.push_back(symbol);
|
||||
while (!symbol_stack.empty()) {
|
||||
Symbol current_symbol = symbol_stack.back();
|
||||
symbol_stack.pop_back();
|
||||
if (current_symbol.is_token) {
|
||||
first_set.insert(current_symbol);
|
||||
} else if (processed_symbols.insert(current_symbol).second) {
|
||||
for (const Production &production : grammar.productions(current_symbol)) {
|
||||
if (!production.empty()) {
|
||||
symbol_stack.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.insert({symbol, first_set});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar) :
|
||||
grammar{&grammar}, first_sets{build_first_sets(grammar)} {
|
||||
}
|
||||
|
||||
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
||||
items_to_process.clear();
|
||||
for (const auto &entry : item_set->entries) {
|
||||
items_to_process.push_back(make_tuple(entry.first, entry.second, true));
|
||||
}
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = get<0>(items_to_process.back());
|
||||
LookaheadSet lookahead_symbols = get<1>(items_to_process.back());
|
||||
bool from_original_set = get<2>(items_to_process.back());
|
||||
items_to_process.pop_back();
|
||||
|
||||
// Add the parse-item and lookahead symbols to the item set.
|
||||
// If they were already present, skip to the next item.
|
||||
if (!from_original_set && !item_set->entries[item].insert_all(lookahead_symbols))
|
||||
continue;
|
||||
|
||||
// If the next symbol in the production is not a non-terminal, skip to the
|
||||
// next item.
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol == NONE() || next_symbol.is_token ||
|
||||
next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
// If the next symbol is the last symbol in the item's production, then the
|
||||
// lookahead symbols for the new items are the same as for the current item.
|
||||
// Otherwise, they are the FOLLOW set of the symbol in this production.
|
||||
LookaheadSet next_lookahead_symbols;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
if (symbol_after_next.is_token) {
|
||||
next_lookahead_symbols.insert(symbol_after_next);
|
||||
} else {
|
||||
next_lookahead_symbols = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
for (const Production &production : grammar->productions(next_symbol))
|
||||
items_to_process.push_back(make_tuple(
|
||||
ParseItem(next_symbol, production, 0),
|
||||
next_lookahead_symbols,
|
||||
false
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
27
src/compiler/build_tables/parse_item_set_builder.h
Normal file
27
src/compiler/build_tables/parse_item_set_builder.h
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
|
||||
#define COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
|
||||
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
class ParseItemSetBuilder {
|
||||
const SyntaxGrammar *grammar;
|
||||
std::map<rules::Symbol, LookaheadSet> first_sets;
|
||||
std::vector<std::tuple<ParseItem, LookaheadSet, bool>> items_to_process;
|
||||
|
||||
public:
|
||||
ParseItemSetBuilder(const SyntaxGrammar &);
|
||||
void apply_transitive_closure(ParseItemSet *);
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
|
||||
|
|
@ -1086,7 +1086,7 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
return;
|
||||
}
|
||||
|
||||
parser__handle_error(self, version, lookahead->symbol);
|
||||
parser__handle_error(self, version, lookahead->first_leaf.symbol);
|
||||
|
||||
if (ts_stack_is_halted(self->stack, version)) {
|
||||
ts_tree_release(lookahead);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue