Store productions' end rule ids in the vector
This commit is contained in:
parent
1ba8701ada
commit
8ac4b9fc17
12 changed files with 74 additions and 81 deletions
|
|
@ -176,11 +176,12 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
bool item_is_done(const ParseItem &item) {
|
||||
return item.consumed_symbol_count == grammar.productions(item.lhs)[item.production_index].size();
|
||||
return item.consumed_symbol_count ==
|
||||
grammar.productions(item.lhs)[item.production_index].symbol_count();
|
||||
}
|
||||
|
||||
int item_precedence(const ParseItem &item) {
|
||||
return grammar.productions(item.lhs)[item.production_index].precedence_at(item.consumed_symbol_count - 1);
|
||||
return grammar.productions(item.lhs)[item.production_index][item.consumed_symbol_count - 1].precedence;
|
||||
}
|
||||
|
||||
void record_conflict(const Symbol &sym, const ParseAction &left,
|
||||
|
|
|
|||
|
|
@ -35,18 +35,18 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
|
|||
continue;
|
||||
|
||||
const Production &item_production = grammar.productions(item.lhs)[item.production_index];
|
||||
if (item_production.size() <= item.consumed_symbol_count)
|
||||
if (item.consumed_symbol_count >= item_production.symbol_count())
|
||||
continue;
|
||||
|
||||
Symbol symbol = item_production.symbol_at(item.consumed_symbol_count);
|
||||
Symbol symbol = item_production[item.consumed_symbol_count].symbol;
|
||||
if (symbol.is_token() || symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols;
|
||||
if (item.consumed_symbol_count + 1 >= item_production.size()) {
|
||||
if (item.consumed_symbol_count + 1 >= item_production.symbol_count()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process({ item_production.symbol_at(item.consumed_symbol_count + 1) });
|
||||
vector<Symbol> symbols_to_process({ item_production[item.consumed_symbol_count + 1].symbol });
|
||||
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol following_symbol = symbols_to_process.back();
|
||||
|
|
@ -55,14 +55,14 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
|
|||
continue;
|
||||
|
||||
for (const auto &production : grammar.productions(following_symbol))
|
||||
symbols_to_process.push_back(production.symbol_at(0));
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
items_to_process.push_back({
|
||||
ParseItem(symbol, i, production.rule_id_at(0), 0),
|
||||
ParseItem(symbol, i, production[0].rule_id, 0),
|
||||
next_lookahead_symbols
|
||||
});
|
||||
i++;
|
||||
|
|
|
|||
|
|
@ -23,11 +23,11 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
|
|||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
const Production &production = grammar.productions(item.lhs)[item.production_index];
|
||||
if (production.size() <= item.consumed_symbol_count)
|
||||
if (item.consumed_symbol_count >= production.symbol_count())
|
||||
continue;
|
||||
|
||||
const Symbol &symbol = production.symbol_at(item.consumed_symbol_count);
|
||||
int rule_id = production.rule_id_at(item.consumed_symbol_count + 1);
|
||||
const Symbol &symbol = production[item.consumed_symbol_count].symbol;
|
||||
int rule_id = production[item.consumed_symbol_count + 1].rule_id;
|
||||
ParseItem new_item(item.lhs, item.production_index, rule_id, item.consumed_symbol_count + 1);
|
||||
|
||||
result[symbol][new_item].insert(lookahead_symbols.begin(), lookahead_symbols.end());
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
|
|
@ -63,16 +64,16 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
Production flatten_rule(const rule_ptr &rule) {
|
||||
FlattenRule flattener;
|
||||
flattener.apply(rule);
|
||||
return Production(flattener.entries, 0);
|
||||
int end_precedence = flattener.entries.back().precedence;
|
||||
flattener.entries.push_back({ rules::NONE(), end_precedence, 0 });
|
||||
return Production(flattener.entries);
|
||||
}
|
||||
|
||||
struct ProductionSlice {
|
||||
vector<ProductionEntry>::const_iterator start;
|
||||
vector<ProductionEntry>::const_iterator end;
|
||||
int end_precedence;
|
||||
|
||||
bool operator==(const ProductionSlice &other) const {
|
||||
if (end_precedence != other.end_precedence) return false;
|
||||
if (end - start != other.end - other.start) return false;
|
||||
for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
|
||||
if (!(iter1->symbol == iter2->symbol) || iter1->precedence != iter2->precedence)
|
||||
|
|
@ -82,11 +83,10 @@ struct ProductionSlice {
|
|||
};
|
||||
|
||||
void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_slices) {
|
||||
auto &entries = production->entries;
|
||||
auto end = entries.end();
|
||||
auto end = production->entries.end();
|
||||
|
||||
for (auto iter = entries.begin(); iter != end; ++iter) {
|
||||
ProductionSlice slice{iter, end, 0};
|
||||
for (auto iter = production->entries.begin(); iter != end; ++iter) {
|
||||
ProductionSlice slice{iter, end};
|
||||
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
|
||||
if (existing_id == unique_slices->end()) {
|
||||
unique_slices->push_back(slice);
|
||||
|
|
@ -95,15 +95,6 @@ void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_sli
|
|||
iter->rule_id = existing_id - unique_slices->cbegin();
|
||||
}
|
||||
}
|
||||
|
||||
ProductionSlice slice{end, end, production->precedence_at(production->size() - 1)};
|
||||
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
|
||||
if (existing_id == unique_slices->end()) {
|
||||
unique_slices->push_back(slice);
|
||||
production->end_rule_id = unique_slices->size() - 1;
|
||||
} else {
|
||||
production->end_rule_id = existing_id - unique_slices->cbegin();
|
||||
}
|
||||
}
|
||||
|
||||
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
|
|
@ -126,7 +117,7 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
|||
if (rules.empty()) {
|
||||
rules.push_back({
|
||||
"START",
|
||||
{ Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }, 0) }
|
||||
{ Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }) }
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
|
|||
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
|
||||
Symbol START() { return Symbol(-3); }
|
||||
Symbol DOCUMENT() { return Symbol(-4); }
|
||||
Symbol NONE() { return Symbol(-5); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ Symbol ERROR();
|
|||
Symbol START();
|
||||
Symbol END_OF_INPUT();
|
||||
Symbol DOCUMENT();
|
||||
Symbol NONE();
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ using std::vector;
|
|||
using std::set;
|
||||
|
||||
static const vector<Production> START_PRODUCTIONS({
|
||||
Production({ {rules::Symbol(0), 0, -1} }, 2)
|
||||
Production({ {rules::Symbol(0), 0, -1}, { rules::NONE(), 0, -2} })
|
||||
});
|
||||
|
||||
static const vector<Production> NO_PRODUCTIONS({});
|
||||
|
|
@ -24,29 +24,14 @@ bool ProductionEntry::operator==(const ProductionEntry &other) const {
|
|||
rule_id == other.rule_id;
|
||||
}
|
||||
|
||||
Production::Production(const vector<ProductionEntry> &entries, int last_rule_id) :
|
||||
entries(entries), end_rule_id(last_rule_id) {}
|
||||
Production::Production(const vector<ProductionEntry> &entries) : entries(entries) {}
|
||||
|
||||
int Production::precedence_at(size_t index) const {
|
||||
if (index >= size())
|
||||
return 0;
|
||||
else
|
||||
return entries[index].precedence;
|
||||
size_t Production::symbol_count() const {
|
||||
return entries.size() - 1;
|
||||
}
|
||||
|
||||
int Production::rule_id_at(size_t index) const {
|
||||
if (index >= size())
|
||||
return end_rule_id;
|
||||
else
|
||||
return entries[index].rule_id;
|
||||
}
|
||||
|
||||
const rules::Symbol &Production::symbol_at(size_t index) const {
|
||||
return entries[index].symbol;
|
||||
}
|
||||
|
||||
size_t Production::size() const {
|
||||
return entries.size();
|
||||
const ProductionEntry &Production::operator[](int i) const {
|
||||
return entries[i];
|
||||
}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
|
|
@ -87,8 +72,7 @@ std::ostream &operator<<(std::ostream &stream, const Production &production) {
|
|||
stream << entry;
|
||||
started = true;
|
||||
}
|
||||
return stream << string(") end_rule_id: ") <<
|
||||
to_string(production.end_rule_id) << string(")");
|
||||
return stream << string(")");
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -18,15 +18,12 @@ struct ProductionEntry {
|
|||
bool operator==(const ProductionEntry &) const;
|
||||
};
|
||||
|
||||
class Production {
|
||||
public:
|
||||
struct Production {
|
||||
Production();
|
||||
Production(const std::vector<ProductionEntry> &);
|
||||
size_t symbol_count() const;
|
||||
const ProductionEntry &operator[](int) const;
|
||||
std::vector<ProductionEntry> entries;
|
||||
int end_rule_id;
|
||||
Production(const std::vector<ProductionEntry> &, int);
|
||||
size_t size() const;
|
||||
const rules::Symbol &symbol_at(size_t) const;
|
||||
int precedence_at(size_t) const;
|
||||
int rule_id_at(size_t) const;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, const ProductionEntry &);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue