Store productions' end rule ids in the vector
This commit is contained in:
parent
1ba8701ada
commit
8ac4b9fc17
12 changed files with 74 additions and 81 deletions
|
|
@ -15,20 +15,32 @@ describe("build_parse_table", []() {
|
|||
{
|
||||
"rule0",
|
||||
{
|
||||
Production({ {Symbol(1), 0, 1} }, 0),
|
||||
Production({ {Symbol(2), 0, 2} }, 0)
|
||||
Production({
|
||||
{Symbol(1), 0, 1},
|
||||
{rules::NONE(), 0, 5}
|
||||
}),
|
||||
Production({
|
||||
{Symbol(2), 0, 2},
|
||||
{rules::NONE(), 0, 6}
|
||||
})
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
{
|
||||
Production({ {Symbol(0, SymbolOptionToken), 0, 3} }, 0)
|
||||
Production({
|
||||
{Symbol(0, SymbolOptionToken), 0, 3},
|
||||
{rules::NONE(), 0, 7}
|
||||
})
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule2",
|
||||
{
|
||||
Production({ {Symbol(1, SymbolOptionToken), 0, 4} }, 0)
|
||||
Production({
|
||||
{Symbol(1, SymbolOptionToken), 0, 4},
|
||||
{rules::NONE(), 0, 8}
|
||||
})
|
||||
}
|
||||
},
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
|
@ -15,8 +16,9 @@ describe("item_set_closure", []() {
|
|||
{
|
||||
Production({
|
||||
{Symbol(1), 0, 100},
|
||||
{Symbol(11, SymbolOptionToken), 0, 101}
|
||||
}, 107),
|
||||
{Symbol(11, SymbolOptionToken), 0, 101},
|
||||
{rules::NONE(), 0, 107}
|
||||
}),
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
@ -24,11 +26,13 @@ describe("item_set_closure", []() {
|
|||
{
|
||||
Production({
|
||||
{Symbol(12, SymbolOptionToken), 0, 102},
|
||||
{Symbol(13, SymbolOptionToken), 0, 103}
|
||||
}, 108),
|
||||
{Symbol(13, SymbolOptionToken), 0, 103},
|
||||
{rules::NONE(), 0, 108}
|
||||
}),
|
||||
Production({
|
||||
{Symbol(2), 0, 104},
|
||||
}, 109)
|
||||
{rules::NONE(), 0, 109}
|
||||
})
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
@ -36,8 +40,9 @@ describe("item_set_closure", []() {
|
|||
{
|
||||
Production({
|
||||
{Symbol(14, SymbolOptionToken), 0, 105},
|
||||
{Symbol(15, SymbolOptionToken), 0, 106}
|
||||
}, 110)
|
||||
{Symbol(15, SymbolOptionToken), 0, 106},
|
||||
{rules::NONE(), 0, 110}
|
||||
})
|
||||
}
|
||||
},
|
||||
}, {}, set<Symbol>());
|
||||
|
|
|
|||
|
|
@ -52,14 +52,14 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
|||
{Symbol(13, SymbolOptionToken), 0, 103},
|
||||
{Symbol(1), 0, 104},
|
||||
{Symbol(14, SymbolOptionToken), 0, 105},
|
||||
}, 1)
|
||||
})
|
||||
},
|
||||
},
|
||||
{
|
||||
"B", {
|
||||
Production({
|
||||
{Symbol(15, SymbolOptionToken), 0, 106},
|
||||
}, 2)
|
||||
})
|
||||
},
|
||||
}
|
||||
}, {}, set<Symbol>());
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -48,15 +49,15 @@ describe("flatten_grammar", []() {
|
|||
AssertThat(
|
||||
get_symbol_lists(0),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(4) },
|
||||
{ Symbol(1), Symbol(3), Symbol(4) }
|
||||
{ Symbol(1), Symbol(2), Symbol(4), rules::NONE() },
|
||||
{ Symbol(1), Symbol(3), Symbol(4), rules::NONE() }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_symbol_lists(1),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) },
|
||||
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) }
|
||||
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7), rules::NONE() },
|
||||
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7), rules::NONE() }
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -74,15 +75,15 @@ describe("flatten_grammar", []() {
|
|||
AssertThat(
|
||||
get_precedence_lists(0),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 0 },
|
||||
{ 0, 0, 0 }
|
||||
{ 0, 0, 0, 0 },
|
||||
{ 0, 0, 0, 0 }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_precedence_lists(1),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 50, 100, 50, 0 },
|
||||
{ 0, 0, 50, 50, 0 }
|
||||
{ 0, 0, 50, 100, 50, 0, 0 },
|
||||
{ 0, 0, 50, 50, 0, 0 }
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -90,7 +91,7 @@ describe("flatten_grammar", []() {
|
|||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto rule_id = [&](int rule_index, int production_index, int symbol_index) {
|
||||
return grammar.rules[rule_index].second[production_index].rule_id_at(symbol_index);
|
||||
return grammar.rules[rule_index].second[production_index][symbol_index].rule_id;
|
||||
};
|
||||
|
||||
// Rule 1: last symbol is the same for both productions.
|
||||
|
|
|
|||
|
|
@ -176,11 +176,12 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
bool item_is_done(const ParseItem &item) {
|
||||
return item.consumed_symbol_count == grammar.productions(item.lhs)[item.production_index].size();
|
||||
return item.consumed_symbol_count ==
|
||||
grammar.productions(item.lhs)[item.production_index].symbol_count();
|
||||
}
|
||||
|
||||
int item_precedence(const ParseItem &item) {
|
||||
return grammar.productions(item.lhs)[item.production_index].precedence_at(item.consumed_symbol_count - 1);
|
||||
return grammar.productions(item.lhs)[item.production_index][item.consumed_symbol_count - 1].precedence;
|
||||
}
|
||||
|
||||
void record_conflict(const Symbol &sym, const ParseAction &left,
|
||||
|
|
|
|||
|
|
@ -35,18 +35,18 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
|
|||
continue;
|
||||
|
||||
const Production &item_production = grammar.productions(item.lhs)[item.production_index];
|
||||
if (item_production.size() <= item.consumed_symbol_count)
|
||||
if (item.consumed_symbol_count >= item_production.symbol_count())
|
||||
continue;
|
||||
|
||||
Symbol symbol = item_production.symbol_at(item.consumed_symbol_count);
|
||||
Symbol symbol = item_production[item.consumed_symbol_count].symbol;
|
||||
if (symbol.is_token() || symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols;
|
||||
if (item.consumed_symbol_count + 1 >= item_production.size()) {
|
||||
if (item.consumed_symbol_count + 1 >= item_production.symbol_count()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process({ item_production.symbol_at(item.consumed_symbol_count + 1) });
|
||||
vector<Symbol> symbols_to_process({ item_production[item.consumed_symbol_count + 1].symbol });
|
||||
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol following_symbol = symbols_to_process.back();
|
||||
|
|
@ -55,14 +55,14 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
|
|||
continue;
|
||||
|
||||
for (const auto &production : grammar.productions(following_symbol))
|
||||
symbols_to_process.push_back(production.symbol_at(0));
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
items_to_process.push_back({
|
||||
ParseItem(symbol, i, production.rule_id_at(0), 0),
|
||||
ParseItem(symbol, i, production[0].rule_id, 0),
|
||||
next_lookahead_symbols
|
||||
});
|
||||
i++;
|
||||
|
|
|
|||
|
|
@ -23,11 +23,11 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
|
|||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
const Production &production = grammar.productions(item.lhs)[item.production_index];
|
||||
if (production.size() <= item.consumed_symbol_count)
|
||||
if (item.consumed_symbol_count >= production.symbol_count())
|
||||
continue;
|
||||
|
||||
const Symbol &symbol = production.symbol_at(item.consumed_symbol_count);
|
||||
int rule_id = production.rule_id_at(item.consumed_symbol_count + 1);
|
||||
const Symbol &symbol = production[item.consumed_symbol_count].symbol;
|
||||
int rule_id = production[item.consumed_symbol_count + 1].rule_id;
|
||||
ParseItem new_item(item.lhs, item.production_index, rule_id, item.consumed_symbol_count + 1);
|
||||
|
||||
result[symbol][new_item].insert(lookahead_symbols.begin(), lookahead_symbols.end());
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
|
|
@ -63,16 +64,16 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
Production flatten_rule(const rule_ptr &rule) {
|
||||
FlattenRule flattener;
|
||||
flattener.apply(rule);
|
||||
return Production(flattener.entries, 0);
|
||||
int end_precedence = flattener.entries.back().precedence;
|
||||
flattener.entries.push_back({ rules::NONE(), end_precedence, 0 });
|
||||
return Production(flattener.entries);
|
||||
}
|
||||
|
||||
struct ProductionSlice {
|
||||
vector<ProductionEntry>::const_iterator start;
|
||||
vector<ProductionEntry>::const_iterator end;
|
||||
int end_precedence;
|
||||
|
||||
bool operator==(const ProductionSlice &other) const {
|
||||
if (end_precedence != other.end_precedence) return false;
|
||||
if (end - start != other.end - other.start) return false;
|
||||
for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
|
||||
if (!(iter1->symbol == iter2->symbol) || iter1->precedence != iter2->precedence)
|
||||
|
|
@ -82,11 +83,10 @@ struct ProductionSlice {
|
|||
};
|
||||
|
||||
void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_slices) {
|
||||
auto &entries = production->entries;
|
||||
auto end = entries.end();
|
||||
auto end = production->entries.end();
|
||||
|
||||
for (auto iter = entries.begin(); iter != end; ++iter) {
|
||||
ProductionSlice slice{iter, end, 0};
|
||||
for (auto iter = production->entries.begin(); iter != end; ++iter) {
|
||||
ProductionSlice slice{iter, end};
|
||||
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
|
||||
if (existing_id == unique_slices->end()) {
|
||||
unique_slices->push_back(slice);
|
||||
|
|
@ -95,15 +95,6 @@ void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_sli
|
|||
iter->rule_id = existing_id - unique_slices->cbegin();
|
||||
}
|
||||
}
|
||||
|
||||
ProductionSlice slice{end, end, production->precedence_at(production->size() - 1)};
|
||||
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
|
||||
if (existing_id == unique_slices->end()) {
|
||||
unique_slices->push_back(slice);
|
||||
production->end_rule_id = unique_slices->size() - 1;
|
||||
} else {
|
||||
production->end_rule_id = existing_id - unique_slices->cbegin();
|
||||
}
|
||||
}
|
||||
|
||||
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
|
|
@ -126,7 +117,7 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
|||
if (rules.empty()) {
|
||||
rules.push_back({
|
||||
"START",
|
||||
{ Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }, 0) }
|
||||
{ Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }) }
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
|
|||
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
|
||||
Symbol START() { return Symbol(-3); }
|
||||
Symbol DOCUMENT() { return Symbol(-4); }
|
||||
Symbol NONE() { return Symbol(-5); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ Symbol ERROR();
|
|||
Symbol START();
|
||||
Symbol END_OF_INPUT();
|
||||
Symbol DOCUMENT();
|
||||
Symbol NONE();
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ using std::vector;
|
|||
using std::set;
|
||||
|
||||
static const vector<Production> START_PRODUCTIONS({
|
||||
Production({ {rules::Symbol(0), 0, -1} }, 2)
|
||||
Production({ {rules::Symbol(0), 0, -1}, { rules::NONE(), 0, -2} })
|
||||
});
|
||||
|
||||
static const vector<Production> NO_PRODUCTIONS({});
|
||||
|
|
@ -24,29 +24,14 @@ bool ProductionEntry::operator==(const ProductionEntry &other) const {
|
|||
rule_id == other.rule_id;
|
||||
}
|
||||
|
||||
Production::Production(const vector<ProductionEntry> &entries, int last_rule_id) :
|
||||
entries(entries), end_rule_id(last_rule_id) {}
|
||||
Production::Production(const vector<ProductionEntry> &entries) : entries(entries) {}
|
||||
|
||||
int Production::precedence_at(size_t index) const {
|
||||
if (index >= size())
|
||||
return 0;
|
||||
else
|
||||
return entries[index].precedence;
|
||||
size_t Production::symbol_count() const {
|
||||
return entries.size() - 1;
|
||||
}
|
||||
|
||||
int Production::rule_id_at(size_t index) const {
|
||||
if (index >= size())
|
||||
return end_rule_id;
|
||||
else
|
||||
return entries[index].rule_id;
|
||||
}
|
||||
|
||||
const rules::Symbol &Production::symbol_at(size_t index) const {
|
||||
return entries[index].symbol;
|
||||
}
|
||||
|
||||
size_t Production::size() const {
|
||||
return entries.size();
|
||||
const ProductionEntry &Production::operator[](int i) const {
|
||||
return entries[i];
|
||||
}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
|
|
@ -87,8 +72,7 @@ std::ostream &operator<<(std::ostream &stream, const Production &production) {
|
|||
stream << entry;
|
||||
started = true;
|
||||
}
|
||||
return stream << string(") end_rule_id: ") <<
|
||||
to_string(production.end_rule_id) << string(")");
|
||||
return stream << string(")");
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -18,15 +18,12 @@ struct ProductionEntry {
|
|||
bool operator==(const ProductionEntry &) const;
|
||||
};
|
||||
|
||||
class Production {
|
||||
public:
|
||||
struct Production {
|
||||
Production();
|
||||
Production(const std::vector<ProductionEntry> &);
|
||||
size_t symbol_count() const;
|
||||
const ProductionEntry &operator[](int) const;
|
||||
std::vector<ProductionEntry> entries;
|
||||
int end_rule_id;
|
||||
Production(const std::vector<ProductionEntry> &, int);
|
||||
size_t size() const;
|
||||
const rules::Symbol &symbol_at(size_t) const;
|
||||
int precedence_at(size_t) const;
|
||||
int rule_id_at(size_t) const;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, const ProductionEntry &);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue