Store productions' end rule ids in the vector

This commit is contained in:
Max Brunsfeld 2015-01-27 19:56:49 -08:00
parent 1ba8701ada
commit 8ac4b9fc17
12 changed files with 74 additions and 81 deletions

View file

@ -15,20 +15,32 @@ describe("build_parse_table", []() {
{
"rule0",
{
Production({ {Symbol(1), 0, 1} }, 0),
Production({ {Symbol(2), 0, 2} }, 0)
Production({
{Symbol(1), 0, 1},
{rules::NONE(), 0, 5}
}),
Production({
{Symbol(2), 0, 2},
{rules::NONE(), 0, 6}
})
}
},
{
"rule1",
{
Production({ {Symbol(0, SymbolOptionToken), 0, 3} }, 0)
Production({
{Symbol(0, SymbolOptionToken), 0, 3},
{rules::NONE(), 0, 7}
})
}
},
{
"rule2",
{
Production({ {Symbol(1, SymbolOptionToken), 0, 4} }, 0)
Production({
{Symbol(1, SymbolOptionToken), 0, 4},
{rules::NONE(), 0, 8}
})
}
},
}, {}, { Symbol(2, SymbolOptionToken) });

View file

@ -2,6 +2,7 @@
#include "compiler/syntax_grammar.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/rules/built_in_symbols.h"
using namespace build_tables;
using namespace rules;
@ -15,8 +16,9 @@ describe("item_set_closure", []() {
{
Production({
{Symbol(1), 0, 100},
{Symbol(11, SymbolOptionToken), 0, 101}
}, 107),
{Symbol(11, SymbolOptionToken), 0, 101},
{rules::NONE(), 0, 107}
}),
}
},
{
@ -24,11 +26,13 @@ describe("item_set_closure", []() {
{
Production({
{Symbol(12, SymbolOptionToken), 0, 102},
{Symbol(13, SymbolOptionToken), 0, 103}
}, 108),
{Symbol(13, SymbolOptionToken), 0, 103},
{rules::NONE(), 0, 108}
}),
Production({
{Symbol(2), 0, 104},
}, 109)
{rules::NONE(), 0, 109}
})
}
},
{
@ -36,8 +40,9 @@ describe("item_set_closure", []() {
{
Production({
{Symbol(14, SymbolOptionToken), 0, 105},
{Symbol(15, SymbolOptionToken), 0, 106}
}, 110)
{Symbol(15, SymbolOptionToken), 0, 106},
{rules::NONE(), 0, 110}
})
}
},
}, {}, set<Symbol>());

View file

@ -52,14 +52,14 @@ describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
{Symbol(13, SymbolOptionToken), 0, 103},
{Symbol(1), 0, 104},
{Symbol(14, SymbolOptionToken), 0, 105},
}, 1)
})
},
},
{
"B", {
Production({
{Symbol(15, SymbolOptionToken), 0, 106},
}, 2)
})
},
}
}, {}, set<Symbol>());

View file

@ -3,6 +3,7 @@
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/helpers/containers.h"
#include "compiler/rules/built_in_symbols.h"
START_TEST
@ -48,15 +49,15 @@ describe("flatten_grammar", []() {
AssertThat(
get_symbol_lists(0),
Equals(vector<vector<Symbol>>({
{ Symbol(1), Symbol(2), Symbol(4) },
{ Symbol(1), Symbol(3), Symbol(4) }
{ Symbol(1), Symbol(2), Symbol(4), rules::NONE() },
{ Symbol(1), Symbol(3), Symbol(4), rules::NONE() }
})));
AssertThat(
get_symbol_lists(1),
Equals(vector<vector<Symbol>>({
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) },
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) }
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7), rules::NONE() },
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7), rules::NONE() }
})));
});
@ -74,15 +75,15 @@ describe("flatten_grammar", []() {
AssertThat(
get_precedence_lists(0),
Equals(vector<vector<int>>({
{ 0, 0, 0 },
{ 0, 0, 0 }
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 }
})));
AssertThat(
get_precedence_lists(1),
Equals(vector<vector<int>>({
{ 0, 0, 50, 100, 50, 0 },
{ 0, 0, 50, 50, 0 }
{ 0, 0, 50, 100, 50, 0, 0 },
{ 0, 0, 50, 50, 0, 0 }
})));
});
@ -90,7 +91,7 @@ describe("flatten_grammar", []() {
SyntaxGrammar grammar = flatten_grammar(input_grammar);
auto rule_id = [&](int rule_index, int production_index, int symbol_index) {
return grammar.rules[rule_index].second[production_index].rule_id_at(symbol_index);
return grammar.rules[rule_index].second[production_index][symbol_index].rule_id;
};
// Rule 1: last symbol is the same for both productions.

View file

@ -176,11 +176,12 @@ class ParseTableBuilder {
}
bool item_is_done(const ParseItem &item) {
return item.consumed_symbol_count == grammar.productions(item.lhs)[item.production_index].size();
return item.consumed_symbol_count ==
grammar.productions(item.lhs)[item.production_index].symbol_count();
}
int item_precedence(const ParseItem &item) {
return grammar.productions(item.lhs)[item.production_index].precedence_at(item.consumed_symbol_count - 1);
return grammar.productions(item.lhs)[item.production_index][item.consumed_symbol_count - 1].precedence;
}
void record_conflict(const Symbol &sym, const ParseAction &left,

View file

@ -35,18 +35,18 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
continue;
const Production &item_production = grammar.productions(item.lhs)[item.production_index];
if (item_production.size() <= item.consumed_symbol_count)
if (item.consumed_symbol_count >= item_production.symbol_count())
continue;
Symbol symbol = item_production.symbol_at(item.consumed_symbol_count);
Symbol symbol = item_production[item.consumed_symbol_count].symbol;
if (symbol.is_token() || symbol.is_built_in())
continue;
set<Symbol> next_lookahead_symbols;
if (item.consumed_symbol_count + 1 >= item_production.size()) {
if (item.consumed_symbol_count + 1 >= item_production.symbol_count()) {
next_lookahead_symbols = lookahead_symbols;
} else {
vector<Symbol> symbols_to_process({ item_production.symbol_at(item.consumed_symbol_count + 1) });
vector<Symbol> symbols_to_process({ item_production[item.consumed_symbol_count + 1].symbol });
while (!symbols_to_process.empty()) {
Symbol following_symbol = symbols_to_process.back();
@ -55,14 +55,14 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
continue;
for (const auto &production : grammar.productions(following_symbol))
symbols_to_process.push_back(production.symbol_at(0));
symbols_to_process.push_back(production[0].symbol);
}
}
size_t i = 0;
for (const Production &production : grammar.productions(symbol)) {
items_to_process.push_back({
ParseItem(symbol, i, production.rule_id_at(0), 0),
ParseItem(symbol, i, production[0].rule_id, 0),
next_lookahead_symbols
});
i++;

View file

@ -23,11 +23,11 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
const Production &production = grammar.productions(item.lhs)[item.production_index];
if (production.size() <= item.consumed_symbol_count)
if (item.consumed_symbol_count >= production.symbol_count())
continue;
const Symbol &symbol = production.symbol_at(item.consumed_symbol_count);
int rule_id = production.rule_id_at(item.consumed_symbol_count + 1);
const Symbol &symbol = production[item.consumed_symbol_count].symbol;
int rule_id = production[item.consumed_symbol_count + 1].rule_id;
ParseItem new_item(item.lhs, item.production_index, rule_id, item.consumed_symbol_count + 1);
result[symbol][new_item].insert(lookahead_symbols.begin(), lookahead_symbols.end());

View file

@ -5,6 +5,7 @@
#include "compiler/rules/seq.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/built_in_symbols.h"
#include <string>
#include <algorithm>
@ -63,16 +64,16 @@ class FlattenRule : public rules::RuleFn<void> {
Production flatten_rule(const rule_ptr &rule) {
FlattenRule flattener;
flattener.apply(rule);
return Production(flattener.entries, 0);
int end_precedence = flattener.entries.back().precedence;
flattener.entries.push_back({ rules::NONE(), end_precedence, 0 });
return Production(flattener.entries);
}
struct ProductionSlice {
vector<ProductionEntry>::const_iterator start;
vector<ProductionEntry>::const_iterator end;
int end_precedence;
bool operator==(const ProductionSlice &other) const {
if (end_precedence != other.end_precedence) return false;
if (end - start != other.end - other.start) return false;
for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
if (!(iter1->symbol == iter2->symbol) || iter1->precedence != iter2->precedence)
@ -82,11 +83,10 @@ struct ProductionSlice {
};
void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_slices) {
auto &entries = production->entries;
auto end = entries.end();
auto end = production->entries.end();
for (auto iter = entries.begin(); iter != end; ++iter) {
ProductionSlice slice{iter, end, 0};
for (auto iter = production->entries.begin(); iter != end; ++iter) {
ProductionSlice slice{iter, end};
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
if (existing_id == unique_slices->end()) {
unique_slices->push_back(slice);
@ -95,15 +95,6 @@ void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_sli
iter->rule_id = existing_id - unique_slices->cbegin();
}
}
ProductionSlice slice{end, end, production->precedence_at(production->size() - 1)};
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
if (existing_id == unique_slices->end()) {
unique_slices->push_back(slice);
production->end_rule_id = unique_slices->size() - 1;
} else {
production->end_rule_id = existing_id - unique_slices->cbegin();
}
}
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
@ -126,7 +117,7 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
if (rules.empty()) {
rules.push_back({
"START",
{ Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }, 0) }
{ Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }) }
});
}

View file

@ -7,6 +7,7 @@ Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
Symbol START() { return Symbol(-3); }
Symbol DOCUMENT() { return Symbol(-4); }
Symbol NONE() { return Symbol(-5); }
} // namespace rules
} // namespace tree_sitter

View file

@ -10,6 +10,7 @@ Symbol ERROR();
Symbol START();
Symbol END_OF_INPUT();
Symbol DOCUMENT();
Symbol NONE();
} // namespace rules
} // namespace tree_sitter

View file

@ -14,7 +14,7 @@ using std::vector;
using std::set;
static const vector<Production> START_PRODUCTIONS({
Production({ {rules::Symbol(0), 0, -1} }, 2)
Production({ {rules::Symbol(0), 0, -1}, { rules::NONE(), 0, -2} })
});
static const vector<Production> NO_PRODUCTIONS({});
@ -24,29 +24,14 @@ bool ProductionEntry::operator==(const ProductionEntry &other) const {
rule_id == other.rule_id;
}
Production::Production(const vector<ProductionEntry> &entries, int last_rule_id) :
entries(entries), end_rule_id(last_rule_id) {}
Production::Production(const vector<ProductionEntry> &entries) : entries(entries) {}
int Production::precedence_at(size_t index) const {
if (index >= size())
return 0;
else
return entries[index].precedence;
size_t Production::symbol_count() const {
return entries.size() - 1;
}
int Production::rule_id_at(size_t index) const {
if (index >= size())
return end_rule_id;
else
return entries[index].rule_id;
}
const rules::Symbol &Production::symbol_at(size_t index) const {
return entries[index].symbol;
}
size_t Production::size() const {
return entries.size();
const ProductionEntry &Production::operator[](int i) const {
return entries[i];
}
SyntaxGrammar::SyntaxGrammar() {}
@ -87,8 +72,7 @@ std::ostream &operator<<(std::ostream &stream, const Production &production) {
stream << entry;
started = true;
}
return stream << string(") end_rule_id: ") <<
to_string(production.end_rule_id) << string(")");
return stream << string(")");
}
} // namespace tree_sitter

View file

@ -18,15 +18,12 @@ struct ProductionEntry {
bool operator==(const ProductionEntry &) const;
};
class Production {
public:
struct Production {
Production();
Production(const std::vector<ProductionEntry> &);
size_t symbol_count() const;
const ProductionEntry &operator[](int) const;
std::vector<ProductionEntry> entries;
int end_rule_id;
Production(const std::vector<ProductionEntry> &, int);
size_t size() const;
const rules::Symbol &symbol_at(size_t) const;
int precedence_at(size_t) const;
int rule_id_at(size_t) const;
};
std::ostream &operator<<(std::ostream &, const ProductionEntry &);