Handle inlined rules that contain other inlined rules
This commit is contained in:
parent
f33421c53e
commit
7d9d8bce79
6 changed files with 230 additions and 140 deletions
|
|
@ -21,61 +21,58 @@ using std::unordered_map;
|
|||
using std::vector;
|
||||
using rules::Symbol;
|
||||
|
||||
static vector<Production> inline_production(const ParseItem &item, const SyntaxGrammar &grammar) {
|
||||
vector<Production> result;
|
||||
auto &inlined_step = item.production->at(item.step_index);
|
||||
auto &productions_to_insert = grammar.variables[inlined_step.symbol.index].productions;
|
||||
for (const Production &production_to_insert : productions_to_insert) {
|
||||
auto begin = item.production->steps.begin();
|
||||
auto end = item.production->steps.end();
|
||||
auto step = begin + item.step_index;
|
||||
struct FollowSetInfo {
|
||||
LookaheadSet lookaheads;
|
||||
bool propagates_lookaheads;
|
||||
};
|
||||
|
||||
Production production{{begin, step}, item.production->dynamic_precedence};
|
||||
for (auto &step : production_to_insert) {
|
||||
production.steps.push_back(step);
|
||||
if (!inlined_step.name_replacement.empty()) {
|
||||
production.steps.back().name_replacement = inlined_step.name_replacement;
|
||||
}
|
||||
}
|
||||
production.back().precedence = inlined_step.precedence;
|
||||
production.back().associativity = inlined_step.associativity;
|
||||
production.steps.insert(
|
||||
production.steps.end(),
|
||||
step + 1,
|
||||
end
|
||||
);
|
||||
struct NonTerminalQueueEntry {
|
||||
Symbol::Index non_terminal;
|
||||
LookaheadSet lookaheads;
|
||||
bool propagates_lookaheads;
|
||||
};
|
||||
|
||||
if (find(result.begin(), result.end(), production) == result.end()) {
|
||||
result.push_back(move(production));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
bool ParseItemSetBuilder::ParseItemSetComponent::operator==(
|
||||
const ParseItemSetBuilder::ParseItemSetComponent &other) const {
|
||||
return item == other.item &&
|
||||
lookaheads == other.lookaheads &&
|
||||
propagates_lookaheads == other.propagates_lookaheads;
|
||||
}
|
||||
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar) : grammar{grammar} {
|
||||
vector<Symbol> symbols_to_process;
|
||||
set<Symbol::Index> processed_non_terminals;
|
||||
template <typename T>
|
||||
inline void find_or_push(vector<T> &vector, const T &item) {
|
||||
if (find(vector.begin(), vector.end(), item) == vector.end()) {
|
||||
vector.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(
|
||||
const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar
|
||||
) : grammar{grammar} {
|
||||
|
||||
// Populate the FIRST and LAST set of each terminal, which just contains the terminal itself.
|
||||
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::terminal(i);
|
||||
first_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
last_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::external(i);
|
||||
first_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
last_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
}
|
||||
|
||||
// Populate the FIRST and LAST set of each non-terminal by recursively expanding non-terminals.
|
||||
vector<Symbol> symbols_to_process;
|
||||
set<Symbol::Index> processed_non_terminals;
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::non_terminal(i);
|
||||
LookaheadSet &first_set = first_sets[symbol];
|
||||
LookaheadSet &last_set = last_sets[symbol];
|
||||
|
||||
LookaheadSet first_set;
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push_back(symbol);
|
||||
symbols_to_process.assign({symbol});
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol current_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
|
@ -91,12 +88,8 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
}
|
||||
}
|
||||
|
||||
first_sets.insert({symbol, first_set});
|
||||
|
||||
LookaheadSet last_set;
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push_back(symbol);
|
||||
symbols_to_process.assign({symbol});
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol current_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
|
@ -111,41 +104,27 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_sets.insert({symbol, last_set});
|
||||
}
|
||||
|
||||
struct NonTerminalQueueEntry {
|
||||
Symbol::Index non_terminal;
|
||||
LookaheadSet lookaheads;
|
||||
bool propagates_lookaheads;
|
||||
};
|
||||
// Populate a cache of which ParseItems will be created when a given non-terminal is expanded.
|
||||
vector<NonTerminalQueueEntry> non_terminal_queue;
|
||||
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
|
||||
vector<NonTerminalQueueEntry> non_terminal_queue_entry;
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::non_terminal(i);
|
||||
|
||||
unordered_map<Symbol::Index, pair<LookaheadSet, bool>> cached_lookaheads_by_non_terminal;
|
||||
|
||||
non_terminal_queue_entry.clear();
|
||||
non_terminal_queue_entry.push_back({
|
||||
symbol.index,
|
||||
LookaheadSet(),
|
||||
true
|
||||
});
|
||||
|
||||
while (!non_terminal_queue_entry.empty()) {
|
||||
NonTerminalQueueEntry queue_entry = non_terminal_queue_entry.back();
|
||||
non_terminal_queue_entry.pop_back();
|
||||
// Compute the follow set of each *other* non-terminal that the current non-terminal can
|
||||
// start with.
|
||||
unordered_map<Symbol::Index, FollowSetInfo> follow_set_info_by_non_terminal;
|
||||
non_terminal_queue.assign({{i, LookaheadSet(), true}});
|
||||
while (!non_terminal_queue.empty()) {
|
||||
NonTerminalQueueEntry queue_entry = non_terminal_queue.back();
|
||||
non_terminal_queue.pop_back();
|
||||
|
||||
bool queue_entry_is_new;
|
||||
auto &cache_entry = cached_lookaheads_by_non_terminal[queue_entry.non_terminal];
|
||||
auto &follow_set_info = follow_set_info_by_non_terminal[queue_entry.non_terminal];
|
||||
if (queue_entry.propagates_lookaheads) {
|
||||
queue_entry_is_new = !cache_entry.second;
|
||||
cache_entry.second = true;
|
||||
queue_entry_is_new = !follow_set_info.propagates_lookaheads;
|
||||
follow_set_info.propagates_lookaheads = true;
|
||||
} else {
|
||||
queue_entry_is_new = cache_entry.first.insert_all(queue_entry.lookaheads);
|
||||
queue_entry_is_new = follow_set_info.lookaheads.insert_all(queue_entry.lookaheads);
|
||||
}
|
||||
|
||||
if (queue_entry_is_new) {
|
||||
|
|
@ -165,7 +144,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
propagates_lookaheads = false;
|
||||
}
|
||||
|
||||
non_terminal_queue_entry.push_back({
|
||||
non_terminal_queue.push_back({
|
||||
next_symbol.index,
|
||||
next_lookaheads,
|
||||
propagates_lookaheads
|
||||
|
|
@ -174,91 +153,135 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
}
|
||||
}
|
||||
|
||||
for (auto &pair : cached_lookaheads_by_non_terminal) {
|
||||
for (const Production &production : grammar.variables[pair.first].productions) {
|
||||
Symbol lhs = Symbol::non_terminal(pair.first);
|
||||
ParseItem item(lhs, production, 0);
|
||||
// Use these follow sets to populate the cache of ParseItems for non-terminal `i`.
|
||||
for (auto &pair : follow_set_info_by_non_terminal) {
|
||||
Symbol non_terminal = Symbol::non_terminal(pair.first);
|
||||
|
||||
for (const Production &production : grammar.variables[non_terminal.index].productions) {
|
||||
ParseItem item(non_terminal, production, 0);
|
||||
|
||||
if (grammar.variables_to_inline.count(item.next_symbol())) {
|
||||
vector<Production> &inlined_productions = inlined_productions_by_original_production[item];
|
||||
if (inlined_productions.empty()) {
|
||||
inlined_productions = inline_production(item, grammar);
|
||||
}
|
||||
|
||||
for (const Production &inlined_production : inlined_productions) {
|
||||
ParseItemSetComponent component{
|
||||
ParseItem(lhs, inlined_production, 0),
|
||||
pair.second.first,
|
||||
pair.second.second
|
||||
};
|
||||
|
||||
if (find(component_cache[i].begin(), component_cache[i].end(), component) == component_cache[i].end()) {
|
||||
component_cache[i].push_back(component);
|
||||
}
|
||||
}
|
||||
} else if (!grammar.variables_to_inline.count(lhs)) {
|
||||
ParseItemSetComponent component{
|
||||
ParseItem(lhs, production, 0),
|
||||
pair.second.first,
|
||||
pair.second.second
|
||||
};
|
||||
|
||||
if (find(component_cache[i].begin(), component_cache[i].end(), component) == component_cache[i].end()) {
|
||||
component_cache[i].push_back(component);
|
||||
for (const Production &inlined_production : inline_production(item)) {
|
||||
find_or_push(transitive_closure_component_cache[i], {
|
||||
ParseItem(non_terminal, inlined_production, 0),
|
||||
pair.second.lookaheads,
|
||||
pair.second.propagates_lookaheads
|
||||
});
|
||||
}
|
||||
} else if (!grammar.variables_to_inline.count(non_terminal)) {
|
||||
find_or_push(transitive_closure_component_cache[i], {
|
||||
item,
|
||||
pair.second.lookaheads,
|
||||
pair.second.propagates_lookaheads
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const vector<Production> &ParseItemSetBuilder::inline_production(const ParseItem &item) {
|
||||
vector<Production> &result = inlined_productions_by_original_production[item];
|
||||
if (!result.empty()) return result;
|
||||
|
||||
auto &inlined_step = item.production->at(item.step_index);
|
||||
vector<const Production *> productions_to_insert;
|
||||
for (auto &production : grammar.variables[inlined_step.symbol.index].productions) {
|
||||
productions_to_insert.push_back(&production);
|
||||
}
|
||||
|
||||
for (auto iter = productions_to_insert.begin(); iter != productions_to_insert.end();) {
|
||||
const Production *production = *iter;
|
||||
|
||||
if (!production->empty() && grammar.variables_to_inline.count(production->steps.front().symbol)) {
|
||||
iter = productions_to_insert.erase(iter);
|
||||
for (auto &production : inline_production(ParseItem(inlined_step.symbol, *production, 0))) {
|
||||
iter = productions_to_insert.insert(iter, &production);
|
||||
}
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
for (const Production *production_to_insert : productions_to_insert) {
|
||||
auto begin = item.production->steps.begin();
|
||||
auto end = item.production->steps.end();
|
||||
auto step = begin + item.step_index;
|
||||
|
||||
Production production{{begin, step}, item.production->dynamic_precedence};
|
||||
for (auto &step : *production_to_insert) {
|
||||
production.steps.push_back(step);
|
||||
if (!inlined_step.name_replacement.empty()) {
|
||||
production.steps.back().name_replacement = inlined_step.name_replacement;
|
||||
}
|
||||
}
|
||||
production.back().precedence = inlined_step.precedence;
|
||||
production.back().associativity = inlined_step.associativity;
|
||||
production.steps.insert(
|
||||
production.steps.end(),
|
||||
step + 1,
|
||||
end
|
||||
);
|
||||
|
||||
if (find(result.begin(), result.end(), production) == result.end()) {
|
||||
result.push_back(move(production));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
||||
for (auto iter = item_set->entries.begin(), end = item_set->entries.end(); iter != end;) {
|
||||
const ParseItem &item = iter->first;
|
||||
const LookaheadSet &lookaheads = iter->second;
|
||||
if (item.lhs() != rules::START() && item.step_index == 0) {
|
||||
++iter;
|
||||
continue;
|
||||
}
|
||||
|
||||
const Symbol &next_symbol = item.next_symbol();
|
||||
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) {
|
||||
++iter;
|
||||
continue;
|
||||
}
|
||||
// Items whose `step_index` is 0 are not part of the item set's "kernel"; they have been
|
||||
// added in previous iterations of this loop, and they don't need to be further processed.
|
||||
if (item.lhs() == rules::START() || item.step_index > 0) {
|
||||
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookaheads = lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
// Kernel items whose next symbol is a non-terminal are expanded using the pre-computed
|
||||
// parse item cache.
|
||||
const Symbol &next_symbol = item.next_symbol();
|
||||
if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) {
|
||||
|
||||
for (const ParseItemSetComponent &component : component_cache[next_symbol.index]) {
|
||||
LookaheadSet ¤t_lookaheads = item_set->entries[component.item];
|
||||
current_lookaheads.insert_all(component.lookaheads);
|
||||
if (component.propagates_lookaheads) current_lookaheads.insert_all(next_lookaheads);
|
||||
}
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookaheads = lookaheads;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookaheads = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
|
||||
if (grammar.variables_to_inline.count(next_symbol)) {
|
||||
vector<Production> &inlined_productions = inlined_productions_by_original_production[item];
|
||||
if (inlined_productions.empty()) {
|
||||
inlined_productions = inline_production(item, grammar);
|
||||
}
|
||||
|
||||
for (const Production &inlined_production : inlined_productions) {
|
||||
item_set->entries.insert({
|
||||
ParseItem(item.lhs(), inlined_production, item.step_index),
|
||||
lookaheads
|
||||
});
|
||||
for (const auto &component : transitive_closure_component_cache[next_symbol.index]) {
|
||||
LookaheadSet ¤t_lookaheads = item_set->entries[component.item];
|
||||
current_lookaheads.insert_all(component.lookaheads);
|
||||
if (component.propagates_lookaheads) {
|
||||
current_lookaheads.insert_all(next_lookaheads);
|
||||
}
|
||||
}
|
||||
|
||||
if (grammar.variables_to_inline.count(next_symbol)) {
|
||||
for (const Production &inlined_production : inline_production(item)) {
|
||||
item_set->entries.insert({
|
||||
ParseItem(item.lhs(), inlined_production, item.step_index),
|
||||
lookaheads
|
||||
});
|
||||
}
|
||||
|
||||
iter = item_set->entries.erase(iter);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (grammar.variables_to_inline.count(item.lhs())) {
|
||||
iter = item_set->entries.erase(iter);
|
||||
} else {
|
||||
++iter;
|
||||
continue;
|
||||
}
|
||||
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,19 +18,15 @@ class ParseItemSetBuilder {
|
|||
ParseItem item;
|
||||
LookaheadSet lookaheads;
|
||||
bool propagates_lookaheads;
|
||||
|
||||
inline bool operator==(const ParseItemSetComponent &other) {
|
||||
return item == other.item &&
|
||||
lookaheads == other.lookaheads &&
|
||||
propagates_lookaheads == other.propagates_lookaheads;
|
||||
}
|
||||
bool operator==(const ParseItemSetComponent &) const;
|
||||
};
|
||||
|
||||
const SyntaxGrammar &grammar;
|
||||
std::map<rules::Symbol, LookaheadSet> first_sets;
|
||||
std::map<rules::Symbol, LookaheadSet> last_sets;
|
||||
std::map<rules::Symbol::Index, std::vector<ParseItemSetComponent>> component_cache;
|
||||
std::map<rules::Symbol::Index, std::vector<ParseItemSetComponent>> transitive_closure_component_cache;
|
||||
std::map<ParseItem, std::vector<Production>> inlined_productions_by_original_production;
|
||||
const std::vector<Production> &inline_production(const ParseItem &);
|
||||
|
||||
public:
|
||||
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ ParseAction::ParseAction()
|
|||
: production(nullptr),
|
||||
consumed_symbol_count(0),
|
||||
symbol(rules::NONE()),
|
||||
dynamic_precedence(0),
|
||||
type(ParseActionTypeError),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
|
|
@ -72,6 +73,7 @@ bool ParseAction::operator==(const ParseAction &other) const {
|
|||
state_index == other.state_index &&
|
||||
production == other.production &&
|
||||
consumed_symbol_count == other.consumed_symbol_count &&
|
||||
dynamic_precedence == other.dynamic_precedence &&
|
||||
rename_sequence_id == other.rename_sequence_id;
|
||||
}
|
||||
|
||||
|
|
@ -90,6 +92,8 @@ bool ParseAction::operator<(const ParseAction &other) const {
|
|||
if (other.production < production) return false;
|
||||
if (consumed_symbol_count < other.consumed_symbol_count) return true;
|
||||
if (other.consumed_symbol_count < consumed_symbol_count) return false;
|
||||
if (dynamic_precedence < other.dynamic_precedence) return true;
|
||||
if (other.dynamic_precedence < dynamic_precedence) return false;
|
||||
return rename_sequence_id < other.rename_sequence_id;
|
||||
}
|
||||
|
||||
|
|
|
|||
12
test/fixtures/test_grammars/nested_inlined_rules/corpus.txt
vendored
Normal file
12
test/fixtures/test_grammars/nested_inlined_rules/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
==================================
|
||||
Statements
|
||||
==================================
|
||||
|
||||
return 1;
|
||||
return 2;
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(return_statement (number))
|
||||
(return_statement (number)))
|
||||
54
test/fixtures/test_grammars/nested_inlined_rules/grammar.json
vendored
Normal file
54
test/fixtures/test_grammars/nested_inlined_rules/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
{
|
||||
"name": "nested_inlined_rules",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"inline": [
|
||||
"top_level_item",
|
||||
"statement"
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"program": {
|
||||
"type": "REPEAT1",
|
||||
"content": {"type": "SYMBOL", "name": "top_level_item"}
|
||||
},
|
||||
|
||||
"top_level_item": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "statement"},
|
||||
{"type": "STRING", "value": "!"}
|
||||
]
|
||||
},
|
||||
|
||||
"statement": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression_statement"},
|
||||
{"type": "SYMBOL", "name": "return_statement"}
|
||||
]
|
||||
},
|
||||
|
||||
"return_statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "return"},
|
||||
{"type": "SYMBOL", "name": "number"},
|
||||
{"type": "STRING", "value": ";"}
|
||||
]
|
||||
},
|
||||
|
||||
"expression_statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "number"},
|
||||
{"type": "STRING", "value": ";"}
|
||||
]
|
||||
},
|
||||
|
||||
"number": {"type": "PATTERN", "value": "\\d+"}
|
||||
}
|
||||
}
|
||||
1
test/fixtures/test_grammars/nested_inlined_rules/readme.md
vendored
Normal file
1
test/fixtures/test_grammars/nested_inlined_rules/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This grammar demonstrates that you can have an inlined rule that contains another inlined rule.
|
||||
Loading…
Add table
Add a link
Reference in a new issue