Include non-terminal lookahead symbols for reduction actions

This is necessary for re-using the right subtree after an edit
This commit is contained in:
Max Brunsfeld 2014-10-10 12:06:16 -07:00
parent 4dcc712a8c
commit 3bcb221379
9 changed files with 19707 additions and 2755 deletions

View file

@ -13,7 +13,7 @@
'src/compiler/build_tables/build_lex_table.cc',
'src/compiler/build_tables/build_parse_table.cc',
'src/compiler/build_tables/build_tables.cc',
'src/compiler/build_tables/first_set.cc',
'src/compiler/build_tables/first_symbols.cc',
'src/compiler/build_tables/get_metadata.cc',
'src/compiler/build_tables/item.cc',
'src/compiler/build_tables/item_set_closure.cc',

View file

@ -1,6 +1,6 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/build_tables/first_set.h"
#include "compiler/build_tables/first_symbols.h"
#include "compiler/rules/metadata.h"
using namespace build_tables;
@ -8,32 +8,32 @@ using namespace rules;
START_TEST
describe("first_set", []() {
describe("first_symbols", []() {
const SyntaxGrammar null_grammar;
describe("for a sequence AB", [&]() {
it("ignores B when A cannot be blank", [&]() {
auto rule = seq({ i_token(0), i_token(1) });
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
})));
});
it("includes FIRST(B) when A can be blank", [&]() {
it("includes first_symbols(B) when A can be blank", [&]() {
auto rule = seq({
choice({
i_token(0),
blank() }),
i_token(1) });
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
Symbol(1, SymbolOptionToken)
})));
});
it("includes FIRST(A's right hand side) when A is a non-terminal", [&]() {
it("includes first_symbols(A's right hand side) when A is a non-terminal", [&]() {
auto rule = choice({
seq({
i_token(0),
@ -47,13 +47,14 @@ describe("first_set", []() {
i_token(4) }) }
}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
Symbol(0),
Symbol(0, SymbolOptionToken),
Symbol(2, SymbolOptionToken),
})));
});
it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() {
it("includes first_symbols(B) when A is a non-terminal and its expansion can be blank", [&]() {
auto rule = seq({
i_sym(0),
i_token(1) });
@ -64,7 +65,8 @@ describe("first_set", []() {
blank() }) }
}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
Symbol(0),
Symbol(0, SymbolOptionToken),
Symbol(1, SymbolOptionToken),
})));
@ -82,7 +84,8 @@ describe("first_set", []() {
auto rule = i_sym(0);
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
Symbol(0),
Symbol(11, SymbolOptionToken)
})));
});
@ -91,7 +94,7 @@ describe("first_set", []() {
it("ignores metadata rules", [&]() {
auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
Symbol(3, SymbolOptionToken),
})));
});

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -484,6 +484,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_6] = SHIFT(56),
},
[6] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_value, 1),
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
[ts_aux_sym_6] = REDUCE(ts_sym_value, 1),
},
@ -515,6 +516,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_6] = SHIFT(14),
},
[11] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_array, 2),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
[ts_aux_sym_6] = REDUCE(ts_sym_array, 2),
},
@ -535,6 +537,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_5] = SHIFT(8),
},
[14] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_array, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 3),
[ts_aux_sym_6] = REDUCE(ts_sym_array, 3),
},
@ -547,6 +550,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 3),
},
[17] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_array, 4),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
[ts_aux_sym_6] = REDUCE(ts_sym_array, 4),
},
@ -559,6 +563,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_2] = SHIFT(21),
},
[20] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 2),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 2),
},
@ -580,6 +585,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_4] = SHIFT(50),
},
[23] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_value, 1),
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
[ts_aux_sym_4] = REDUCE(ts_sym_value, 1),
},
@ -608,6 +614,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_6] = SHIFT(29),
},
[27] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_array, 2),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
[ts_aux_sym_4] = REDUCE(ts_sym_array, 2),
},
@ -615,10 +622,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_6] = SHIFT(30),
},
[29] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_array, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 3),
[ts_aux_sym_4] = REDUCE(ts_sym_array, 3),
},
[30] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_array, 4),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
[ts_aux_sym_4] = REDUCE(ts_sym_array, 4),
},
@ -631,6 +640,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_2] = SHIFT(34),
},
[33] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 2),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 2),
},
@ -659,6 +669,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_sym_string] = SHIFT(40),
},
[38] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 5),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 5),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 5),
},
@ -694,6 +705,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 3),
},
[45] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 6),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 6),
},
@ -701,10 +713,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_4] = SHIFT(48),
},
[47] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 3),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 3),
},
[48] = {
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 4),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 4),
},
@ -712,10 +726,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_4] = SHIFT(51),
},
[50] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 5),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 5),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 5),
},
[51] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 6),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 6),
},
@ -723,10 +739,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_aux_sym_4] = SHIFT(54),
},
[53] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 3),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 3),
},
[54] = {
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 4),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 4),
},

View file

@ -11,7 +11,6 @@
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/first_set.h"
namespace tree_sitter {
namespace build_tables {

View file

@ -1,4 +1,4 @@
#include "compiler/build_tables/first_set.h"
#include "compiler/build_tables/first_symbols.h"
#include "tree_sitter/compiler.h"
#include "compiler/prepared_grammar.h"
#include "compiler/build_tables/rule_can_be_blank.h"
@ -14,21 +14,25 @@ namespace build_tables {
using std::set;
using rules::Symbol;
class FirstSet : public rules::RuleFn<set<Symbol> > {
class FirstSymbols : public rules::RuleFn<set<Symbol> > {
const SyntaxGrammar *grammar;
set<Symbol> visited_symbols;
public:
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
explicit FirstSymbols(const SyntaxGrammar *grammar) : grammar(grammar) {}
set<Symbol> apply_to(const Symbol *rule) {
auto insertion_result = visited_symbols.insert(*rule);
if (insertion_result.second) {
return (rule->is_token()) ? set<Symbol>({ *rule })
: apply(grammar->rule(*rule));
} else {
if (!insertion_result.second)
return set<Symbol>();
set<Symbol> result({ *rule });
if (!rule->is_token()) {
set<Symbol> &&symbols = apply(grammar->rule(*rule));
result.insert(symbols.begin(), symbols.end());
}
return result;
}
set<Symbol> apply_to(const rules::Metadata *rule) {
@ -54,8 +58,8 @@ class FirstSet : public rules::RuleFn<set<Symbol> > {
}
};
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return FirstSet(&grammar).apply(rule);
set<Symbol> first_symbols(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return FirstSymbols(&grammar).apply(rule);
}
} // namespace build_tables

View file

@ -1,5 +1,5 @@
#ifndef COMPILER_BUILD_TABLES_FIRST_SET_H_
#define COMPILER_BUILD_TABLES_FIRST_SET_H_
#ifndef COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
#define COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
#include <set>
#include "compiler/build_tables/parse_item.h"
@ -16,10 +16,10 @@ namespace build_tables {
* the beginning of a string derivable from a given rule,
* in a given grammar.
*/
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule,
std::set<rules::Symbol> first_symbols(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_FIRST_SET_H_
#endif // COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_

View file

@ -3,7 +3,7 @@
#include <vector>
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/build_tables/first_set.h"
#include "compiler/build_tables/first_symbols.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/build_tables/item.h"
@ -47,7 +47,7 @@ const ParseItemSet item_set_closure(const ParseItem &starting_item,
if (symbol.is_token() || symbol.is_built_in())
continue;
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
set<Symbol> next_lookahead_symbols = first_symbols(next_rule, grammar);
if (rule_can_be_blank(next_rule, grammar))
next_lookahead_symbols.insert(lookahead_symbols.begin(),
lookahead_symbols.end());