Include non-terminal lookahead symbols for reduction actions
This is necessary for re-using the right subtree after an edit
This commit is contained in:
parent
4dcc712a8c
commit
3bcb221379
9 changed files with 19707 additions and 2755 deletions
|
|
@ -13,7 +13,7 @@
|
|||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/first_set.cc',
|
||||
'src/compiler/build_tables/first_symbols.cc',
|
||||
'src/compiler/build_tables/get_metadata.cc',
|
||||
'src/compiler/build_tables/item.cc',
|
||||
'src/compiler/build_tables/item_set_closure.cc',
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
using namespace build_tables;
|
||||
|
|
@ -8,32 +8,32 @@ using namespace rules;
|
|||
|
||||
START_TEST
|
||||
|
||||
describe("first_set", []() {
|
||||
describe("first_symbols", []() {
|
||||
const SyntaxGrammar null_grammar;
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
auto rule = seq({ i_token(0), i_token(1) });
|
||||
|
||||
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes FIRST(B) when A can be blank", [&]() {
|
||||
it("includes first_symbols(B) when A can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
choice({
|
||||
i_token(0),
|
||||
blank() }),
|
||||
i_token(1) });
|
||||
|
||||
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken)
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes FIRST(A's right hand side) when A is a non-terminal", [&]() {
|
||||
it("includes first_symbols(A's right hand side) when A is a non-terminal", [&]() {
|
||||
auto rule = choice({
|
||||
seq({
|
||||
i_token(0),
|
||||
|
|
@ -47,13 +47,14 @@ describe("first_set", []() {
|
|||
i_token(4) }) }
|
||||
}, {});
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(2, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() {
|
||||
it("includes first_symbols(B) when A is a non-terminal and its expansion can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
|
@ -64,7 +65,8 @@ describe("first_set", []() {
|
|||
blank() }) }
|
||||
}, {});
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken),
|
||||
})));
|
||||
|
|
@ -82,7 +84,8 @@ describe("first_set", []() {
|
|||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(11, SymbolOptionToken)
|
||||
})));
|
||||
});
|
||||
|
|
@ -91,7 +94,7 @@ describe("first_set", []() {
|
|||
it("ignores metadata rules", [&]() {
|
||||
auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());
|
||||
|
||||
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(3, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
5523
spec/fixtures/parsers/golang.c
vendored
5523
spec/fixtures/parsers/golang.c
vendored
File diff suppressed because it is too large
Load diff
16859
spec/fixtures/parsers/javascript.c
vendored
16859
spec/fixtures/parsers/javascript.c
vendored
File diff suppressed because it is too large
Load diff
18
spec/fixtures/parsers/json.c
vendored
18
spec/fixtures/parsers/json.c
vendored
|
|
@ -484,6 +484,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_6] = SHIFT(56),
|
||||
},
|
||||
[6] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_value, 1),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_value, 1),
|
||||
},
|
||||
|
|
@ -515,6 +516,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_6] = SHIFT(14),
|
||||
},
|
||||
[11] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_array, 2),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_array, 2),
|
||||
},
|
||||
|
|
@ -535,6 +537,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_5] = SHIFT(8),
|
||||
},
|
||||
[14] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_array, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 3),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_array, 3),
|
||||
},
|
||||
|
|
@ -547,6 +550,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 3),
|
||||
},
|
||||
[17] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_array, 4),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_array, 4),
|
||||
},
|
||||
|
|
@ -559,6 +563,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_2] = SHIFT(21),
|
||||
},
|
||||
[20] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 2),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 2),
|
||||
},
|
||||
|
|
@ -580,6 +585,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_4] = SHIFT(50),
|
||||
},
|
||||
[23] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_value, 1),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_value, 1),
|
||||
},
|
||||
|
|
@ -608,6 +614,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_6] = SHIFT(29),
|
||||
},
|
||||
[27] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_array, 2),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_array, 2),
|
||||
},
|
||||
|
|
@ -615,10 +622,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_6] = SHIFT(30),
|
||||
},
|
||||
[29] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_array, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 3),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_array, 3),
|
||||
},
|
||||
[30] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_array, 4),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_array, 4),
|
||||
},
|
||||
|
|
@ -631,6 +640,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_2] = SHIFT(34),
|
||||
},
|
||||
[33] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 2),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 2),
|
||||
},
|
||||
|
|
@ -659,6 +669,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_sym_string] = SHIFT(40),
|
||||
},
|
||||
[38] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 5),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 5),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 5),
|
||||
},
|
||||
|
|
@ -694,6 +705,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 3),
|
||||
},
|
||||
[45] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 6),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 6),
|
||||
},
|
||||
|
|
@ -701,10 +713,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_4] = SHIFT(48),
|
||||
},
|
||||
[47] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 3),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 3),
|
||||
},
|
||||
[48] = {
|
||||
[ts_aux_sym_object_repeat0] = REDUCE(ts_sym_object, 4),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 4),
|
||||
},
|
||||
|
|
@ -712,10 +726,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_4] = SHIFT(51),
|
||||
},
|
||||
[50] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 5),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 5),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 5),
|
||||
},
|
||||
[51] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 6),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 6),
|
||||
},
|
||||
|
|
@ -723,10 +739,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_aux_sym_4] = SHIFT(54),
|
||||
},
|
||||
[53] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 3),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 3),
|
||||
},
|
||||
[54] = {
|
||||
[ts_aux_sym_array_repeat0] = REDUCE(ts_sym_object, 4),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 4),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#include "compiler/build_tables/first_set.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
|
|
@ -14,21 +14,25 @@ namespace build_tables {
|
|||
using std::set;
|
||||
using rules::Symbol;
|
||||
|
||||
class FirstSet : public rules::RuleFn<set<Symbol> > {
|
||||
class FirstSymbols : public rules::RuleFn<set<Symbol> > {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
|
||||
public:
|
||||
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
explicit FirstSymbols(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
auto insertion_result = visited_symbols.insert(*rule);
|
||||
if (insertion_result.second) {
|
||||
return (rule->is_token()) ? set<Symbol>({ *rule })
|
||||
: apply(grammar->rule(*rule));
|
||||
} else {
|
||||
if (!insertion_result.second)
|
||||
return set<Symbol>();
|
||||
|
||||
set<Symbol> result({ *rule });
|
||||
if (!rule->is_token()) {
|
||||
set<Symbol> &&symbols = apply(grammar->rule(*rule));
|
||||
result.insert(symbols.begin(), symbols.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Metadata *rule) {
|
||||
|
|
@ -54,8 +58,8 @@ class FirstSet : public rules::RuleFn<set<Symbol> > {
|
|||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return FirstSet(&grammar).apply(rule);
|
||||
set<Symbol> first_symbols(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return FirstSymbols(&grammar).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_FIRST_SET_H_
|
||||
#define COMPILER_BUILD_TABLES_FIRST_SET_H_
|
||||
#ifndef COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
#define COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
|
||||
#include <set>
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
|
|
@ -16,10 +16,10 @@ namespace build_tables {
|
|||
* the beginning of a string derivable from a given rule,
|
||||
* in a given grammar.
|
||||
*/
|
||||
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule,
|
||||
std::set<rules::Symbol> first_symbols(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_FIRST_SET_H_
|
||||
#endif // COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
#include <vector>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
|
|
@ -47,7 +47,7 @@ const ParseItemSet item_set_closure(const ParseItem &starting_item,
|
|||
if (symbol.is_token() || symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
|
||||
set<Symbol> next_lookahead_symbols = first_symbols(next_rule, grammar);
|
||||
if (rule_can_be_blank(next_rule, grammar))
|
||||
next_lookahead_symbols.insert(lookahead_symbols.begin(),
|
||||
lookahead_symbols.end());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue