Dedup auxiliary repeat rules from different source rules
This commit is contained in:
parent
6431a5ee75
commit
fd97b8a237
5 changed files with 4516 additions and 13058 deletions
|
|
@ -70,8 +70,9 @@ describe("expand_repeats", []() {
|
|||
it("does not create redundant auxiliary rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
seq({ i_token(1), repeat(i_token(3)) }),
|
||||
seq({ i_token(2), repeat(i_token(3)) }) }) },
|
||||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }) }) },
|
||||
{ "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
|
@ -80,11 +81,12 @@ describe("expand_repeats", []() {
|
|||
{ "rule0", choice({
|
||||
seq({ i_token(1), choice({ i_aux_sym(0), blank() }) }),
|
||||
seq({ i_token(2), choice({ i_aux_sym(0), blank() }) }) }) },
|
||||
{ "rule1", seq({ i_token(3), choice({ i_aux_sym(0), blank() }) }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", seq({
|
||||
i_token(3),
|
||||
i_token(4),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
410
spec/fixtures/parsers/golang.c
vendored
410
spec/fixtures/parsers/golang.c
vendored
File diff suppressed because it is too large
Load diff
17048
spec/fixtures/parsers/javascript.c
vendored
17048
spec/fixtures/parsers/javascript.c
vendored
File diff suppressed because it is too large
Load diff
84
spec/fixtures/parsers/json.c
vendored
84
spec/fixtures/parsers/json.c
vendored
|
|
@ -12,8 +12,8 @@ enum {
|
|||
sym_null,
|
||||
sym_true,
|
||||
sym_false,
|
||||
aux_sym_object_repeat0,
|
||||
aux_sym_array_repeat0,
|
||||
aux_sym_object_repeat1,
|
||||
aux_sym_array_repeat1,
|
||||
aux_sym_STR_LBRACE,
|
||||
aux_sym_STR_COLON,
|
||||
aux_sym_STR_COMMA,
|
||||
|
|
@ -34,8 +34,8 @@ static const char *ts_symbol_names[] = {
|
|||
[sym_null] = "null",
|
||||
[sym_true] = "true",
|
||||
[sym_false] = "false",
|
||||
[aux_sym_object_repeat0] = "object_repeat0",
|
||||
[aux_sym_array_repeat0] = "array_repeat0",
|
||||
[aux_sym_object_repeat1] = "object_repeat1",
|
||||
[aux_sym_array_repeat1] = "array_repeat1",
|
||||
[aux_sym_STR_LBRACE] = "STR_{",
|
||||
[aux_sym_STR_COLON] = "STR_:",
|
||||
[aux_sym_STR_COMMA] = "STR_,",
|
||||
|
|
@ -45,8 +45,8 @@ static const char *ts_symbol_names[] = {
|
|||
};
|
||||
|
||||
static const int ts_hidden_symbol_flags[SYMBOL_COUNT] = {
|
||||
[aux_sym_object_repeat0] = 1,
|
||||
[aux_sym_array_repeat0] = 1,
|
||||
[aux_sym_object_repeat1] = 1,
|
||||
[aux_sym_array_repeat1] = 1,
|
||||
[aux_sym_STR_LBRACE] = 1,
|
||||
[aux_sym_STR_COLON] = 1,
|
||||
[aux_sym_STR_COMMA] = 1,
|
||||
|
|
@ -479,12 +479,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_RBRACK] = SHIFT(9),
|
||||
},
|
||||
[5] = {
|
||||
[aux_sym_array_repeat0] = SHIFT(55),
|
||||
[aux_sym_array_repeat1] = SHIFT(55),
|
||||
[aux_sym_STR_COMMA] = SHIFT(13),
|
||||
[aux_sym_STR_RBRACK] = SHIFT(56),
|
||||
},
|
||||
[6] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_value, 1),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_value, 1),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_value, 1),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_value, 1),
|
||||
},
|
||||
|
|
@ -511,12 +511,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_builtin_sym_end] = REDUCE(sym_array, 2),
|
||||
},
|
||||
[10] = {
|
||||
[aux_sym_array_repeat0] = SHIFT(12),
|
||||
[aux_sym_array_repeat1] = SHIFT(12),
|
||||
[aux_sym_STR_COMMA] = SHIFT(13),
|
||||
[aux_sym_STR_RBRACK] = SHIFT(14),
|
||||
},
|
||||
[11] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_array, 2),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_array, 2),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_array, 2),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_array, 2),
|
||||
},
|
||||
|
|
@ -537,25 +537,25 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_LBRACK] = SHIFT(8),
|
||||
},
|
||||
[14] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_array, 3),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_array, 3),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_array, 3),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_array, 3),
|
||||
},
|
||||
[15] = {
|
||||
[aux_sym_array_repeat0] = SHIFT(16),
|
||||
[aux_sym_array_repeat1] = SHIFT(16),
|
||||
[aux_sym_STR_COMMA] = SHIFT(13),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat0, 2),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat1, 2),
|
||||
},
|
||||
[16] = {
|
||||
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat0, 3),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat1, 3),
|
||||
},
|
||||
[17] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_array, 4),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_array, 4),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_array, 4),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_array, 4),
|
||||
},
|
||||
[18] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(52),
|
||||
[aux_sym_object_repeat1] = SHIFT(52),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = SHIFT(53),
|
||||
},
|
||||
|
|
@ -563,7 +563,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_COLON] = SHIFT(21),
|
||||
},
|
||||
[20] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_object, 2),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_object, 2),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 2),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 2),
|
||||
},
|
||||
|
|
@ -580,12 +580,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_LBRACK] = SHIFT(25),
|
||||
},
|
||||
[22] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(49),
|
||||
[aux_sym_object_repeat1] = SHIFT(49),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = SHIFT(50),
|
||||
},
|
||||
[23] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_value, 1),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_value, 1),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_value, 1),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_value, 1),
|
||||
},
|
||||
|
|
@ -609,12 +609,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_RBRACK] = SHIFT(27),
|
||||
},
|
||||
[26] = {
|
||||
[aux_sym_array_repeat0] = SHIFT(28),
|
||||
[aux_sym_array_repeat1] = SHIFT(28),
|
||||
[aux_sym_STR_COMMA] = SHIFT(13),
|
||||
[aux_sym_STR_RBRACK] = SHIFT(29),
|
||||
},
|
||||
[27] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_array, 2),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_array, 2),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_array, 2),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_array, 2),
|
||||
},
|
||||
|
|
@ -622,17 +622,17 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_RBRACK] = SHIFT(30),
|
||||
},
|
||||
[29] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_array, 3),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_array, 3),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_array, 3),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_array, 3),
|
||||
},
|
||||
[30] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_array, 4),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_array, 4),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_array, 4),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_array, 4),
|
||||
},
|
||||
[31] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(46),
|
||||
[aux_sym_object_repeat1] = SHIFT(46),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = SHIFT(47),
|
||||
},
|
||||
|
|
@ -640,7 +640,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_COLON] = SHIFT(34),
|
||||
},
|
||||
[33] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_object, 2),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_object, 2),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 2),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 2),
|
||||
},
|
||||
|
|
@ -657,7 +657,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_LBRACK] = SHIFT(25),
|
||||
},
|
||||
[35] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(36),
|
||||
[aux_sym_object_repeat1] = SHIFT(36),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = SHIFT(38),
|
||||
},
|
||||
|
|
@ -669,14 +669,14 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[sym_string] = SHIFT(40),
|
||||
},
|
||||
[38] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_object, 5),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_object, 5),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 5),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 5),
|
||||
},
|
||||
[39] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(44),
|
||||
[aux_sym_object_repeat1] = SHIFT(44),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 2),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 2),
|
||||
},
|
||||
[40] = {
|
||||
[aux_sym_STR_COLON] = SHIFT(41),
|
||||
|
|
@ -694,18 +694,18 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_LBRACK] = SHIFT(25),
|
||||
},
|
||||
[42] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(43),
|
||||
[aux_sym_object_repeat1] = SHIFT(43),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 4),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 4),
|
||||
},
|
||||
[43] = {
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 5),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 5),
|
||||
},
|
||||
[44] = {
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 3),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 3),
|
||||
},
|
||||
[45] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_object, 6),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_object, 6),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 6),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 6),
|
||||
},
|
||||
|
|
@ -713,12 +713,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_RBRACE] = SHIFT(48),
|
||||
},
|
||||
[47] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_object, 3),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_object, 3),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 3),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 3),
|
||||
},
|
||||
[48] = {
|
||||
[aux_sym_object_repeat0] = REDUCE(sym_object, 4),
|
||||
[aux_sym_object_repeat1] = REDUCE(sym_object, 4),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 4),
|
||||
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 4),
|
||||
},
|
||||
|
|
@ -726,12 +726,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_RBRACE] = SHIFT(51),
|
||||
},
|
||||
[50] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_object, 5),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_object, 5),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 5),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 5),
|
||||
},
|
||||
[51] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_object, 6),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_object, 6),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 6),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 6),
|
||||
},
|
||||
|
|
@ -739,12 +739,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_RBRACE] = SHIFT(54),
|
||||
},
|
||||
[53] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_object, 3),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_object, 3),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 3),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 3),
|
||||
},
|
||||
[54] = {
|
||||
[aux_sym_array_repeat0] = REDUCE(sym_object, 4),
|
||||
[aux_sym_array_repeat1] = REDUCE(sym_object, 4),
|
||||
[aux_sym_STR_COMMA] = REDUCE(sym_object, 4),
|
||||
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 4),
|
||||
},
|
||||
|
|
@ -758,7 +758,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[ts_builtin_sym_end] = REDUCE(sym_array, 4),
|
||||
},
|
||||
[58] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(66),
|
||||
[aux_sym_object_repeat1] = SHIFT(66),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = SHIFT(67),
|
||||
},
|
||||
|
|
@ -781,7 +781,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
|
|||
[aux_sym_STR_LBRACK] = SHIFT(25),
|
||||
},
|
||||
[62] = {
|
||||
[aux_sym_object_repeat0] = SHIFT(63),
|
||||
[aux_sym_object_repeat1] = SHIFT(63),
|
||||
[aux_sym_STR_COMMA] = SHIFT(37),
|
||||
[aux_sym_STR_RBRACE] = SHIFT(64),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ using rules::Symbol;
|
|||
|
||||
class ExpandRepeats : public rules::IdentityRuleFn {
|
||||
string rule_name;
|
||||
size_t offset;
|
||||
size_t repeat_count;
|
||||
vector<pair<rule_ptr, Symbol>> existing_repeats;
|
||||
|
||||
rule_ptr expand_repeat(const Repeat *rule) {
|
||||
|
|
@ -38,7 +40,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
|
||||
string helper_rule_name = rule_name + string("_repeat") + to_string(++repeat_count);
|
||||
Symbol repeat_symbol(offset + index, rules::SymbolOptionAuxiliary);
|
||||
existing_repeats.push_back({ rule->copy(), repeat_symbol });
|
||||
aux_rules.push_back(
|
||||
|
|
@ -53,23 +55,29 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
}
|
||||
|
||||
public:
|
||||
ExpandRepeats(string rule_name, size_t offset)
|
||||
: rule_name(rule_name), offset(offset) {}
|
||||
ExpandRepeats(size_t offset) : offset(offset) {}
|
||||
|
||||
rule_ptr expand(const rule_ptr &rule, const string &name) {
|
||||
rule_name = name;
|
||||
repeat_count = 0;
|
||||
return apply(rule);
|
||||
}
|
||||
|
||||
size_t offset;
|
||||
vector<pair<string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
|
||||
ExpandRepeats expander(aux_rules.size());
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
ExpandRepeats expander(pair.first, aux_rules.size());
|
||||
rules.push_back({ pair.first, expander.apply(pair.second) });
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
rules.push_back({ pair.first, expander.expand(pair.second, pair.first) });
|
||||
}
|
||||
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue