Dedup auxiliary repeat rules from different source rules

This commit is contained in:
Max Brunsfeld 2015-05-02 20:42:47 -07:00
parent 6431a5ee75
commit fd97b8a237
5 changed files with 4516 additions and 13058 deletions

View file

@ -70,8 +70,9 @@ describe("expand_repeats", []() {
it("does not create redundant auxiliary rules", [&]() {
SyntaxGrammar grammar({
{ "rule0", choice({
seq({ i_token(1), repeat(i_token(3)) }),
seq({ i_token(2), repeat(i_token(3)) }) }) },
seq({ i_token(1), repeat(i_token(4)) }),
seq({ i_token(2), repeat(i_token(4)) }) }) },
{ "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
}, {}, set<Symbol>());
auto match = expand_repeats(grammar);
@ -80,11 +81,12 @@ describe("expand_repeats", []() {
{ "rule0", choice({
seq({ i_token(1), choice({ i_aux_sym(0), blank() }) }),
seq({ i_token(2), choice({ i_aux_sym(0), blank() }) }) }) },
{ "rule1", seq({ i_token(3), choice({ i_aux_sym(0), blank() }) }) },
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", seq({
i_token(3),
i_token(4),
choice({ i_aux_sym(0), blank() }) }) },
})));
});

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -12,8 +12,8 @@ enum {
sym_null,
sym_true,
sym_false,
aux_sym_object_repeat0,
aux_sym_array_repeat0,
aux_sym_object_repeat1,
aux_sym_array_repeat1,
aux_sym_STR_LBRACE,
aux_sym_STR_COLON,
aux_sym_STR_COMMA,
@ -34,8 +34,8 @@ static const char *ts_symbol_names[] = {
[sym_null] = "null",
[sym_true] = "true",
[sym_false] = "false",
[aux_sym_object_repeat0] = "object_repeat0",
[aux_sym_array_repeat0] = "array_repeat0",
[aux_sym_object_repeat1] = "object_repeat1",
[aux_sym_array_repeat1] = "array_repeat1",
[aux_sym_STR_LBRACE] = "STR_{",
[aux_sym_STR_COLON] = "STR_:",
[aux_sym_STR_COMMA] = "STR_,",
@ -45,8 +45,8 @@ static const char *ts_symbol_names[] = {
};
static const int ts_hidden_symbol_flags[SYMBOL_COUNT] = {
[aux_sym_object_repeat0] = 1,
[aux_sym_array_repeat0] = 1,
[aux_sym_object_repeat1] = 1,
[aux_sym_array_repeat1] = 1,
[aux_sym_STR_LBRACE] = 1,
[aux_sym_STR_COLON] = 1,
[aux_sym_STR_COMMA] = 1,
@ -479,12 +479,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_RBRACK] = SHIFT(9),
},
[5] = {
[aux_sym_array_repeat0] = SHIFT(55),
[aux_sym_array_repeat1] = SHIFT(55),
[aux_sym_STR_COMMA] = SHIFT(13),
[aux_sym_STR_RBRACK] = SHIFT(56),
},
[6] = {
[aux_sym_array_repeat0] = REDUCE(sym_value, 1),
[aux_sym_array_repeat1] = REDUCE(sym_value, 1),
[aux_sym_STR_COMMA] = REDUCE(sym_value, 1),
[aux_sym_STR_RBRACK] = REDUCE(sym_value, 1),
},
@ -511,12 +511,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_builtin_sym_end] = REDUCE(sym_array, 2),
},
[10] = {
[aux_sym_array_repeat0] = SHIFT(12),
[aux_sym_array_repeat1] = SHIFT(12),
[aux_sym_STR_COMMA] = SHIFT(13),
[aux_sym_STR_RBRACK] = SHIFT(14),
},
[11] = {
[aux_sym_array_repeat0] = REDUCE(sym_array, 2),
[aux_sym_array_repeat1] = REDUCE(sym_array, 2),
[aux_sym_STR_COMMA] = REDUCE(sym_array, 2),
[aux_sym_STR_RBRACK] = REDUCE(sym_array, 2),
},
@ -537,25 +537,25 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_LBRACK] = SHIFT(8),
},
[14] = {
[aux_sym_array_repeat0] = REDUCE(sym_array, 3),
[aux_sym_array_repeat1] = REDUCE(sym_array, 3),
[aux_sym_STR_COMMA] = REDUCE(sym_array, 3),
[aux_sym_STR_RBRACK] = REDUCE(sym_array, 3),
},
[15] = {
[aux_sym_array_repeat0] = SHIFT(16),
[aux_sym_array_repeat1] = SHIFT(16),
[aux_sym_STR_COMMA] = SHIFT(13),
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat0, 2),
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat1, 2),
},
[16] = {
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat0, 3),
[aux_sym_STR_RBRACK] = REDUCE(aux_sym_array_repeat1, 3),
},
[17] = {
[aux_sym_array_repeat0] = REDUCE(sym_array, 4),
[aux_sym_array_repeat1] = REDUCE(sym_array, 4),
[aux_sym_STR_COMMA] = REDUCE(sym_array, 4),
[aux_sym_STR_RBRACK] = REDUCE(sym_array, 4),
},
[18] = {
[aux_sym_object_repeat0] = SHIFT(52),
[aux_sym_object_repeat1] = SHIFT(52),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = SHIFT(53),
},
@ -563,7 +563,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_COLON] = SHIFT(21),
},
[20] = {
[aux_sym_array_repeat0] = REDUCE(sym_object, 2),
[aux_sym_array_repeat1] = REDUCE(sym_object, 2),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 2),
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 2),
},
@ -580,12 +580,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_LBRACK] = SHIFT(25),
},
[22] = {
[aux_sym_object_repeat0] = SHIFT(49),
[aux_sym_object_repeat1] = SHIFT(49),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = SHIFT(50),
},
[23] = {
[aux_sym_object_repeat0] = REDUCE(sym_value, 1),
[aux_sym_object_repeat1] = REDUCE(sym_value, 1),
[aux_sym_STR_COMMA] = REDUCE(sym_value, 1),
[aux_sym_STR_RBRACE] = REDUCE(sym_value, 1),
},
@ -609,12 +609,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_RBRACK] = SHIFT(27),
},
[26] = {
[aux_sym_array_repeat0] = SHIFT(28),
[aux_sym_array_repeat1] = SHIFT(28),
[aux_sym_STR_COMMA] = SHIFT(13),
[aux_sym_STR_RBRACK] = SHIFT(29),
},
[27] = {
[aux_sym_object_repeat0] = REDUCE(sym_array, 2),
[aux_sym_object_repeat1] = REDUCE(sym_array, 2),
[aux_sym_STR_COMMA] = REDUCE(sym_array, 2),
[aux_sym_STR_RBRACE] = REDUCE(sym_array, 2),
},
@ -622,17 +622,17 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_RBRACK] = SHIFT(30),
},
[29] = {
[aux_sym_object_repeat0] = REDUCE(sym_array, 3),
[aux_sym_object_repeat1] = REDUCE(sym_array, 3),
[aux_sym_STR_COMMA] = REDUCE(sym_array, 3),
[aux_sym_STR_RBRACE] = REDUCE(sym_array, 3),
},
[30] = {
[aux_sym_object_repeat0] = REDUCE(sym_array, 4),
[aux_sym_object_repeat1] = REDUCE(sym_array, 4),
[aux_sym_STR_COMMA] = REDUCE(sym_array, 4),
[aux_sym_STR_RBRACE] = REDUCE(sym_array, 4),
},
[31] = {
[aux_sym_object_repeat0] = SHIFT(46),
[aux_sym_object_repeat1] = SHIFT(46),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = SHIFT(47),
},
@ -640,7 +640,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_COLON] = SHIFT(34),
},
[33] = {
[aux_sym_object_repeat0] = REDUCE(sym_object, 2),
[aux_sym_object_repeat1] = REDUCE(sym_object, 2),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 2),
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 2),
},
@ -657,7 +657,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_LBRACK] = SHIFT(25),
},
[35] = {
[aux_sym_object_repeat0] = SHIFT(36),
[aux_sym_object_repeat1] = SHIFT(36),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = SHIFT(38),
},
@ -669,14 +669,14 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[sym_string] = SHIFT(40),
},
[38] = {
[aux_sym_object_repeat0] = REDUCE(sym_object, 5),
[aux_sym_object_repeat1] = REDUCE(sym_object, 5),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 5),
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 5),
},
[39] = {
[aux_sym_object_repeat0] = SHIFT(44),
[aux_sym_object_repeat1] = SHIFT(44),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 2),
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 2),
},
[40] = {
[aux_sym_STR_COLON] = SHIFT(41),
@ -694,18 +694,18 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_LBRACK] = SHIFT(25),
},
[42] = {
[aux_sym_object_repeat0] = SHIFT(43),
[aux_sym_object_repeat1] = SHIFT(43),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 4),
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 4),
},
[43] = {
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 5),
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 5),
},
[44] = {
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat0, 3),
[aux_sym_STR_RBRACE] = REDUCE(aux_sym_object_repeat1, 3),
},
[45] = {
[aux_sym_object_repeat0] = REDUCE(sym_object, 6),
[aux_sym_object_repeat1] = REDUCE(sym_object, 6),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 6),
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 6),
},
@ -713,12 +713,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_RBRACE] = SHIFT(48),
},
[47] = {
[aux_sym_object_repeat0] = REDUCE(sym_object, 3),
[aux_sym_object_repeat1] = REDUCE(sym_object, 3),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 3),
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 3),
},
[48] = {
[aux_sym_object_repeat0] = REDUCE(sym_object, 4),
[aux_sym_object_repeat1] = REDUCE(sym_object, 4),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 4),
[aux_sym_STR_RBRACE] = REDUCE(sym_object, 4),
},
@ -726,12 +726,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_RBRACE] = SHIFT(51),
},
[50] = {
[aux_sym_array_repeat0] = REDUCE(sym_object, 5),
[aux_sym_array_repeat1] = REDUCE(sym_object, 5),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 5),
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 5),
},
[51] = {
[aux_sym_array_repeat0] = REDUCE(sym_object, 6),
[aux_sym_array_repeat1] = REDUCE(sym_object, 6),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 6),
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 6),
},
@ -739,12 +739,12 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_RBRACE] = SHIFT(54),
},
[53] = {
[aux_sym_array_repeat0] = REDUCE(sym_object, 3),
[aux_sym_array_repeat1] = REDUCE(sym_object, 3),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 3),
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 3),
},
[54] = {
[aux_sym_array_repeat0] = REDUCE(sym_object, 4),
[aux_sym_array_repeat1] = REDUCE(sym_object, 4),
[aux_sym_STR_COMMA] = REDUCE(sym_object, 4),
[aux_sym_STR_RBRACK] = REDUCE(sym_object, 4),
},
@ -758,7 +758,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[ts_builtin_sym_end] = REDUCE(sym_array, 4),
},
[58] = {
[aux_sym_object_repeat0] = SHIFT(66),
[aux_sym_object_repeat1] = SHIFT(66),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = SHIFT(67),
},
@ -781,7 +781,7 @@ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[aux_sym_STR_LBRACK] = SHIFT(25),
},
[62] = {
[aux_sym_object_repeat0] = SHIFT(63),
[aux_sym_object_repeat1] = SHIFT(63),
[aux_sym_STR_COMMA] = SHIFT(37),
[aux_sym_STR_RBRACE] = SHIFT(64),
},

View file

@ -28,6 +28,8 @@ using rules::Symbol;
class ExpandRepeats : public rules::IdentityRuleFn {
string rule_name;
size_t offset;
size_t repeat_count;
vector<pair<rule_ptr, Symbol>> existing_repeats;
rule_ptr expand_repeat(const Repeat *rule) {
@ -38,7 +40,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
string helper_rule_name = rule_name + string("_repeat") + to_string(++repeat_count);
Symbol repeat_symbol(offset + index, rules::SymbolOptionAuxiliary);
existing_repeats.push_back({ rule->copy(), repeat_symbol });
aux_rules.push_back(
@ -53,23 +55,29 @@ class ExpandRepeats : public rules::IdentityRuleFn {
}
public:
ExpandRepeats(string rule_name, size_t offset)
: rule_name(rule_name), offset(offset) {}
ExpandRepeats(size_t offset) : offset(offset) {}
rule_ptr expand(const rule_ptr &rule, const string &name) {
rule_name = name;
repeat_count = 0;
return apply(rule);
}
size_t offset;
vector<pair<string, rules::rule_ptr>> aux_rules;
};
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
ExpandRepeats expander(aux_rules.size());
for (auto &pair : grammar.rules) {
ExpandRepeats expander(pair.first, aux_rules.size());
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
expander.aux_rules.end());
rules.push_back({ pair.first, expander.expand(pair.second, pair.first) });
}
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
expander.aux_rules.end());
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
}