Convert repeat rules into pairs of recursive rules

This commit is contained in:
Max Brunsfeld 2014-01-24 18:25:56 -08:00
parent cbcf28f9d4
commit 67fa81d079
10 changed files with 328 additions and 218 deletions

View file

@ -0,0 +1,36 @@
#include "spec_helper.h"
#include "prepare_grammar/expand_repeats.h"
START_TEST
using prepare_grammar::expand_repeats;
using namespace rules;
describe("expanding repeat rules in a grammar", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
Grammar result = expand_repeats(Grammar({
{ "rule1", seq({
sym("x"),
repeat(seq({ sym("a"), sym("b") })),
sym("y")
}) },
}));
AssertThat(result, Equals(Grammar({
{ "rule1", seq({
sym("x"),
sym("repeat_helper1"),
sym("y")
}) },
{ "repeat_helper1", seq({
seq({ sym("a"), sym("b") }),
choice({
sym("repeat_helper1") ,
blank()
}),
}) }
})));
});
});
END_TEST

View file

@ -1,14 +1,14 @@
#include "spec_helper.h"
#include "prepare_grammar/perform.h"
#include "prepare_grammar/extract_tokens.h"
START_TEST
using prepare_grammar::perform;
using prepare_grammar::extract_tokens;
using namespace rules;
describe("preparing a grammar", []() {
it("extracts character-based subtrees into a separate grammar", [&]() {
pair<Grammar, Grammar> result = perform(Grammar({
pair<Grammar, Grammar> result = extract_tokens(Grammar({
{ "rule1", seq({
character('a'),
character('b'),
@ -38,7 +38,7 @@ describe("preparing a grammar", []() {
});
it("turns entire rules into tokens when they contain no symbols", [&]() {
auto result = perform(Grammar({
auto result = extract_tokens(Grammar({
{ "rule1", sym("rule2") },
{ "rule2", seq({
character('a'),

View file

@ -3,12 +3,12 @@
enum ts_symbol {
ts_symbol_expression,
ts_symbol_term,
ts_symbol_factor,
ts_symbol_term,
ts_symbol_times,
ts_symbol_plus,
ts_symbol_2,
ts_symbol_1,
ts_symbol_plus,
ts_symbol_number,
ts_symbol___END__,
ts_symbol_variable,
@ -16,12 +16,12 @@ enum ts_symbol {
static const char *ts_symbol_names[] = {
"expression",
"term",
"factor",
"term",
"times",
"plus",
"2",
"1",
"plus",
"number",
"__END__",
"variable",
@ -75,44 +75,28 @@ static void ts_lex(TSParser *parser) {
case 10:
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(13);
if (LOOKAHEAD_CHAR() == '(')
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(12);
if (isdigit(LOOKAHEAD_CHAR()))
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(11);
LEX_ERROR(3, EXPECT({"<digit>", "'('", "<word>"}));
LEX_ERROR(3, EXPECT({"'('", "<digit>", "<word>"}));
case 11:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(11);
ACCEPT_TOKEN(ts_symbol_number);
case 12:
ACCEPT_TOKEN(ts_symbol_1);
case 12:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(12);
ACCEPT_TOKEN(ts_symbol_number);
case 13:
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(13);
ACCEPT_TOKEN(ts_symbol_variable);
case 14:
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(13);
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(12);
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(11);
LEX_ERROR(3, EXPECT({"<digit>", "'('", "<word>"}));
case 15:
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(8);
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(3);
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
LEX_ERROR(3, EXPECT({"')'", "'*'", "'+'"}));
case 16:
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(8);
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(2, EXPECT({"''", "'+'"}));
case 17:
case 15:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(3);
if (LOOKAHEAD_CHAR() == '+')
@ -120,14 +104,6 @@ static void ts_lex(TSParser *parser) {
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(3, EXPECT({"''", "'+'", "'*'"}));
case 18:
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(8);
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(3);
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(3, EXPECT({"''", "'*'", "'+'"}));
default:
LEX_PANIC();
}
@ -164,7 +140,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"__END__"}));
}
case 2:
SET_LEX_STATE(16);
SET_LEX_STATE(14);
switch (LOOKAHEAD_SYM()) {
case ts_symbol___END__:
REDUCE(ts_symbol_expression, 1);
@ -338,7 +314,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(3, EXPECT({"2", "plus", "times"}));
}
case 16:
SET_LEX_STATE(15);
SET_LEX_STATE(7);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_plus:
REDUCE(ts_symbol_term, 1);
@ -350,7 +326,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(3, EXPECT({"times", "2", "plus"}));
}
case 17:
SET_LEX_STATE(14);
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(22);
@ -448,7 +424,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(2, EXPECT({"times", "2"}));
}
case 26:
SET_LEX_STATE(14);
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(31);
@ -540,7 +516,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(2, EXPECT({"times", "__END__"}));
}
case 35:
SET_LEX_STATE(14);
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(40);
@ -604,7 +580,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"__END__"}));
}
case 41:
SET_LEX_STATE(17);
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_times:
REDUCE(ts_symbol_factor, 1);
@ -642,7 +618,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"2"}));
}
case 44:
SET_LEX_STATE(17);
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_times:
REDUCE(ts_symbol_factor, 3);
@ -654,7 +630,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(3, EXPECT({"__END__", "plus", "times"}));
}
case 45:
SET_LEX_STATE(18);
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_plus:
REDUCE(ts_symbol_term, 1);
@ -666,7 +642,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(3, EXPECT({"times", "__END__", "plus"}));
}
case 46:
SET_LEX_STATE(14);
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(51);
@ -680,7 +656,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(4, EXPECT({"variable", "number", "1", "factor"}));
}
case 47:
SET_LEX_STATE(16);
SET_LEX_STATE(14);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_plus:
REDUCE(ts_symbol_factor, 1);
@ -716,7 +692,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"2"}));
}
case 50:
SET_LEX_STATE(16);
SET_LEX_STATE(14);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_plus:
REDUCE(ts_symbol_factor, 3);
@ -726,7 +702,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(2, EXPECT({"__END__", "plus"}));
}
case 51:
SET_LEX_STATE(16);
SET_LEX_STATE(14);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_plus:
REDUCE(ts_symbol_term, 3);

View file

@ -2,8 +2,10 @@
#include <ctype.h>
enum ts_symbol {
ts_symbol_repeat_helper1,
ts_symbol_value,
ts_symbol_object,
ts_symbol_repeat_helper2,
ts_symbol_array,
ts_symbol___END__,
ts_symbol_number,
@ -18,8 +20,10 @@ enum ts_symbol {
};
static const char *ts_symbol_names[] = {
"repeat_helper1",
"value",
"object",
"repeat_helper2",
"array",
"__END__",
"number",
@ -47,7 +51,7 @@ static void ts_lex(TSParser *parser) {
ADVANCE(3);
ACCEPT_TOKEN(ts_symbol_2);
case 3:
LEX_ERROR(0, EXPECT({}));
ACCEPT_TOKEN(ts_symbol_7);
case 4:
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(5);
@ -55,11 +59,11 @@ static void ts_lex(TSParser *parser) {
case 5:
ACCEPT_TOKEN(ts_symbol_3);
case 6:
if (LOOKAHEAD_CHAR() == ',')
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(7);
ACCEPT_TOKEN(ts_symbol_6);
LEX_ERROR(1, EXPECT({"'}'"}));
case 7:
LEX_ERROR(0, EXPECT({}));
ACCEPT_TOKEN(ts_symbol_6);
case 8:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
@ -91,18 +95,12 @@ static void ts_lex(TSParser *parser) {
case 14:
ACCEPT_TOKEN(ts_symbol_string);
case 15:
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(16);
LEX_ERROR(1, EXPECT({"'}'"}));
case 16:
ACCEPT_TOKEN(ts_symbol_7);
case 17:
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(18);
ADVANCE(16);
LEX_ERROR(1, EXPECT({"':'"}));
case 18:
case 16:
ACCEPT_TOKEN(ts_symbol_5);
case 19:
case 17:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
LEX_ERROR(1, EXPECT({"'\"'"}));
@ -119,15 +117,15 @@ static TSParseResult ts_parse(const char *input) {
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(34);
SHIFT(31);
case ts_symbol_string:
SHIFT(34);
SHIFT(31);
case ts_symbol_array:
SHIFT(34);
SHIFT(31);
case ts_symbol_object:
SHIFT(34);
SHIFT(31);
case ts_symbol_4:
SHIFT(28);
SHIFT(25);
case ts_symbol_1:
SHIFT(2);
case ts_symbol_value:
@ -147,17 +145,17 @@ static TSParseResult ts_parse(const char *input) {
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(14);
SHIFT(19);
case ts_symbol_string:
SHIFT(14);
SHIFT(19);
case ts_symbol_array:
SHIFT(14);
SHIFT(19);
case ts_symbol_object:
SHIFT(14);
SHIFT(19);
case ts_symbol_4:
SHIFT(7);
SHIFT(8);
case ts_symbol_value:
SHIFT(25);
SHIFT(22);
case ts_symbol_1:
SHIFT(3);
default:
@ -167,15 +165,15 @@ static TSParseResult ts_parse(const char *input) {
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(14);
SHIFT(19);
case ts_symbol_string:
SHIFT(14);
SHIFT(19);
case ts_symbol_array:
SHIFT(14);
SHIFT(19);
case ts_symbol_object:
SHIFT(14);
SHIFT(19);
case ts_symbol_4:
SHIFT(7);
SHIFT(8);
case ts_symbol_value:
SHIFT(4);
case ts_symbol_1:
@ -186,10 +184,14 @@ static TSParseResult ts_parse(const char *input) {
case 4:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(7);
case ts_symbol_2:
SHIFT(5);
case ts_symbol_repeat_helper1:
SHIFT(5);
default:
PARSE_ERROR(1, EXPECT({"2"}));
PARSE_ERROR(3, EXPECT({"repeat_helper1", "2", "7"}));
}
case 5:
SET_LEX_STATE(4);
@ -202,60 +204,62 @@ static TSParseResult ts_parse(const char *input) {
case 6:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
REDUCE(ts_symbol_array, 4);
case ts_symbol_2:
REDUCE(ts_symbol_array, 4);
default:
PARSE_ERROR(1, EXPECT({"2"}));
PARSE_ERROR(2, EXPECT({"2", "7"}));
}
case 7:
SET_LEX_STATE(19);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(19);
case ts_symbol_string:
SHIFT(19);
case ts_symbol_array:
SHIFT(19);
case ts_symbol_object:
SHIFT(19);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_value:
SHIFT(20);
case ts_symbol_1:
SHIFT(3);
default:
PARSE_ERROR(1, EXPECT({"string"}));
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
}
case 8:
SET_LEX_STATE(17);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_5:
case ts_symbol_string:
SHIFT(9);
default:
PARSE_ERROR(1, EXPECT({"5"}));
PARSE_ERROR(1, EXPECT({"string"}));
}
case 9:
SET_LEX_STATE(8);
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(21);
case ts_symbol_string:
SHIFT(21);
case ts_symbol_array:
SHIFT(21);
case ts_symbol_object:
SHIFT(21);
case ts_symbol_4:
SHIFT(15);
case ts_symbol_value:
SHIFT(22);
case ts_symbol_1:
case ts_symbol_5:
SHIFT(10);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
PARSE_ERROR(1, EXPECT({"5"}));
}
case 10:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(14);
SHIFT(19);
case ts_symbol_string:
SHIFT(14);
SHIFT(19);
case ts_symbol_array:
SHIFT(14);
SHIFT(19);
case ts_symbol_object:
SHIFT(14);
SHIFT(19);
case ts_symbol_4:
SHIFT(7);
SHIFT(8);
case ts_symbol_value:
SHIFT(11);
case ts_symbol_1:
@ -266,144 +270,140 @@ static TSParseResult ts_parse(const char *input) {
case 11:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(14);
case ts_symbol_2:
SHIFT(12);
case ts_symbol_repeat_helper2:
SHIFT(12);
default:
PARSE_ERROR(1, EXPECT({"2"}));
PARSE_ERROR(3, EXPECT({"repeat_helper2", "2", "7"}));
}
case 12:
SET_LEX_STATE(4);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_3:
SHIFT(13);
default:
PARSE_ERROR(1, EXPECT({"3"}));
}
case 13:
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_6:
REDUCE(ts_symbol_array, 4);
SHIFT(13);
default:
PARSE_ERROR(1, EXPECT({"6"}));
}
case 14:
case 13:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
REDUCE(ts_symbol_object, 6);
case ts_symbol_2:
REDUCE(ts_symbol_value, 1);
REDUCE(ts_symbol_object, 6);
default:
PARSE_ERROR(1, EXPECT({"2"}));
PARSE_ERROR(2, EXPECT({"2", "7"}));
}
case 15:
SET_LEX_STATE(19);
case 14:
SET_LEX_STATE(17);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(16);
SHIFT(15);
default:
PARSE_ERROR(1, EXPECT({"string"}));
}
case 16:
SET_LEX_STATE(17);
case 15:
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_5:
SHIFT(17);
SHIFT(16);
default:
PARSE_ERROR(1, EXPECT({"5"}));
}
case 17:
case 16:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(21);
SHIFT(19);
case ts_symbol_string:
SHIFT(21);
SHIFT(19);
case ts_symbol_array:
SHIFT(21);
SHIFT(19);
case ts_symbol_object:
SHIFT(21);
SHIFT(19);
case ts_symbol_4:
SHIFT(15);
SHIFT(8);
case ts_symbol_value:
SHIFT(18);
SHIFT(17);
case ts_symbol_1:
SHIFT(10);
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
}
case 17:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(14);
case ts_symbol_2:
SHIFT(18);
case ts_symbol_repeat_helper2:
SHIFT(18);
default:
PARSE_ERROR(3, EXPECT({"repeat_helper2", "2", "7"}));
}
case 18:
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_6:
SHIFT(19);
REDUCE(ts_symbol_repeat_helper2, 5);
default:
PARSE_ERROR(1, EXPECT({"6"}));
}
case 19:
SET_LEX_STATE(15);
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(20);
default:
PARSE_ERROR(1, EXPECT({"7"}));
}
case 20:
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_6:
REDUCE(ts_symbol_object, 6);
default:
PARSE_ERROR(1, EXPECT({"6"}));
}
case 21:
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_6:
REDUCE(ts_symbol_value, 1);
case ts_symbol_2:
REDUCE(ts_symbol_value, 1);
default:
PARSE_ERROR(1, EXPECT({"6"}));
PARSE_ERROR(2, EXPECT({"2", "7"}));
}
case 22:
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_6:
SHIFT(23);
default:
PARSE_ERROR(1, EXPECT({"6"}));
}
case 23:
SET_LEX_STATE(15);
case 20:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(24);
default:
PARSE_ERROR(1, EXPECT({"7"}));
}
case 24:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
SHIFT(7);
case ts_symbol_2:
REDUCE(ts_symbol_object, 6);
SHIFT(21);
case ts_symbol_repeat_helper1:
SHIFT(21);
default:
PARSE_ERROR(1, EXPECT({"2"}));
PARSE_ERROR(3, EXPECT({"repeat_helper1", "2", "7"}));
}
case 25:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_2:
SHIFT(26);
default:
PARSE_ERROR(1, EXPECT({"2"}));
}
case 26:
case 21:
SET_LEX_STATE(4);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_3:
SHIFT(27);
REDUCE(ts_symbol_repeat_helper1, 3);
default:
PARSE_ERROR(1, EXPECT({"3"}));
}
case 27:
case 22:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(7);
case ts_symbol_2:
SHIFT(23);
case ts_symbol_repeat_helper1:
SHIFT(23);
default:
PARSE_ERROR(3, EXPECT({"repeat_helper1", "2", "7"}));
}
case 23:
SET_LEX_STATE(4);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_3:
SHIFT(24);
default:
PARSE_ERROR(1, EXPECT({"3"}));
}
case 24:
SET_LEX_STATE(0);
switch (LOOKAHEAD_SYM()) {
case ts_symbol___END__:
@ -411,59 +411,63 @@ static TSParseResult ts_parse(const char *input) {
default:
PARSE_ERROR(1, EXPECT({"__END__"}));
}
case 28:
SET_LEX_STATE(19);
case 25:
SET_LEX_STATE(17);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(29);
SHIFT(26);
default:
PARSE_ERROR(1, EXPECT({"string"}));
}
case 29:
SET_LEX_STATE(17);
case 26:
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_5:
SHIFT(30);
SHIFT(27);
default:
PARSE_ERROR(1, EXPECT({"5"}));
}
case 30:
case 27:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(21);
SHIFT(19);
case ts_symbol_string:
SHIFT(21);
SHIFT(19);
case ts_symbol_array:
SHIFT(21);
SHIFT(19);
case ts_symbol_object:
SHIFT(21);
SHIFT(19);
case ts_symbol_4:
SHIFT(15);
SHIFT(8);
case ts_symbol_value:
SHIFT(31);
SHIFT(28);
case ts_symbol_1:
SHIFT(10);
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
}
case 31:
case 28:
SET_LEX_STATE(2);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(14);
case ts_symbol_2:
SHIFT(29);
case ts_symbol_repeat_helper2:
SHIFT(29);
default:
PARSE_ERROR(3, EXPECT({"repeat_helper2", "2", "7"}));
}
case 29:
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_6:
SHIFT(32);
SHIFT(30);
default:
PARSE_ERROR(1, EXPECT({"6"}));
}
case 32:
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(33);
default:
PARSE_ERROR(1, EXPECT({"7"}));
}
case 33:
case 30:
SET_LEX_STATE(0);
switch (LOOKAHEAD_SYM()) {
case ts_symbol___END__:
@ -471,7 +475,7 @@ static TSParseResult ts_parse(const char *input) {
default:
PARSE_ERROR(1, EXPECT({"__END__"}));
}
case 34:
case 31:
SET_LEX_STATE(0);
switch (LOOKAHEAD_SYM()) {
case ts_symbol___END__:

View file

@ -21,6 +21,9 @@ describe("json", []() {
it("parses objects", [&]() {
TSDocumentSetText(document, "{\"key1\":1}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))"));
TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))"));
});
});

View file

@ -40,6 +40,7 @@ namespace tree_sitter {
bool LexItem::operator<(const LexItem &other) const {
if (rule_name < other.rule_name) return true;
if (rule_name > other.rule_name) return false;
if (rule->to_string() < other.rule->to_string()) return true;
return false;
}
@ -50,6 +51,7 @@ namespace tree_sitter {
if (rule->to_string() < other.rule->to_string()) return true;
if (rule->to_string() > other.rule->to_string()) return false;
if (consumed_sym_count < other.consumed_sym_count) return true;
if (consumed_sym_count > other.consumed_sym_count) return false;
if (lookahead_sym_name < other.lookahead_sym_name) return true;
return false;
}

View file

@ -0,0 +1,58 @@
#include "expand_repeats.h"
#include <unordered_map>
using std::string;
using std::to_string;
using std::unordered_map;
using namespace tree_sitter::rules;
namespace tree_sitter {
namespace prepare_grammar {
class RepeatExpander : rules::Visitor {
public:
rule_ptr value;
unordered_map<string, const rule_ptr> aux_rules;
rule_ptr apply(const rule_ptr rule) {
rule->accept(*this);
return value;
}
rule_ptr make_repeat_helper(string name, const rule_ptr &rule) {
return seq({
rule,
choice({ sym(name), blank() })
});
}
void visit(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
value = sym(helper_rule_name);
}
void visit(const Seq *rule) {
value = seq({ apply(rule->left), apply(rule->right) });
}
void visit(const Choice *rule) {
value = choice({ apply(rule->left), apply(rule->right) });
}
void default_visit(const Rule *rule) {
value = rule->copy();
}
};
Grammar expand_repeats(const Grammar &grammar) {
unordered_map<string, const rule_ptr> result;
RepeatExpander visitor;
for (auto pair : grammar.rules)
result.insert({ pair.first, visitor.apply(pair.second) });
for (auto pair : visitor.aux_rules)
result.insert(pair);
return Grammar(grammar.start_rule_name, result);
}
}
}

View file

@ -0,0 +1,12 @@
#ifndef __tree_sitter__expand_repeats__
#define __tree_sitter__expand_repeats__
#include "grammar.h"
namespace tree_sitter {
namespace prepare_grammar {
Grammar expand_repeats(const Grammar &);
}
}
#endif

View file

@ -1,12 +1,14 @@
#include "./perform.h"
#include "extract_tokens.h"
#include "./extract_tokens.h"
#include "./expand_repeats.h"
using std::pair;
namespace tree_sitter {
namespace prepare_grammar {
pair<Grammar, Grammar> perform(const Grammar &input_grammar) {
return prepare_grammar::extract_tokens(input_grammar);
auto rule_grammar = expand_repeats(input_grammar);
return prepare_grammar::extract_tokens(rule_grammar);
}
}
}

View file

@ -14,7 +14,7 @@
12130614182C3A1700FCF928 /* seq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130612182C3A1700FCF928 /* seq.cpp */; };
12130617182C3D2900FCF928 /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130615182C3D2900FCF928 /* string.cpp */; };
1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; };
1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */; };
1225CC6418765693000D4723 /* extract_tokens_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* extract_tokens_spec.cpp */; };
1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; };
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
12AB465F188BD03E00DE79DF /* follow_sets.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12AB465D188BD03E00DE79DF /* follow_sets.cpp */; };
@ -25,6 +25,8 @@
12E75A971891BD32001B8F10 /* json.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12E75A961891BD32001B8F10 /* json.cpp */; };
12E75A9A1891BF57001B8F10 /* json.c in Sources */ = {isa = PBXBuildFile; fileRef = 12E75A981891BF3B001B8F10 /* json.c */; };
12E75A9C1891C17D001B8F10 /* json_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12E75A9B1891C17D001B8F10 /* json_spec.cpp */; };
12E75AA218930931001B8F10 /* expand_repeats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12E75AA018930931001B8F10 /* expand_repeats.cpp */; };
12E75AA318930982001B8F10 /* expand_repeats_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12E75A9E189308C4001B8F10 /* expand_repeats_spec.cpp */; };
12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF89187B498C005A7A07 /* tree_spec.cpp */; };
12EDCF8D187C6282005A7A07 /* document.c in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF8C187C6282005A7A07 /* document.c */; };
12EDCF981881FCD5005A7A07 /* extract_tokens.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF8F1881FCCA005A7A07 /* extract_tokens.cpp */; };
@ -95,7 +97,7 @@
121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; };
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/compiler/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
121D8B3018795CC0003CF44B /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = "<group>"; };
1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = prepare_grammar_spec.cpp; path = compiler/prepare_grammar_spec.cpp; sourceTree = "<group>"; };
1225CC6318765693000D4723 /* extract_tokens_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = extract_tokens_spec.cpp; path = prepare_grammar/extract_tokens_spec.cpp; sourceTree = "<group>"; };
1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = "<group>"; };
125120A218307FFD00C9B56A /* test_grammars.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = test_grammars.h; path = spec/fixtures/grammars/test_grammars.h; sourceTree = SOURCE_ROOT; };
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
@ -115,6 +117,9 @@
12E75A961891BD32001B8F10 /* json.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = json.cpp; sourceTree = "<group>"; };
12E75A981891BF3B001B8F10 /* json.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = json.c; sourceTree = "<group>"; };
12E75A9B1891C17D001B8F10 /* json_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = json_spec.cpp; sourceTree = "<group>"; };
12E75A9E189308C4001B8F10 /* expand_repeats_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expand_repeats_spec.cpp; sourceTree = "<group>"; };
12E75AA018930931001B8F10 /* expand_repeats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = expand_repeats.cpp; path = src/compiler/prepare_grammar/expand_repeats.cpp; sourceTree = SOURCE_ROOT; };
12E75AA118930931001B8F10 /* expand_repeats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_repeats.h; path = src/compiler/prepare_grammar/expand_repeats.h; sourceTree = SOURCE_ROOT; };
12EDCF89187B498C005A7A07 /* tree_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tree_spec.cpp; sourceTree = "<group>"; };
12EDCF8B187C6251005A7A07 /* document.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = document.h; sourceTree = "<group>"; };
12EDCF8C187C6282005A7A07 /* document.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = document.c; sourceTree = "<group>"; };
@ -243,8 +248,7 @@
12BC470618830BC5005AC502 /* first_set_spec.cpp */,
12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */,
);
name = build_tables;
path = compiler/build_tables;
path = build_tables;
sourceTree = "<group>";
};
125120A118307FCA00C9B56A /* grammars */ = {
@ -318,9 +322,20 @@
path = spec;
sourceTree = "<group>";
};
12E75A9D1892D4F1001B8F10 /* prepare_grammar */ = {
isa = PBXGroup;
children = (
1225CC6318765693000D4723 /* extract_tokens_spec.cpp */,
12E75A9E189308C4001B8F10 /* expand_repeats_spec.cpp */,
);
name = prepare_grammar;
sourceTree = "<group>";
};
12ED72A5186FC6D90089229B /* prepare_grammar */ = {
isa = PBXGroup;
children = (
12E75AA018930931001B8F10 /* expand_repeats.cpp */,
12E75AA118930931001B8F10 /* expand_repeats.h */,
12EDCF8F1881FCCA005A7A07 /* extract_tokens.cpp */,
12EDCF901881FCCA005A7A07 /* extract_tokens.h */,
12EDCF911881FCCA005A7A07 /* perform.cpp */,
@ -384,12 +399,12 @@
12FD40AE185EE6610041A84E /* compiler */ = {
isa = PBXGroup;
children = (
12E75A9D1892D4F1001B8F10 /* prepare_grammar */,
1213061C182C854F00FCF928 /* build_tables */,
12FD4063185E75290041A84E /* compile_fixtures.cpp */,
1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */,
12D1369F18357066005F3369 /* rules */,
);
name = compiler;
path = compiler;
sourceTree = "<group>";
};
12FD40AF185EE81D0041A84E /* fixtures */ = {
@ -496,6 +511,7 @@
12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */,
12130617182C3D2900FCF928 /* string.cpp in Sources */,
12EDCFC018820880005A7A07 /* item_set_closure.cpp in Sources */,
12E75AA218930931001B8F10 /* expand_repeats.cpp in Sources */,
12EDCFBD188205BF005A7A07 /* perform_spec.cpp in Sources */,
12EDCFC61882153D005A7A07 /* first_set.cpp in Sources */,
12130611182C3A1100FCF928 /* blank.cpp in Sources */,
@ -516,7 +532,8 @@
1214930E181E200B008E9BDA /* main.cpp in Sources */,
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */,
12D136A4183678A2005F3369 /* repeat.cpp in Sources */,
1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */,
12E75AA318930982001B8F10 /* expand_repeats_spec.cpp in Sources */,
1225CC6418765693000D4723 /* extract_tokens_spec.cpp in Sources */,
12EDCF9A1881FCD9005A7A07 /* search_for_symbols.cpp in Sources */,
12FD40F3186641C00041A84E /* char_match.cpp in Sources */,
12EDCFB21882039A005A7A07 /* perform.cpp in Sources */,