Handle * quantifier in regex patterns

This commit is contained in:
Max Brunsfeld 2014-03-22 20:08:11 -07:00
parent 57ed6da225
commit 50a90e456b
9 changed files with 260 additions and 136 deletions

View file

@ -39,7 +39,7 @@ namespace test_grammars {
comma_sep(err(sym("value"))),
_sym("right_bracket"), }) },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "number", pattern("\\d+") },
{ "number", pattern("\\d+(.\\d+)?") },
{ "comma", str(",") },
{ "colon", str(":") },
{ "left_bracket", str("[") },

View file

@ -66,32 +66,41 @@ LEX_FN() {
ADVANCE(10);
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(11);
ADVANCE(12);
LEX_ERROR();
case 9:
ACCEPT_TOKEN(ts_aux_sym_token1);
case 10:
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(10);
ADVANCE(11);
ACCEPT_TOKEN(ts_sym_number);
case 11:
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(11);
ACCEPT_TOKEN(ts_sym_number);
case 12:
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(11);
ADVANCE(13);
ACCEPT_TOKEN(ts_sym_variable);
case 12:
case 13:
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(13);
ACCEPT_TOKEN(ts_sym_variable);
case 14:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(4);
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(7);
LEX_ERROR();
case 13:
case 15:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(7);
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(2);
LEX_ERROR();
case 14:
case 16:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(7);
LEX_ERROR();
@ -108,7 +117,7 @@ LEX_FN() {
ADVANCE(10);
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(11);
ADVANCE(12);
LEX_ERROR();
default:
LEX_PANIC();
@ -134,7 +143,7 @@ PARSE_TABLE() {
END_STATE();
STATE(2);
SET_LEX_STATE(13);
SET_LEX_STATE(15);
REDUCE(ts_sym_plus, ts_sym_term, 1, COLLAPSE({0}))
SHIFT(ts_sym_times, 3)
REDUCE(ts_builtin_sym_end, ts_sym_term, 1, COLLAPSE({0}))
@ -254,7 +263,7 @@ PARSE_TABLE() {
END_STATE();
STATE(19);
SET_LEX_STATE(12);
SET_LEX_STATE(14);
SHIFT(ts_sym_times, 20)
REDUCE(ts_aux_sym_token2, ts_sym_term, 1, COLLAPSE({0}))
END_STATE();
@ -322,7 +331,7 @@ PARSE_TABLE() {
END_STATE();
STATE(29);
SET_LEX_STATE(12);
SET_LEX_STATE(14);
REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0}))
REDUCE(ts_aux_sym_token2, ts_sym_factor, 1, COLLAPSE({0}))
END_STATE();
@ -349,13 +358,13 @@ PARSE_TABLE() {
END_STATE();
STATE(33);
SET_LEX_STATE(12);
SET_LEX_STATE(14);
REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
REDUCE(ts_aux_sym_token2, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
END_STATE();
STATE(34);
SET_LEX_STATE(13);
SET_LEX_STATE(15);
REDUCE(ts_sym_plus, ts_sym_factor, 1, COLLAPSE({0}))
REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0}))
REDUCE(ts_builtin_sym_end, ts_sym_factor, 1, COLLAPSE({0}))
@ -377,7 +386,7 @@ PARSE_TABLE() {
END_STATE();
STATE(37);
SET_LEX_STATE(14);
SET_LEX_STATE(16);
SHIFT(ts_sym_times, 38)
REDUCE(ts_builtin_sym_end, ts_sym_term, 1, COLLAPSE({0}))
END_STATE();
@ -422,7 +431,7 @@ PARSE_TABLE() {
END_STATE();
STATE(44);
SET_LEX_STATE(14);
SET_LEX_STATE(16);
REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0}))
REDUCE(ts_builtin_sym_end, ts_sym_factor, 1, COLLAPSE({0}))
END_STATE();
@ -449,7 +458,7 @@ PARSE_TABLE() {
END_STATE();
STATE(48);
SET_LEX_STATE(14);
SET_LEX_STATE(16);
REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
REDUCE(ts_builtin_sym_end, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
END_STATE();
@ -471,7 +480,7 @@ PARSE_TABLE() {
END_STATE();
STATE(51);
SET_LEX_STATE(13);
SET_LEX_STATE(15);
REDUCE(ts_sym_plus, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
REDUCE(ts_builtin_sym_end, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))

View file

@ -75,114 +75,143 @@ LEX_FN() {
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(15);
ADVANCE(19);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(16);
ADVANCE(24);
if (LOOKAHEAD_CHAR() == 'f')
ADVANCE(17);
ADVANCE(25);
if (LOOKAHEAD_CHAR() == 'n')
ADVANCE(22);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(26);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(30);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(34);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(38);
LEX_ERROR();
case 9:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(12);
ADVANCE(16);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(14);
ADVANCE(18);
LEX_ERROR();
case 10:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\\')
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(14);
ADVANCE(15);
LEX_ERROR();
case 11:
ACCEPT_TOKEN(ts_sym_string);
case 12:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(10);
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(13);
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(12);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(14);
ADVANCE(15);
LEX_ERROR();
case 12:
ACCEPT_TOKEN(ts_sym_string);
case 13:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(10);
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(14);
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(12);
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(14);
ACCEPT_TOKEN(ts_sym_string);
ADVANCE(15);
LEX_ERROR();
case 14:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_string);
case 15:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(11);
LEX_ERROR();
case 16:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(17);
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
LEX_ERROR();
case 17:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_string);
case 18:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(10);
LEX_ERROR();
case 15:
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_number);
case 16:
ACCEPT_TOKEN(ts_sym_left_bracket);
case 17:
if (LOOKAHEAD_CHAR() == 'a')
ADVANCE(18);
LEX_ERROR();
case 18:
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(19);
LEX_ERROR();
case 19:
if (LOOKAHEAD_CHAR() == 's')
if (LOOKAHEAD_CHAR() == '.')
ADVANCE(20);
LEX_ERROR();
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(23);
ACCEPT_TOKEN(ts_sym_number);
case 20:
if (LOOKAHEAD_CHAR() == 'e')
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(21);
LEX_ERROR();
case 21:
ACCEPT_TOKEN(ts_sym_false);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(22);
ACCEPT_TOKEN(ts_sym_number);
case 22:
if (LOOKAHEAD_CHAR() == 'u')
ADVANCE(23);
LEX_ERROR();
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(22);
ACCEPT_TOKEN(ts_sym_number);
case 23:
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(24);
LEX_ERROR();
if (LOOKAHEAD_CHAR() == '.')
ADVANCE(20);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(23);
ACCEPT_TOKEN(ts_sym_number);
case 24:
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(25);
LEX_ERROR();
ACCEPT_TOKEN(ts_sym_left_bracket);
case 25:
ACCEPT_TOKEN(ts_sym_null);
if (LOOKAHEAD_CHAR() == 'a')
ADVANCE(26);
LEX_ERROR();
case 26:
if (LOOKAHEAD_CHAR() == 'r')
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(27);
LEX_ERROR();
case 27:
if (LOOKAHEAD_CHAR() == 'u')
if (LOOKAHEAD_CHAR() == 's')
ADVANCE(28);
LEX_ERROR();
case 28:
@ -190,78 +219,106 @@ LEX_FN() {
ADVANCE(29);
LEX_ERROR();
case 29:
ACCEPT_TOKEN(ts_sym_true);
ACCEPT_TOKEN(ts_sym_false);
case 30:
ACCEPT_TOKEN(ts_sym_left_brace);
if (LOOKAHEAD_CHAR() == 'u')
ADVANCE(31);
LEX_ERROR();
case 31:
if (LOOKAHEAD_CHAR() == ':')
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(32);
LEX_ERROR();
case 32:
ACCEPT_TOKEN(ts_sym_colon);
if (LOOKAHEAD_CHAR() == 'l')
ADVANCE(33);
LEX_ERROR();
case 33:
ACCEPT_TOKEN(ts_sym_null);
case 34:
if (LOOKAHEAD_CHAR() == 'r')
ADVANCE(35);
LEX_ERROR();
case 35:
if (LOOKAHEAD_CHAR() == 'u')
ADVANCE(36);
LEX_ERROR();
case 36:
if (LOOKAHEAD_CHAR() == 'e')
ADVANCE(37);
LEX_ERROR();
case 37:
ACCEPT_TOKEN(ts_sym_true);
case 38:
ACCEPT_TOKEN(ts_sym_left_brace);
case 39:
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(40);
LEX_ERROR();
case 40:
ACCEPT_TOKEN(ts_sym_colon);
case 41:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(3);
LEX_ERROR();
case 34:
case 42:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(15);
ADVANCE(19);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(16);
ADVANCE(24);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(6);
if (LOOKAHEAD_CHAR() == 'f')
ADVANCE(17);
ADVANCE(25);
if (LOOKAHEAD_CHAR() == 'n')
ADVANCE(22);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(26);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(30);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(34);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(38);
LEX_ERROR();
case 35:
case 43:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
LEX_ERROR();
case 36:
case 44:
ACCEPT_TOKEN(ts_sym_comma);
case 37:
case 45:
ACCEPT_TOKEN(ts_sym_colon);
case 38:
case 46:
ACCEPT_TOKEN(ts_sym_left_bracket);
case 39:
case 47:
ACCEPT_TOKEN(ts_sym_right_bracket);
case 40:
case 48:
ACCEPT_TOKEN(ts_sym_left_brace);
case 41:
case 49:
ACCEPT_TOKEN(ts_sym_right_brace);
case ts_lex_state_error:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(36);
ADVANCE(44);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(15);
ADVANCE(19);
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(37);
ADVANCE(45);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(38);
ADVANCE(46);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(39);
ADVANCE(47);
if (LOOKAHEAD_CHAR() == 'f')
ADVANCE(17);
ADVANCE(25);
if (LOOKAHEAD_CHAR() == 'n')
ADVANCE(22);
ADVANCE(30);
if (LOOKAHEAD_CHAR() == 't')
ADVANCE(26);
ADVANCE(34);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(40);
ADVANCE(48);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(41);
ADVANCE(49);
LEX_ERROR();
default:
LEX_PANIC();
@ -296,14 +353,14 @@ PARSE_TABLE() {
END_STATE();
STATE(3);
SET_LEX_STATE(33);
SET_LEX_STATE(41);
SHIFT(ts_sym_string, 4)
SHIFT(ts_sym_right_brace, 51)
SHIFT(ts_builtin_sym_error, 52)
END_STATE();
STATE(4);
SET_LEX_STATE(31);
SET_LEX_STATE(39);
SHIFT(ts_sym_colon, 5)
END_STATE();
@ -335,13 +392,13 @@ PARSE_TABLE() {
END_STATE();
STATE(8);
SET_LEX_STATE(35);
SET_LEX_STATE(43);
SHIFT(ts_sym_string, 9)
SHIFT(ts_builtin_sym_error, 47)
END_STATE();
STATE(9);
SET_LEX_STATE(31);
SET_LEX_STATE(39);
SHIFT(ts_sym_colon, 10)
END_STATE();
@ -372,14 +429,14 @@ PARSE_TABLE() {
END_STATE();
STATE(13);
SET_LEX_STATE(33);
SET_LEX_STATE(41);
SHIFT(ts_sym_string, 14)
SHIFT(ts_sym_right_brace, 43)
SHIFT(ts_builtin_sym_error, 44)
END_STATE();
STATE(14);
SET_LEX_STATE(31);
SET_LEX_STATE(39);
SHIFT(ts_sym_colon, 15)
END_STATE();
@ -416,7 +473,7 @@ PARSE_TABLE() {
END_STATE();
STATE(19);
SET_LEX_STATE(34);
SET_LEX_STATE(42);
SHIFT(ts_sym_array, 20)
SHIFT(ts_sym_false, 20)
SHIFT(ts_sym_null, 20)
@ -472,14 +529,14 @@ PARSE_TABLE() {
END_STATE();
STATE(25);
SET_LEX_STATE(33);
SET_LEX_STATE(41);
SHIFT(ts_sym_string, 26)
SHIFT(ts_sym_right_brace, 31)
SHIFT(ts_builtin_sym_error, 32)
END_STATE();
STATE(26);
SET_LEX_STATE(31);
SET_LEX_STATE(39);
SHIFT(ts_sym_colon, 27)
END_STATE();
@ -540,7 +597,7 @@ PARSE_TABLE() {
END_STATE();
STATE(35);
SET_LEX_STATE(34);
SET_LEX_STATE(42);
SHIFT(ts_sym_array, 20)
SHIFT(ts_sym_false, 20)
SHIFT(ts_sym_null, 20)
@ -665,7 +722,7 @@ PARSE_TABLE() {
END_STATE();
STATE(55);
SET_LEX_STATE(34);
SET_LEX_STATE(42);
SHIFT(ts_sym_array, 20)
SHIFT(ts_sym_false, 20)
SHIFT(ts_sym_null, 20)

View file

@ -18,6 +18,11 @@ describe("checking if rules can be blank", [&]() {
AssertThat(rule_can_be_blank(rule), Equals(false));
});
it("returns true for repeats", [&]() {
rule_ptr rule = repeat(str("x"));
AssertThat(rule_can_be_blank(rule), Equals(true));
});
});
END_TEST

View file

@ -130,6 +130,10 @@ describe("rule transitions", []() {
})));
});
it("handles blanks", [&]() {
AssertThat(char_transitions(blank()), Equals(rule_map<CharacterSet>({})));
});
it("handles repeats", [&]() {
rule_ptr rule = repeat(str("ab"));
AssertThat(

View file

@ -78,7 +78,7 @@ describe("parsing pattern rules", []() {
});
it("parses character groups in sequences", []() {
Pattern rule("\"([^\"]|\\\\\")+\"");
Pattern rule("\"([^\"]|\\\\\")*\"");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
@ -122,17 +122,40 @@ describe("parsing pattern rules", []() {
rule.to_rule_tree(),
EqualsPointer(
seq({
repeat(seq({
character({ 'a' }),
character({ 'b' })
})),
repeat(seq({
character({ 'c' }),
character({ 'd' })
})),
seq({
seq({ character({ 'a' }), character({ 'b' }) }),
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
}),
seq({
seq({ character({ 'c' }), character({ 'd' }) }),
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
}),
})
));
Pattern rule2("(ab)*(cd)*");
AssertThat(
rule2.to_rule_tree(),
EqualsPointer(
seq({
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
})
));
});
it("parses optional rules", []() {
Pattern rule("a(bc)?");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
character({ 'a' }),
choice({
seq({ character({ 'b' }), character({ 'c' }) }),
blank()
})
})));
});
});
END_TEST

View file

@ -1,3 +1,10 @@
=============================
parses floating point numbers
=============================
3.14
---
(value (number))
===================
parses empty arrays
===================

View file

@ -15,31 +15,37 @@ namespace tree_sitter {
using std::map;
using std::make_shared;
using rules::rule_ptr;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::Rule;
using rules::Seq;
using rules::Symbol;
namespace prepare_grammar {
class ExpandRepeats : public rules::RuleFn<rule_ptr> {
rule_ptr make_repeat_helper(string name, const rule_ptr &rule) {
return rules::Choice::Build({
rules::Seq::Build({ rule, make_shared<rules::Symbol>(name, rules::SymbolTypeAuxiliary) }),
make_shared<rules::Blank>() });
return Choice::Build({
Seq::Build({ rule, make_shared<Symbol>(name, rules::SymbolTypeAuxiliary) }),
make_shared<Blank>() });
}
void visit(const rules::Repeat *rule) {
void visit(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
value = make_shared<rules::Symbol>(helper_rule_name, rules::SymbolTypeAuxiliary);
value = make_shared<Symbol>(helper_rule_name, rules::SymbolTypeAuxiliary);
}
void visit(const rules::Seq *rule) {
value = rules::Seq::Build({ apply(rule->left), apply(rule->right) });
void visit(const Seq *rule) {
value = Seq::Build({ apply(rule->left), apply(rule->right) });
}
void visit(const rules::Choice *rule) {
value = rules::Choice::Build({ apply(rule->left), apply(rule->right) });
void visit(const Choice *rule) {
value = Choice::Build({ apply(rule->left), apply(rule->right) });
}
void default_visit(const rules::Rule *rule) {
void default_visit(const Rule *rule) {
value = rule->copy();
}

View file

@ -6,6 +6,7 @@
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/blank.h"
namespace tree_sitter {
namespace rules {
@ -40,9 +41,21 @@ namespace tree_sitter {
rule_ptr factor() {
rule_ptr result = atom();
if (has_more_input() && (peek() == '+')) {
next();
result = make_shared<Repeat>(result);
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = make_shared<Repeat>(result);
break;
case '+':
next();
result = make_shared<Seq>(result, make_shared<Repeat>(result));
break;
case '?':
next();
result = make_shared<Choice>(result, make_shared<Blank>());
break;
}
}
return result;
}