diff --git a/examples/grammars/json.hpp b/examples/grammars/json.hpp index 01767a5e..a73f057f 100644 --- a/examples/grammars/json.hpp +++ b/examples/grammars/json.hpp @@ -39,7 +39,7 @@ namespace test_grammars { comma_sep(err(sym("value"))), _sym("right_bracket"), }) }, { "string", pattern("\"([^\"]|\\\\\")+\"") }, - { "number", pattern("\\d+") }, + { "number", pattern("\\d+(.\\d+)?") }, { "comma", str(",") }, { "colon", str(":") }, { "left_bracket", str("[") }, diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index 4fbc3a3b..eac6df51 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -66,32 +66,41 @@ LEX_FN() { ADVANCE(10); if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) - ADVANCE(11); + ADVANCE(12); LEX_ERROR(); case 9: ACCEPT_TOKEN(ts_aux_sym_token1); case 10: if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(10); + ADVANCE(11); ACCEPT_TOKEN(ts_sym_number); case 11: + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(11); + ACCEPT_TOKEN(ts_sym_number); + case 12: if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) - ADVANCE(11); + ADVANCE(13); ACCEPT_TOKEN(ts_sym_variable); - case 12: + case 13: + if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || + ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) + ADVANCE(13); + ACCEPT_TOKEN(ts_sym_variable); + case 14: if (LOOKAHEAD_CHAR() == ')') ADVANCE(4); if (LOOKAHEAD_CHAR() == '*') ADVANCE(7); LEX_ERROR(); - case 13: + case 15: if (LOOKAHEAD_CHAR() == '*') ADVANCE(7); if (LOOKAHEAD_CHAR() == '+') ADVANCE(2); LEX_ERROR(); - case 14: + case 16: if (LOOKAHEAD_CHAR() == '*') ADVANCE(7); LEX_ERROR(); @@ -108,7 +117,7 @@ LEX_FN() { ADVANCE(10); if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) - ADVANCE(11); + ADVANCE(12); LEX_ERROR(); default: LEX_PANIC(); @@ -134,7 +143,7 @@ PARSE_TABLE() { END_STATE(); STATE(2); - SET_LEX_STATE(13); + SET_LEX_STATE(15); REDUCE(ts_sym_plus, ts_sym_term, 1, COLLAPSE({0})) SHIFT(ts_sym_times, 3) REDUCE(ts_builtin_sym_end, ts_sym_term, 1, COLLAPSE({0})) @@ -254,7 +263,7 @@ PARSE_TABLE() { END_STATE(); STATE(19); - SET_LEX_STATE(12); + SET_LEX_STATE(14); SHIFT(ts_sym_times, 20) REDUCE(ts_aux_sym_token2, ts_sym_term, 1, COLLAPSE({0})) END_STATE(); @@ -322,7 +331,7 @@ PARSE_TABLE() { END_STATE(); STATE(29); - SET_LEX_STATE(12); + SET_LEX_STATE(14); REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0})) REDUCE(ts_aux_sym_token2, ts_sym_factor, 1, COLLAPSE({0})) END_STATE(); @@ -349,13 +358,13 @@ PARSE_TABLE() { END_STATE(); STATE(33); - SET_LEX_STATE(12); + SET_LEX_STATE(14); REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1})) REDUCE(ts_aux_sym_token2, ts_sym_factor, 3, COLLAPSE({1, 0, 1})) END_STATE(); STATE(34); - SET_LEX_STATE(13); + SET_LEX_STATE(15); REDUCE(ts_sym_plus, ts_sym_factor, 1, COLLAPSE({0})) REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0})) REDUCE(ts_builtin_sym_end, ts_sym_factor, 1, COLLAPSE({0})) @@ -377,7 +386,7 @@ PARSE_TABLE() { END_STATE(); STATE(37); - SET_LEX_STATE(14); + SET_LEX_STATE(16); SHIFT(ts_sym_times, 38) REDUCE(ts_builtin_sym_end, ts_sym_term, 1, COLLAPSE({0})) END_STATE(); @@ -422,7 +431,7 @@ PARSE_TABLE() { END_STATE(); STATE(44); - SET_LEX_STATE(14); + SET_LEX_STATE(16); REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0})) REDUCE(ts_builtin_sym_end, ts_sym_factor, 1, COLLAPSE({0})) END_STATE(); @@ -449,7 +458,7 @@ PARSE_TABLE() { END_STATE(); STATE(48); - SET_LEX_STATE(14); + SET_LEX_STATE(16); REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1})) REDUCE(ts_builtin_sym_end, ts_sym_factor, 3, COLLAPSE({1, 0, 1})) END_STATE(); @@ -471,7 +480,7 @@ PARSE_TABLE() { END_STATE(); STATE(51); - SET_LEX_STATE(13); + SET_LEX_STATE(15); REDUCE(ts_sym_plus, ts_sym_factor, 3, COLLAPSE({1, 0, 1})) REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1})) REDUCE(ts_builtin_sym_end, ts_sym_factor, 3, COLLAPSE({1, 0, 1})) diff --git a/examples/parsers/json.c b/examples/parsers/json.c index a4e65d02..302e1b53 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -75,114 +75,143 @@ LEX_FN() { if (LOOKAHEAD_CHAR() == '\"') ADVANCE(9); if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(15); + ADVANCE(19); if (LOOKAHEAD_CHAR() == '[') - ADVANCE(16); + ADVANCE(24); if (LOOKAHEAD_CHAR() == 'f') - ADVANCE(17); + ADVANCE(25); if (LOOKAHEAD_CHAR() == 'n') - ADVANCE(22); - if (LOOKAHEAD_CHAR() == 't') - ADVANCE(26); - if (LOOKAHEAD_CHAR() == '{') ADVANCE(30); + if (LOOKAHEAD_CHAR() == 't') + ADVANCE(34); + if (LOOKAHEAD_CHAR() == '{') + ADVANCE(38); LEX_ERROR(); case 9: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) ADVANCE(10); - if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(11); if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(12); + ADVANCE(16); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(14); + ADVANCE(18); LEX_ERROR(); case 10: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(10); - if (LOOKAHEAD_CHAR() == '\"') ADVANCE(11); - if (LOOKAHEAD_CHAR() == '\\') + if (LOOKAHEAD_CHAR() == '\"') ADVANCE(12); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(13); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(14); + ADVANCE(15); LEX_ERROR(); case 11: - ACCEPT_TOKEN(ts_sym_string); - case 12: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(10); + ADVANCE(11); if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(13); - if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') - ADVANCE(10); - if (LOOKAHEAD_CHAR() == '\\') ADVANCE(12); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(13); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(14); + ADVANCE(15); LEX_ERROR(); + case 12: + ACCEPT_TOKEN(ts_sym_string); case 13: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(10); + ADVANCE(11); if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(14); + if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') ADVANCE(11); if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(12); + ADVANCE(13); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(14); - ACCEPT_TOKEN(ts_sym_string); + ADVANCE(15); + LEX_ERROR(); case 14: + if (!((LOOKAHEAD_CHAR() == '\"') || + (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(11); + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(12); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(13); + if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') + ADVANCE(15); + ACCEPT_TOKEN(ts_sym_string); + case 15: + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(11); + LEX_ERROR(); + case 16: + if (!((LOOKAHEAD_CHAR() == '\"') || + (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(11); + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(17); + if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') + ADVANCE(10); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(13); + if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') + ADVANCE(15); + LEX_ERROR(); + case 17: + if (!((LOOKAHEAD_CHAR() == '\"') || + (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(11); + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(12); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(13); + if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') + ADVANCE(15); + ACCEPT_TOKEN(ts_sym_string); + case 18: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(10); LEX_ERROR(); - case 15: - if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(15); - ACCEPT_TOKEN(ts_sym_number); - case 16: - ACCEPT_TOKEN(ts_sym_left_bracket); - case 17: - if (LOOKAHEAD_CHAR() == 'a') - ADVANCE(18); - LEX_ERROR(); - case 18: - if (LOOKAHEAD_CHAR() == 'l') - ADVANCE(19); - LEX_ERROR(); case 19: - if (LOOKAHEAD_CHAR() == 's') + if (LOOKAHEAD_CHAR() == '.') ADVANCE(20); - LEX_ERROR(); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(23); + ACCEPT_TOKEN(ts_sym_number); case 20: - if (LOOKAHEAD_CHAR() == 'e') + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') ADVANCE(21); LEX_ERROR(); case 21: - ACCEPT_TOKEN(ts_sym_false); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(22); + ACCEPT_TOKEN(ts_sym_number); case 22: - if (LOOKAHEAD_CHAR() == 'u') - ADVANCE(23); - LEX_ERROR(); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(22); + ACCEPT_TOKEN(ts_sym_number); case 23: - if (LOOKAHEAD_CHAR() == 'l') - ADVANCE(24); - LEX_ERROR(); + if (LOOKAHEAD_CHAR() == '.') + ADVANCE(20); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(23); + ACCEPT_TOKEN(ts_sym_number); case 24: - if (LOOKAHEAD_CHAR() == 'l') - ADVANCE(25); - LEX_ERROR(); + ACCEPT_TOKEN(ts_sym_left_bracket); case 25: - ACCEPT_TOKEN(ts_sym_null); + if (LOOKAHEAD_CHAR() == 'a') + ADVANCE(26); + LEX_ERROR(); case 26: - if (LOOKAHEAD_CHAR() == 'r') + if (LOOKAHEAD_CHAR() == 'l') ADVANCE(27); LEX_ERROR(); case 27: - if (LOOKAHEAD_CHAR() == 'u') + if (LOOKAHEAD_CHAR() == 's') ADVANCE(28); LEX_ERROR(); case 28: @@ -190,78 +219,106 @@ LEX_FN() { ADVANCE(29); LEX_ERROR(); case 29: - ACCEPT_TOKEN(ts_sym_true); + ACCEPT_TOKEN(ts_sym_false); case 30: - ACCEPT_TOKEN(ts_sym_left_brace); + if (LOOKAHEAD_CHAR() == 'u') + ADVANCE(31); + LEX_ERROR(); case 31: - if (LOOKAHEAD_CHAR() == ':') + if (LOOKAHEAD_CHAR() == 'l') ADVANCE(32); LEX_ERROR(); case 32: - ACCEPT_TOKEN(ts_sym_colon); + if (LOOKAHEAD_CHAR() == 'l') + ADVANCE(33); + LEX_ERROR(); case 33: + ACCEPT_TOKEN(ts_sym_null); + case 34: + if (LOOKAHEAD_CHAR() == 'r') + ADVANCE(35); + LEX_ERROR(); + case 35: + if (LOOKAHEAD_CHAR() == 'u') + ADVANCE(36); + LEX_ERROR(); + case 36: + if (LOOKAHEAD_CHAR() == 'e') + ADVANCE(37); + LEX_ERROR(); + case 37: + ACCEPT_TOKEN(ts_sym_true); + case 38: + ACCEPT_TOKEN(ts_sym_left_brace); + case 39: + if (LOOKAHEAD_CHAR() == ':') + ADVANCE(40); + LEX_ERROR(); + case 40: + ACCEPT_TOKEN(ts_sym_colon); + case 41: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(9); if (LOOKAHEAD_CHAR() == '}') ADVANCE(3); LEX_ERROR(); - case 34: + case 42: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(9); if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(15); + ADVANCE(19); if (LOOKAHEAD_CHAR() == '[') - ADVANCE(16); + ADVANCE(24); if (LOOKAHEAD_CHAR() == ']') ADVANCE(6); if (LOOKAHEAD_CHAR() == 'f') - ADVANCE(17); + ADVANCE(25); if (LOOKAHEAD_CHAR() == 'n') - ADVANCE(22); - if (LOOKAHEAD_CHAR() == 't') - ADVANCE(26); - if (LOOKAHEAD_CHAR() == '{') ADVANCE(30); + if (LOOKAHEAD_CHAR() == 't') + ADVANCE(34); + if (LOOKAHEAD_CHAR() == '{') + ADVANCE(38); LEX_ERROR(); - case 35: + case 43: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(9); LEX_ERROR(); - case 36: + case 44: ACCEPT_TOKEN(ts_sym_comma); - case 37: + case 45: ACCEPT_TOKEN(ts_sym_colon); - case 38: + case 46: ACCEPT_TOKEN(ts_sym_left_bracket); - case 39: + case 47: ACCEPT_TOKEN(ts_sym_right_bracket); - case 40: + case 48: ACCEPT_TOKEN(ts_sym_left_brace); - case 41: + case 49: ACCEPT_TOKEN(ts_sym_right_brace); case ts_lex_state_error: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(9); if (LOOKAHEAD_CHAR() == ',') - ADVANCE(36); + ADVANCE(44); if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(15); + ADVANCE(19); if (LOOKAHEAD_CHAR() == ':') - ADVANCE(37); + ADVANCE(45); if (LOOKAHEAD_CHAR() == '[') - ADVANCE(38); + ADVANCE(46); if (LOOKAHEAD_CHAR() == ']') - ADVANCE(39); + ADVANCE(47); if (LOOKAHEAD_CHAR() == 'f') - ADVANCE(17); + ADVANCE(25); if (LOOKAHEAD_CHAR() == 'n') - ADVANCE(22); + ADVANCE(30); if (LOOKAHEAD_CHAR() == 't') - ADVANCE(26); + ADVANCE(34); if (LOOKAHEAD_CHAR() == '{') - ADVANCE(40); + ADVANCE(48); if (LOOKAHEAD_CHAR() == '}') - ADVANCE(41); + ADVANCE(49); LEX_ERROR(); default: LEX_PANIC(); @@ -296,14 +353,14 @@ PARSE_TABLE() { END_STATE(); STATE(3); - SET_LEX_STATE(33); + SET_LEX_STATE(41); SHIFT(ts_sym_string, 4) SHIFT(ts_sym_right_brace, 51) SHIFT(ts_builtin_sym_error, 52) END_STATE(); STATE(4); - SET_LEX_STATE(31); + SET_LEX_STATE(39); SHIFT(ts_sym_colon, 5) END_STATE(); @@ -335,13 +392,13 @@ PARSE_TABLE() { END_STATE(); STATE(8); - SET_LEX_STATE(35); + SET_LEX_STATE(43); SHIFT(ts_sym_string, 9) SHIFT(ts_builtin_sym_error, 47) END_STATE(); STATE(9); - SET_LEX_STATE(31); + SET_LEX_STATE(39); SHIFT(ts_sym_colon, 10) END_STATE(); @@ -372,14 +429,14 @@ PARSE_TABLE() { END_STATE(); STATE(13); - SET_LEX_STATE(33); + SET_LEX_STATE(41); SHIFT(ts_sym_string, 14) SHIFT(ts_sym_right_brace, 43) SHIFT(ts_builtin_sym_error, 44) END_STATE(); STATE(14); - SET_LEX_STATE(31); + SET_LEX_STATE(39); SHIFT(ts_sym_colon, 15) END_STATE(); @@ -416,7 +473,7 @@ PARSE_TABLE() { END_STATE(); STATE(19); - SET_LEX_STATE(34); + SET_LEX_STATE(42); SHIFT(ts_sym_array, 20) SHIFT(ts_sym_false, 20) SHIFT(ts_sym_null, 20) @@ -472,14 +529,14 @@ PARSE_TABLE() { END_STATE(); STATE(25); - SET_LEX_STATE(33); + SET_LEX_STATE(41); SHIFT(ts_sym_string, 26) SHIFT(ts_sym_right_brace, 31) SHIFT(ts_builtin_sym_error, 32) END_STATE(); STATE(26); - SET_LEX_STATE(31); + SET_LEX_STATE(39); SHIFT(ts_sym_colon, 27) END_STATE(); @@ -540,7 +597,7 @@ PARSE_TABLE() { END_STATE(); STATE(35); - SET_LEX_STATE(34); + SET_LEX_STATE(42); SHIFT(ts_sym_array, 20) SHIFT(ts_sym_false, 20) SHIFT(ts_sym_null, 20) @@ -665,7 +722,7 @@ PARSE_TABLE() { END_STATE(); STATE(55); - SET_LEX_STATE(34); + SET_LEX_STATE(42); SHIFT(ts_sym_array, 20) SHIFT(ts_sym_false, 20) SHIFT(ts_sym_null, 20) diff --git a/spec/compiler/build_tables/rule_can_be_blank_spec.cc b/spec/compiler/build_tables/rule_can_be_blank_spec.cc index 461ae193..e6eb239f 100644 --- a/spec/compiler/build_tables/rule_can_be_blank_spec.cc +++ b/spec/compiler/build_tables/rule_can_be_blank_spec.cc @@ -18,6 +18,11 @@ describe("checking if rules can be blank", [&]() { AssertThat(rule_can_be_blank(rule), Equals(false)); }); + + it("returns true for repeats", [&]() { + rule_ptr rule = repeat(str("x")); + AssertThat(rule_can_be_blank(rule), Equals(true)); + }); }); END_TEST \ No newline at end of file diff --git a/spec/compiler/build_tables/rule_transitions_spec.cc b/spec/compiler/build_tables/rule_transitions_spec.cc index 9f706980..4bb437b6 100644 --- a/spec/compiler/build_tables/rule_transitions_spec.cc +++ b/spec/compiler/build_tables/rule_transitions_spec.cc @@ -130,6 +130,10 @@ describe("rule transitions", []() { }))); }); + it("handles blanks", [&]() { + AssertThat(char_transitions(blank()), Equals(rule_map({}))); + }); + it("handles repeats", [&]() { rule_ptr rule = repeat(str("ab")); AssertThat( diff --git a/spec/compiler/rules/pattern_spec.cc b/spec/compiler/rules/pattern_spec.cc index b615309f..9a56a3f1 100644 --- a/spec/compiler/rules/pattern_spec.cc +++ b/spec/compiler/rules/pattern_spec.cc @@ -78,7 +78,7 @@ describe("parsing pattern rules", []() { }); it("parses character groups in sequences", []() { - Pattern rule("\"([^\"]|\\\\\")+\""); + Pattern rule("\"([^\"]|\\\\\")*\""); AssertThat( rule.to_rule_tree(), EqualsPointer(seq({ @@ -122,17 +122,40 @@ describe("parsing pattern rules", []() { rule.to_rule_tree(), EqualsPointer( seq({ - repeat(seq({ - character({ 'a' }), - character({ 'b' }) - })), - repeat(seq({ - character({ 'c' }), - character({ 'd' }) - })), + seq({ + seq({ character({ 'a' }), character({ 'b' }) }), + repeat(seq({ character({ 'a' }), character({ 'b' }) })), + }), + seq({ + seq({ character({ 'c' }), character({ 'd' }) }), + repeat(seq({ character({ 'c' }), character({ 'd' }) })), + }), + }) + )); + + Pattern rule2("(ab)*(cd)*"); + AssertThat( + rule2.to_rule_tree(), + EqualsPointer( + seq({ + repeat(seq({ character({ 'a' }), character({ 'b' }) })), + repeat(seq({ character({ 'c' }), character({ 'd' }) })), }) )); }); + + it("parses optional rules", []() { + Pattern rule("a(bc)?"); + AssertThat( + rule.to_rule_tree(), + EqualsPointer(seq({ + character({ 'a' }), + choice({ + seq({ character({ 'b' }), character({ 'c' }) }), + blank() + }) + }))); + }); }); END_TEST diff --git a/spec/runtime/languages/json/main.txt b/spec/runtime/languages/json/main.txt index 58b5b821..6051b2d0 100644 --- a/spec/runtime/languages/json/main.txt +++ b/spec/runtime/languages/json/main.txt @@ -1,3 +1,10 @@ +============================= +parses floating point numbers +============================= +3.14 +--- +(value (number)) + =================== parses empty arrays =================== diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index 356c3faa..379bf56d 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -15,31 +15,37 @@ namespace tree_sitter { using std::map; using std::make_shared; using rules::rule_ptr; + using rules::Blank; + using rules::Choice; + using rules::Repeat; + using rules::Rule; + using rules::Seq; + using rules::Symbol; namespace prepare_grammar { class ExpandRepeats : public rules::RuleFn { rule_ptr make_repeat_helper(string name, const rule_ptr &rule) { - return rules::Choice::Build({ - rules::Seq::Build({ rule, make_shared(name, rules::SymbolTypeAuxiliary) }), - make_shared() }); + return Choice::Build({ + Seq::Build({ rule, make_shared(name, rules::SymbolTypeAuxiliary) }), + make_shared() }); } - void visit(const rules::Repeat *rule) { + void visit(const Repeat *rule) { rule_ptr inner_rule = apply(rule->content); string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1); aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) }); - value = make_shared(helper_rule_name, rules::SymbolTypeAuxiliary); + value = make_shared(helper_rule_name, rules::SymbolTypeAuxiliary); } - void visit(const rules::Seq *rule) { - value = rules::Seq::Build({ apply(rule->left), apply(rule->right) }); + void visit(const Seq *rule) { + value = Seq::Build({ apply(rule->left), apply(rule->right) }); } - void visit(const rules::Choice *rule) { - value = rules::Choice::Build({ apply(rule->left), apply(rule->right) }); + void visit(const Choice *rule) { + value = Choice::Build({ apply(rule->left), apply(rule->right) }); } - void default_visit(const rules::Rule *rule) { + void default_visit(const Rule *rule) { value = rule->copy(); } diff --git a/src/compiler/rules/pattern.cc b/src/compiler/rules/pattern.cc index 825b642e..abba641b 100644 --- a/src/compiler/rules/pattern.cc +++ b/src/compiler/rules/pattern.cc @@ -6,6 +6,7 @@ #include "compiler/rules/seq.h" #include "compiler/rules/repeat.h" #include "compiler/rules/character_set.h" +#include "compiler/rules/blank.h" namespace tree_sitter { namespace rules { @@ -40,9 +41,21 @@ namespace tree_sitter { rule_ptr factor() { rule_ptr result = atom(); - if (has_more_input() && (peek() == '+')) { - next(); - result = make_shared(result); + if (has_more_input()) { + switch (peek()) { + case '*': + next(); + result = make_shared(result); + break; + case '+': + next(); + result = make_shared(result, make_shared(result)); + break; + case '?': + next(); + result = make_shared(result, make_shared()); + break; + } } return result; }