diff --git a/spec/compiler/rules/pattern_spec.cpp b/spec/compiler/rules/pattern_spec.cpp index c1641628..98076cda 100644 --- a/spec/compiler/rules/pattern_spec.cpp +++ b/spec/compiler/rules/pattern_spec.cpp @@ -55,6 +55,13 @@ describe("parsing pattern rules", []() { EqualsPointer(character({ 'a', 'b', 'c' }, true))); }); + it("parses character ranges", []() { + Pattern rule("[12a-dA-D3]"); + AssertThat( + rule.to_rule_tree(), + EqualsPointer(character({ '1', '2', CharacterMatch({'a', 'd'}), CharacterMatch({ 'A', 'D' }), '3' }, true))); + }); + it("parses negated characters", []() { Pattern rule("[^a\\d]"); AssertThat( diff --git a/spec/fixtures/grammars/arithmetic.cpp b/spec/fixtures/grammars/arithmetic.cpp index 117e7c7a..a0b33c8f 100644 --- a/spec/fixtures/grammars/arithmetic.cpp +++ b/spec/fixtures/grammars/arithmetic.cpp @@ -29,7 +29,7 @@ namespace test_grammars { { "plus", str("+") }, { "times", str("*") }, { "number", pattern("\\d+") }, - { "variable", pattern("\\w+") }, + { "variable", pattern("[a-zA-Z]+") }, }); } } diff --git a/spec/fixtures/grammars/json.cpp b/spec/fixtures/grammars/json.cpp index e2d6e9d1..549734d7 100644 --- a/spec/fixtures/grammars/json.cpp +++ b/spec/fixtures/grammars/json.cpp @@ -33,10 +33,7 @@ namespace test_grammars { str("["), comma_sep(sym("value")), str("]"), }) }, - { "string", seq({ - str("\""), - repeat(pattern("[^\"]")), - str("\"") }) }, + { "string", pattern("\"[^\"]+\"") }, { "number", pattern("\\d+") } }); } diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index ef4723e0..9fbd973a 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -2,28 +2,28 @@ #include enum ts_symbol { - ts_symbol_factor, - ts_aux_token1, + ts_symbol_plus, ts_aux_token2, ts_symbol_number, - ts_symbol_variable, - ts_symbol_plus, - ts_symbol_times, ts_symbol_term, + ts_symbol_factor, + ts_aux_token1, + ts_symbol_times, ts_symbol_expression, + ts_symbol_variable, ts_symbol___END__, }; static const char *ts_symbol_names[] = { - "factor", - "token1", + "plus", "token2", "number", - "variable", - "plus", - "times", "term", + "factor", + "token1", + "times", "expression", + "variable", "__END__", }; @@ -31,79 +31,81 @@ static void ts_lex(TSParser *parser) { START_LEXER(); switch (LEX_STATE()) { case 0: - if (LOOKAHEAD_CHAR() == '\0') + if ((LOOKAHEAD_CHAR() == '\0')) ADVANCE(1); LEX_ERROR(1, EXPECT({""})); case 1: ACCEPT_TOKEN(ts_symbol___END__); case 2: - if (LOOKAHEAD_CHAR() == '*') + if ((LOOKAHEAD_CHAR() == '*')) ADVANCE(3); - if (LOOKAHEAD_CHAR() == '\0') + if ((LOOKAHEAD_CHAR() == '\0')) ADVANCE(1); LEX_ERROR(2, EXPECT({"'*'", ""})); case 3: ACCEPT_TOKEN(ts_symbol_times); case 4: - if (LOOKAHEAD_CHAR() == ')') + if ((LOOKAHEAD_CHAR() == ')')) ADVANCE(5); LEX_ERROR(1, EXPECT({"')'"})); case 5: ACCEPT_TOKEN(ts_aux_token2); case 6: - if (LOOKAHEAD_CHAR() == ')') + if ((LOOKAHEAD_CHAR() == ')')) ADVANCE(5); - if (LOOKAHEAD_CHAR() == '*') + if ((LOOKAHEAD_CHAR() == '*')) ADVANCE(3); LEX_ERROR(2, EXPECT({"')'", "'*'"})); case 7: - if (LOOKAHEAD_CHAR() == ')') + if ((LOOKAHEAD_CHAR() == ')')) ADVANCE(5); - if (LOOKAHEAD_CHAR() == '*') + if ((LOOKAHEAD_CHAR() == '*')) ADVANCE(3); - if (LOOKAHEAD_CHAR() == '+') + if ((LOOKAHEAD_CHAR() == '+')) ADVANCE(8); LEX_ERROR(3, EXPECT({"')'", "'*'", "'+'"})); case 8: ACCEPT_TOKEN(ts_symbol_plus); case 9: - if (LOOKAHEAD_CHAR() == ')') + if ((LOOKAHEAD_CHAR() == ')')) ADVANCE(5); - if (LOOKAHEAD_CHAR() == '+') + if ((LOOKAHEAD_CHAR() == '+')) ADVANCE(8); LEX_ERROR(2, EXPECT({"')'", "'+'"})); case 10: - if (isalnum(LOOKAHEAD_CHAR())) + if (('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z') || + ('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z')) ADVANCE(13); - if (LOOKAHEAD_CHAR() == '(') + if ((LOOKAHEAD_CHAR() == '(')) ADVANCE(12); - if (isdigit(LOOKAHEAD_CHAR())) + if ((isdigit(LOOKAHEAD_CHAR()))) ADVANCE(11); - LEX_ERROR(3, EXPECT({"", "'('", ""})); + LEX_ERROR(4, EXPECT({"'A'-'Z'", "'a'-'z'", "'('", ""})); case 11: - if (isdigit(LOOKAHEAD_CHAR())) + if ((isdigit(LOOKAHEAD_CHAR()))) ADVANCE(11); ACCEPT_TOKEN(ts_symbol_number); case 12: ACCEPT_TOKEN(ts_aux_token1); case 13: - if (isalnum(LOOKAHEAD_CHAR())) + if (('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z') || + ('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z')) ADVANCE(13); ACCEPT_TOKEN(ts_symbol_variable); case 14: - if (LOOKAHEAD_CHAR() == '+') + if ((LOOKAHEAD_CHAR() == '+')) ADVANCE(8); - if (LOOKAHEAD_CHAR() == '\0') + if ((LOOKAHEAD_CHAR() == '\0')) ADVANCE(1); LEX_ERROR(2, EXPECT({"'+'", ""})); case 15: - if (LOOKAHEAD_CHAR() == '*') - ADVANCE(3); - if (LOOKAHEAD_CHAR() == '+') + if ((LOOKAHEAD_CHAR() == '+')) ADVANCE(8); - if (LOOKAHEAD_CHAR() == '\0') + if ((LOOKAHEAD_CHAR() == '*')) + ADVANCE(3); + if ((LOOKAHEAD_CHAR() == '\0')) ADVANCE(1); - LEX_ERROR(3, EXPECT({"'*'", "'+'", ""})); + LEX_ERROR(3, EXPECT({"'+'", "'*'", ""})); default: LEX_PANIC(); } @@ -118,6 +120,8 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(52); + case ts_symbol_expression: + SHIFT(1); case ts_aux_token1: SHIFT(49); case ts_symbol_number: @@ -126,10 +130,8 @@ static TSParseResult ts_parse(const char *input) { SHIFT(47); case ts_symbol_term: SHIFT(2); - case ts_symbol_expression: - SHIFT(1); default: - PARSE_ERROR(6, EXPECT({"expression", "term", "variable", "number", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 1: SET_LEX_STATE(0); @@ -209,7 +211,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 8: SET_LEX_STATE(9); @@ -281,7 +283,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 14: SET_LEX_STATE(7); @@ -323,7 +325,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 17: SET_LEX_STATE(4); @@ -407,7 +409,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 24: SET_LEX_STATE(4); @@ -511,7 +513,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 34: SET_LEX_STATE(4); @@ -611,7 +613,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 44: SET_LEX_STATE(4); @@ -677,7 +679,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 50: SET_LEX_STATE(4); @@ -761,7 +763,7 @@ static TSParseResult ts_parse(const char *input) { case ts_symbol_term: SHIFT(8); default: - PARSE_ERROR(6, EXPECT({"term", "variable", "number", "expression", "token1", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "token1", "expression", "factor"})); } case 57: SET_LEX_STATE(4); diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index ce14f66a..c668b263 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -2,102 +2,102 @@ #include enum ts_symbol { + ts_aux_token6, ts_symbol_string, - ts_aux_repeat_helper1, + ts_symbol_array, + ts_symbol_number, + ts_symbol_object, ts_aux_token5, ts_aux_repeat_helper2, - ts_symbol_object, - ts_aux_token6, ts_aux_token7, ts_aux_token4, - ts_aux_token1, - ts_symbol_array, - ts_symbol___END__, - ts_symbol_value, - ts_symbol_number, ts_aux_token3, + ts_aux_token1, + ts_aux_repeat_helper1, + ts_symbol_value, ts_aux_token2, + ts_symbol___END__, }; static const char *ts_symbol_names[] = { + "token6", "string", - "repeat_helper1", + "array", + "number", + "object", "token5", "repeat_helper2", - "object", - "token6", "token7", "token4", - "token1", - "array", - "__END__", - "value", - "number", "token3", + "token1", + "repeat_helper1", + "value", "token2", + "__END__", }; static void ts_lex(TSParser *parser) { START_LEXER(); switch (LEX_STATE()) { case 0: - if (LOOKAHEAD_CHAR() == '\0') + if ((LOOKAHEAD_CHAR() == '\0')) ADVANCE(1); LEX_ERROR(1, EXPECT({""})); case 1: ACCEPT_TOKEN(ts_symbol___END__); case 2: - if (LOOKAHEAD_CHAR() == ',') + if ((LOOKAHEAD_CHAR() == ',')) ADVANCE(3); ACCEPT_TOKEN(ts_aux_token3); case 3: ACCEPT_TOKEN(ts_aux_token2); case 4: - if (LOOKAHEAD_CHAR() == ']') + if ((LOOKAHEAD_CHAR() == ']')) ADVANCE(5); LEX_ERROR(1, EXPECT({"']'"})); case 5: ACCEPT_TOKEN(ts_aux_token4); case 6: - if (LOOKAHEAD_CHAR() == ']') + if ((LOOKAHEAD_CHAR() == ']')) ADVANCE(5); - if (LOOKAHEAD_CHAR() == ',') + if ((LOOKAHEAD_CHAR() == ',')) ADVANCE(3); LEX_ERROR(2, EXPECT({"']'", "','"})); case 7: - if (LOOKAHEAD_CHAR() == '}') + if ((LOOKAHEAD_CHAR() == '}')) ADVANCE(8); LEX_ERROR(1, EXPECT({"'}'"})); case 8: ACCEPT_TOKEN(ts_aux_token7); case 9: - if (LOOKAHEAD_CHAR() == '}') + if ((LOOKAHEAD_CHAR() == '}')) ADVANCE(8); - if (LOOKAHEAD_CHAR() == ',') + if ((LOOKAHEAD_CHAR() == ',')) ADVANCE(3); LEX_ERROR(2, EXPECT({"'}'", "','"})); case 10: - if (LOOKAHEAD_CHAR() == '{') + if ((LOOKAHEAD_CHAR() == '{')) ADVANCE(16); - if (LOOKAHEAD_CHAR() == '[') + if ((LOOKAHEAD_CHAR() == '[')) ADVANCE(15); - if (LOOKAHEAD_CHAR() == '\"') + if ((LOOKAHEAD_CHAR() == '\"')) ADVANCE(12); - if (isdigit(LOOKAHEAD_CHAR())) + if ((isdigit(LOOKAHEAD_CHAR()))) ADVANCE(11); LEX_ERROR(4, EXPECT({"'{'", "'['", "'\"'", ""})); case 11: - if (isdigit(LOOKAHEAD_CHAR())) + if ((isdigit(LOOKAHEAD_CHAR()))) ADVANCE(11); ACCEPT_TOKEN(ts_symbol_number); case 12: - if (!(LOOKAHEAD_CHAR() == '\"')) + if (!((LOOKAHEAD_CHAR() == '\"'))) ADVANCE(13); LEX_ERROR(1, EXPECT({"'\"'"})); case 13: - if (LOOKAHEAD_CHAR() == '\"') + if ((LOOKAHEAD_CHAR() == '\"')) ADVANCE(14); - if (!(LOOKAHEAD_CHAR() == '\"')) + if (!((LOOKAHEAD_CHAR() == '\"'))) ADVANCE(13); LEX_ERROR(1, EXPECT({"'\"'"})); case 14: @@ -107,13 +107,13 @@ static void ts_lex(TSParser *parser) { case 16: ACCEPT_TOKEN(ts_aux_token5); case 17: - if (LOOKAHEAD_CHAR() == ':') + if ((LOOKAHEAD_CHAR() == ':')) ADVANCE(18); LEX_ERROR(1, EXPECT({"':'"})); case 18: ACCEPT_TOKEN(ts_aux_token6); case 19: - if (LOOKAHEAD_CHAR() == '\"') + if ((LOOKAHEAD_CHAR() == '\"')) ADVANCE(12); LEX_ERROR(1, EXPECT({"'\"'"})); default: @@ -128,22 +128,22 @@ static TSParseResult ts_parse(const char *input) { case 0: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(81); case ts_symbol_array: SHIFT(79); - case ts_symbol_object: - SHIFT(78); - case ts_aux_token5: - SHIFT(70); case ts_symbol_string: SHIFT(80); + case ts_aux_token5: + SHIFT(70); case ts_aux_token1: SHIFT(2); + case ts_symbol_object: + SHIFT(78); + case ts_symbol_number: + SHIFT(81); case ts_symbol_value: SHIFT(1); default: - PARSE_ERROR(7, EXPECT({"value", "token1", "string", "token5", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"value", "number", "object", "token1", "token5", "string", "array"})); } case 1: SET_LEX_STATE(0); @@ -156,42 +156,42 @@ static TSParseResult ts_parse(const char *input) { case 2: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); - case ts_aux_token5: - SHIFT(16); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(65); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 3: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); - case ts_aux_token5: - SHIFT(16); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(4); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 4: SET_LEX_STATE(2); @@ -244,42 +244,42 @@ static TSParseResult ts_parse(const char *input) { case 9: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(64); case ts_symbol_array: SHIFT(62); case ts_symbol_object: SHIFT(61); + case ts_symbol_number: + SHIFT(64); case ts_symbol_value: SHIFT(59); - case ts_aux_token5: - SHIFT(51); case ts_symbol_string: SHIFT(63); + case ts_aux_token5: + SHIFT(51); case ts_aux_token1: SHIFT(10); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 10: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); - case ts_aux_token5: - SHIFT(16); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(11); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 11: SET_LEX_STATE(2); @@ -348,22 +348,22 @@ static TSParseResult ts_parse(const char *input) { case 18: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(19); - case ts_aux_token5: - SHIFT(16); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 19: SET_LEX_STATE(2); @@ -432,42 +432,42 @@ static TSParseResult ts_parse(const char *input) { case 26: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(50); case ts_symbol_array: SHIFT(48); case ts_symbol_object: SHIFT(47); + case ts_symbol_number: + SHIFT(50); case ts_symbol_value: SHIFT(45); - case ts_aux_token5: - SHIFT(37); case ts_symbol_string: SHIFT(49); + case ts_aux_token5: + SHIFT(37); case ts_aux_token1: SHIFT(27); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 27: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); - case ts_aux_token5: - SHIFT(16); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(28); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 28: SET_LEX_STATE(2); @@ -576,22 +576,22 @@ static TSParseResult ts_parse(const char *input) { case 39: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(40); - case ts_aux_token5: - SHIFT(16); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 40: SET_LEX_STATE(2); @@ -720,22 +720,22 @@ static TSParseResult ts_parse(const char *input) { case 53: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(54); - case ts_aux_token5: - SHIFT(16); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 54: SET_LEX_STATE(2); @@ -908,22 +908,22 @@ static TSParseResult ts_parse(const char *input) { case 72: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(36); case ts_symbol_array: SHIFT(34); case ts_symbol_object: SHIFT(33); + case ts_symbol_number: + SHIFT(36); case ts_symbol_value: SHIFT(73); - case ts_aux_token5: - SHIFT(16); case ts_symbol_string: SHIFT(35); + case ts_aux_token5: + SHIFT(16); case ts_aux_token1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"})); + PARSE_ERROR(7, EXPECT({"token1", "token5", "string", "number", "value", "object", "array"})); } case 73: SET_LEX_STATE(2); diff --git a/spec/runtime/arithmetic_spec.cpp b/spec/runtime/arithmetic_spec.cpp index 77aa00f1..b07b7a94 100644 --- a/spec/runtime/arithmetic_spec.cpp +++ b/spec/runtime/arithmetic_spec.cpp @@ -18,6 +18,12 @@ describe("arithmetic", []() { AssertThat(string(TSDocumentToString(document)), Equals( "(expression (term (factor (variable))))")); }); + + it("parses numbers", [&]() { + TSDocumentSetText(document, "5"); + AssertThat(string(TSDocumentToString(document)), Equals( + "(expression (term (factor (number))))")); + }); it("parses products of variables", [&]() { TSDocumentSetText(document, "x+y"); diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index b1fce081..2d174a79 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -102,28 +102,30 @@ namespace tree_sitter { } string condition_for_character_match(const rules::CharacterMatch &match) { - auto value = "LOOKAHEAD_CHAR()"; + string lookahead("LOOKAHEAD_CHAR()"); + auto value = match.value; switch (match.type) { case rules::CharacterMatchTypeClass: - switch (match.value.character_class) { + switch (value.character_class) { case rules::CharClassDigit: - return string("isdigit(") + value + ")"; + return string("isdigit(") + lookahead + ")"; case rules::CharClassWord: - return string("isalnum(") + value + ")"; + return string("isalnum(") + lookahead + ")"; } case rules::CharacterMatchTypeSpecific: - return string(value) + " == '" + character_code(match.value.character) + "'"; - default: - return ""; + return lookahead + " == '" + character_code(value.character) + "'"; + case rules::CharacterMatchTypeRange: + return string("'") + value.range.min_character + string("' <= ") + lookahead + + " && " + lookahead + " <= '" + value.range.max_character + "'"; } } string condition_for_character_rule(const rules::Character &rule) { vector parts; for (auto &match : rule.matches) { - parts.push_back(condition_for_character_match(match)); + parts.push_back("(" + condition_for_character_match(match) + ")"); } - string result = join(parts, " || "); + string result = join(parts, " ||\n "); if (!rule.sign) result = "!(" + result + ")"; return result; } diff --git a/src/compiler/rules/character.cpp b/src/compiler/rules/character.cpp index 400743db..044712f4 100644 --- a/src/compiler/rules/character.cpp +++ b/src/compiler/rules/character.cpp @@ -7,7 +7,7 @@ namespace tree_sitter { namespace rules { CharacterMatch::CharacterMatch(char character) : type(CharacterMatchTypeSpecific) { value.character = character; } CharacterMatch::CharacterMatch(CharacterClass klass) : type(CharacterMatchTypeClass) { value.character_class = klass; } - CharacterMatch::CharacterMatch(std::pair bounds) : type(CharacterMatchTypeRange) { + CharacterMatch::CharacterMatch(const std::pair bounds) : type(CharacterMatchTypeRange) { value.range.min_character = bounds.first; value.range.max_character = bounds.second; } diff --git a/src/compiler/rules/character.h b/src/compiler/rules/character.h index d2c5a54d..a5f5bcc0 100644 --- a/src/compiler/rules/character.h +++ b/src/compiler/rules/character.h @@ -30,7 +30,7 @@ namespace tree_sitter { } value; CharacterMatch(char); - CharacterMatch(std::pair); + CharacterMatch(const std::pair); CharacterMatch(CharacterClass); bool operator==(const CharacterMatch &) const; std::string to_string() const; diff --git a/src/compiler/rules/pattern.cpp b/src/compiler/rules/pattern.cpp index be2d3bba..74587eee 100644 --- a/src/compiler/rules/pattern.cpp +++ b/src/compiler/rules/pattern.cpp @@ -87,9 +87,15 @@ namespace tree_sitter { next(); break; default: - value = peek(); + char first_char = peek(); next(); - return value; + if (peek() == '-') { + next(); + value = CharacterMatch({ first_char, peek() }); + next(); + } else { + value = first_char; + } } return value; } diff --git a/todo.md b/todo.md index 5f731c3c..c7930981 100644 --- a/todo.md +++ b/todo.md @@ -4,8 +4,6 @@ TODO ## correct batch parsing - generate correct lexers in the presence of 'overlapping' transition rules (e.g. transitions on 'a' and on \w). -- add support for character-range rules (e.g. [0-9]) -- add support for negation rules, (only exposed through regex patterns) - add comments to generated C code giving an example string for each token - change the meaning of 'repeat' from 1-or-more to 0-or-more - fix any memory leaks