Expand regex/string rules as part of grammar preparation

This makes it possible to report errors in regex parsing
This commit is contained in:
Max Brunsfeld 2014-05-19 20:54:59 -07:00
parent 5245bc01fe
commit 649f200831
26 changed files with 883 additions and 651 deletions

View file

@ -19,7 +19,7 @@ namespace tree_sitter_examples {
str(":"),
sym("value") })))) },
{ "array", in_brackets(comma_sep(err(sym("value")))) },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "string", pattern("\"([^\"]|\\\\\")*\"") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "null", keyword("null") },
{ "true", keyword("true") },

View file

@ -34,13 +34,13 @@ SYMBOL_NAMES = {
[ts_builtin_sym_end] = "end",
[ts_sym_number] = "number",
[ts_sym_variable] = "variable",
[ts_aux_sym_token0] = "'+'",
[ts_aux_sym_token1] = "'-'",
[ts_aux_sym_token2] = "'*'",
[ts_aux_sym_token3] = "'/'",
[ts_aux_sym_token4] = "'^'",
[ts_aux_sym_token5] = "'('",
[ts_aux_sym_token6] = "')'",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
[ts_aux_sym_token6] = "",
};
UBIQUITOUS_SYMBOLS = {

View file

@ -109,34 +109,34 @@ SYMBOL_NAMES = {
[ts_aux_sym__func_signature_repeat2] = "_func_signature_repeat2",
[ts_aux_sym__func_signature_repeat3] = "_func_signature_repeat3",
[ts_aux_sym__func_signature_repeat4] = "_func_signature_repeat4",
[ts_aux_sym_token0] = "'package'",
[ts_aux_sym_token1] = "'import'",
[ts_aux_sym_token2] = "'('",
[ts_aux_sym_token3] = "')'",
[ts_aux_sym_token4] = "'type'",
[ts_aux_sym_token5] = "'var'",
[ts_aux_sym_token6] = "'='",
[ts_aux_sym_token7] = "'func'",
[ts_aux_sym_token8] = "'{'",
[ts_aux_sym_token9] = "'}'",
[ts_aux_sym_token10] = "'*'",
[ts_aux_sym_token11] = "'map'",
[ts_aux_sym_token12] = "'['",
[ts_aux_sym_token13] = "']'",
[ts_aux_sym_token14] = "'struct'",
[ts_aux_sym_token15] = "'interface'",
[ts_aux_sym_token16] = "'/'",
[ts_aux_sym_token17] = "'+'",
[ts_aux_sym_token18] = "'-'",
[ts_aux_sym_token19] = "'||'",
[ts_aux_sym_token20] = "'&&'",
[ts_aux_sym_token21] = "'=='",
[ts_aux_sym_token22] = "'<='",
[ts_aux_sym_token23] = "'<'",
[ts_aux_sym_token24] = "'>='",
[ts_aux_sym_token25] = "'>'",
[ts_aux_sym_token26] = "'!'",
[ts_aux_sym_token27] = "','",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
[ts_aux_sym_token6] = "",
[ts_aux_sym_token7] = "",
[ts_aux_sym_token8] = "",
[ts_aux_sym_token9] = "",
[ts_aux_sym_token10] = "",
[ts_aux_sym_token11] = "",
[ts_aux_sym_token12] = "",
[ts_aux_sym_token13] = "",
[ts_aux_sym_token14] = "",
[ts_aux_sym_token15] = "",
[ts_aux_sym_token16] = "",
[ts_aux_sym_token17] = "",
[ts_aux_sym_token18] = "",
[ts_aux_sym_token19] = "",
[ts_aux_sym_token20] = "",
[ts_aux_sym_token21] = "",
[ts_aux_sym_token22] = "",
[ts_aux_sym_token23] = "",
[ts_aux_sym_token24] = "",
[ts_aux_sym_token25] = "",
[ts_aux_sym_token26] = "",
[ts_aux_sym_token27] = "",
};
UBIQUITOUS_SYMBOLS = {

View file

@ -143,52 +143,52 @@ SYMBOL_NAMES = {
[ts_aux_sym_formal_parameters_repeat0] = "formal_parameters_repeat0",
[ts_aux_sym_object_repeat0] = "object_repeat0",
[ts_aux_sym_array_repeat0] = "array_repeat0",
[ts_aux_sym_token0] = "'{'",
[ts_aux_sym_token1] = "'}'",
[ts_aux_sym_token2] = "'for'",
[ts_aux_sym_token3] = "'('",
[ts_aux_sym_token4] = "')'",
[ts_aux_sym_token5] = "'if'",
[ts_aux_sym_token6] = "'else'",
[ts_aux_sym_token7] = "'while'",
[ts_aux_sym_token8] = "'try'",
[ts_aux_sym_token9] = "'catch'",
[ts_aux_sym_token10] = "'switch'",
[ts_aux_sym_token11] = "'case'",
[ts_aux_sym_token12] = "'default'",
[ts_aux_sym_token13] = "':'",
[ts_aux_sym_token14] = "'break'",
[ts_aux_sym_token15] = "'var'",
[ts_aux_sym_token16] = "','",
[ts_aux_sym_token17] = "'return'",
[ts_aux_sym_token18] = "'delete'",
[ts_aux_sym_token19] = "'++'",
[ts_aux_sym_token20] = "'--'",
[ts_aux_sym_token21] = "'+'",
[ts_aux_sym_token22] = "'-'",
[ts_aux_sym_token23] = "'*'",
[ts_aux_sym_token24] = "'/'",
[ts_aux_sym_token25] = "'&'",
[ts_aux_sym_token26] = "'|'",
[ts_aux_sym_token27] = "'^'",
[ts_aux_sym_token28] = "'||'",
[ts_aux_sym_token29] = "'&&'",
[ts_aux_sym_token30] = "'==='",
[ts_aux_sym_token31] = "'=='",
[ts_aux_sym_token32] = "'!=='",
[ts_aux_sym_token33] = "'!='",
[ts_aux_sym_token34] = "'<='",
[ts_aux_sym_token35] = "'<'",
[ts_aux_sym_token36] = "'>='",
[ts_aux_sym_token37] = "'>'",
[ts_aux_sym_token38] = "'!'",
[ts_aux_sym_token39] = "'?'",
[ts_aux_sym_token40] = "'='",
[ts_aux_sym_token41] = "'function'",
[ts_aux_sym_token42] = "'new'",
[ts_aux_sym_token43] = "'.'",
[ts_aux_sym_token44] = "'['",
[ts_aux_sym_token45] = "']'",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
[ts_aux_sym_token6] = "",
[ts_aux_sym_token7] = "",
[ts_aux_sym_token8] = "",
[ts_aux_sym_token9] = "",
[ts_aux_sym_token10] = "",
[ts_aux_sym_token11] = "",
[ts_aux_sym_token12] = "",
[ts_aux_sym_token13] = "",
[ts_aux_sym_token14] = "",
[ts_aux_sym_token15] = "",
[ts_aux_sym_token16] = "",
[ts_aux_sym_token17] = "",
[ts_aux_sym_token18] = "",
[ts_aux_sym_token19] = "",
[ts_aux_sym_token20] = "",
[ts_aux_sym_token21] = "",
[ts_aux_sym_token22] = "",
[ts_aux_sym_token23] = "",
[ts_aux_sym_token24] = "",
[ts_aux_sym_token25] = "",
[ts_aux_sym_token26] = "",
[ts_aux_sym_token27] = "",
[ts_aux_sym_token28] = "",
[ts_aux_sym_token29] = "",
[ts_aux_sym_token30] = "",
[ts_aux_sym_token31] = "",
[ts_aux_sym_token32] = "",
[ts_aux_sym_token33] = "",
[ts_aux_sym_token34] = "",
[ts_aux_sym_token35] = "",
[ts_aux_sym_token36] = "",
[ts_aux_sym_token37] = "",
[ts_aux_sym_token38] = "",
[ts_aux_sym_token39] = "",
[ts_aux_sym_token40] = "",
[ts_aux_sym_token41] = "",
[ts_aux_sym_token42] = "",
[ts_aux_sym_token43] = "",
[ts_aux_sym_token44] = "",
[ts_aux_sym_token45] = "",
};
UBIQUITOUS_SYMBOLS = {

View file

@ -35,12 +35,12 @@ SYMBOL_NAMES = {
[ts_sym_false] = "false",
[ts_aux_sym_object_repeat0] = "object_repeat0",
[ts_aux_sym_array_repeat0] = "array_repeat0",
[ts_aux_sym_token0] = "'{'",
[ts_aux_sym_token1] = "':'",
[ts_aux_sym_token2] = "','",
[ts_aux_sym_token3] = "'}'",
[ts_aux_sym_token4] = "'['",
[ts_aux_sym_token5] = "']'",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
};
UBIQUITOUS_SYMBOLS = {
@ -69,90 +69,87 @@ LEX_FN() {
if (lookahead == '\"')
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
LEX_ERROR();
case 2:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(3);
if (lookahead == '\\')
ADVANCE(5);
ADVANCE(4);
LEX_ERROR();
case 3:
ACCEPT_TOKEN(ts_sym_string);
case 4:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '\\')
ADVANCE(5);
if (lookahead == '\\')
ADVANCE(4);
LEX_ERROR();
case 4:
ACCEPT_TOKEN(ts_sym_string);
case 5:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(6);
ADVANCE(3);
if (lookahead == '\\')
ADVANCE(5);
LEX_ERROR();
case 6:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '\\')
ADVANCE(5);
ACCEPT_TOKEN(ts_sym_string);
case 7:
case 6:
if (lookahead == '.')
ADVANCE(8);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(6);
ACCEPT_TOKEN(ts_sym_number);
case 7:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(8);
LEX_ERROR();
case 8:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
LEX_ERROR();
case 9:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
ADVANCE(8);
ACCEPT_TOKEN(ts_sym_number);
case 10:
case 9:
ACCEPT_TOKEN(ts_aux_sym_token4);
case 11:
case 10:
if (lookahead == 'a')
ADVANCE(11);
LEX_ERROR();
case 11:
if (lookahead == 'l')
ADVANCE(12);
LEX_ERROR();
case 12:
if (lookahead == 'l')
if (lookahead == 's')
ADVANCE(13);
LEX_ERROR();
case 13:
if (lookahead == 's')
if (lookahead == 'e')
ADVANCE(14);
LEX_ERROR();
case 14:
if (lookahead == 'e')
ADVANCE(15);
LEX_ERROR();
case 15:
ACCEPT_TOKEN(ts_sym_false);
case 16:
case 15:
if (lookahead == 'u')
ADVANCE(16);
LEX_ERROR();
case 16:
if (lookahead == 'l')
ADVANCE(17);
LEX_ERROR();
case 17:
@ -160,65 +157,71 @@ LEX_FN() {
ADVANCE(18);
LEX_ERROR();
case 18:
if (lookahead == 'l')
ADVANCE(19);
LEX_ERROR();
case 19:
ACCEPT_TOKEN(ts_sym_null);
case 20:
case 19:
if (lookahead == 'r')
ADVANCE(20);
LEX_ERROR();
case 20:
if (lookahead == 'u')
ADVANCE(21);
LEX_ERROR();
case 21:
if (lookahead == 'u')
if (lookahead == 'e')
ADVANCE(22);
LEX_ERROR();
case 22:
if (lookahead == 'e')
ADVANCE(23);
LEX_ERROR();
case 23:
ACCEPT_TOKEN(ts_sym_true);
case 24:
case 23:
ACCEPT_TOKEN(ts_aux_sym_token0);
case 25:
case 24:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(26);
ADVANCE(25);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(25);
ADVANCE(24);
LEX_ERROR();
case 26:
case 25:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 27:
case 26:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(27);
ADVANCE(26);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
case 28:
case 27:
ACCEPT_TOKEN(ts_aux_sym_token3);
case 29:
case 28:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(29);
ADVANCE(28);
if (lookahead == ',')
ADVANCE(29);
if (lookahead == '}')
ADVANCE(27);
LEX_ERROR();
case 29:
ACCEPT_TOKEN(ts_aux_sym_token2);
case 30:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(30);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
case 30:
ACCEPT_TOKEN(ts_aux_sym_token2);
case 31:
START_TOKEN();
if ((lookahead == '\t') ||
@ -226,8 +229,8 @@ LEX_FN() {
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(31);
if (lookahead == '}')
ADVANCE(28);
if (lookahead == '\"')
ADVANCE(2);
LEX_ERROR();
case 32:
START_TOKEN();
@ -236,128 +239,118 @@ LEX_FN() {
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(32);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ':')
ADVANCE(33);
LEX_ERROR();
case 33:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(33);
if (lookahead == ':')
ADVANCE(34);
LEX_ERROR();
case 34:
ACCEPT_TOKEN(ts_aux_sym_token1);
case 35:
case 34:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(35);
ADVANCE(34);
if (lookahead == '\"')
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
LEX_ERROR();
case 36:
case 35:
ACCEPT_TOKEN(ts_aux_sym_token5);
case 37:
case 36:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(36);
if (lookahead == ',')
ADVANCE(29);
if (lookahead == ']')
ADVANCE(35);
LEX_ERROR();
case 37:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(37);
if (lookahead == ',')
ADVANCE(30);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
LEX_ERROR();
case 38:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(38);
if (lookahead == ']')
ADVANCE(36);
LEX_ERROR();
case 39:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(26);
ADVANCE(25);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(39);
ADVANCE(38);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ',')
ADVANCE(30);
ADVANCE(29);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == ':')
ADVANCE(34);
ADVANCE(33);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(26);
ADVANCE(25);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(39);
ADVANCE(38);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ',')
ADVANCE(30);
ADVANCE(29);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == ':')
ADVANCE(34);
ADVANCE(33);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
default:
LEX_PANIC();
@ -366,65 +359,65 @@ LEX_FN() {
LEX_STATES = {
[0] = 1,
[1] = 25,
[2] = 25,
[3] = 27,
[4] = 29,
[5] = 31,
[6] = 25,
[7] = 32,
[8] = 29,
[9] = 31,
[10] = 33,
[1] = 24,
[2] = 24,
[3] = 26,
[4] = 28,
[5] = 30,
[6] = 24,
[7] = 31,
[8] = 28,
[9] = 30,
[10] = 32,
[11] = 1,
[12] = 29,
[13] = 31,
[14] = 29,
[15] = 27,
[16] = 29,
[17] = 31,
[18] = 29,
[19] = 33,
[12] = 28,
[13] = 30,
[14] = 28,
[15] = 26,
[16] = 28,
[17] = 30,
[18] = 28,
[19] = 32,
[20] = 1,
[21] = 29,
[22] = 31,
[23] = 29,
[24] = 35,
[25] = 37,
[26] = 38,
[27] = 29,
[21] = 28,
[22] = 30,
[23] = 28,
[24] = 34,
[25] = 36,
[26] = 37,
[27] = 28,
[28] = 1,
[29] = 37,
[30] = 38,
[31] = 37,
[32] = 27,
[33] = 29,
[34] = 31,
[35] = 37,
[36] = 33,
[29] = 36,
[30] = 37,
[31] = 36,
[32] = 26,
[33] = 28,
[34] = 30,
[35] = 36,
[36] = 32,
[37] = 1,
[38] = 29,
[39] = 31,
[40] = 37,
[41] = 37,
[42] = 35,
[43] = 37,
[44] = 38,
[45] = 37,
[46] = 37,
[47] = 29,
[48] = 29,
[49] = 33,
[38] = 28,
[39] = 30,
[40] = 36,
[41] = 36,
[42] = 34,
[43] = 36,
[44] = 37,
[45] = 36,
[46] = 36,
[47] = 28,
[48] = 28,
[49] = 32,
[50] = 1,
[51] = 29,
[52] = 31,
[53] = 25,
[54] = 25,
[55] = 35,
[56] = 37,
[57] = 38,
[58] = 25,
[59] = 25,
[51] = 28,
[52] = 30,
[53] = 24,
[54] = 24,
[55] = 34,
[56] = 36,
[57] = 37,
[58] = 24,
[59] = 24,
};
#pragma GCC diagnostic push

View file

@ -55,6 +55,7 @@ namespace tree_sitter {
class GrammarError {
public:
GrammarError(GrammarErrorType type, std::string message);
bool operator==(const GrammarError &other) const;
GrammarErrorType type;
std::string message;
};

View file

@ -13,8 +13,8 @@ describe("lexical item set transitions", []() {
describe("when two items in the set have transitions on the same character", [&]() {
it("merges the transitions by computing the union of the two item sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), pattern("[a-f]")),
LexItem(Symbol(2), pattern("[e-x]")) });
LexItem(Symbol(1), character({ {'a', 'f'} })),
LexItem(Symbol(2), character({ {'e', 'x'} })) });
AssertThat(char_transitions(set1, grammar), Equals(map<CharacterSet, LexItemSet>({
{ CharacterSet({ {'a', 'd'} }), LexItemSet({

View file

@ -97,23 +97,6 @@ describe("rule transitions", []() {
})));
});
it("handles strings", [&]() {
AssertThat(
char_transitions(str("bad")),
Equals(rule_map<CharacterSet>({
{ CharacterSet({ 'b' }), seq({ character({ 'a' }), character({ 'd' }) }) }
})));
});
it("handles patterns", [&]() {
AssertThat(
char_transitions(pattern("a|b")),
Equals(rule_map<CharacterSet>({
{ CharacterSet({ 'a' }), blank() },
{ CharacterSet({ 'b' }), blank() }
})));
});
it("handles choices between overlapping character sets", [&]() {
AssertThat(
char_transitions(choice({
@ -164,7 +147,7 @@ describe("rule transitions", []() {
});
it("handles repeats", [&]() {
rule_ptr rule = repeat(str("ab"));
rule_ptr rule = repeat(seq({ character({ 'a' }), character({ 'b' }) }));
AssertThat(
char_transitions(rule),
Equals(rule_map<CharacterSet>({
@ -176,7 +159,7 @@ describe("rule transitions", []() {
})
}})));
rule = repeat(str("a"));
rule = repeat(character({ 'a' }));
AssertThat(
char_transitions(rule),
Equals(rule_map<CharacterSet>({

View file

@ -5,6 +5,7 @@
namespace tree_sitter {
using std::make_shared;
using std::set;
using std::map;
namespace rules {
rule_ptr character(const set<CharacterRange> &ranges) {
@ -33,5 +34,9 @@ namespace tree_sitter {
rule_ptr i_aux_token(size_t index) {
return make_shared<rules::Symbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
}
rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
return make_shared<Metadata>(rule, values);
}
}
}

View file

@ -3,9 +3,11 @@
#include "tree_sitter/compiler.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace rules {
rule_ptr metadata(rule_ptr, std::map<MetadataKey, int>);
rule_ptr character(const std::set<CharacterRange> &ranges);
rule_ptr character(const std::set<CharacterRange> &ranges, bool sign);
rule_ptr i_sym(size_t index);

View file

@ -0,0 +1,63 @@
#include "compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/prepare_grammar/expand_tokens.h"
START_TEST
using namespace rules;
using prepare_grammar::expand_tokens;
describe("expanding token rules", []() {
it("replaces regex patterns with their expansion", [&]() {
PreparedGrammar grammar({
{ "rule_A", seq({
i_sym(10),
pattern("x*"),
i_sym(11) }) },
}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule_A", seq({
i_sym(10),
repeat(character({ 'x' })),
i_sym(11) }) },
}, {})));
});
it("replaces string rules with a sequence of characters", [&]() {
PreparedGrammar grammar({
{ "rule_A", seq({
i_sym(10),
str("xyz"),
i_sym(11) }) },
}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule_A", seq({
i_sym(10),
seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }),
i_sym(11) }) },
}, {})));
});
it("returns an error when the grammar contains an invalid regex", [&]() {
PreparedGrammar grammar({
{ "rule_A", seq({
pattern("("),
str("xyz"),
pattern("[") }) },
}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
});
});
END_TEST

View file

@ -0,0 +1,217 @@
#include "compiler_spec_helper.h"
#include "compiler/prepare_grammar/parse_regex.h"
START_TEST
using namespace rules;
using prepare_grammar::parse_regex;
describe("parsing regex patterns", []() {
vector<tuple<string, string, rule_ptr>> valid_inputs = {
{
"character sets",
"[aAeE]",
character({ 'a', 'A', 'e', 'E' })
},
{
"'.' characters as wildcards",
".",
CharacterSet({'\n'}).complement().copy()
},
{
"character classes",
"\\w-\\d",
seq({
character({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'} }),
character({ '-' }),
character({ {'0', '9'} }) })
},
{
"choices",
"ab|cd|ef",
choice({
seq({
character({ 'a' }),
character({ 'b' }),
}),
seq({
character({ 'c' }),
character({ 'd' })
}),
seq({
character({ 'e' }),
character({ 'f' })
})
})
},
{
"simple sequences",
"abc",
seq({
character({ 'a' }),
character({ 'b' }),
character({ 'c' }) })
},
{
"character ranges",
"[12a-dA-D3]",
character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })
},
{
"negated characters",
"[^a\\d]",
character({ {'a'}, {'0', '9'} }, false)
},
{
"backslashes",
"\\\\",
character({ '\\' })
},
{
"character groups in sequences",
"x([^x]|\\\\x)*x",
seq({
character({ 'x' }),
repeat(choice({
character({ 'x' }, false),
seq({ character({ '\\' }), character({ 'x' }) })
})),
character({ 'x' })
})
},
{
"choices in sequences",
"(a|b)cd",
seq({
choice({
character({ 'a' }),
character({ 'b' }),
}),
character({ 'c' }),
character({ 'd' })
})
},
{
"escaped parentheses",
"a\\(b",
seq({
character({ 'a' }),
character({ '(' }),
character({ 'b' })
})
},
{
"escaped periods",
"a\\.",
seq({
character({ 'a' }),
character({ '.' })
})
},
{
"plus repeats",
"(ab)+(cd)+",
seq({
seq({
seq({ character({ 'a' }), character({ 'b' }) }),
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
}),
seq({
seq({ character({ 'c' }), character({ 'd' }) }),
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
}),
})
},
{
"asterix repeats",
"(ab)*(cd)*",
seq({
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
})
},
{
"optional rules",
"a(bc)?",
seq({
character({ 'a' }),
choice({
seq({ character({ 'b' }), character({ 'c' }) }),
blank()
})
})
}
};
vector<tuple<string, string, const char *>> invalid_inputs = {
{
"mismatched open parens",
"(a",
"unmatched open paren",
},
{
"mismatched nested open parens",
"((a) (b)",
"unmatched open paren",
},
{
"mismatched close parens",
"a)",
"unmatched close paren",
},
{
"mismatched nested close parens",
"((a) b))",
"unmatched close paren",
},
{
"mismatched brackets for character classes",
"[a",
"unmatched open square bracket",
},
{
"mismatched brackets for character classes",
"a]",
"unmatched close square bracket",
},
};
for (auto &triple : valid_inputs) {
string description = get<0>(triple);
string regex = get<1>(triple);
rule_ptr rule = get<2>(triple);
it(("parses " + description).c_str(), [&]() {
auto result = parse_regex(regex);
AssertThat(result.first, EqualsPointer(rule));
});
}
for (auto &triple : invalid_inputs) {
string description = get<0>(triple);
string regex = get<1>(triple);
const char *expected_message = get<2>(triple);
it(("handles invalid regexes with " + description).c_str(), [&]() {
auto result = parse_regex(regex);
AssertThat(result.second, !Equals((const GrammarError *)nullptr));
AssertThat(result.second->message, Contains(expected_message));
});
}
});
END_TEST

View file

@ -1,177 +0,0 @@
#include "compiler_spec_helper.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/character_set.h"
using namespace rules;
START_TEST
describe("parsing regex pattern rules", []() {
it("parses simple strings", [&]() {
Pattern rule("abc");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
character({ 'a' }),
character({ 'b' }),
character({ 'c' })
})));
});
it("parses wildcard '.' characters", [&]() {
Pattern rule(".");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(CharacterSet({'\n'}).complement().copy()));
});
it("parses character classes", []() {
Pattern rule("\\w-\\d");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
character({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'} }),
character({ '-' }),
character({ {'0', '9'} })
})));
});
it("parses choices", []() {
Pattern rule("ab|cd|ef");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(choice({
seq({
character({ 'a' }),
character({ 'b' }),
}),
seq({
character({ 'c' }),
character({ 'd' })
}),
seq({
character({ 'e' }),
character({ 'f' })
})
})));
});
it("parses character sets", []() {
Pattern rule("[aAeE]");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ 'a', 'A', 'e', 'E' })));
});
it("parses character ranges", []() {
Pattern rule("[12a-dA-D3]");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })));
});
it("parses negated characters", []() {
Pattern rule("[^a\\d]");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ {'a'}, {'0', '9'} }, false)));
});
it("parses backslashes", []() {
Pattern rule("\\\\");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ '\\' })));
});
it("parses character groups in sequences", []() {
Pattern rule("\"([^\"]|\\\\\")*\"");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
character({ '"' }),
repeat(choice({
character({ '"' }, false),
seq({ character({ '\\' }), character({ '"' }) })
})),
character({ '"' })
})));
});
it("parses choices in sequences", []() {
Pattern rule("(a|b)cd");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
choice({
character({ 'a' }),
character({ 'b' }),
}),
character({ 'c' }),
character({ 'd' })
})));
});
it("parses special characters when they are escaped", []() {
Pattern rule("a\\(b");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
character({ 'a' }),
character({ '(' }),
character({ 'b' })
})));
Pattern rule2("a\\.");
AssertThat(
rule2.to_rule_tree(),
EqualsPointer(seq({
character({ 'a' }),
character({ '.' }),
})));
});
it("parses repeating rules", []() {
Pattern rule("(ab)+(cd)+");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(
seq({
seq({
seq({ character({ 'a' }), character({ 'b' }) }),
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
}),
seq({
seq({ character({ 'c' }), character({ 'd' }) }),
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
}),
})
));
Pattern rule2("(ab)*(cd)*");
AssertThat(
rule2.to_rule_tree(),
EqualsPointer(
seq({
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
})
));
});
it("parses optional rules", []() {
Pattern rule("a(bc)?");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
character({ 'a' }),
choice({
seq({ character({ 'b' }), character({ 'c' }) }),
blank()
})
})));
});
});
END_TEST

View file

@ -94,20 +94,6 @@ namespace tree_sitter {
});
return result;
}
map<T, rule_ptr> apply_to(const rules::String *rule) {
rule_ptr result = make_shared<rules::Blank>();
for (char val : rule->value)
result = rules::Seq::Build({
result,
CharacterSet({ val }).copy()
});
return this->apply(result);
}
map<T, rule_ptr> apply_to(const rules::Pattern *rule) {
return this->apply(rule->to_rule_tree());
}
};
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {

View file

@ -113,8 +113,7 @@ namespace tree_sitter {
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return token_description(grammar_for_symbol(symbol).rule(symbol));
} else {
string name = grammar_for_symbol(symbol).rule_name(symbol);
return name;
return grammar_for_symbol(symbol).rule_name(symbol);
}
}

View file

@ -48,6 +48,10 @@ namespace tree_sitter {
GrammarError::GrammarError(GrammarErrorType type, std::string message) :
type(type),
message(message) {}
bool GrammarError::operator==(const GrammarError &other) const {
return type == other.type && message == other.message;
}
ostream& operator<<(ostream &stream, const GrammarError *error) {
if (error)

View file

@ -0,0 +1,68 @@
#include "compiler/prepare_grammar/expand_tokens.h"
#include <vector>
#include <string>
#include <utility>
#include "compiler/prepared_grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/string.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/character_set.h"
#include "compiler/prepare_grammar/parse_regex.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::String;
using rules::Pattern;
namespace prepare_grammar {
class ExpandTokens : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const String *rule) {
vector<rule_ptr> elements;
for (char val : rule->value)
elements.push_back(rules::CharacterSet({ val }).copy());
return rules::Seq::Build(elements);
}
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error)
error = pair.second;
return pair.first;
}
public:
const GrammarError *error;
ExpandTokens() : error(nullptr) {}
};
pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &grammar) {
vector<pair<string, rule_ptr>> rules, aux_rules;
ExpandTokens expander;
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
rules.push_back({ pair.first, rule });
}
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
aux_rules.push_back({ pair.first, rule });
}
return { PreparedGrammar(rules, aux_rules, grammar.options), nullptr };
}
}
}

View file

@ -0,0 +1,16 @@
#ifndef COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
#define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class PreparedGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &);
}
}
#endif // COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_

View file

@ -0,0 +1,210 @@
#include "compiler/prepare_grammar/parse_regex.h"
#include <string>
#include <utility>
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/blank.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::CharacterSet;
using rules::Seq;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::CharacterRange;
using rules::blank;
namespace prepare_grammar {
class PatternParser {
public:
explicit PatternParser(const string &input) :
input(input),
length(input.length()),
position(0) {}
pair<rule_ptr, const GrammarError *> rule(bool nested) {
vector<rule_ptr> choices = {};
do {
if (!choices.empty()) {
if (peek() == '|')
next();
else
break;
}
auto pair = term(nested);
if (pair.second)
return { blank(), pair.second };
choices.push_back(pair.first);
} while (has_more_input());
auto rule = (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, nullptr };
}
private:
pair<rule_ptr, const GrammarError *> term(bool nested) {
rule_ptr result = blank();
do {
if (peek() == '|')
break;
if (nested && peek() == ')')
break;
auto pair = factor();
if (pair.second)
return { blank(), pair.second };
result = Seq::Build({ result, pair.first });
} while (has_more_input());
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> factor() {
auto pair = atom();
if (pair.second)
return { blank(), pair.second };
rule_ptr result = pair.first;
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = make_shared<Repeat>(result);
break;
case '+':
next();
result = make_shared<Seq>(result, make_shared<Repeat>(result));
break;
case '?':
next();
result = Choice::Build({ result, make_shared<Blank>() });
break;
}
}
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> atom() {
switch (peek()) {
case '(': {
next();
auto pair = rule(true);
if (pair.second)
return { blank(), pair.second };
if (peek() != ')')
return error("unmatched open paren");
next();
return { pair.first, nullptr };
}
case '[': {
next();
auto pair = char_set();
if (pair.second)
return { blank(), pair.second };
if (peek() != ']')
return error("unmatched open square bracket");
next();
return { pair.first.copy(), nullptr };
}
case ')': {
return error("unmatched close paren");
}
case ']': {
return error("unmatched close square bracket");
}
case '.': {
next();
return { CharacterSet({ '\n' }).complement().copy(), nullptr };
}
default: {
auto pair = single_char();
if (pair.second)
return { blank(), pair.second };
return { pair.first.copy(), nullptr };
}
}
}
pair<CharacterSet, const GrammarError *> char_set() {
bool is_affirmative = true;
if (peek() == '^') {
next();
is_affirmative = false;
}
CharacterSet result;
while (has_more_input() && (peek() != ']')) {
auto pair = single_char();
if (pair.second)
return { CharacterSet(), pair.second };
result.add_set(pair.first);
}
if (!is_affirmative)
result = result.complement();
return { result, nullptr };
}
pair<CharacterSet, const GrammarError *> single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
next();
break;
default:
char first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet({ CharacterRange(first_char, peek()) });
next();
} else {
value = CharacterSet({ first_char });
}
}
return { value, nullptr };
}
CharacterSet escaped_char(char value) {
switch (value) {
case 'a':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'} });
case 'w':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'}});
case 'd':
return CharacterSet({ {'0', '9'} });
default:
return CharacterSet({ value });
}
}
void next() {
position++;
}
char peek() {
return input[position];
}
bool has_more_input() {
return position < length;
}
pair<rule_ptr, const GrammarError *> error(string msg) {
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
}
const string input;
const size_t length;
size_t position;
};
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
return PatternParser(input).rule(false);
}
}
}

View file

@ -0,0 +1,16 @@
#ifndef COMPILER_PREPARE_GRAMMAR_PARSE_REGEX_H_
#define COMPILER_PREPARE_GRAMMAR_PARSE_REGEX_H_
#include "tree_sitter/compiler.h"
#include <string>
#include <utility>
namespace tree_sitter {
namespace prepare_grammar {
std::pair<rules::rule_ptr, const GrammarError *>
parse_regex(const std::string &);
}
}
#endif // COMPILER_PREPARE_GRAMMAR_PARSE_REGEX_H_

View file

@ -2,8 +2,11 @@
#include "compiler/prepared_grammar.h"
#include "compiler/prepare_grammar/extract_tokens.h"
#include "compiler/prepare_grammar/expand_repeats.h"
#include "compiler/prepare_grammar/expand_tokens.h"
#include "compiler/prepare_grammar/intern_symbols.h"
#include "stream_methods.h"
namespace tree_sitter {
using std::tuple;
using std::make_tuple;
@ -16,12 +19,17 @@ namespace tree_sitter {
const GrammarError *error = result.second;
if (error)
return make_tuple(PreparedGrammar({}, {}), PreparedGrammar({}, {}), error);
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
auto grammars = extract_tokens(grammar);
const PreparedGrammar &rule_grammar = expand_repeats(grammars.first);
const PreparedGrammar &lex_grammar = grammars.second;
auto expand_tokens_result = expand_tokens(grammars.second);
const PreparedGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error)
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
return make_tuple(rule_grammar, lex_grammar, nullptr);
}
}

View file

@ -10,6 +10,8 @@ namespace tree_sitter {
using std::ostream;
using rules::rule_ptr;
using rules::Symbol;
PreparedGrammar::PreparedGrammar() : Grammar({}), aux_rules({}), options({}) {}
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :

View file

@ -14,6 +14,7 @@ namespace tree_sitter {
class PreparedGrammar : public Grammar {
public:
PreparedGrammar();
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,

View file

@ -11,6 +11,7 @@ namespace tree_sitter {
START_TOKEN,
PRECEDENCE,
IS_TOKEN,
DESCRIPTION,
} MetadataKey;
class Metadata : public Rule {

View file

@ -1,173 +1,12 @@
#include "compiler/rules/pattern.h"
#include <set>
#include <string>
#include <vector>
#include "compiler/rules/visitor.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/blank.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
using std::make_shared;
using std::set;
using std::vector;
class PatternParser {
public:
explicit PatternParser(const string &input) :
input(input),
length(input.length()),
position(0) {}
rule_ptr rule() {
vector<rule_ptr> choices = { term() };
while (has_more_input() && peek() == '|') {
next();
choices.push_back(term());
}
return (choices.size() > 1) ? Choice::Build(choices) : choices.front();
}
private:
rule_ptr term() {
rule_ptr result = factor();
while (has_more_input() && (peek() != '|') && (peek() != ')'))
result = Seq::Build({ result, factor() });
return result;
}
rule_ptr factor() {
rule_ptr result = atom();
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = make_shared<Repeat>(result);
break;
case '+':
next();
result = make_shared<Seq>(result, make_shared<Repeat>(result));
break;
case '?':
next();
result = Choice::Build({ result, make_shared<Blank>() });
break;
}
}
return result;
}
rule_ptr atom() {
rule_ptr result;
switch (peek()) {
case '(':
next();
result = rule();
if (has_error()) return result;
if (peek() != ')') {
error = "mismatched parens";
return result;
}
next();
break;
case '[':
next();
result = char_set().copy();
if (has_error()) return result;
if (peek() != ']') {
error = "mismatched square brackets";
return result;
}
next();
break;
case ')':
error = "mismatched parens";
break;
case '.':
result = CharacterSet({ '\n' }).complement().copy();
next();
break;
default:
result = single_char().copy();
}
return result;
}
CharacterSet char_set() {
bool is_affirmative = true;
if (peek() == '^') {
next();
is_affirmative = false;
}
CharacterSet result;
while (has_more_input() && (peek() != ']'))
result.add_set(single_char());
return is_affirmative ? result : result.complement();
}
CharacterSet single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
if (has_error()) return value;
next();
break;
default:
char first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet({ CharacterRange(first_char, peek()) });
next();
} else {
value = CharacterSet({ first_char });
}
}
return value;
}
CharacterSet escaped_char(char value) {
switch (value) {
case 'a':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'} });
case 'w':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'}});
case 'd':
return CharacterSet({ {'0', '9'} });
default:
return CharacterSet({ value });
}
}
void next() {
position++;
}
char peek() {
return input[position];
}
bool has_more_input() {
return position < length;
}
bool has_error() {
return error != "";
}
string error;
const string input;
const size_t length;
size_t position;
};
Pattern::Pattern(const string &string) : value(string) {}
@ -191,9 +30,5 @@ namespace tree_sitter {
void Pattern::accept(Visitor *visitor) const {
visitor->visit(this);
}
rule_ptr Pattern::to_rule_tree() const {
return PatternParser(value).rule();
}
}
}

View file

@ -17,7 +17,6 @@ namespace tree_sitter {
void accept(Visitor *visitor) const;
const std::string value;
rule_ptr to_rule_tree() const;
};
}
}