Allow Character rules to handle arbitrary character sets
This commit is contained in:
parent
bc1d115ee2
commit
7f62e752be
16 changed files with 322 additions and 309 deletions
|
|
@ -16,8 +16,8 @@ static unordered_set<Symbol> keys(const unordered_map<Symbol, parse_actions> &ma
|
|||
return result;
|
||||
}
|
||||
|
||||
static unordered_set<CharMatch> keys(const unordered_map<CharMatch, lex_actions> &map) {
|
||||
unordered_set<CharMatch> result;
|
||||
static unordered_set<Character> keys(const unordered_map<Character, lex_actions> &map) {
|
||||
unordered_set<Character> result;
|
||||
for (auto pair : map) {
|
||||
result.insert(pair.first);
|
||||
}
|
||||
|
|
@ -79,16 +79,16 @@ describe("building parse and lex tables", []() {
|
|||
Symbol("left-paren"),
|
||||
})));
|
||||
|
||||
AssertThat(keys(lex_state(0).actions), Equals(unordered_set<CharMatch>({
|
||||
CharMatchSpecific('('),
|
||||
CharMatchClass(CharClassDigit),
|
||||
CharMatchClass(CharClassWord),
|
||||
AssertThat(keys(lex_state(0).actions), Equals(unordered_set<Character>({
|
||||
Character('('),
|
||||
Character(CharClassDigit),
|
||||
Character(CharClassWord),
|
||||
})));
|
||||
|
||||
AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set<CharMatch>({
|
||||
CharMatchSpecific('('),
|
||||
CharMatchClass(CharClassDigit),
|
||||
CharMatchClass(CharClassWord),
|
||||
AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set<Character>({
|
||||
Character('('),
|
||||
Character(CharClassDigit),
|
||||
Character(CharClassWord),
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
|
|||
110
spec/fixtures/parsers/arithmetic.c
vendored
110
spec/fixtures/parsers/arithmetic.c
vendored
|
|
@ -3,28 +3,28 @@
|
|||
|
||||
enum ts_symbol {
|
||||
ts_symbol_factor,
|
||||
ts_aux_token1,
|
||||
ts_aux_token2,
|
||||
ts_symbol_number,
|
||||
ts_symbol_times,
|
||||
ts_symbol___END__,
|
||||
ts_aux_token1,
|
||||
ts_symbol_variable,
|
||||
ts_symbol_term,
|
||||
ts_symbol_plus,
|
||||
ts_symbol_expression,
|
||||
ts_symbol_variable,
|
||||
ts_symbol_number,
|
||||
ts_symbol___END__,
|
||||
};
|
||||
|
||||
static const char *ts_symbol_names[] = {
|
||||
"factor",
|
||||
"token1",
|
||||
"token2",
|
||||
"number",
|
||||
"times",
|
||||
"__END__",
|
||||
"token1",
|
||||
"variable",
|
||||
"term",
|
||||
"plus",
|
||||
"expression",
|
||||
"variable",
|
||||
"number",
|
||||
"__END__",
|
||||
};
|
||||
|
||||
static void ts_lex(TSParser *parser) {
|
||||
|
|
@ -33,7 +33,7 @@ static void ts_lex(TSParser *parser) {
|
|||
case 0:
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(1, EXPECT({"''"}));
|
||||
LEX_ERROR(1, EXPECT({"<EOF>"}));
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_symbol___END__);
|
||||
case 2:
|
||||
|
|
@ -41,7 +41,7 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(3);
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(2, EXPECT({"''", "'*'"}));
|
||||
LEX_ERROR(2, EXPECT({"'*'", "<EOF>"}));
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_symbol_times);
|
||||
case 4:
|
||||
|
|
@ -55,7 +55,7 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(5);
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR(2, EXPECT({"'*'", "')'"}));
|
||||
LEX_ERROR(2, EXPECT({"')'", "'*'"}));
|
||||
case 7:
|
||||
if (LOOKAHEAD_CHAR() == ')')
|
||||
ADVANCE(5);
|
||||
|
|
@ -63,7 +63,7 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(3);
|
||||
if (LOOKAHEAD_CHAR() == '+')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR(3, EXPECT({"'+'", "'*'", "')'"}));
|
||||
LEX_ERROR(3, EXPECT({"')'", "'*'", "'+'"}));
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_symbol_plus);
|
||||
case 9:
|
||||
|
|
@ -71,15 +71,15 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(5);
|
||||
if (LOOKAHEAD_CHAR() == '+')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR(2, EXPECT({"'+'", "')'"}));
|
||||
LEX_ERROR(2, EXPECT({"')'", "'+'"}));
|
||||
case 10:
|
||||
if (isalnum(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(13);
|
||||
if (LOOKAHEAD_CHAR() == '(')
|
||||
ADVANCE(12);
|
||||
if (isalnum(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(13);
|
||||
if (isdigit(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(11);
|
||||
LEX_ERROR(3, EXPECT({"<digit>", "'('", "<word>"}));
|
||||
LEX_ERROR(3, EXPECT({"<word>", "'('", "<digit>"}));
|
||||
case 11:
|
||||
if (isdigit(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(11);
|
||||
|
|
@ -95,7 +95,7 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(8);
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(2, EXPECT({"''", "'+'"}));
|
||||
LEX_ERROR(2, EXPECT({"'+'", "<EOF>"}));
|
||||
case 15:
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(3);
|
||||
|
|
@ -103,7 +103,7 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(8);
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(3, EXPECT({"''", "'+'", "'*'"}));
|
||||
LEX_ERROR(3, EXPECT({"'*'", "'+'", "<EOF>"}));
|
||||
default:
|
||||
LEX_PANIC();
|
||||
}
|
||||
|
|
@ -118,18 +118,18 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(52);
|
||||
case ts_symbol_number:
|
||||
SHIFT(48);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(47);
|
||||
case ts_symbol_term:
|
||||
SHIFT(2);
|
||||
case ts_aux_token1:
|
||||
SHIFT(49);
|
||||
case ts_symbol_number:
|
||||
SHIFT(48);
|
||||
case ts_symbol_term:
|
||||
SHIFT(2);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(1);
|
||||
default:
|
||||
PARSE_ERROR(6, EXPECT({"expression", "token1", "term", "variable", "number", "factor"}));
|
||||
PARSE_ERROR(6, EXPECT({"expression", "variable", "token1", "term", "number", "factor"}));
|
||||
}
|
||||
case 1:
|
||||
SET_LEX_STATE(0);
|
||||
|
|
@ -198,14 +198,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(37);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(37);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -270,14 +270,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(27);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(27);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -312,14 +312,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(17);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(17);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -396,14 +396,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(24);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(24);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -500,14 +500,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(34);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(34);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -600,14 +600,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(44);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(44);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -666,14 +666,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(50);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(50);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -750,14 +750,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(19);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(57);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(16);
|
||||
case ts_symbol_number:
|
||||
SHIFT(15);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(14);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(57);
|
||||
case ts_symbol_term:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
|
|||
176
spec/fixtures/parsers/json.c
vendored
176
spec/fixtures/parsers/json.c
vendored
|
|
@ -2,38 +2,38 @@
|
|||
#include <ctype.h>
|
||||
|
||||
enum ts_symbol {
|
||||
ts_symbol_number,
|
||||
ts_symbol_string,
|
||||
ts_aux_repeat_helper1,
|
||||
ts_aux_token7,
|
||||
ts_symbol_array,
|
||||
ts_aux_token4,
|
||||
ts_aux_token5,
|
||||
ts_aux_token3,
|
||||
ts_aux_token2,
|
||||
ts_aux_token1,
|
||||
ts_aux_repeat_helper2,
|
||||
ts_aux_token6,
|
||||
ts_aux_repeat_helper2,
|
||||
ts_aux_token5,
|
||||
ts_symbol_string,
|
||||
ts_symbol_value,
|
||||
ts_symbol_object,
|
||||
ts_aux_token4,
|
||||
ts_aux_token7,
|
||||
ts_symbol_number,
|
||||
ts_aux_token2,
|
||||
ts_aux_token3,
|
||||
ts_aux_token1,
|
||||
ts_aux_repeat_helper1,
|
||||
ts_symbol___END__,
|
||||
};
|
||||
|
||||
static const char *ts_symbol_names[] = {
|
||||
"number",
|
||||
"string",
|
||||
"repeat_helper1",
|
||||
"token7",
|
||||
"array",
|
||||
"token4",
|
||||
"token5",
|
||||
"token3",
|
||||
"token2",
|
||||
"token1",
|
||||
"repeat_helper2",
|
||||
"token6",
|
||||
"repeat_helper2",
|
||||
"token5",
|
||||
"string",
|
||||
"value",
|
||||
"object",
|
||||
"token4",
|
||||
"token7",
|
||||
"number",
|
||||
"token2",
|
||||
"token3",
|
||||
"token1",
|
||||
"repeat_helper1",
|
||||
"__END__",
|
||||
};
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ static void ts_lex(TSParser *parser) {
|
|||
case 0:
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(1, EXPECT({"''"}));
|
||||
LEX_ERROR(1, EXPECT({"<EOF>"}));
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_symbol___END__);
|
||||
case 2:
|
||||
|
|
@ -63,7 +63,7 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(5);
|
||||
if (LOOKAHEAD_CHAR() == ',')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR(2, EXPECT({"','", "']'"}));
|
||||
LEX_ERROR(2, EXPECT({"']'", "','"}));
|
||||
case 7:
|
||||
if (LOOKAHEAD_CHAR() == '}')
|
||||
ADVANCE(8);
|
||||
|
|
@ -75,17 +75,17 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(8);
|
||||
if (LOOKAHEAD_CHAR() == ',')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR(2, EXPECT({"','", "'}'"}));
|
||||
LEX_ERROR(2, EXPECT({"'}'", "','"}));
|
||||
case 10:
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(16);
|
||||
if (LOOKAHEAD_CHAR() == '[')
|
||||
ADVANCE(15);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(12);
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(16);
|
||||
if (isdigit(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(11);
|
||||
LEX_ERROR(4, EXPECT({"<digit>", "'\"'", "'['", "'{'"}));
|
||||
LEX_ERROR(4, EXPECT({"'['", "'\"'", "'{'", "<digit>"}));
|
||||
case 11:
|
||||
if (isdigit(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(11);
|
||||
|
|
@ -99,7 +99,7 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(14);
|
||||
if (isalnum(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(13);
|
||||
LEX_ERROR(2, EXPECT({"<word>", "'\"'"}));
|
||||
LEX_ERROR(2, EXPECT({"'\"'", "<word>"}));
|
||||
case 14:
|
||||
ACCEPT_TOKEN(ts_symbol_string);
|
||||
case 15:
|
||||
|
|
@ -130,20 +130,20 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(81);
|
||||
case ts_aux_token5:
|
||||
SHIFT(70);
|
||||
case ts_symbol_array:
|
||||
SHIFT(79);
|
||||
case ts_symbol_object:
|
||||
SHIFT(78);
|
||||
case ts_aux_token1:
|
||||
SHIFT(2);
|
||||
case ts_aux_token5:
|
||||
SHIFT(70);
|
||||
case ts_symbol_string:
|
||||
SHIFT(80);
|
||||
case ts_aux_token1:
|
||||
SHIFT(2);
|
||||
case ts_symbol_value:
|
||||
SHIFT(1);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"value", "string", "token1", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"value", "token1", "string", "token5", "object", "array", "number"}));
|
||||
}
|
||||
case 1:
|
||||
SET_LEX_STATE(0);
|
||||
|
|
@ -158,40 +158,40 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_value:
|
||||
SHIFT(65);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"}));
|
||||
}
|
||||
case 3:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_value:
|
||||
SHIFT(4);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"}));
|
||||
}
|
||||
case 4:
|
||||
SET_LEX_STATE(2);
|
||||
|
|
@ -246,40 +246,40 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(64);
|
||||
case ts_aux_token5:
|
||||
SHIFT(51);
|
||||
case ts_symbol_array:
|
||||
SHIFT(62);
|
||||
case ts_symbol_object:
|
||||
SHIFT(61);
|
||||
case ts_symbol_string:
|
||||
SHIFT(63);
|
||||
case ts_symbol_value:
|
||||
SHIFT(59);
|
||||
case ts_aux_token5:
|
||||
SHIFT(51);
|
||||
case ts_symbol_string:
|
||||
SHIFT(63);
|
||||
case ts_aux_token1:
|
||||
SHIFT(10);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"}));
|
||||
}
|
||||
case 10:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_value:
|
||||
SHIFT(11);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"}));
|
||||
}
|
||||
case 11:
|
||||
SET_LEX_STATE(2);
|
||||
|
|
@ -350,20 +350,20 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_symbol_value:
|
||||
SHIFT(19);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"}));
|
||||
}
|
||||
case 19:
|
||||
SET_LEX_STATE(2);
|
||||
|
|
@ -434,40 +434,40 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(50);
|
||||
case ts_aux_token5:
|
||||
SHIFT(37);
|
||||
case ts_symbol_array:
|
||||
SHIFT(48);
|
||||
case ts_symbol_object:
|
||||
SHIFT(47);
|
||||
case ts_symbol_string:
|
||||
SHIFT(49);
|
||||
case ts_symbol_value:
|
||||
SHIFT(45);
|
||||
case ts_aux_token5:
|
||||
SHIFT(37);
|
||||
case ts_symbol_string:
|
||||
SHIFT(49);
|
||||
case ts_aux_token1:
|
||||
SHIFT(27);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"}));
|
||||
}
|
||||
case 27:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_value:
|
||||
SHIFT(28);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "value", "token5", "object", "array", "number"}));
|
||||
}
|
||||
case 28:
|
||||
SET_LEX_STATE(2);
|
||||
|
|
@ -578,20 +578,20 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_symbol_value:
|
||||
SHIFT(40);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"}));
|
||||
}
|
||||
case 40:
|
||||
SET_LEX_STATE(2);
|
||||
|
|
@ -644,14 +644,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 45:
|
||||
SET_LEX_STATE(9);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token2:
|
||||
SHIFT(24);
|
||||
case ts_aux_token7:
|
||||
REDUCE(ts_aux_repeat_helper1, 4, COLLAPSE({1, 0, 1, 0}));
|
||||
case ts_aux_token2:
|
||||
SHIFT(24);
|
||||
case ts_aux_repeat_helper1:
|
||||
SHIFT(46);
|
||||
default:
|
||||
PARSE_ERROR(3, EXPECT({"repeat_helper1", "token7", "token2"}));
|
||||
PARSE_ERROR(3, EXPECT({"repeat_helper1", "token2", "token7"}));
|
||||
}
|
||||
case 46:
|
||||
SET_LEX_STATE(7);
|
||||
|
|
@ -722,20 +722,20 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_symbol_value:
|
||||
SHIFT(54);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"}));
|
||||
}
|
||||
case 54:
|
||||
SET_LEX_STATE(2);
|
||||
|
|
@ -788,14 +788,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 59:
|
||||
SET_LEX_STATE(6);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token4:
|
||||
REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0}));
|
||||
case ts_aux_token2:
|
||||
SHIFT(9);
|
||||
case ts_aux_token4:
|
||||
REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0}));
|
||||
case ts_aux_repeat_helper2:
|
||||
SHIFT(60);
|
||||
default:
|
||||
PARSE_ERROR(3, EXPECT({"repeat_helper2", "token2", "token4"}));
|
||||
PARSE_ERROR(3, EXPECT({"repeat_helper2", "token4", "token2"}));
|
||||
}
|
||||
case 60:
|
||||
SET_LEX_STATE(4);
|
||||
|
|
@ -910,20 +910,20 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(36);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_object:
|
||||
SHIFT(33);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_symbol_value:
|
||||
SHIFT(73);
|
||||
case ts_aux_token5:
|
||||
SHIFT(16);
|
||||
case ts_symbol_string:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
PARSE_ERROR(7, EXPECT({"token1", "value", "string", "object", "array", "token5", "number"}));
|
||||
PARSE_ERROR(7, EXPECT({"token1", "string", "token5", "value", "object", "array", "number"}));
|
||||
}
|
||||
case 73:
|
||||
SET_LEX_STATE(2);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ namespace tree_sitter {
|
|||
rules::Character rule = *transition.first;
|
||||
LexItemSet item_set = *transition.second;
|
||||
size_t new_state_index = add_lex_state(item_set);
|
||||
lex_table.add_action(state_index, rule.value, LexAction::Advance(new_state_index));
|
||||
lex_table.add_action(state_index, rule, LexAction::Advance(new_state_index));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,62 +0,0 @@
|
|||
#include "char_match.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace tree_sitter {
|
||||
CharMatch CharMatchSpecific(char value) {
|
||||
CharMatch result = { .type = CharMatchTypeSpecific };
|
||||
result.value.character = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
CharMatch CharMatchClass(CharClass value) {
|
||||
CharMatch result = { .type = CharMatchTypeClass };
|
||||
result.value.character = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
CharMatch CharMatchRange(char min, char max) {
|
||||
CharMatch result = { .type = CharMatchTypeRange };
|
||||
result.value.range.min_character = min;
|
||||
result.value.range.max_character = max;
|
||||
return result;
|
||||
}
|
||||
|
||||
string CharMatchToString(CharMatch match) {
|
||||
switch (match.type) {
|
||||
case CharMatchTypeClass:
|
||||
switch (match.value.character_class) {
|
||||
case CharClassDigit:
|
||||
return "<digit>";
|
||||
case CharClassWord:
|
||||
return "<word>";
|
||||
}
|
||||
case CharMatchTypeSpecific:
|
||||
return string("'") + string(&match.value.character) + "'";
|
||||
case CharMatchTypeRange:
|
||||
return (
|
||||
string("'") +
|
||||
string(&match.value.range.min_character) + "'-'" +
|
||||
string(&match.value.range.max_character) + "'");
|
||||
}
|
||||
}
|
||||
|
||||
bool operator==(const CharMatch &left, const CharMatch &right) {
|
||||
if (left.type != right.type)
|
||||
return false;
|
||||
switch (left.type) {
|
||||
case CharMatchTypeClass:
|
||||
return (left.value.character_class == right.value.character_class);
|
||||
case CharMatchTypeSpecific:
|
||||
return (left.value.character == right.value.character);
|
||||
case CharMatchTypeRange:
|
||||
return (
|
||||
left.value.range.min_character == right.value.range.min_character &&
|
||||
left.value.range.max_character == right.value.range.max_character);
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const CharMatch &match) {
|
||||
return stream << CharMatchToString(match);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
#ifndef __TreeSitter__char_match__
|
||||
#define __TreeSitter__char_match__
|
||||
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
CharMatchTypeSpecific,
|
||||
CharMatchTypeClass,
|
||||
CharMatchTypeRange,
|
||||
} CharMatchType;
|
||||
|
||||
typedef enum {
|
||||
CharClassWord,
|
||||
CharClassDigit
|
||||
} CharClass;
|
||||
|
||||
struct CharMatch {
|
||||
CharMatchType type;
|
||||
union {
|
||||
CharClass character_class;
|
||||
char character;
|
||||
struct {
|
||||
char min_character;
|
||||
char max_character;
|
||||
} range;
|
||||
} value;
|
||||
};
|
||||
|
||||
CharMatch CharMatchSpecific(char);
|
||||
CharMatch CharMatchClass(CharClass);
|
||||
CharMatch CharMatchRange(char, char);
|
||||
std::string CharMatchToString(CharMatch);
|
||||
|
||||
bool operator==(const CharMatch &, const CharMatch &);
|
||||
std::ostream& operator<<(std::ostream& stream, const CharMatch &rule);
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::CharMatch> {
|
||||
size_t operator()(const tree_sitter::CharMatch &match) const {
|
||||
return (
|
||||
hash<int>()(match.type) ^
|
||||
hash<char>()(match.value.range.min_character) ^
|
||||
hash<char>()(match.value.range.max_character));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -101,23 +101,33 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
string condition_for_char_match(const CharMatch &char_match) {
|
||||
string condition_for_character_match(const rules::CharacterMatch &match) {
|
||||
auto value = "LOOKAHEAD_CHAR()";
|
||||
switch (char_match.type) {
|
||||
case CharMatchTypeClass:
|
||||
switch (char_match.value.character_class) {
|
||||
case CharClassDigit:
|
||||
switch (match.type) {
|
||||
case rules::CharacterMatchTypeClass:
|
||||
switch (match.value.character_class) {
|
||||
case rules::CharClassDigit:
|
||||
return string("isdigit(") + value + ")";
|
||||
case CharClassWord:
|
||||
case rules::CharClassWord:
|
||||
return string("isalnum(") + value + ")";
|
||||
}
|
||||
case CharMatchTypeSpecific:
|
||||
return string(value) + " == '" + character_code(char_match.value.character) + "'";
|
||||
case rules::CharacterMatchTypeSpecific:
|
||||
return string(value) + " == '" + character_code(match.value.character) + "'";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_rule(const rules::Character &rule) {
|
||||
vector<string> parts;
|
||||
for (auto &match : rule.matches) {
|
||||
parts.push_back(condition_for_character_match(match));
|
||||
}
|
||||
string result = join(parts, " || ");
|
||||
if (!rule.sign) result = "!(" + result + ")";
|
||||
return result;
|
||||
}
|
||||
|
||||
string collapse_flags(vector<bool> flags) {
|
||||
string result;
|
||||
bool started = false;
|
||||
|
|
@ -164,19 +174,24 @@ namespace tree_sitter {
|
|||
return input;
|
||||
}
|
||||
|
||||
string lex_error_call(const unordered_set<CharMatch> &expected_inputs) {
|
||||
string result = "LEX_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
|
||||
string lex_error_call(const unordered_set<rules::Character> &expected_inputs) {
|
||||
unordered_set<rules::CharacterMatch> expected_matches;
|
||||
for (auto &rule : expected_inputs)
|
||||
for (auto &match : rule.matches)
|
||||
expected_matches.insert(match);
|
||||
|
||||
string result = "LEX_ERROR(" + to_string(expected_matches.size()) + ", EXPECT({";
|
||||
bool started = false;
|
||||
for (auto match : expected_inputs) {
|
||||
for (auto match : expected_matches) {
|
||||
if (started) result += ", ";
|
||||
started = true;
|
||||
result += "\"" + escape_string(CharMatchToString(match)) + "\"";
|
||||
result += "\"" + escape_string(match.to_string()) + "\"";
|
||||
}
|
||||
result += "}));";
|
||||
return result;
|
||||
}
|
||||
|
||||
string code_for_lex_actions(const unordered_set<LexAction> &actions, const unordered_set<CharMatch> &expected_inputs) {
|
||||
string code_for_lex_actions(const unordered_set<LexAction> &actions, const unordered_set<rules::Character> &expected_inputs) {
|
||||
auto action = actions.begin();
|
||||
if (action == actions.end()) {
|
||||
return lex_error_call(expected_inputs);
|
||||
|
|
@ -206,7 +221,7 @@ namespace tree_sitter {
|
|||
string result = "";
|
||||
auto expected_inputs = parse_state.expected_inputs();
|
||||
for (auto pair : parse_state.actions)
|
||||
result += _if(condition_for_char_match(pair.first), code_for_lex_actions(pair.second, expected_inputs));
|
||||
result += _if(condition_for_character_rule(pair.first), code_for_lex_actions(pair.second, expected_inputs));
|
||||
result += code_for_lex_actions(parse_state.default_actions, expected_inputs);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,8 +45,8 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
// State
|
||||
unordered_set<CharMatch> LexState::expected_inputs() const {
|
||||
unordered_set<CharMatch> result;
|
||||
unordered_set<rules::Character> LexState::expected_inputs() const {
|
||||
unordered_set<rules::Character> result;
|
||||
for (auto pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
|
|
@ -58,7 +58,7 @@ namespace tree_sitter {
|
|||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void LexTable::add_action(size_t state_index, CharMatch match, LexAction action) {
|
||||
void LexTable::add_action(size_t state_index, rules::Character match, LexAction action) {
|
||||
states[state_index].actions[match].insert(action);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include "char_match.h"
|
||||
#include "symbol.h"
|
||||
#include "character.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
|
|
@ -45,15 +45,15 @@ namespace std {
|
|||
namespace tree_sitter {
|
||||
class LexState {
|
||||
public:
|
||||
std::unordered_map<CharMatch, std::unordered_set<LexAction>> actions;
|
||||
std::unordered_map<rules::Character, std::unordered_set<LexAction>> actions;
|
||||
std::unordered_set<LexAction> default_actions;
|
||||
std::unordered_set<CharMatch> expected_inputs() const;
|
||||
std::unordered_set<rules::Character> expected_inputs() const;
|
||||
};
|
||||
|
||||
class LexTable {
|
||||
public:
|
||||
size_t add_state();
|
||||
void add_action(size_t state_index, CharMatch match, LexAction action);
|
||||
void add_action(size_t state_index, rules::Character rule, LexAction action);
|
||||
void add_default_action(size_t state_index, LexAction action);
|
||||
|
||||
std::vector<LexState> states;
|
||||
|
|
|
|||
|
|
@ -5,17 +5,64 @@ using std::hash;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Character::Character(char value) : value(CharMatchSpecific(value)) {};
|
||||
Character::Character(CharClass value) : value(CharMatchClass(value)) {};
|
||||
Character::Character(char min, char max) : value(CharMatchRange(min, max)) {};
|
||||
CharacterMatch::CharacterMatch(char character) : type(CharacterMatchTypeSpecific) { value.character = character; }
|
||||
CharacterMatch::CharacterMatch(CharacterClass klass) : type(CharacterMatchTypeClass) { value.character_class = klass; }
|
||||
CharacterMatch::CharacterMatch(std::pair<char, char> bounds) : type(CharacterMatchTypeRange) {
|
||||
value.range.min_character = bounds.first;
|
||||
value.range.max_character = bounds.second;
|
||||
}
|
||||
|
||||
Character::Character(char character) : matches({ CharacterMatch(character) }), sign(true) {}
|
||||
Character::Character(CharacterClass char_class) : matches({ CharacterMatch(char_class) }), sign(true) {}
|
||||
Character::Character(const std::vector<CharacterMatch> &matches, bool sign) : matches(matches), sign(sign) {}
|
||||
|
||||
bool CharacterMatch::operator==(const CharacterMatch &right) const {
|
||||
if (type != right.type)
|
||||
return false;
|
||||
switch (type) {
|
||||
case CharacterMatchTypeClass:
|
||||
return (value.character_class == right.value.character_class);
|
||||
case CharacterMatchTypeSpecific:
|
||||
return (value.character == right.value.character);
|
||||
case CharacterMatchTypeRange:
|
||||
return (value.range.min_character == right.value.range.min_character &&
|
||||
value.range.max_character == right.value.range.max_character);
|
||||
}
|
||||
}
|
||||
|
||||
string CharacterMatch::to_string() const {
|
||||
switch (type) {
|
||||
case CharacterMatchTypeClass:
|
||||
switch (value.character_class) {
|
||||
case CharClassDigit:
|
||||
return "<digit>";
|
||||
case CharClassWord:
|
||||
return "<word>";
|
||||
}
|
||||
case CharacterMatchTypeSpecific:
|
||||
return (value.character == '\0') ?
|
||||
"<EOF>" :
|
||||
string("'") + value.character + "'";
|
||||
case CharacterMatchTypeRange:
|
||||
return (string("'") +
|
||||
value.range.min_character + "'-'" +
|
||||
value.range.max_character + "'");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Character::operator==(const Rule &rule) const {
|
||||
const Character *other = dynamic_cast<const Character *>(&rule);
|
||||
return other && (other->value == value);
|
||||
if (!other) return false;
|
||||
auto size = matches.size();
|
||||
if (other->matches.size() != size) return false;
|
||||
for (int i = 0; i < size; i++)
|
||||
if (!(matches[i] == other->matches[i])) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
size_t Character::hash_code() const {
|
||||
return typeid(this).hash_code() ^ hash<string>()(CharMatchToString(value));
|
||||
return typeid(this).hash_code() ^ hash<string>()(to_string());
|
||||
}
|
||||
|
||||
rule_ptr Character::copy() const {
|
||||
|
|
@ -23,7 +70,10 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
string Character::to_string() const {
|
||||
return string("#<char ") + CharMatchToString(value) + ">";
|
||||
string prefix("#<char");
|
||||
for (auto &match : matches)
|
||||
prefix += " " + match.to_string();
|
||||
return prefix + ">";
|
||||
}
|
||||
|
||||
void Character::accept(Visitor &visitor) const {
|
||||
|
|
|
|||
|
|
@ -2,15 +2,46 @@
|
|||
#define __tree_sitter__char__
|
||||
|
||||
#include "rule.h"
|
||||
#include "char_match.h"
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
CharClassWord,
|
||||
CharClassDigit
|
||||
} CharacterClass;
|
||||
|
||||
typedef enum {
|
||||
CharacterMatchTypeSpecific,
|
||||
CharacterMatchTypeClass,
|
||||
CharacterMatchTypeRange,
|
||||
} CharacterMatchType;
|
||||
|
||||
struct CharacterMatch {
|
||||
CharacterMatchType type;
|
||||
union {
|
||||
CharacterClass character_class;
|
||||
char character;
|
||||
struct {
|
||||
char min_character;
|
||||
char max_character;
|
||||
} range;
|
||||
} value;
|
||||
|
||||
CharacterMatch(char);
|
||||
CharacterMatch(std::pair<char, char>);
|
||||
CharacterMatch(CharacterClass);
|
||||
bool operator==(const CharacterMatch &) const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
|
||||
class Character : public Rule {
|
||||
public:
|
||||
Character(char character);
|
||||
Character(CharClass character_class);
|
||||
Character(CharacterClass character_class);
|
||||
Character(char min_character, char max_character);
|
||||
Character(const std::vector<CharacterMatch> &matches, bool sign);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
|
|
@ -18,9 +49,35 @@ namespace tree_sitter {
|
|||
std::string to_string() const;
|
||||
void accept(Visitor &visitor) const;
|
||||
|
||||
const CharMatch value;
|
||||
std::vector<CharacterMatch> matches;
|
||||
bool sign;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::CharacterMatch> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterMatch &match) const {
|
||||
auto type = match.type;
|
||||
auto result = hash<short int>()(type);
|
||||
switch (type) {
|
||||
case tree_sitter::rules::CharacterMatchTypeClass:
|
||||
result ^= hash<short int>()(match.value.character_class);
|
||||
case tree_sitter::rules::CharacterMatchTypeRange:
|
||||
result ^= hash<char>()(match.value.range.min_character);
|
||||
result ^= hash<char>()(match.value.range.max_character);
|
||||
case tree_sitter::rules::CharacterMatchTypeSpecific:
|
||||
result ^= hash<char>()(match.value.character);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::Character> : hash<tree_sitter::rules::Rule> {};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -14,9 +14,13 @@ namespace tree_sitter {
|
|||
return make_shared<Character>(value);
|
||||
}
|
||||
|
||||
rule_ptr character(CharClass value) {
|
||||
rule_ptr character(CharacterClass value) {
|
||||
return make_shared<Character>(value);
|
||||
}
|
||||
|
||||
rule_ptr character(const std::vector<CharacterMatch> &matches, bool is_affirmative) {
|
||||
return make_shared<Character>(matches, is_affirmative);
|
||||
}
|
||||
|
||||
rule_ptr choice(const initializer_list<rule_ptr> &rules) {
|
||||
rule_ptr result;
|
||||
|
|
|
|||
|
|
@ -16,8 +16,10 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
rule_ptr blank();
|
||||
rule_ptr character(char value);
|
||||
rule_ptr character(char min, char max);
|
||||
rule_ptr character(CharClass value);
|
||||
rule_ptr character(CharacterClass value);
|
||||
rule_ptr character(const std::vector<CharacterMatch> &matches);
|
||||
rule_ptr character(const std::vector<CharacterMatch> &matches, bool);
|
||||
|
||||
rule_ptr choice(const std::initializer_list<rule_ptr> &rules);
|
||||
rule_ptr pattern(const std::string &value);
|
||||
rule_ptr repeat(const rule_ptr content);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "rules.h"
|
||||
#include <unordered_map>
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
6
todo.md
6
todo.md
|
|
@ -14,4 +14,8 @@ TODO
|
|||
## node.js wrapper
|
||||
- add simple selector engine for trees
|
||||
|
||||
## incremental parsing
|
||||
## incremental parsing
|
||||
|
||||
## chores
|
||||
- figure out why Symbol and Character can't have const member variables
|
||||
(unordered_map seems to require mutability of key objects)
|
||||
|
|
|
|||
|
|
@ -53,7 +53,6 @@
|
|||
12FD40DF1860064C0041A84E /* tree.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DE1860064C0041A84E /* tree.c */; };
|
||||
12FD40E718639B910041A84E /* visitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E618639B910041A84E /* visitor.cpp */; };
|
||||
12FD40E918641FB70041A84E /* rules.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E818641FB70041A84E /* rules.cpp */; };
|
||||
12FD40F3186641C00041A84E /* char_match.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F1186641C00041A84E /* char_match.cpp */; };
|
||||
12FD40F7186A16020041A84E /* lex_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F5186A16020041A84E /* lex_table.cpp */; };
|
||||
27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
|
@ -162,8 +161,6 @@
|
|||
12FD40E41862B3530041A84E /* visitor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = visitor.h; sourceTree = "<group>"; };
|
||||
12FD40E618639B910041A84E /* visitor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = visitor.cpp; sourceTree = "<group>"; };
|
||||
12FD40E818641FB70041A84E /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = "<group>"; };
|
||||
12FD40F1186641C00041A84E /* char_match.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = char_match.cpp; sourceTree = "<group>"; };
|
||||
12FD40F2186641C00041A84E /* char_match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char_match.h; sourceTree = "<group>"; };
|
||||
12FD40F5186A16020041A84E /* lex_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = lex_table.cpp; sourceTree = "<group>"; };
|
||||
27A340F3EEB184C040521323 /* pattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pattern.cpp; sourceTree = "<group>"; };
|
||||
27A3438C4FA59A3882E8493B /* pattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pattern.h; sourceTree = "<group>"; };
|
||||
|
|
@ -358,8 +355,6 @@
|
|||
isa = PBXGroup;
|
||||
children = (
|
||||
12130618182C84B700FCF928 /* build_tables */,
|
||||
12FD40F1186641C00041A84E /* char_match.cpp */,
|
||||
12FD40F2186641C00041A84E /* char_match.h */,
|
||||
12EDCFAC18820181005A7A07 /* compile.cpp */,
|
||||
12EDCFAD18820181005A7A07 /* compile.h */,
|
||||
12FD4067185E8AF40041A84E /* generate_code */,
|
||||
|
|
@ -523,7 +518,6 @@
|
|||
12D136A4183678A2005F3369 /* repeat.cpp in Sources */,
|
||||
1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */,
|
||||
12EDCF9A1881FCD9005A7A07 /* search_for_symbols.cpp in Sources */,
|
||||
12FD40F3186641C00041A84E /* char_match.cpp in Sources */,
|
||||
12EDCFB21882039A005A7A07 /* perform.cpp in Sources */,
|
||||
12FD40E718639B910041A84E /* visitor.cpp in Sources */,
|
||||
12EDCF991881FCD9005A7A07 /* perform.cpp in Sources */,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue