Revert "Remove the separator characters construct"
This reverts commit 5cd07648fd.
The separators construct is useful as an optimization. It turns out that
constructing a node for every chunk of whitespace in a document causes a
significant performance regression.
Conflicts:
src/compiler/build_tables/build_lex_table.cc
src/compiler/grammar.cc
src/runtime/parser.c
This commit is contained in:
parent
e941f8c175
commit
545e575508
43 changed files with 9065 additions and 11203 deletions
|
|
@ -6,7 +6,7 @@ namespace tree_sitter_examples {
|
|||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar arithmetic = Grammar({
|
||||
extern const Grammar arithmetic({
|
||||
{ "expression", choice({
|
||||
sym("sum"),
|
||||
sym("difference"),
|
||||
|
|
@ -26,7 +26,6 @@ extern const Grammar arithmetic = Grammar({
|
|||
|
||||
{ "number", pattern("\\d+") },
|
||||
{ "variable", pattern("\\a[\\w_]*") },
|
||||
{ "_whitespace", pattern("[\\s\n]+") },
|
||||
}).ubiquitous_tokens({ "_whitespace" });
|
||||
});
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -158,7 +158,6 @@ extern const Grammar golang = Grammar({
|
|||
blank() }) }) },
|
||||
|
||||
{ "_line_break", str("\n") },
|
||||
{ "_whitespace", pattern("\\s+") },
|
||||
|
||||
{ "string", delimited("\"") },
|
||||
{ "package_name", sym("_identifier") },
|
||||
|
|
@ -168,6 +167,7 @@ extern const Grammar golang = Grammar({
|
|||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "comment", keypattern("//[^\n]*") },
|
||||
})
|
||||
.ubiquitous_tokens({ "comment", "_whitespace", "_line_break" });
|
||||
.ubiquitous_tokens({ "comment", "_line_break" })
|
||||
.separators({ ' ', '\t', '\r' });
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -208,13 +208,13 @@ extern const Grammar javascript = Grammar({
|
|||
delimited("\""),
|
||||
delimited("'") })) },
|
||||
{ "_line_break", str("\n") },
|
||||
{ "_whitespace", pattern("\\s+") },
|
||||
{ "identifier", pattern("[\\a_$][\\w_$]*") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
})
|
||||
.ubiquitous_tokens({ "comment", "_whitespace", "_line_break" });
|
||||
.ubiquitous_tokens({ "comment", "_line_break" })
|
||||
.separators({ ' ', '\t', '\r' });
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ namespace tree_sitter_examples {
|
|||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar json = Grammar({
|
||||
extern const Grammar json({
|
||||
{ "value", choice({
|
||||
sym("object"),
|
||||
sym("array"),
|
||||
|
|
@ -25,7 +25,6 @@ extern const Grammar json = Grammar({
|
|||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
{ "_whitespace", pattern("[\\s\n]+") },
|
||||
}).ubiquitous_tokens({ "_whitespace" });
|
||||
});
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "tree_sitter/parser.h"
|
||||
|
||||
#define STATE_COUNT 32
|
||||
#define SYMBOL_COUNT 20
|
||||
#define SYMBOL_COUNT 19
|
||||
|
||||
enum {
|
||||
ts_sym_expression = ts_builtin_sym_start,
|
||||
|
|
@ -13,7 +13,6 @@ enum {
|
|||
ts_sym_group,
|
||||
ts_sym_number,
|
||||
ts_sym_variable,
|
||||
ts_sym__whitespace,
|
||||
ts_aux_sym_1,
|
||||
ts_aux_sym_2,
|
||||
ts_aux_sym_3,
|
||||
|
|
@ -36,7 +35,6 @@ SYMBOL_NAMES = {
|
|||
[ts_builtin_sym_end] = "end",
|
||||
[ts_sym_number] = "number",
|
||||
[ts_sym_variable] = "variable",
|
||||
[ts_sym__whitespace] = "_whitespace",
|
||||
[ts_aux_sym_1] = "'+'",
|
||||
[ts_aux_sym_2] = "'-'",
|
||||
[ts_aux_sym_3] = "'*'",
|
||||
|
|
@ -47,7 +45,6 @@ SYMBOL_NAMES = {
|
|||
};
|
||||
|
||||
HIDDEN_SYMBOLS = {
|
||||
[ts_sym__whitespace] = 1,
|
||||
[ts_aux_sym_1] = 1,
|
||||
[ts_aux_sym_2] = 1,
|
||||
[ts_aux_sym_3] = 1,
|
||||
|
|
@ -61,127 +58,154 @@ LEX_FN() {
|
|||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
case 1:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ADVANCE(1);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(3);
|
||||
ADVANCE(2);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(4);
|
||||
ADVANCE(3);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(5);
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case 2:
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ACCEPT_TOKEN(ts_sym__whitespace);
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_aux_sym_6);
|
||||
case 4:
|
||||
case 3:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(4);
|
||||
ADVANCE(3);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 5:
|
||||
case 4:
|
||||
if (('0' <= lookahead && lookahead <= '9') ||
|
||||
('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == '_') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(5);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 6:
|
||||
if (lookahead == 0)
|
||||
ADVANCE(7);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(12);
|
||||
LEX_ERROR();
|
||||
case 7:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_aux_sym_3);
|
||||
case 9:
|
||||
ACCEPT_TOKEN(ts_aux_sym_1);
|
||||
case 10:
|
||||
ACCEPT_TOKEN(ts_aux_sym_2);
|
||||
case 11:
|
||||
ACCEPT_TOKEN(ts_aux_sym_4);
|
||||
case 12:
|
||||
ACCEPT_TOKEN(ts_aux_sym_5);
|
||||
case 13:
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(12);
|
||||
LEX_ERROR();
|
||||
case 14:
|
||||
ACCEPT_TOKEN(ts_aux_sym_7);
|
||||
case 15:
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(14);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
if (lookahead == 0)
|
||||
ADVANCE(7);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(3);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(11);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(4);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 5:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(6);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(5);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 6:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 7:
|
||||
ACCEPT_TOKEN(ts_aux_sym_3);
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_aux_sym_1);
|
||||
case 9:
|
||||
ACCEPT_TOKEN(ts_aux_sym_2);
|
||||
case 10:
|
||||
ACCEPT_TOKEN(ts_aux_sym_4);
|
||||
case 11:
|
||||
ACCEPT_TOKEN(ts_aux_sym_5);
|
||||
case 12:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(12);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(13);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 13:
|
||||
ACCEPT_TOKEN(ts_aux_sym_7);
|
||||
case 14:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(14);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(13);
|
||||
LEX_ERROR();
|
||||
case 15:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(6);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(15);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(2);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(13);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(10);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(3);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(5);
|
||||
ADVANCE(4);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(12);
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(6);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(15);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(2);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(13);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(10);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(3);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(4);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
|
|
@ -190,37 +214,37 @@ LEX_FN() {
|
|||
|
||||
LEX_STATES = {
|
||||
[0] = 1,
|
||||
[1] = 6,
|
||||
[2] = 6,
|
||||
[1] = 5,
|
||||
[2] = 5,
|
||||
[3] = 1,
|
||||
[4] = 13,
|
||||
[5] = 13,
|
||||
[6] = 15,
|
||||
[4] = 12,
|
||||
[5] = 12,
|
||||
[6] = 14,
|
||||
[7] = 1,
|
||||
[8] = 13,
|
||||
[9] = 15,
|
||||
[10] = 13,
|
||||
[8] = 12,
|
||||
[9] = 14,
|
||||
[10] = 12,
|
||||
[11] = 1,
|
||||
[12] = 1,
|
||||
[13] = 1,
|
||||
[14] = 1,
|
||||
[15] = 1,
|
||||
[16] = 13,
|
||||
[17] = 13,
|
||||
[18] = 13,
|
||||
[19] = 13,
|
||||
[20] = 13,
|
||||
[21] = 6,
|
||||
[16] = 12,
|
||||
[17] = 12,
|
||||
[18] = 12,
|
||||
[19] = 12,
|
||||
[20] = 12,
|
||||
[21] = 5,
|
||||
[22] = 1,
|
||||
[23] = 1,
|
||||
[24] = 1,
|
||||
[25] = 1,
|
||||
[26] = 1,
|
||||
[27] = 6,
|
||||
[28] = 6,
|
||||
[29] = 6,
|
||||
[30] = 6,
|
||||
[31] = 6,
|
||||
[27] = 5,
|
||||
[28] = 5,
|
||||
[29] = 5,
|
||||
[30] = 5,
|
||||
[31] = 5,
|
||||
};
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
|
|
@ -237,12 +261,10 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(2),
|
||||
[ts_sym_number] = SHIFT(2),
|
||||
[ts_sym_variable] = SHIFT(2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(3),
|
||||
},
|
||||
[1] = {
|
||||
[ts_builtin_sym_end] = ACCEPT_INPUT(),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(22),
|
||||
[ts_aux_sym_2] = SHIFT(23),
|
||||
[ts_aux_sym_3] = SHIFT(24),
|
||||
|
|
@ -251,7 +273,6 @@ PARSE_TABLE = {
|
|||
},
|
||||
[2] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_expression, 1),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_expression, 1),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_expression, 1),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_expression, 1),
|
||||
|
|
@ -269,11 +290,9 @@ PARSE_TABLE = {
|
|||
[ts_builtin_sym_error] = SHIFT(6),
|
||||
[ts_sym_number] = SHIFT(5),
|
||||
[ts_sym_variable] = SHIFT(5),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(7),
|
||||
},
|
||||
[4] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(11),
|
||||
[ts_aux_sym_2] = SHIFT(12),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
|
|
@ -282,7 +301,6 @@ PARSE_TABLE = {
|
|||
[ts_aux_sym_7] = SHIFT(21),
|
||||
},
|
||||
[5] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_expression, 1),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_expression, 1),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_expression, 1),
|
||||
|
|
@ -291,7 +309,6 @@ PARSE_TABLE = {
|
|||
[ts_aux_sym_7] = REDUCE(ts_sym_expression, 1),
|
||||
},
|
||||
[6] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_7] = SHIFT(21),
|
||||
},
|
||||
[7] = {
|
||||
|
|
@ -305,11 +322,9 @@ PARSE_TABLE = {
|
|||
[ts_builtin_sym_error] = SHIFT(9),
|
||||
[ts_sym_number] = SHIFT(5),
|
||||
[ts_sym_variable] = SHIFT(5),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(7),
|
||||
},
|
||||
[8] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(11),
|
||||
[ts_aux_sym_2] = SHIFT(12),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
|
|
@ -318,11 +333,9 @@ PARSE_TABLE = {
|
|||
[ts_aux_sym_7] = SHIFT(10),
|
||||
},
|
||||
[9] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_7] = SHIFT(10),
|
||||
},
|
||||
[10] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_group, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_group, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_group, 3),
|
||||
|
|
@ -340,7 +353,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(5),
|
||||
[ts_sym_number] = SHIFT(5),
|
||||
[ts_sym_variable] = SHIFT(5),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(7),
|
||||
},
|
||||
[12] = {
|
||||
|
|
@ -353,7 +365,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(5),
|
||||
[ts_sym_number] = SHIFT(5),
|
||||
[ts_sym_variable] = SHIFT(5),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(7),
|
||||
},
|
||||
[13] = {
|
||||
|
|
@ -366,7 +377,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(5),
|
||||
[ts_sym_number] = SHIFT(5),
|
||||
[ts_sym_variable] = SHIFT(5),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(7),
|
||||
},
|
||||
[14] = {
|
||||
|
|
@ -379,7 +389,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(5),
|
||||
[ts_sym_number] = SHIFT(5),
|
||||
[ts_sym_variable] = SHIFT(5),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(7),
|
||||
},
|
||||
[15] = {
|
||||
|
|
@ -392,11 +401,9 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(5),
|
||||
[ts_sym_number] = SHIFT(5),
|
||||
[ts_sym_variable] = SHIFT(5),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(7),
|
||||
},
|
||||
[16] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_exponent, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_exponent, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_exponent, 3),
|
||||
|
|
@ -405,7 +412,6 @@ PARSE_TABLE = {
|
|||
[ts_aux_sym_7] = REDUCE(ts_sym_exponent, 3),
|
||||
},
|
||||
[17] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_quotient, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_quotient, 3),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
|
|
@ -414,7 +420,6 @@ PARSE_TABLE = {
|
|||
[ts_aux_sym_7] = REDUCE(ts_sym_quotient, 3),
|
||||
},
|
||||
[18] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_product, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_product, 3),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
|
|
@ -423,7 +428,6 @@ PARSE_TABLE = {
|
|||
[ts_aux_sym_7] = REDUCE(ts_sym_product, 3),
|
||||
},
|
||||
[19] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(11),
|
||||
[ts_aux_sym_2] = SHIFT(12),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
|
|
@ -432,7 +436,6 @@ PARSE_TABLE = {
|
|||
[ts_aux_sym_7] = REDUCE(ts_sym_difference, 3),
|
||||
},
|
||||
[20] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(11),
|
||||
[ts_aux_sym_2] = SHIFT(12),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
|
|
@ -442,7 +445,6 @@ PARSE_TABLE = {
|
|||
},
|
||||
[21] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_group, 3),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_group, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_group, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_group, 3),
|
||||
|
|
@ -459,7 +461,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(2),
|
||||
[ts_sym_number] = SHIFT(2),
|
||||
[ts_sym_variable] = SHIFT(2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(3),
|
||||
},
|
||||
[23] = {
|
||||
|
|
@ -472,7 +473,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(2),
|
||||
[ts_sym_number] = SHIFT(2),
|
||||
[ts_sym_variable] = SHIFT(2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(3),
|
||||
},
|
||||
[24] = {
|
||||
|
|
@ -485,7 +485,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(2),
|
||||
[ts_sym_number] = SHIFT(2),
|
||||
[ts_sym_variable] = SHIFT(2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(3),
|
||||
},
|
||||
[25] = {
|
||||
|
|
@ -498,7 +497,6 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(2),
|
||||
[ts_sym_number] = SHIFT(2),
|
||||
[ts_sym_variable] = SHIFT(2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(3),
|
||||
},
|
||||
[26] = {
|
||||
|
|
@ -511,12 +509,10 @@ PARSE_TABLE = {
|
|||
[ts_sym_group] = SHIFT(2),
|
||||
[ts_sym_number] = SHIFT(2),
|
||||
[ts_sym_variable] = SHIFT(2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(3),
|
||||
},
|
||||
[27] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_exponent, 3),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_exponent, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_exponent, 3),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_exponent, 3),
|
||||
|
|
@ -525,7 +521,6 @@ PARSE_TABLE = {
|
|||
},
|
||||
[28] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_quotient, 3),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_quotient, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_quotient, 3),
|
||||
[ts_aux_sym_3] = SHIFT(24),
|
||||
|
|
@ -534,7 +529,6 @@ PARSE_TABLE = {
|
|||
},
|
||||
[29] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_product, 3),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = REDUCE(ts_sym_product, 3),
|
||||
[ts_aux_sym_2] = REDUCE(ts_sym_product, 3),
|
||||
[ts_aux_sym_3] = SHIFT(24),
|
||||
|
|
@ -543,7 +537,6 @@ PARSE_TABLE = {
|
|||
},
|
||||
[30] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_difference, 3),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(22),
|
||||
[ts_aux_sym_2] = SHIFT(23),
|
||||
[ts_aux_sym_3] = SHIFT(24),
|
||||
|
|
@ -552,7 +545,6 @@ PARSE_TABLE = {
|
|||
},
|
||||
[31] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_sum, 3),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(22),
|
||||
[ts_aux_sym_2] = SHIFT(23),
|
||||
[ts_aux_sym_3] = SHIFT(24),
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,7 @@
|
|||
#include "tree_sitter/parser.h"
|
||||
|
||||
#define STATE_COUNT 60
|
||||
#define SYMBOL_COUNT 20
|
||||
#define SYMBOL_COUNT 19
|
||||
|
||||
enum {
|
||||
ts_sym_value = ts_builtin_sym_start,
|
||||
|
|
@ -12,7 +12,6 @@ enum {
|
|||
ts_sym_null,
|
||||
ts_sym_true,
|
||||
ts_sym_false,
|
||||
ts_sym__whitespace,
|
||||
ts_aux_sym_object_repeat0,
|
||||
ts_aux_sym_array_repeat0,
|
||||
ts_aux_sym_1,
|
||||
|
|
@ -35,7 +34,6 @@ SYMBOL_NAMES = {
|
|||
[ts_sym_null] = "null",
|
||||
[ts_sym_true] = "true",
|
||||
[ts_sym_false] = "false",
|
||||
[ts_sym__whitespace] = "_whitespace",
|
||||
[ts_aux_sym_object_repeat0] = "object_repeat0",
|
||||
[ts_aux_sym_array_repeat0] = "array_repeat0",
|
||||
[ts_aux_sym_1] = "'{'",
|
||||
|
|
@ -47,7 +45,6 @@ SYMBOL_NAMES = {
|
|||
};
|
||||
|
||||
HIDDEN_SYMBOLS = {
|
||||
[ts_sym__whitespace] = 1,
|
||||
[ts_aux_sym_object_repeat0] = 1,
|
||||
[ts_aux_sym_array_repeat0] = 1,
|
||||
[ts_aux_sym_1] = 1,
|
||||
|
|
@ -62,98 +59,96 @@ LEX_FN() {
|
|||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
case 1:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ADVANCE(1);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
ADVANCE(2);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(7);
|
||||
ADVANCE(6);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(10);
|
||||
ADVANCE(9);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(11);
|
||||
ADVANCE(10);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(16);
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(20);
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(24);
|
||||
ADVANCE(23);
|
||||
LEX_ERROR();
|
||||
case 2:
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ACCEPT_TOKEN(ts_sym__whitespace);
|
||||
case 3:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(5);
|
||||
ADVANCE(4);
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(3);
|
||||
ADVANCE(2);
|
||||
LEX_ERROR();
|
||||
case 4:
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 4:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(4);
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(2);
|
||||
LEX_ERROR();
|
||||
case 5:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(5);
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(3);
|
||||
LEX_ERROR();
|
||||
case 6:
|
||||
if (lookahead == '\"')
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(5);
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(3);
|
||||
ADVANCE(2);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 7:
|
||||
case 6:
|
||||
if (lookahead == '.')
|
||||
ADVANCE(8);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(7);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(6);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 7:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR();
|
||||
case 8:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(9);
|
||||
LEX_ERROR();
|
||||
case 9:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(9);
|
||||
ADVANCE(8);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 10:
|
||||
case 9:
|
||||
ACCEPT_TOKEN(ts_aux_sym_5);
|
||||
case 11:
|
||||
case 10:
|
||||
if (lookahead == 'a')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 11:
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(12);
|
||||
LEX_ERROR();
|
||||
case 12:
|
||||
if (lookahead == 'l')
|
||||
if (lookahead == 's')
|
||||
ADVANCE(13);
|
||||
LEX_ERROR();
|
||||
case 13:
|
||||
if (lookahead == 's')
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(14);
|
||||
LEX_ERROR();
|
||||
case 14:
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 15:
|
||||
ACCEPT_TOKEN(ts_sym_false);
|
||||
case 16:
|
||||
case 15:
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(16);
|
||||
LEX_ERROR();
|
||||
case 16:
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(17);
|
||||
LEX_ERROR();
|
||||
case 17:
|
||||
|
|
@ -161,168 +156,206 @@ LEX_FN() {
|
|||
ADVANCE(18);
|
||||
LEX_ERROR();
|
||||
case 18:
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(19);
|
||||
LEX_ERROR();
|
||||
case 19:
|
||||
ACCEPT_TOKEN(ts_sym_null);
|
||||
case 20:
|
||||
case 19:
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(20);
|
||||
LEX_ERROR();
|
||||
case 20:
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(21);
|
||||
LEX_ERROR();
|
||||
case 21:
|
||||
if (lookahead == 'u')
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(22);
|
||||
LEX_ERROR();
|
||||
case 22:
|
||||
if (lookahead == 'e')
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
case 23:
|
||||
ACCEPT_TOKEN(ts_aux_sym_1);
|
||||
case 24:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(25);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(24);
|
||||
LEX_ERROR();
|
||||
case 25:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 26:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(26);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 27:
|
||||
ACCEPT_TOKEN(ts_aux_sym_4);
|
||||
case 28:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(28);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(2);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(9);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(29);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(10);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(23);
|
||||
LEX_ERROR();
|
||||
case 23:
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
case 24:
|
||||
ACCEPT_TOKEN(ts_aux_sym_1);
|
||||
case 25:
|
||||
if (lookahead == 0)
|
||||
ADVANCE(26);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
LEX_ERROR();
|
||||
case 26:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 27:
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(28);
|
||||
LEX_ERROR();
|
||||
case 28:
|
||||
ACCEPT_TOKEN(ts_aux_sym_4);
|
||||
case 29:
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(10);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(30);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(11);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(16);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(20);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(24);
|
||||
LEX_ERROR();
|
||||
case 30:
|
||||
ACCEPT_TOKEN(ts_aux_sym_6);
|
||||
case 31:
|
||||
case 30:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ADVANCE(30);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(31);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(29);
|
||||
LEX_ERROR();
|
||||
case 31:
|
||||
ACCEPT_TOKEN(ts_aux_sym_3);
|
||||
case 32:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(32);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(30);
|
||||
ADVANCE(29);
|
||||
LEX_ERROR();
|
||||
case 32:
|
||||
ACCEPT_TOKEN(ts_aux_sym_3);
|
||||
case 33:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(30);
|
||||
ADVANCE(33);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(31);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 34:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(32);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(28);
|
||||
ADVANCE(34);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(35);
|
||||
LEX_ERROR();
|
||||
case 35:
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(36);
|
||||
LEX_ERROR();
|
||||
case 36:
|
||||
ACCEPT_TOKEN(ts_aux_sym_2);
|
||||
case 37:
|
||||
case 36:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ADVANCE(36);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(28);
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 37:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(37);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(2);
|
||||
LEX_ERROR();
|
||||
case 38:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(25);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ADVANCE(38);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
ADVANCE(2);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(31);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(6);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(35);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(9);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(29);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(10);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(23);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(26);
|
||||
ADVANCE(25);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
ADVANCE(38);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
ADVANCE(2);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(32);
|
||||
ADVANCE(31);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(7);
|
||||
ADVANCE(6);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(36);
|
||||
ADVANCE(35);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(10);
|
||||
ADVANCE(9);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(30);
|
||||
ADVANCE(29);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(11);
|
||||
ADVANCE(10);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(16);
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(20);
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(24);
|
||||
ADVANCE(23);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(28);
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
|
|
@ -331,65 +364,65 @@ LEX_FN() {
|
|||
|
||||
LEX_STATES = {
|
||||
[0] = 1,
|
||||
[1] = 25,
|
||||
[2] = 25,
|
||||
[3] = 27,
|
||||
[4] = 29,
|
||||
[5] = 31,
|
||||
[6] = 31,
|
||||
[7] = 27,
|
||||
[8] = 29,
|
||||
[9] = 25,
|
||||
[10] = 31,
|
||||
[11] = 31,
|
||||
[12] = 33,
|
||||
[1] = 24,
|
||||
[2] = 24,
|
||||
[3] = 26,
|
||||
[4] = 28,
|
||||
[5] = 30,
|
||||
[6] = 30,
|
||||
[7] = 26,
|
||||
[8] = 28,
|
||||
[9] = 24,
|
||||
[10] = 30,
|
||||
[11] = 30,
|
||||
[12] = 32,
|
||||
[13] = 1,
|
||||
[14] = 31,
|
||||
[15] = 33,
|
||||
[16] = 31,
|
||||
[17] = 34,
|
||||
[18] = 35,
|
||||
[19] = 31,
|
||||
[14] = 30,
|
||||
[15] = 32,
|
||||
[16] = 30,
|
||||
[17] = 33,
|
||||
[18] = 34,
|
||||
[19] = 30,
|
||||
[20] = 1,
|
||||
[21] = 34,
|
||||
[22] = 34,
|
||||
[23] = 27,
|
||||
[24] = 29,
|
||||
[25] = 31,
|
||||
[26] = 34,
|
||||
[27] = 33,
|
||||
[28] = 34,
|
||||
[29] = 34,
|
||||
[30] = 35,
|
||||
[31] = 34,
|
||||
[21] = 33,
|
||||
[22] = 33,
|
||||
[23] = 26,
|
||||
[24] = 28,
|
||||
[25] = 30,
|
||||
[26] = 33,
|
||||
[27] = 32,
|
||||
[28] = 33,
|
||||
[29] = 33,
|
||||
[30] = 34,
|
||||
[31] = 33,
|
||||
[32] = 1,
|
||||
[33] = 34,
|
||||
[34] = 37,
|
||||
[35] = 38,
|
||||
[36] = 34,
|
||||
[37] = 35,
|
||||
[33] = 33,
|
||||
[34] = 36,
|
||||
[35] = 37,
|
||||
[36] = 33,
|
||||
[37] = 34,
|
||||
[38] = 1,
|
||||
[39] = 34,
|
||||
[40] = 37,
|
||||
[41] = 37,
|
||||
[42] = 34,
|
||||
[43] = 37,
|
||||
[44] = 34,
|
||||
[45] = 37,
|
||||
[46] = 31,
|
||||
[47] = 37,
|
||||
[48] = 31,
|
||||
[49] = 33,
|
||||
[50] = 25,
|
||||
[51] = 34,
|
||||
[52] = 35,
|
||||
[53] = 25,
|
||||
[39] = 33,
|
||||
[40] = 36,
|
||||
[41] = 36,
|
||||
[42] = 33,
|
||||
[43] = 36,
|
||||
[44] = 33,
|
||||
[45] = 36,
|
||||
[46] = 30,
|
||||
[47] = 36,
|
||||
[48] = 30,
|
||||
[49] = 32,
|
||||
[50] = 24,
|
||||
[51] = 33,
|
||||
[52] = 34,
|
||||
[53] = 24,
|
||||
[54] = 1,
|
||||
[55] = 34,
|
||||
[56] = 37,
|
||||
[57] = 25,
|
||||
[58] = 37,
|
||||
[59] = 25,
|
||||
[55] = 33,
|
||||
[56] = 36,
|
||||
[57] = 24,
|
||||
[58] = 36,
|
||||
[59] = 24,
|
||||
};
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
|
|
@ -405,22 +438,18 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(2),
|
||||
[ts_sym_true] = SHIFT(2),
|
||||
[ts_sym_false] = SHIFT(2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(3),
|
||||
[ts_aux_sym_5] = SHIFT(4),
|
||||
},
|
||||
[1] = {
|
||||
[ts_builtin_sym_end] = ACCEPT_INPUT(),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
[2] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_value, 1),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
[3] = {
|
||||
[ts_builtin_sym_error] = SHIFT(51),
|
||||
[ts_sym_string] = SHIFT(52),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(53),
|
||||
},
|
||||
[4] = {
|
||||
|
|
@ -433,26 +462,22 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(6),
|
||||
[ts_sym_true] = SHIFT(6),
|
||||
[ts_sym_false] = SHIFT(6),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(7),
|
||||
[ts_aux_sym_5] = SHIFT(8),
|
||||
[ts_aux_sym_6] = SHIFT(9),
|
||||
},
|
||||
[5] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_array_repeat0] = SHIFT(49),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
|
||||
},
|
||||
[6] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_value, 1),
|
||||
},
|
||||
[7] = {
|
||||
[ts_builtin_sym_error] = SHIFT(17),
|
||||
[ts_sym_string] = SHIFT(18),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(19),
|
||||
},
|
||||
[8] = {
|
||||
|
|
@ -465,28 +490,23 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(6),
|
||||
[ts_sym_true] = SHIFT(6),
|
||||
[ts_sym_false] = SHIFT(6),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(7),
|
||||
[ts_aux_sym_5] = SHIFT(8),
|
||||
[ts_aux_sym_6] = SHIFT(11),
|
||||
},
|
||||
[9] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_array, 2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
[10] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_array_repeat0] = SHIFT(12),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
|
||||
},
|
||||
[11] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_array, 2),
|
||||
},
|
||||
[12] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(16),
|
||||
},
|
||||
[13] = {
|
||||
|
|
@ -499,37 +519,30 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(6),
|
||||
[ts_sym_true] = SHIFT(6),
|
||||
[ts_sym_false] = SHIFT(6),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(7),
|
||||
[ts_aux_sym_5] = SHIFT(8),
|
||||
},
|
||||
[14] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_array_repeat0] = SHIFT(15),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
|
||||
},
|
||||
[15] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 3),
|
||||
},
|
||||
[16] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_array, 4),
|
||||
},
|
||||
[17] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(47),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[18] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_2] = SHIFT(20),
|
||||
},
|
||||
[19] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 2),
|
||||
},
|
||||
|
|
@ -542,25 +555,21 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(22),
|
||||
[ts_sym_true] = SHIFT(22),
|
||||
[ts_sym_false] = SHIFT(22),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(23),
|
||||
[ts_aux_sym_5] = SHIFT(24),
|
||||
},
|
||||
[21] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(45),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[22] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_value, 1),
|
||||
},
|
||||
[23] = {
|
||||
[ts_builtin_sym_error] = SHIFT(29),
|
||||
[ts_sym_string] = SHIFT(30),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(31),
|
||||
},
|
||||
[24] = {
|
||||
|
|
@ -573,43 +582,35 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(6),
|
||||
[ts_sym_true] = SHIFT(6),
|
||||
[ts_sym_false] = SHIFT(6),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(7),
|
||||
[ts_aux_sym_5] = SHIFT(8),
|
||||
[ts_aux_sym_6] = SHIFT(26),
|
||||
},
|
||||
[25] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_array_repeat0] = SHIFT(27),
|
||||
[ts_aux_sym_3] = SHIFT(13),
|
||||
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
|
||||
},
|
||||
[26] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_array, 2),
|
||||
},
|
||||
[27] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(28),
|
||||
},
|
||||
[28] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_array, 4),
|
||||
},
|
||||
[29] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(43),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[30] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_2] = SHIFT(32),
|
||||
},
|
||||
[31] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 2),
|
||||
},
|
||||
|
|
@ -622,33 +623,27 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(22),
|
||||
[ts_sym_true] = SHIFT(22),
|
||||
[ts_sym_false] = SHIFT(22),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(23),
|
||||
[ts_aux_sym_5] = SHIFT(24),
|
||||
},
|
||||
[33] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(34),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[34] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(42),
|
||||
},
|
||||
[35] = {
|
||||
[ts_builtin_sym_error] = SHIFT(36),
|
||||
[ts_sym_string] = SHIFT(37),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
[36] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(41),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[37] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_2] = SHIFT(38),
|
||||
},
|
||||
[38] = {
|
||||
|
|
@ -660,77 +655,61 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(22),
|
||||
[ts_sym_true] = SHIFT(22),
|
||||
[ts_sym_false] = SHIFT(22),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(23),
|
||||
[ts_aux_sym_5] = SHIFT(24),
|
||||
},
|
||||
[39] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(40),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[40] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 5),
|
||||
},
|
||||
[41] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 3),
|
||||
},
|
||||
[42] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 6),
|
||||
},
|
||||
[43] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(44),
|
||||
},
|
||||
[44] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
|
||||
[ts_aux_sym_4] = REDUCE(ts_sym_object, 4),
|
||||
},
|
||||
[45] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(46),
|
||||
},
|
||||
[46] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 6),
|
||||
},
|
||||
[47] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(48),
|
||||
},
|
||||
[48] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
|
||||
[ts_aux_sym_6] = REDUCE(ts_sym_object, 4),
|
||||
},
|
||||
[49] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_6] = SHIFT(50),
|
||||
},
|
||||
[50] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_array, 4),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
[51] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(58),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[52] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_2] = SHIFT(54),
|
||||
},
|
||||
[53] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_object, 2),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
[54] = {
|
||||
[ts_sym_value] = SHIFT(55),
|
||||
|
|
@ -741,31 +720,25 @@ PARSE_TABLE = {
|
|||
[ts_sym_null] = SHIFT(22),
|
||||
[ts_sym_true] = SHIFT(22),
|
||||
[ts_sym_false] = SHIFT(22),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_1] = SHIFT(23),
|
||||
[ts_aux_sym_5] = SHIFT(24),
|
||||
},
|
||||
[55] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_object_repeat0] = SHIFT(56),
|
||||
[ts_aux_sym_3] = SHIFT(35),
|
||||
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
|
||||
},
|
||||
[56] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(57),
|
||||
},
|
||||
[57] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_object, 6),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
[58] = {
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
[ts_aux_sym_4] = SHIFT(59),
|
||||
},
|
||||
[59] = {
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_object, 4),
|
||||
[ts_sym__whitespace] = SHIFT_EXTRA(),
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ std::ostream &operator<<(std::ostream &stream, const rules::rule_ptr &rule);
|
|||
class Grammar {
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > rules_;
|
||||
std::set<std::string> ubiquitous_tokens_;
|
||||
std::set<char> separators_;
|
||||
|
||||
public:
|
||||
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr> > &rules);
|
||||
|
|
@ -43,6 +44,8 @@ class Grammar {
|
|||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules() const;
|
||||
const std::set<std::string> &ubiquitous_tokens() const;
|
||||
Grammar &ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
|
||||
const std::set<char> &separators() const;
|
||||
Grammar &separators(const std::set<char> &separators);
|
||||
};
|
||||
|
||||
struct Conflict {
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ typedef struct TSLexer {
|
|||
size_t chunk_size;
|
||||
size_t position_in_chunk;
|
||||
size_t token_end_position;
|
||||
size_t token_start_position;
|
||||
|
||||
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int);
|
||||
int (*advance_fn)(struct TSLexer *);
|
||||
|
|
@ -33,6 +34,10 @@ static inline char ts_lexer_lookahead_char(const TSLexer *lexer) {
|
|||
return lexer->chunk[lexer->position_in_chunk];
|
||||
}
|
||||
|
||||
static inline void ts_lexer_start_token(TSLexer *lexer) {
|
||||
lexer->token_start_position = ts_lexer_position(lexer);
|
||||
}
|
||||
|
||||
static inline int ts_lexer_advance(TSLexer *lexer) {
|
||||
return lexer->advance_fn(lexer);
|
||||
}
|
||||
|
|
@ -101,6 +106,8 @@ struct TSLanguage {
|
|||
lookahead = ts_lexer_lookahead_char(lexer); \
|
||||
DEBUG_LEX("CHAR '%c'", lookahead);
|
||||
|
||||
#define START_TOKEN() ts_lexer_start_token(lexer);
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ \
|
||||
DEBUG_LEX("ADVANCE %d", state_index); \
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ describe("resolving parse conflicts", []() {
|
|||
{ "token1", pattern("[a-c]") },
|
||||
{ "token2", pattern("[b-d]") },
|
||||
{ "token3", keyword("stuff") },
|
||||
}, {});
|
||||
}, {}, set<char>());
|
||||
|
||||
describe("lexical conflicts", [&]() {
|
||||
Symbol sym1(0, SymbolOptionToken);
|
||||
|
|
|
|||
44
spec/compiler/build_tables/lex_item_spec.cc
Normal file
44
spec/compiler/build_tables/lex_item_spec.cc
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("lex items", []() {
|
||||
describe("determining if an item is the start of a token", [&]() {
|
||||
Symbol sym(1);
|
||||
rule_ptr token_start = make_shared<Metadata>(str("a"), map<MetadataKey, int>({
|
||||
{ START_TOKEN, 1 }
|
||||
}));
|
||||
|
||||
it("returns true for rules designated as token starts", [&]() {
|
||||
LexItem item(sym, token_start);
|
||||
AssertThat(item.is_token_start(), IsTrue());
|
||||
});
|
||||
|
||||
it("returns false for rules not designated as token starts", [&]() {
|
||||
AssertThat(LexItem(sym, make_shared<Metadata>(str("a"), map<MetadataKey, int>({
|
||||
{ START_TOKEN, 0 }
|
||||
}))).is_token_start(), IsFalse());
|
||||
AssertThat(LexItem(sym, str("a")).is_token_start(), IsFalse());
|
||||
});
|
||||
|
||||
describe("when given a sequence containing a token start", [&]() {
|
||||
it("returns true when the rule before the token start may be blank", [&]() {
|
||||
LexItem item(sym, seq({ repeat(str("a")), token_start }));
|
||||
AssertThat(item.is_token_start(), IsTrue());
|
||||
});
|
||||
|
||||
it("returns false when the rule before the token start cannot be blank", [&]() {
|
||||
LexItem item(sym, seq({ str("a"), token_start }));
|
||||
AssertThat(item.is_token_start(), IsFalse());
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -18,6 +18,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
},
|
||||
set<Symbol>(),
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
|
|
@ -36,6 +37,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
|
||||
},
|
||||
set<Symbol>(),
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
|
|
@ -56,6 +58,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
i_sym(0) }) }
|
||||
},
|
||||
set<Symbol>(),
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
|
|
@ -74,6 +77,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
},
|
||||
set<Symbol>(),
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
|
|
@ -90,6 +94,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
},
|
||||
set<Symbol>(),
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
|
|
@ -102,6 +107,18 @@ describe("extracting tokens from a grammar", []() {
|
|||
})))
|
||||
});
|
||||
|
||||
it("preserves the separator characters in the lexical grammar", [&]() {
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", str("ab") },
|
||||
},
|
||||
set<Symbol>(),
|
||||
{ 'x', 'y', 'z' }
|
||||
});
|
||||
|
||||
AssertThat(result.second.separators, Equals(set<char>({ 'x', 'y', 'z' })));
|
||||
});
|
||||
|
||||
describe("when an entire rule can be extracted", [&]() {
|
||||
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
|
|
@ -111,6 +128,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
},
|
||||
set<Symbol>(),
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
|
|
@ -132,6 +150,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_C", i_sym(1) },
|
||||
},
|
||||
set<Symbol>(),
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
|
|
@ -153,6 +172,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_C", i_sym(1) },
|
||||
},
|
||||
{ Symbol(0) },
|
||||
set<char>()
|
||||
});
|
||||
|
||||
AssertThat(result.first.ubiquitous_tokens, Equals(set<Symbol>({
|
||||
|
|
|
|||
|
|
@ -54,6 +54,16 @@ describe("interning symbols in a grammar", []() {
|
|||
Symbol(2)
|
||||
})));
|
||||
});
|
||||
|
||||
it("preserves the grammar's separator character set", [&]() {
|
||||
auto grammar = Grammar({
|
||||
{ "z", str("stuff") }
|
||||
}).separators({ 'x', 'y' });
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.separators, Equals(set<char>({ 'x', 'y' })))
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -1,9 +1,6 @@
|
|||
#include "runtime/runtime_spec_helper.h"
|
||||
#include "runtime/helpers/spy_reader.h"
|
||||
|
||||
#include "runtime/tree.h"
|
||||
#include "runtime/node.h"
|
||||
|
||||
extern "C" const TSLanguage * ts_language_json();
|
||||
extern "C" const TSLanguage * ts_language_javascript();
|
||||
|
||||
|
|
@ -90,7 +87,7 @@ describe("Document", [&]() {
|
|||
|
||||
it("updates the parse tree", [&]() {
|
||||
AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals(
|
||||
"(DOCUMENT (object (string) (array (number) (number)) (string) (value (number))))"));
|
||||
"(DOCUMENT (object (string) (array (number) (number)) (string) (number)))"));
|
||||
});
|
||||
|
||||
it("re-reads only the changed portion of the input", [&]() {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ recovers from errors at the top level
|
|||
=====================================================
|
||||
x * * y
|
||||
---
|
||||
(expression (variable)) (ERROR '*')
|
||||
(variable) (ERROR '*')
|
||||
|
||||
=====================================================
|
||||
recovers from errors inside parenthesized expressions
|
||||
|
|
@ -11,5 +11,5 @@ recovers from errors inside parenthesized expressions
|
|||
x + (y * + z) * 5
|
||||
---
|
||||
(sum
|
||||
(expression (variable))
|
||||
(product (group (ERROR '+')) (expression (number))))
|
||||
(variable)
|
||||
(product (group (ERROR '+')) (number)))
|
||||
|
|
|
|||
|
|
@ -3,28 +3,28 @@ parses numbers
|
|||
===================
|
||||
5
|
||||
---
|
||||
(expression (number))
|
||||
(number)
|
||||
|
||||
===================
|
||||
parses variables
|
||||
===================
|
||||
x
|
||||
---
|
||||
(expression (variable))
|
||||
(variable)
|
||||
|
||||
===================
|
||||
parses products
|
||||
===================
|
||||
x * x
|
||||
---
|
||||
(product (expression (variable)) (expression (variable)))
|
||||
(product (variable) (variable))
|
||||
|
||||
===================
|
||||
parses sums
|
||||
===================
|
||||
x + x
|
||||
---
|
||||
(sum (expression (variable)) (expression (variable)))
|
||||
(sum (variable) (variable))
|
||||
|
||||
===============================================
|
||||
binds multiplication more tightly than addition
|
||||
|
|
@ -32,8 +32,8 @@ binds multiplication more tightly than addition
|
|||
a * b + c * d
|
||||
---
|
||||
(sum
|
||||
(product (expression (variable)) (expression (variable)))
|
||||
(product (expression (variable)) (expression (variable))))
|
||||
(product (variable) (variable))
|
||||
(product (variable) (variable)))
|
||||
|
||||
============================
|
||||
parses exponents
|
||||
|
|
@ -41,11 +41,9 @@ parses exponents
|
|||
x + y * z^(a + b)
|
||||
---
|
||||
(sum
|
||||
(expression (variable))
|
||||
(variable)
|
||||
(product
|
||||
(expression (variable))
|
||||
(variable)
|
||||
(exponent
|
||||
(variable)
|
||||
(group (sum
|
||||
(expression (variable))
|
||||
(variable))))))
|
||||
(group (sum (variable) (variable))))))
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ print(isDone() ? stuff : otherStuff);
|
|||
(program (expression_statement
|
||||
(function_call
|
||||
(identifier)
|
||||
(ternary (function_call (identifier)) (expression (identifier)) (identifier)))))
|
||||
(ternary (function_call (identifier)) (identifier) (identifier)))))
|
||||
|
||||
==========================================
|
||||
parses mathematical operators
|
||||
|
|
@ -78,10 +78,10 @@ parses mathematical operators
|
|||
---
|
||||
(program (expression_statement
|
||||
(math_op
|
||||
(math_op (expression (identifier)))
|
||||
(math_op (identifier))
|
||||
(math_op
|
||||
(math_op (expression (identifier)) (expression (identifier)))
|
||||
(math_op (expression (identifier)) (math_op (identifier)))))))
|
||||
(math_op (identifier) (identifier))
|
||||
(math_op (identifier) (math_op (identifier)))))))
|
||||
|
||||
==========================================
|
||||
parses boolean operators
|
||||
|
|
@ -91,9 +91,9 @@ parses boolean operators
|
|||
---
|
||||
(program (expression_statement
|
||||
(bool_op
|
||||
(bool_op (expression (identifier)))
|
||||
(bool_op (identifier))
|
||||
(bool_op
|
||||
(expression (bool_op (expression (identifier)) (identifier)))))))
|
||||
(expression (bool_op (identifier) (identifier)))))))
|
||||
|
||||
===========================================
|
||||
parses the type operators
|
||||
|
|
@ -103,8 +103,8 @@ print((x instanceof Array) || (typeof x == "string"))
|
|||
---
|
||||
(program (expression_statement (function_call (identifier)
|
||||
(bool_op
|
||||
(expression (instanceof_expression (expression (identifier)) (identifier)))
|
||||
(expression (typeof_expression (bool_op (expression (identifier)) (string))))))))
|
||||
(expression (instanceof_expression (identifier) (identifier)))
|
||||
(expression (typeof_expression (bool_op (identifier) (string))))))))
|
||||
|
||||
============================================
|
||||
parses the 'in' operator
|
||||
|
|
@ -114,7 +114,7 @@ print(x in y)
|
|||
---
|
||||
(program (expression_statement (function_call
|
||||
(identifier)
|
||||
(in_expression (expression (identifier)) (identifier)))))
|
||||
(in_expression (identifier) (identifier)))))
|
||||
|
||||
============================================
|
||||
parses assignment operators
|
||||
|
|
|
|||
|
|
@ -37,4 +37,4 @@ recovers from errors inside nested objects
|
|||
(object
|
||||
(string) (object (string) (number) (ERROR '2'))
|
||||
(ERROR '[')
|
||||
(string) (value (number)))
|
||||
(string) (number))
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ parses floating point numbers
|
|||
=============================
|
||||
3.14
|
||||
---
|
||||
(value (number))
|
||||
(number)
|
||||
|
||||
===================
|
||||
parses empty arrays
|
||||
|
|
@ -28,7 +28,7 @@ parses arrays
|
|||
(null)
|
||||
(true)
|
||||
(false)
|
||||
(object (string) (value (string))))
|
||||
(object (string) (string)))
|
||||
|
||||
====================
|
||||
parses empty objects
|
||||
|
|
@ -47,5 +47,5 @@ parses long objects
|
|||
---
|
||||
(object
|
||||
(string) (string)
|
||||
(string) (value (number)))
|
||||
(string) (number))
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ describe("stacks", [&]() {
|
|||
TSTree *node1;
|
||||
|
||||
before_each([&]() {
|
||||
node1 = ts_tree_make_leaf(sym1, 5, 0);
|
||||
node1 = ts_tree_make_leaf(sym1, 5, 1, 0);
|
||||
ts_stack_push(&stack, 5, node1);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ describe("Tree", []() {
|
|||
TSTree *tree1, *tree2, *parent1;
|
||||
|
||||
before_each([&]() {
|
||||
tree1 = ts_tree_make_leaf(cat, 5, 0);
|
||||
tree2 = ts_tree_make_leaf(cat, 3, 0);
|
||||
tree1 = ts_tree_make_leaf(cat, 5, 2, 0);
|
||||
tree2 = ts_tree_make_leaf(cat, 3, 1, 0);
|
||||
parent1 = ts_tree_make_node(dog, 2, tree_array({ tree1, tree2, }), 0);
|
||||
});
|
||||
|
||||
|
|
@ -28,7 +28,11 @@ describe("Tree", []() {
|
|||
|
||||
describe("building a parent node", [&]() {
|
||||
it("computes its size based on its child nodes", [&]() {
|
||||
AssertThat(parent1->size, Equals<size_t>(8));
|
||||
AssertThat(parent1->size, Equals<size_t>(9));
|
||||
});
|
||||
|
||||
it("computes its padding based on its first child", [&]() {
|
||||
AssertThat(parent1->padding, Equals<size_t>(2));
|
||||
});
|
||||
|
||||
it("computes the offset of each child node", [&]() {
|
||||
|
|
@ -39,7 +43,8 @@ describe("Tree", []() {
|
|||
AssertThat(children[0].tree, Equals(tree1));
|
||||
AssertThat(children[0].offset, Equals<size_t>(0));
|
||||
AssertThat(children[1].tree, Equals(tree2));
|
||||
AssertThat(children[1].offset, Equals<size_t>(tree1->size));
|
||||
AssertThat(children[1].offset, Equals<size_t>(
|
||||
tree1->size + tree2->padding));
|
||||
});
|
||||
|
||||
describe("when one of the child nodes is hidden", [&]() {
|
||||
|
|
@ -47,7 +52,7 @@ describe("Tree", []() {
|
|||
|
||||
before_each([&]() {
|
||||
parent1->options = TSTreeOptionsHidden;
|
||||
tree3 = ts_tree_make_leaf(cat, 8, 0);
|
||||
tree3 = ts_tree_make_leaf(cat, 8, 5, 0);
|
||||
grandparent = ts_tree_make_node(pig, 2, tree_array({
|
||||
parent1,
|
||||
tree3,
|
||||
|
|
@ -67,18 +72,20 @@ describe("Tree", []() {
|
|||
AssertThat(children[0].tree, Equals(tree1));
|
||||
AssertThat(children[0].offset, Equals<size_t>(0));
|
||||
AssertThat(children[1].tree, Equals(tree2));
|
||||
AssertThat(children[1].offset, Equals<size_t>(tree1->size));
|
||||
AssertThat(children[1].offset, Equals<size_t>(
|
||||
tree1->size + tree2->padding));
|
||||
AssertThat(children[2].tree, Equals(tree3));
|
||||
AssertThat(children[2].offset, Equals<size_t>(tree1->size + tree2->size));
|
||||
AssertThat(children[2].offset, Equals<size_t>(
|
||||
tree1->size + tree2->padding + tree2->size + tree3->padding));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("equality", [&]() {
|
||||
it("returns true for identical trees", [&]() {
|
||||
TSTree *tree1_copy = ts_tree_make_leaf(cat, 5, 0);
|
||||
TSTree *tree1_copy = ts_tree_make_leaf(cat, 5, 2, 0);
|
||||
AssertThat(ts_tree_equals(tree1, tree1_copy), Equals(1));
|
||||
TSTree *tree2_copy = ts_tree_make_leaf(cat, 3, 0);
|
||||
TSTree *tree2_copy = ts_tree_make_leaf(cat, 3, 1, 0);
|
||||
AssertThat(ts_tree_equals(tree2, tree2_copy), Equals(1));
|
||||
|
||||
TSTree *parent2 = ts_tree_make_node(dog, 2, tree_array({
|
||||
|
|
@ -92,13 +99,13 @@ describe("Tree", []() {
|
|||
});
|
||||
|
||||
it("returns false for trees with different symbols", [&]() {
|
||||
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0);
|
||||
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0, 0);
|
||||
AssertThat(ts_tree_equals(tree1, different_tree), Equals(0));
|
||||
ts_tree_release(different_tree);
|
||||
});
|
||||
|
||||
it("returns false for trees with different children", [&]() {
|
||||
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0);
|
||||
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0, 0);
|
||||
TSTree *different_parent = ts_tree_make_node(dog, 2, tree_array({
|
||||
different_tree, different_tree,
|
||||
}), 0);
|
||||
|
|
|
|||
|
|
@ -38,9 +38,11 @@ class LexTableBuilder {
|
|||
if (symbol == rules::ERROR())
|
||||
continue;
|
||||
else if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(LexItem(symbol, CharacterSet().include(0).copy()));
|
||||
result.insert(LexItem(
|
||||
symbol, after_separators(CharacterSet().include(0).copy())));
|
||||
else if (symbol.is_token())
|
||||
result.insert(LexItem(symbol, lex_grammar.rule(symbol)));
|
||||
result.insert(
|
||||
LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
@ -53,6 +55,7 @@ class LexTableBuilder {
|
|||
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_token_start(item_set, state_id);
|
||||
|
||||
return state_id;
|
||||
} else {
|
||||
|
|
@ -92,6 +95,28 @@ class LexTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
CharacterSet separator_set() const {
|
||||
CharacterSet result;
|
||||
for (char c : lex_grammar.separators)
|
||||
result.include(c);
|
||||
return result;
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build(
|
||||
{ make_shared<rules::Metadata>(
|
||||
make_shared<rules::Repeat>(separator_set().copy()),
|
||||
map<rules::MetadataKey, int>(
|
||||
{ { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })),
|
||||
rule, });
|
||||
}
|
||||
|
||||
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,25 @@ bool LexItem::operator==(const LexItem &other) const {
|
|||
return (other.lhs == lhs) && other.rule->operator==(*rule);
|
||||
}
|
||||
|
||||
bool LexItem::is_token_start() const {
|
||||
class IsTokenStart : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
if (apply(rule->left))
|
||||
return true;
|
||||
else if (rule_can_be_blank(rule->left))
|
||||
return apply(rule->right);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return rule->value_for(rules::START_TOKEN);
|
||||
}
|
||||
};
|
||||
|
||||
return IsTokenStart().apply(rule);
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const LexItem &item) {
|
||||
return stream << string("(item ") << item.lhs << string(" ") << *item.rule
|
||||
<< string(")");
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ class LexItem : public Item {
|
|||
public:
|
||||
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const LexItem &item);
|
||||
|
|
|
|||
|
|
@ -314,6 +314,8 @@ class CCodeGenerator {
|
|||
|
||||
void code_for_lex_state(const LexState &lex_state) {
|
||||
auto expected_inputs = lex_state.expected_inputs();
|
||||
if (lex_state.is_token_start)
|
||||
line("START_TOKEN();");
|
||||
for (auto pair : lex_state.actions)
|
||||
if (!pair.first.is_empty())
|
||||
_if([&]() { condition_for_character_set(pair.first); },
|
||||
|
|
|
|||
|
|
@ -12,7 +12,9 @@ using rules::rule_ptr;
|
|||
|
||||
Grammar::Grammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules)
|
||||
: rules_(rules), ubiquitous_tokens_({}) {}
|
||||
: rules_(rules),
|
||||
ubiquitous_tokens_({}),
|
||||
separators_({ ' ', '\r', '\t', '\n' }) {}
|
||||
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.rules_.size() != rules_.size())
|
||||
|
|
@ -70,6 +72,13 @@ Grammar &Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
|
|||
return *this;
|
||||
}
|
||||
|
||||
const set<char> &Grammar::separators() const { return separators_; }
|
||||
|
||||
Grammar &Grammar::separators(const set<char> &separators) {
|
||||
separators_ = separators;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const vector<pair<string, rule_ptr> > &Grammar::rules() const { return rules_; }
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ class LexState {
|
|||
std::map<rules::CharacterSet, LexAction> actions;
|
||||
LexAction default_action;
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool is_token_start;
|
||||
};
|
||||
|
||||
typedef int64_t LexStateId;
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
|||
aux_rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
return { LexicalGrammar(rules, aux_rules), nullptr, };
|
||||
return { LexicalGrammar(rules, aux_rules, grammar.separators), nullptr, };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
|
|||
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
|
||||
|
||||
return { SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
|
||||
LexicalGrammar(tokens, aux_tokens), };
|
||||
LexicalGrammar(tokens, aux_tokens, input_grammar.separators), };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(
|
|||
InternedGrammar result;
|
||||
result.rules = rules;
|
||||
result.ubiquitous_tokens = ubiquitous_tokens;
|
||||
result.separators = grammar.separators();
|
||||
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ class InternedGrammar {
|
|||
public:
|
||||
std::vector<std::pair<std::string, rules::rule_ptr> > rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<char> separators;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -178,8 +178,6 @@ class PatternParser {
|
|||
'9');
|
||||
case 'd':
|
||||
return CharacterSet().include('0', '9');
|
||||
case 's':
|
||||
return CharacterSet().include(' ').include('\t').include('\r');
|
||||
case 't':
|
||||
return CharacterSet().include('\t');
|
||||
case 'n':
|
||||
|
|
|
|||
|
|
@ -26,16 +26,21 @@ PreparedGrammar::PreparedGrammar() {}
|
|||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
LexicalGrammar::LexicalGrammar() {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: rules(rules), aux_rules(aux_rules) {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: PreparedGrammar(rules, aux_rules) {}
|
||||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: PreparedGrammar(rules, aux_rules) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: rules(rules), aux_rules(aux_rules) {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules,
|
||||
|
|
@ -44,7 +49,8 @@ SyntaxGrammar::SyntaxGrammar(
|
|||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: PreparedGrammar(rules, aux_rules) {}
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules,
|
||||
const set<char> &separators)
|
||||
: PreparedGrammar(rules, aux_rules), separators(separators) {}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -44,6 +44,12 @@ class LexicalGrammar : public PreparedGrammar {
|
|||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
|
||||
const std::set<char> &separators);
|
||||
|
||||
std::set<char> separators;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
|
||||
typedef enum {
|
||||
START_TOKEN,
|
||||
PRECEDENCE,
|
||||
IS_TOKEN,
|
||||
DESCRIPTION,
|
||||
|
|
|
|||
|
|
@ -23,11 +23,12 @@ static int advance(TSLexer *lexer) {
|
|||
|
||||
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
|
||||
size_t current_position = ts_lexer_position(lexer);
|
||||
size_t size = current_position - lexer->token_end_position;
|
||||
size_t size = current_position - lexer->token_start_position;
|
||||
size_t padding = lexer->token_start_position - lexer->token_end_position;
|
||||
lexer->token_end_position = current_position;
|
||||
return (symbol == ts_builtin_sym_error)
|
||||
? ts_tree_make_error(size, ts_lexer_lookahead_char(lexer))
|
||||
: ts_tree_make_leaf(symbol, size, is_hidden);
|
||||
? ts_tree_make_error(size, padding, ts_lexer_lookahead_char(lexer))
|
||||
: ts_tree_make_leaf(symbol, size, padding, is_hidden);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -41,6 +42,7 @@ TSLexer ts_lexer_make() {
|
|||
.chunk_start = 0,
|
||||
.chunk_size = 0,
|
||||
.position_in_chunk = 0,
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
.advance_fn = advance,
|
||||
.accept_fn = accept, };
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
|
|||
}
|
||||
|
||||
TSNode *ts_node_make_root(const TSTree *tree, const char **names) {
|
||||
return ts_node_make(tree, NULL, 0, 0, names);
|
||||
return ts_node_make(tree, NULL, 0, tree->padding, names);
|
||||
}
|
||||
|
||||
void ts_node_retain(TSNode *node) { node->ref_count++; }
|
||||
|
|
|
|||
|
|
@ -36,24 +36,16 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) {
|
|||
break;
|
||||
|
||||
stack->size--;
|
||||
position -= node->size;
|
||||
|
||||
DEBUG_PARSE("BREAKDOWN %s %u", parser->language->symbol_names[node->symbol],
|
||||
ts_stack_top_state(stack));
|
||||
position -= ts_tree_total_size(node);
|
||||
|
||||
for (size_t i = 0; i < child_count && position < edit->position; i++) {
|
||||
TSTree *child = children[i];
|
||||
TSStateId state = ts_stack_top_state(stack);
|
||||
TSParseAction action = action_for(parser->language, state, child->symbol);
|
||||
TSStateId next_state = (action.type == TSParseActionTypeShift)
|
||||
? action.data.to_state
|
||||
: state;
|
||||
TSStateId next_state =
|
||||
action_for(parser->language, state, child->symbol).data.to_state;
|
||||
ts_stack_push(stack, next_state, child);
|
||||
ts_tree_retain(child);
|
||||
position += child->size;
|
||||
|
||||
DEBUG_PARSE("PUT_BACK %s %u",
|
||||
parser->language->symbol_names[child->symbol], next_state);
|
||||
position += ts_tree_total_size(child);
|
||||
}
|
||||
|
||||
ts_tree_release(node);
|
||||
|
|
@ -84,7 +76,9 @@ static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) {
|
|||
* The child node count is known ahead of time, but some children
|
||||
* may be ubiquitous tokens, which don't count.
|
||||
*/
|
||||
for (size_t i = 0; i < child_count && child_count < stack->size; i++) {
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
if (child_count == stack->size)
|
||||
break;
|
||||
TSTree *child = stack->entries[stack->size - 1 - i].node;
|
||||
if (ts_tree_is_extra(child))
|
||||
child_count++;
|
||||
|
|
@ -127,30 +121,15 @@ static void lex(TSParser *parser, TSStateId lex_state) {
|
|||
static int handle_error(TSParser *parser) {
|
||||
TSTree *error = parser->lookahead;
|
||||
ts_tree_retain(error);
|
||||
size_t last_token_end = parser->lexer.token_end_position;
|
||||
|
||||
for (;;) {
|
||||
|
||||
/*
|
||||
* If there is no state in the stack for which we can recover with the
|
||||
* current lookahead token, advance to the next token. If no characters
|
||||
* were consumed, advance the lexer to the next character.
|
||||
*/
|
||||
size_t prev_position = ts_lexer_position(&parser->lexer);
|
||||
lex(parser, ts_lex_state_error);
|
||||
if (ts_lexer_position(&parser->lexer) == prev_position) {
|
||||
parser->lexer.token_end_position++;
|
||||
if (!ts_lexer_advance(&parser->lexer)) {
|
||||
DEBUG_PARSE("FAIL TO RECOVER");
|
||||
ts_stack_push(&parser->stack, 0, error);
|
||||
ts_tree_release(error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Unwind the parse stack until a state is found in which an error is
|
||||
* expected and the current lookahead token is expected afterwards.
|
||||
*/
|
||||
size_t error_start = last_token_end;
|
||||
TS_STACK_FROM_TOP(parser->stack, entry, i) {
|
||||
TSParseAction action_on_error =
|
||||
action_for(parser->language, entry->state, ts_builtin_sym_error);
|
||||
|
|
@ -160,25 +139,41 @@ static int handle_error(TSParser *parser) {
|
|||
TSParseAction action_after_error = action_for(
|
||||
parser->language, state_after_error, parser->lookahead->symbol);
|
||||
|
||||
if (action_after_error.type == TSParseActionTypeShift ||
|
||||
action_after_error.type == TSParseActionTypeReduce) {
|
||||
if (action_after_error.type != TSParseActionTypeError) {
|
||||
DEBUG_PARSE("RECOVER %u", state_after_error);
|
||||
error->size += ts_lexer_position(&parser->lexer) - 1 - error_start;
|
||||
ts_stack_shrink(&parser->stack, i + 1);
|
||||
error->size = ts_lexer_position(&parser->lexer) -
|
||||
parser->lookahead->size -
|
||||
ts_stack_right_position(&parser->stack);
|
||||
ts_stack_push(&parser->stack, state_after_error, error);
|
||||
ts_tree_release(error);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
TSTree *removed_tree = entry->node;
|
||||
error_start -= ts_tree_total_size(removed_tree);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is no state in the stack for which we can recover with the
|
||||
* current lookahead token, advance to the next token. If no characters
|
||||
* were consumed, advance the lexer to the next character.
|
||||
*/
|
||||
size_t prev_position = ts_lexer_position(&parser->lexer);
|
||||
lex(parser, ts_lex_state_error);
|
||||
parser->lookahead->padding = 0;
|
||||
if (ts_lexer_position(&parser->lexer) == prev_position)
|
||||
if (!ts_lexer_advance(&parser->lexer)) {
|
||||
DEBUG_PARSE("FAIL TO RECOVER");
|
||||
ts_stack_push(&parser->stack, 0, error);
|
||||
ts_tree_release(error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static TSTree *get_root(TSParser *parser) {
|
||||
if (parser->stack.size == 0)
|
||||
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0));
|
||||
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0, 0));
|
||||
|
||||
reduce(parser, ts_builtin_sym_document, parser->stack.size);
|
||||
parser->lookahead->options = 0;
|
||||
|
|
|
|||
|
|
@ -50,26 +50,7 @@ size_t ts_stack_right_position(const TSStack *stack) {
|
|||
size_t result = 0;
|
||||
for (size_t i = 0; i < stack->size; i++) {
|
||||
TSTree *node = stack->entries[i].node;
|
||||
result += node->size;
|
||||
result += ts_tree_total_size(node);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
TSTree **ts_stack_pop_extras(TSStack *stack, size_t *count) {
|
||||
size_t first = stack->size;
|
||||
while (first > 0 && ts_tree_is_extra(stack->entries[first - 1].node))
|
||||
first--;
|
||||
|
||||
*count = (stack->size - first);
|
||||
if (*count == 0)
|
||||
return NULL;
|
||||
|
||||
TSTree **result = malloc(*count * sizeof(TSTree *));
|
||||
for (size_t i = 0; i < *count; i++) {
|
||||
result[i] = stack->entries[first + i].node;
|
||||
ts_tree_retain(result[i]);
|
||||
}
|
||||
|
||||
ts_stack_shrink(stack, first - 1);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node);
|
|||
TSStateId ts_stack_top_state(const TSStack *stack);
|
||||
TSTree *ts_stack_top_node(const TSStack *stack);
|
||||
size_t ts_stack_right_position(const TSStack *stack);
|
||||
TSTree **ts_stack_pop_extras(TSStack *, size_t *);
|
||||
|
||||
#define TS_STACK_FROM_TOP(stack, entry, index) \
|
||||
size_t index = stack.size - 1; \
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "tree_sitter/parser.h"
|
||||
#include "runtime/tree.h"
|
||||
|
||||
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
|
||||
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, size_t padding, bool is_hidden) {
|
||||
TSTree *result = malloc(sizeof(TSTree));
|
||||
*result = (TSTree) { .ref_count = 1,
|
||||
.symbol = sym,
|
||||
|
|
@ -12,12 +12,13 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
|
|||
.child_count = 0,
|
||||
.children = NULL,
|
||||
.lookahead_char = 0,
|
||||
.padding = padding,
|
||||
.options = is_hidden ? TSTreeOptionsHidden : 0, };
|
||||
return result;
|
||||
}
|
||||
|
||||
TSTree *ts_tree_make_error(size_t size, char lookahead_char) {
|
||||
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, false);
|
||||
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char) {
|
||||
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, padding, false);
|
||||
result->lookahead_char = lookahead_char;
|
||||
return result;
|
||||
}
|
||||
|
|
@ -26,14 +27,20 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
|
|||
TSTree **children, bool is_hidden) {
|
||||
|
||||
/*
|
||||
* Determine the new node's size and visible child count based on
|
||||
* Determine the new node's size, padding and visible child count based on
|
||||
* the given child nodes.
|
||||
*/
|
||||
size_t size = 0, visible_child_count = 0;
|
||||
size_t size = 0, padding = 0, visible_child_count = 0;
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSTree *child = children[i];
|
||||
ts_tree_retain(child);
|
||||
size += child->size;
|
||||
|
||||
if (i == 0) {
|
||||
padding = child->padding;
|
||||
size = child->size;
|
||||
} else {
|
||||
size += child->padding + child->size;
|
||||
}
|
||||
|
||||
if (ts_tree_is_visible(child))
|
||||
visible_child_count++;
|
||||
|
|
@ -63,6 +70,7 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
|
|||
.child_count = child_count,
|
||||
.visible_child_count = visible_child_count,
|
||||
.size = size,
|
||||
.padding = padding,
|
||||
.options = options };
|
||||
|
||||
/*
|
||||
|
|
@ -73,6 +81,9 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
|
|||
for (size_t i = 0, vis_i = 0, offset = 0; i < child_count; i++) {
|
||||
TSTree *child = children[i];
|
||||
|
||||
if (i > 0)
|
||||
offset += child->padding;
|
||||
|
||||
if (ts_tree_is_visible(child)) {
|
||||
visible_children[vis_i].tree = child;
|
||||
visible_children[vis_i].offset = offset;
|
||||
|
|
@ -107,6 +118,10 @@ void ts_tree_release(TSTree *tree) {
|
|||
}
|
||||
}
|
||||
|
||||
size_t ts_tree_total_size(const TSTree *tree) {
|
||||
return tree->padding + tree->size;
|
||||
}
|
||||
|
||||
int ts_tree_equals(const TSTree *node1, const TSTree *node2) {
|
||||
if (node1->symbol != node2->symbol)
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ struct TSTree {
|
|||
TSSymbol symbol;
|
||||
TSTreeOptions options;
|
||||
size_t ref_count;
|
||||
size_t padding;
|
||||
size_t size;
|
||||
char lookahead_char;
|
||||
size_t child_count;
|
||||
|
|
@ -46,9 +47,9 @@ static inline int ts_tree_is_wrapper(const TSTree *tree) {
|
|||
return (tree->options & TSTreeOptionsWrapper);
|
||||
}
|
||||
|
||||
TSTree *ts_tree_make_leaf(TSSymbol, size_t, bool);
|
||||
TSTree *ts_tree_make_leaf(TSSymbol, size_t, size_t, bool);
|
||||
TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, bool);
|
||||
TSTree *ts_tree_make_error(size_t, char);
|
||||
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char);
|
||||
void ts_tree_retain(TSTree *tree);
|
||||
void ts_tree_release(TSTree *tree);
|
||||
int ts_tree_equals(const TSTree *tree1, const TSTree *tree2);
|
||||
|
|
@ -56,6 +57,7 @@ char *ts_tree_string(const TSTree *tree, const char **names);
|
|||
char *ts_tree_error_string(const TSTree *tree, const char **names);
|
||||
TSTree **ts_tree_children(const TSTree *tree, size_t *count);
|
||||
TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count);
|
||||
size_t ts_tree_total_size(const TSTree *tree);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue