Revert "Remove the separator characters construct"

This reverts commit 5cd07648fd.

The separators construct is useful as an optimization. It turns out that
constructing a node for every chunk of whitespace in a document causes a
significant performance regression.

Conflicts:
	src/compiler/build_tables/build_lex_table.cc
	src/compiler/grammar.cc
	src/runtime/parser.c
This commit is contained in:
Max Brunsfeld 2014-09-02 07:41:29 -07:00
parent e941f8c175
commit 545e575508
43 changed files with 9065 additions and 11203 deletions

View file

@ -6,7 +6,7 @@ namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
extern const Grammar arithmetic = Grammar({
extern const Grammar arithmetic({
{ "expression", choice({
sym("sum"),
sym("difference"),
@ -26,7 +26,6 @@ extern const Grammar arithmetic = Grammar({
{ "number", pattern("\\d+") },
{ "variable", pattern("\\a[\\w_]*") },
{ "_whitespace", pattern("[\\s\n]+") },
}).ubiquitous_tokens({ "_whitespace" });
});
} // namespace tree_sitter_examples

View file

@ -158,7 +158,6 @@ extern const Grammar golang = Grammar({
blank() }) }) },
{ "_line_break", str("\n") },
{ "_whitespace", pattern("\\s+") },
{ "string", delimited("\"") },
{ "package_name", sym("_identifier") },
@ -168,6 +167,7 @@ extern const Grammar golang = Grammar({
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "comment", keypattern("//[^\n]*") },
})
.ubiquitous_tokens({ "comment", "_whitespace", "_line_break" });
.ubiquitous_tokens({ "comment", "_line_break" })
.separators({ ' ', '\t', '\r' });
} // namespace tree_sitter_examples

View file

@ -208,13 +208,13 @@ extern const Grammar javascript = Grammar({
delimited("\""),
delimited("'") })) },
{ "_line_break", str("\n") },
{ "_whitespace", pattern("\\s+") },
{ "identifier", pattern("[\\a_$][\\w_$]*") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "null", keyword("null") },
{ "true", keyword("true") },
{ "false", keyword("false") },
})
.ubiquitous_tokens({ "comment", "_whitespace", "_line_break" });
.ubiquitous_tokens({ "comment", "_line_break" })
.separators({ ' ', '\t', '\r' });
} // namespace tree_sitter_examples

View file

@ -6,7 +6,7 @@ namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
extern const Grammar json = Grammar({
extern const Grammar json({
{ "value", choice({
sym("object"),
sym("array"),
@ -25,7 +25,6 @@ extern const Grammar json = Grammar({
{ "null", keyword("null") },
{ "true", keyword("true") },
{ "false", keyword("false") },
{ "_whitespace", pattern("[\\s\n]+") },
}).ubiquitous_tokens({ "_whitespace" });
});
} // namespace tree_sitter_examples

View file

@ -1,7 +1,7 @@
#include "tree_sitter/parser.h"
#define STATE_COUNT 32
#define SYMBOL_COUNT 20
#define SYMBOL_COUNT 19
enum {
ts_sym_expression = ts_builtin_sym_start,
@ -13,7 +13,6 @@ enum {
ts_sym_group,
ts_sym_number,
ts_sym_variable,
ts_sym__whitespace,
ts_aux_sym_1,
ts_aux_sym_2,
ts_aux_sym_3,
@ -36,7 +35,6 @@ SYMBOL_NAMES = {
[ts_builtin_sym_end] = "end",
[ts_sym_number] = "number",
[ts_sym_variable] = "variable",
[ts_sym__whitespace] = "_whitespace",
[ts_aux_sym_1] = "'+'",
[ts_aux_sym_2] = "'-'",
[ts_aux_sym_3] = "'*'",
@ -47,7 +45,6 @@ SYMBOL_NAMES = {
};
HIDDEN_SYMBOLS = {
[ts_sym__whitespace] = 1,
[ts_aux_sym_1] = 1,
[ts_aux_sym_2] = 1,
[ts_aux_sym_3] = 1,
@ -61,127 +58,154 @@ LEX_FN() {
START_LEXER();
switch (lex_state) {
case 1:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ADVANCE(1);
if (lookahead == '(')
ADVANCE(3);
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(4);
ADVANCE(3);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(5);
ADVANCE(4);
LEX_ERROR();
case 2:
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ACCEPT_TOKEN(ts_sym__whitespace);
case 3:
ACCEPT_TOKEN(ts_aux_sym_6);
case 4:
case 3:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(4);
ADVANCE(3);
ACCEPT_TOKEN(ts_sym_number);
case 5:
case 4:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(5);
ACCEPT_TOKEN(ts_sym_variable);
case 6:
if (lookahead == 0)
ADVANCE(7);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(9);
if (lookahead == '-')
ADVANCE(10);
if (lookahead == '/')
ADVANCE(11);
if (lookahead == '^')
ADVANCE(12);
LEX_ERROR();
case 7:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 8:
ACCEPT_TOKEN(ts_aux_sym_3);
case 9:
ACCEPT_TOKEN(ts_aux_sym_1);
case 10:
ACCEPT_TOKEN(ts_aux_sym_2);
case 11:
ACCEPT_TOKEN(ts_aux_sym_4);
case 12:
ACCEPT_TOKEN(ts_aux_sym_5);
case 13:
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == ')')
ADVANCE(14);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(9);
if (lookahead == '-')
ADVANCE(10);
if (lookahead == '/')
ADVANCE(11);
if (lookahead == '^')
ADVANCE(12);
LEX_ERROR();
case 14:
ACCEPT_TOKEN(ts_aux_sym_7);
case 15:
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == ')')
ADVANCE(14);
LEX_ERROR();
case ts_lex_state_error:
if (lookahead == 0)
ADVANCE(7);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == '(')
ADVANCE(3);
if (lookahead == ')')
ADVANCE(14);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(9);
if (lookahead == '-')
ADVANCE(10);
if (lookahead == '/')
ADVANCE(11);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(4);
ACCEPT_TOKEN(ts_sym_variable);
case 5:
START_TOKEN();
if (lookahead == 0)
ADVANCE(6);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(5);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(8);
if (lookahead == '-')
ADVANCE(9);
if (lookahead == '/')
ADVANCE(10);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 6:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 7:
ACCEPT_TOKEN(ts_aux_sym_3);
case 8:
ACCEPT_TOKEN(ts_aux_sym_1);
case 9:
ACCEPT_TOKEN(ts_aux_sym_2);
case 10:
ACCEPT_TOKEN(ts_aux_sym_4);
case 11:
ACCEPT_TOKEN(ts_aux_sym_5);
case 12:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(12);
if (lookahead == ')')
ADVANCE(13);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(8);
if (lookahead == '-')
ADVANCE(9);
if (lookahead == '/')
ADVANCE(10);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 13:
ACCEPT_TOKEN(ts_aux_sym_7);
case 14:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(14);
if (lookahead == ')')
ADVANCE(13);
LEX_ERROR();
case 15:
START_TOKEN();
if (lookahead == 0)
ADVANCE(6);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(15);
if (lookahead == '(')
ADVANCE(2);
if (lookahead == ')')
ADVANCE(13);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(8);
if (lookahead == '-')
ADVANCE(9);
if (lookahead == '/')
ADVANCE(10);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(3);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(5);
ADVANCE(4);
if (lookahead == '^')
ADVANCE(12);
ADVANCE(11);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(6);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(15);
if (lookahead == '(')
ADVANCE(2);
if (lookahead == ')')
ADVANCE(13);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(8);
if (lookahead == '-')
ADVANCE(9);
if (lookahead == '/')
ADVANCE(10);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(3);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(4);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
default:
LEX_ERROR();
@ -190,37 +214,37 @@ LEX_FN() {
LEX_STATES = {
[0] = 1,
[1] = 6,
[2] = 6,
[1] = 5,
[2] = 5,
[3] = 1,
[4] = 13,
[5] = 13,
[6] = 15,
[4] = 12,
[5] = 12,
[6] = 14,
[7] = 1,
[8] = 13,
[9] = 15,
[10] = 13,
[8] = 12,
[9] = 14,
[10] = 12,
[11] = 1,
[12] = 1,
[13] = 1,
[14] = 1,
[15] = 1,
[16] = 13,
[17] = 13,
[18] = 13,
[19] = 13,
[20] = 13,
[21] = 6,
[16] = 12,
[17] = 12,
[18] = 12,
[19] = 12,
[20] = 12,
[21] = 5,
[22] = 1,
[23] = 1,
[24] = 1,
[25] = 1,
[26] = 1,
[27] = 6,
[28] = 6,
[29] = 6,
[30] = 6,
[31] = 6,
[27] = 5,
[28] = 5,
[29] = 5,
[30] = 5,
[31] = 5,
};
#pragma GCC diagnostic push
@ -237,12 +261,10 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(2),
[ts_sym_number] = SHIFT(2),
[ts_sym_variable] = SHIFT(2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(3),
},
[1] = {
[ts_builtin_sym_end] = ACCEPT_INPUT(),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(22),
[ts_aux_sym_2] = SHIFT(23),
[ts_aux_sym_3] = SHIFT(24),
@ -251,7 +273,6 @@ PARSE_TABLE = {
},
[2] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_expression, 1),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_expression, 1),
[ts_aux_sym_2] = REDUCE(ts_sym_expression, 1),
[ts_aux_sym_3] = REDUCE(ts_sym_expression, 1),
@ -269,11 +290,9 @@ PARSE_TABLE = {
[ts_builtin_sym_error] = SHIFT(6),
[ts_sym_number] = SHIFT(5),
[ts_sym_variable] = SHIFT(5),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(7),
},
[4] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(11),
[ts_aux_sym_2] = SHIFT(12),
[ts_aux_sym_3] = SHIFT(13),
@ -282,7 +301,6 @@ PARSE_TABLE = {
[ts_aux_sym_7] = SHIFT(21),
},
[5] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_expression, 1),
[ts_aux_sym_2] = REDUCE(ts_sym_expression, 1),
[ts_aux_sym_3] = REDUCE(ts_sym_expression, 1),
@ -291,7 +309,6 @@ PARSE_TABLE = {
[ts_aux_sym_7] = REDUCE(ts_sym_expression, 1),
},
[6] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_7] = SHIFT(21),
},
[7] = {
@ -305,11 +322,9 @@ PARSE_TABLE = {
[ts_builtin_sym_error] = SHIFT(9),
[ts_sym_number] = SHIFT(5),
[ts_sym_variable] = SHIFT(5),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(7),
},
[8] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(11),
[ts_aux_sym_2] = SHIFT(12),
[ts_aux_sym_3] = SHIFT(13),
@ -318,11 +333,9 @@ PARSE_TABLE = {
[ts_aux_sym_7] = SHIFT(10),
},
[9] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_7] = SHIFT(10),
},
[10] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_group, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_group, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_group, 3),
@ -340,7 +353,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(5),
[ts_sym_number] = SHIFT(5),
[ts_sym_variable] = SHIFT(5),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(7),
},
[12] = {
@ -353,7 +365,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(5),
[ts_sym_number] = SHIFT(5),
[ts_sym_variable] = SHIFT(5),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(7),
},
[13] = {
@ -366,7 +377,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(5),
[ts_sym_number] = SHIFT(5),
[ts_sym_variable] = SHIFT(5),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(7),
},
[14] = {
@ -379,7 +389,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(5),
[ts_sym_number] = SHIFT(5),
[ts_sym_variable] = SHIFT(5),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(7),
},
[15] = {
@ -392,11 +401,9 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(5),
[ts_sym_number] = SHIFT(5),
[ts_sym_variable] = SHIFT(5),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(7),
},
[16] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_exponent, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_exponent, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_exponent, 3),
@ -405,7 +412,6 @@ PARSE_TABLE = {
[ts_aux_sym_7] = REDUCE(ts_sym_exponent, 3),
},
[17] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_quotient, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_quotient, 3),
[ts_aux_sym_3] = SHIFT(13),
@ -414,7 +420,6 @@ PARSE_TABLE = {
[ts_aux_sym_7] = REDUCE(ts_sym_quotient, 3),
},
[18] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_product, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_product, 3),
[ts_aux_sym_3] = SHIFT(13),
@ -423,7 +428,6 @@ PARSE_TABLE = {
[ts_aux_sym_7] = REDUCE(ts_sym_product, 3),
},
[19] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(11),
[ts_aux_sym_2] = SHIFT(12),
[ts_aux_sym_3] = SHIFT(13),
@ -432,7 +436,6 @@ PARSE_TABLE = {
[ts_aux_sym_7] = REDUCE(ts_sym_difference, 3),
},
[20] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(11),
[ts_aux_sym_2] = SHIFT(12),
[ts_aux_sym_3] = SHIFT(13),
@ -442,7 +445,6 @@ PARSE_TABLE = {
},
[21] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_group, 3),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_group, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_group, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_group, 3),
@ -459,7 +461,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(2),
[ts_sym_number] = SHIFT(2),
[ts_sym_variable] = SHIFT(2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(3),
},
[23] = {
@ -472,7 +473,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(2),
[ts_sym_number] = SHIFT(2),
[ts_sym_variable] = SHIFT(2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(3),
},
[24] = {
@ -485,7 +485,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(2),
[ts_sym_number] = SHIFT(2),
[ts_sym_variable] = SHIFT(2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(3),
},
[25] = {
@ -498,7 +497,6 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(2),
[ts_sym_number] = SHIFT(2),
[ts_sym_variable] = SHIFT(2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(3),
},
[26] = {
@ -511,12 +509,10 @@ PARSE_TABLE = {
[ts_sym_group] = SHIFT(2),
[ts_sym_number] = SHIFT(2),
[ts_sym_variable] = SHIFT(2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(3),
},
[27] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_exponent, 3),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_exponent, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_exponent, 3),
[ts_aux_sym_3] = REDUCE(ts_sym_exponent, 3),
@ -525,7 +521,6 @@ PARSE_TABLE = {
},
[28] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_quotient, 3),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_quotient, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_quotient, 3),
[ts_aux_sym_3] = SHIFT(24),
@ -534,7 +529,6 @@ PARSE_TABLE = {
},
[29] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_product, 3),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = REDUCE(ts_sym_product, 3),
[ts_aux_sym_2] = REDUCE(ts_sym_product, 3),
[ts_aux_sym_3] = SHIFT(24),
@ -543,7 +537,6 @@ PARSE_TABLE = {
},
[30] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_difference, 3),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(22),
[ts_aux_sym_2] = SHIFT(23),
[ts_aux_sym_3] = SHIFT(24),
@ -552,7 +545,6 @@ PARSE_TABLE = {
},
[31] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_sum, 3),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(22),
[ts_aux_sym_2] = SHIFT(23),
[ts_aux_sym_3] = SHIFT(24),

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
#include "tree_sitter/parser.h"
#define STATE_COUNT 60
#define SYMBOL_COUNT 20
#define SYMBOL_COUNT 19
enum {
ts_sym_value = ts_builtin_sym_start,
@ -12,7 +12,6 @@ enum {
ts_sym_null,
ts_sym_true,
ts_sym_false,
ts_sym__whitespace,
ts_aux_sym_object_repeat0,
ts_aux_sym_array_repeat0,
ts_aux_sym_1,
@ -35,7 +34,6 @@ SYMBOL_NAMES = {
[ts_sym_null] = "null",
[ts_sym_true] = "true",
[ts_sym_false] = "false",
[ts_sym__whitespace] = "_whitespace",
[ts_aux_sym_object_repeat0] = "object_repeat0",
[ts_aux_sym_array_repeat0] = "array_repeat0",
[ts_aux_sym_1] = "'{'",
@ -47,7 +45,6 @@ SYMBOL_NAMES = {
};
HIDDEN_SYMBOLS = {
[ts_sym__whitespace] = 1,
[ts_aux_sym_object_repeat0] = 1,
[ts_aux_sym_array_repeat0] = 1,
[ts_aux_sym_1] = 1,
@ -62,98 +59,96 @@ LEX_FN() {
START_LEXER();
switch (lex_state) {
case 1:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ADVANCE(1);
if (lookahead == '\"')
ADVANCE(3);
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
LEX_ERROR();
case 2:
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ACCEPT_TOKEN(ts_sym__whitespace);
case 3:
if (lookahead == '\"')
ADVANCE(4);
ADVANCE(3);
if (lookahead == '\\')
ADVANCE(5);
ADVANCE(4);
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
ADVANCE(2);
LEX_ERROR();
case 4:
case 3:
ACCEPT_TOKEN(ts_sym_string);
case 4:
if (lookahead == '\"')
ADVANCE(5);
if (lookahead == '\\')
ADVANCE(4);
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(2);
LEX_ERROR();
case 5:
if (lookahead == '\"')
ADVANCE(6);
if (lookahead == '\\')
ADVANCE(5);
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
LEX_ERROR();
case 6:
if (lookahead == '\"')
if (lookahead == '\\')
ADVANCE(4);
if (lookahead == '\\')
ADVANCE(5);
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
ADVANCE(2);
ACCEPT_TOKEN(ts_sym_string);
case 7:
case 6:
if (lookahead == '.')
ADVANCE(8);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(6);
ACCEPT_TOKEN(ts_sym_number);
case 7:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(8);
LEX_ERROR();
case 8:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
LEX_ERROR();
case 9:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
ADVANCE(8);
ACCEPT_TOKEN(ts_sym_number);
case 10:
case 9:
ACCEPT_TOKEN(ts_aux_sym_5);
case 11:
case 10:
if (lookahead == 'a')
ADVANCE(11);
LEX_ERROR();
case 11:
if (lookahead == 'l')
ADVANCE(12);
LEX_ERROR();
case 12:
if (lookahead == 'l')
if (lookahead == 's')
ADVANCE(13);
LEX_ERROR();
case 13:
if (lookahead == 's')
if (lookahead == 'e')
ADVANCE(14);
LEX_ERROR();
case 14:
if (lookahead == 'e')
ADVANCE(15);
LEX_ERROR();
case 15:
ACCEPT_TOKEN(ts_sym_false);
case 16:
case 15:
if (lookahead == 'u')
ADVANCE(16);
LEX_ERROR();
case 16:
if (lookahead == 'l')
ADVANCE(17);
LEX_ERROR();
case 17:
@ -161,168 +156,206 @@ LEX_FN() {
ADVANCE(18);
LEX_ERROR();
case 18:
if (lookahead == 'l')
ADVANCE(19);
LEX_ERROR();
case 19:
ACCEPT_TOKEN(ts_sym_null);
case 20:
case 19:
if (lookahead == 'r')
ADVANCE(20);
LEX_ERROR();
case 20:
if (lookahead == 'u')
ADVANCE(21);
LEX_ERROR();
case 21:
if (lookahead == 'u')
if (lookahead == 'e')
ADVANCE(22);
LEX_ERROR();
case 22:
if (lookahead == 'e')
ACCEPT_TOKEN(ts_sym_true);
case 23:
ACCEPT_TOKEN(ts_aux_sym_1);
case 24:
START_TOKEN();
if (lookahead == 0)
ADVANCE(25);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(24);
LEX_ERROR();
case 25:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 26:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(26);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == '}')
ADVANCE(27);
LEX_ERROR();
case 27:
ACCEPT_TOKEN(ts_aux_sym_4);
case 28:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(28);
if (lookahead == '\"')
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(6);
if (lookahead == '[')
ADVANCE(9);
if (lookahead == ']')
ADVANCE(29);
if (lookahead == 'f')
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(15);
if (lookahead == 't')
ADVANCE(19);
if (lookahead == '{')
ADVANCE(23);
LEX_ERROR();
case 23:
ACCEPT_TOKEN(ts_sym_true);
case 24:
ACCEPT_TOKEN(ts_aux_sym_1);
case 25:
if (lookahead == 0)
ADVANCE(26);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
LEX_ERROR();
case 26:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 27:
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(3);
if (lookahead == '}')
ADVANCE(28);
LEX_ERROR();
case 28:
ACCEPT_TOKEN(ts_aux_sym_4);
case 29:
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(3);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
if (lookahead == '[')
ADVANCE(10);
if (lookahead == ']')
ADVANCE(30);
if (lookahead == 'f')
ADVANCE(11);
if (lookahead == 'n')
ADVANCE(16);
if (lookahead == 't')
ADVANCE(20);
if (lookahead == '{')
ADVANCE(24);
LEX_ERROR();
case 30:
ACCEPT_TOKEN(ts_aux_sym_6);
case 31:
case 30:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ADVANCE(30);
if (lookahead == ',')
ADVANCE(31);
if (lookahead == ']')
ADVANCE(29);
LEX_ERROR();
case 31:
ACCEPT_TOKEN(ts_aux_sym_3);
case 32:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(32);
if (lookahead == ']')
ADVANCE(30);
ADVANCE(29);
LEX_ERROR();
case 32:
ACCEPT_TOKEN(ts_aux_sym_3);
case 33:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == ']')
ADVANCE(30);
ADVANCE(33);
if (lookahead == ',')
ADVANCE(31);
if (lookahead == '}')
ADVANCE(27);
LEX_ERROR();
case 34:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == ',')
ADVANCE(32);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(34);
if (lookahead == ':')
ADVANCE(35);
LEX_ERROR();
case 35:
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == ':')
ADVANCE(36);
LEX_ERROR();
case 36:
ACCEPT_TOKEN(ts_aux_sym_2);
case 37:
case 36:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ADVANCE(36);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
case 37:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(37);
if (lookahead == '\"')
ADVANCE(2);
LEX_ERROR();
case 38:
START_TOKEN();
if (lookahead == 0)
ADVANCE(25);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ADVANCE(38);
if (lookahead == '\"')
ADVANCE(3);
ADVANCE(2);
if (lookahead == ',')
ADVANCE(31);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(6);
if (lookahead == ':')
ADVANCE(35);
if (lookahead == '[')
ADVANCE(9);
if (lookahead == ']')
ADVANCE(29);
if (lookahead == 'f')
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(15);
if (lookahead == 't')
ADVANCE(19);
if (lookahead == '{')
ADVANCE(23);
if (lookahead == '}')
ADVANCE(27);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(26);
ADVANCE(25);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
ADVANCE(38);
if (lookahead == '\"')
ADVANCE(3);
ADVANCE(2);
if (lookahead == ',')
ADVANCE(32);
ADVANCE(31);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == ':')
ADVANCE(36);
ADVANCE(35);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == ']')
ADVANCE(30);
ADVANCE(29);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
default:
LEX_ERROR();
@ -331,65 +364,65 @@ LEX_FN() {
LEX_STATES = {
[0] = 1,
[1] = 25,
[2] = 25,
[3] = 27,
[4] = 29,
[5] = 31,
[6] = 31,
[7] = 27,
[8] = 29,
[9] = 25,
[10] = 31,
[11] = 31,
[12] = 33,
[1] = 24,
[2] = 24,
[3] = 26,
[4] = 28,
[5] = 30,
[6] = 30,
[7] = 26,
[8] = 28,
[9] = 24,
[10] = 30,
[11] = 30,
[12] = 32,
[13] = 1,
[14] = 31,
[15] = 33,
[16] = 31,
[17] = 34,
[18] = 35,
[19] = 31,
[14] = 30,
[15] = 32,
[16] = 30,
[17] = 33,
[18] = 34,
[19] = 30,
[20] = 1,
[21] = 34,
[22] = 34,
[23] = 27,
[24] = 29,
[25] = 31,
[26] = 34,
[27] = 33,
[28] = 34,
[29] = 34,
[30] = 35,
[31] = 34,
[21] = 33,
[22] = 33,
[23] = 26,
[24] = 28,
[25] = 30,
[26] = 33,
[27] = 32,
[28] = 33,
[29] = 33,
[30] = 34,
[31] = 33,
[32] = 1,
[33] = 34,
[34] = 37,
[35] = 38,
[36] = 34,
[37] = 35,
[33] = 33,
[34] = 36,
[35] = 37,
[36] = 33,
[37] = 34,
[38] = 1,
[39] = 34,
[40] = 37,
[41] = 37,
[42] = 34,
[43] = 37,
[44] = 34,
[45] = 37,
[46] = 31,
[47] = 37,
[48] = 31,
[49] = 33,
[50] = 25,
[51] = 34,
[52] = 35,
[53] = 25,
[39] = 33,
[40] = 36,
[41] = 36,
[42] = 33,
[43] = 36,
[44] = 33,
[45] = 36,
[46] = 30,
[47] = 36,
[48] = 30,
[49] = 32,
[50] = 24,
[51] = 33,
[52] = 34,
[53] = 24,
[54] = 1,
[55] = 34,
[56] = 37,
[57] = 25,
[58] = 37,
[59] = 25,
[55] = 33,
[56] = 36,
[57] = 24,
[58] = 36,
[59] = 24,
};
#pragma GCC diagnostic push
@ -405,22 +438,18 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(2),
[ts_sym_true] = SHIFT(2),
[ts_sym_false] = SHIFT(2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(3),
[ts_aux_sym_5] = SHIFT(4),
},
[1] = {
[ts_builtin_sym_end] = ACCEPT_INPUT(),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
[2] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_value, 1),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
[3] = {
[ts_builtin_sym_error] = SHIFT(51),
[ts_sym_string] = SHIFT(52),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(53),
},
[4] = {
@ -433,26 +462,22 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(6),
[ts_sym_true] = SHIFT(6),
[ts_sym_false] = SHIFT(6),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(7),
[ts_aux_sym_5] = SHIFT(8),
[ts_aux_sym_6] = SHIFT(9),
},
[5] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_array_repeat0] = SHIFT(49),
[ts_aux_sym_3] = SHIFT(13),
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
},
[6] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
[ts_aux_sym_6] = REDUCE(ts_sym_value, 1),
},
[7] = {
[ts_builtin_sym_error] = SHIFT(17),
[ts_sym_string] = SHIFT(18),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(19),
},
[8] = {
@ -465,28 +490,23 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(6),
[ts_sym_true] = SHIFT(6),
[ts_sym_false] = SHIFT(6),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(7),
[ts_aux_sym_5] = SHIFT(8),
[ts_aux_sym_6] = SHIFT(11),
},
[9] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_array, 2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
[10] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_array_repeat0] = SHIFT(12),
[ts_aux_sym_3] = SHIFT(13),
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
},
[11] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
[ts_aux_sym_6] = REDUCE(ts_sym_array, 2),
},
[12] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(16),
},
[13] = {
@ -499,37 +519,30 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(6),
[ts_sym_true] = SHIFT(6),
[ts_sym_false] = SHIFT(6),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(7),
[ts_aux_sym_5] = SHIFT(8),
},
[14] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_array_repeat0] = SHIFT(15),
[ts_aux_sym_3] = SHIFT(13),
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
},
[15] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 3),
},
[16] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
[ts_aux_sym_6] = REDUCE(ts_sym_array, 4),
},
[17] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(47),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[18] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_2] = SHIFT(20),
},
[19] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 2),
},
@ -542,25 +555,21 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(22),
[ts_sym_true] = SHIFT(22),
[ts_sym_false] = SHIFT(22),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(23),
[ts_aux_sym_5] = SHIFT(24),
},
[21] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(45),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[22] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_value, 1),
[ts_aux_sym_4] = REDUCE(ts_sym_value, 1),
},
[23] = {
[ts_builtin_sym_error] = SHIFT(29),
[ts_sym_string] = SHIFT(30),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(31),
},
[24] = {
@ -573,43 +582,35 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(6),
[ts_sym_true] = SHIFT(6),
[ts_sym_false] = SHIFT(6),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(7),
[ts_aux_sym_5] = SHIFT(8),
[ts_aux_sym_6] = SHIFT(26),
},
[25] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_array_repeat0] = SHIFT(27),
[ts_aux_sym_3] = SHIFT(13),
[ts_aux_sym_6] = REDUCE(ts_aux_sym_array_repeat0, 0),
},
[26] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 2),
[ts_aux_sym_4] = REDUCE(ts_sym_array, 2),
},
[27] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(28),
},
[28] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_array, 4),
[ts_aux_sym_4] = REDUCE(ts_sym_array, 4),
},
[29] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(43),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[30] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_2] = SHIFT(32),
},
[31] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 2),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 2),
},
@ -622,33 +623,27 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(22),
[ts_sym_true] = SHIFT(22),
[ts_sym_false] = SHIFT(22),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(23),
[ts_aux_sym_5] = SHIFT(24),
},
[33] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(34),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[34] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(42),
},
[35] = {
[ts_builtin_sym_error] = SHIFT(36),
[ts_sym_string] = SHIFT(37),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
[36] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(41),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[37] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_2] = SHIFT(38),
},
[38] = {
@ -660,77 +655,61 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(22),
[ts_sym_true] = SHIFT(22),
[ts_sym_false] = SHIFT(22),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(23),
[ts_aux_sym_5] = SHIFT(24),
},
[39] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(40),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[40] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 5),
},
[41] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 3),
},
[42] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 6),
},
[43] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(44),
},
[44] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
[ts_aux_sym_4] = REDUCE(ts_sym_object, 4),
},
[45] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(46),
},
[46] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 6),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 6),
},
[47] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(48),
},
[48] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_3] = REDUCE(ts_sym_object, 4),
[ts_aux_sym_6] = REDUCE(ts_sym_object, 4),
},
[49] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_6] = SHIFT(50),
},
[50] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_array, 4),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
[51] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(58),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[52] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_2] = SHIFT(54),
},
[53] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_object, 2),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
[54] = {
[ts_sym_value] = SHIFT(55),
@ -741,31 +720,25 @@ PARSE_TABLE = {
[ts_sym_null] = SHIFT(22),
[ts_sym_true] = SHIFT(22),
[ts_sym_false] = SHIFT(22),
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_1] = SHIFT(23),
[ts_aux_sym_5] = SHIFT(24),
},
[55] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_object_repeat0] = SHIFT(56),
[ts_aux_sym_3] = SHIFT(35),
[ts_aux_sym_4] = REDUCE(ts_aux_sym_object_repeat0, 0),
},
[56] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(57),
},
[57] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_object, 6),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
[58] = {
[ts_sym__whitespace] = SHIFT_EXTRA(),
[ts_aux_sym_4] = SHIFT(59),
},
[59] = {
[ts_builtin_sym_end] = REDUCE(ts_sym_object, 4),
[ts_sym__whitespace] = SHIFT_EXTRA(),
},
};

View file

@ -34,6 +34,7 @@ std::ostream &operator<<(std::ostream &stream, const rules::rule_ptr &rule);
class Grammar {
const std::vector<std::pair<std::string, rules::rule_ptr> > rules_;
std::set<std::string> ubiquitous_tokens_;
std::set<char> separators_;
public:
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr> > &rules);
@ -43,6 +44,8 @@ class Grammar {
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules() const;
const std::set<std::string> &ubiquitous_tokens() const;
Grammar &ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
const std::set<char> &separators() const;
Grammar &separators(const std::set<char> &separators);
};
struct Conflict {

View file

@ -20,6 +20,7 @@ typedef struct TSLexer {
size_t chunk_size;
size_t position_in_chunk;
size_t token_end_position;
size_t token_start_position;
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int);
int (*advance_fn)(struct TSLexer *);
@ -33,6 +34,10 @@ static inline char ts_lexer_lookahead_char(const TSLexer *lexer) {
return lexer->chunk[lexer->position_in_chunk];
}
static inline void ts_lexer_start_token(TSLexer *lexer) {
lexer->token_start_position = ts_lexer_position(lexer);
}
static inline int ts_lexer_advance(TSLexer *lexer) {
return lexer->advance_fn(lexer);
}
@ -101,6 +106,8 @@ struct TSLanguage {
lookahead = ts_lexer_lookahead_char(lexer); \
DEBUG_LEX("CHAR '%c'", lookahead);
#define START_TOKEN() ts_lexer_start_token(lexer);
#define ADVANCE(state_index) \
{ \
DEBUG_LEX("ADVANCE %d", state_index); \

View file

@ -21,7 +21,7 @@ describe("resolving parse conflicts", []() {
{ "token1", pattern("[a-c]") },
{ "token2", pattern("[b-d]") },
{ "token3", keyword("stuff") },
}, {});
}, {}, set<char>());
describe("lexical conflicts", [&]() {
Symbol sym1(0, SymbolOptionToken);

View file

@ -0,0 +1,44 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/rules/metadata.h"
#include "compiler/prepared_grammar.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("lex items", []() {
describe("determining if an item is the start of a token", [&]() {
Symbol sym(1);
rule_ptr token_start = make_shared<Metadata>(str("a"), map<MetadataKey, int>({
{ START_TOKEN, 1 }
}));
it("returns true for rules designated as token starts", [&]() {
LexItem item(sym, token_start);
AssertThat(item.is_token_start(), IsTrue());
});
it("returns false for rules not designated as token starts", [&]() {
AssertThat(LexItem(sym, make_shared<Metadata>(str("a"), map<MetadataKey, int>({
{ START_TOKEN, 0 }
}))).is_token_start(), IsFalse());
AssertThat(LexItem(sym, str("a")).is_token_start(), IsFalse());
});
describe("when given a sequence containing a token start", [&]() {
it("returns true when the rule before the token start may be blank", [&]() {
LexItem item(sym, seq({ repeat(str("a")), token_start }));
AssertThat(item.is_token_start(), IsTrue());
});
it("returns false when the rule before the token start cannot be blank", [&]() {
LexItem item(sym, seq({ str("a"), token_start }));
AssertThat(item.is_token_start(), IsFalse());
});
});
});
});
END_TEST

View file

@ -18,6 +18,7 @@ describe("extracting tokens from a grammar", []() {
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
},
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -36,6 +37,7 @@ describe("extracting tokens from a grammar", []() {
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
},
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -56,6 +58,7 @@ describe("extracting tokens from a grammar", []() {
i_sym(0) }) }
},
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -74,6 +77,7 @@ describe("extracting tokens from a grammar", []() {
{ "rule_A", choice({ i_sym(0), blank() }) },
},
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -90,6 +94,7 @@ describe("extracting tokens from a grammar", []() {
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
},
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -102,6 +107,18 @@ describe("extracting tokens from a grammar", []() {
})))
});
it("preserves the separator characters in the lexical grammar", [&]() {
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", str("ab") },
},
set<Symbol>(),
{ 'x', 'y', 'z' }
});
AssertThat(result.second.separators, Equals(set<char>({ 'x', 'y', 'z' })));
});
describe("when an entire rule can be extracted", [&]() {
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
auto result = extract_tokens(InternedGrammar{
@ -111,6 +128,7 @@ describe("extracting tokens from a grammar", []() {
{ "rule_C", token(seq({ str("a"), str("b") })) },
},
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -132,6 +150,7 @@ describe("extracting tokens from a grammar", []() {
{ "rule_C", i_sym(1) },
},
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -153,6 +172,7 @@ describe("extracting tokens from a grammar", []() {
{ "rule_C", i_sym(1) },
},
{ Symbol(0) },
set<char>()
});
AssertThat(result.first.ubiquitous_tokens, Equals(set<Symbol>({

View file

@ -54,6 +54,16 @@ describe("interning symbols in a grammar", []() {
Symbol(2)
})));
});
it("preserves the grammar's separator character set", [&]() {
auto grammar = Grammar({
{ "z", str("stuff") }
}).separators({ 'x', 'y' });
auto result = intern_symbols(grammar);
AssertThat(result.first.separators, Equals(set<char>({ 'x', 'y' })))
});
});
END_TEST

View file

@ -1,9 +1,6 @@
#include "runtime/runtime_spec_helper.h"
#include "runtime/helpers/spy_reader.h"
#include "runtime/tree.h"
#include "runtime/node.h"
extern "C" const TSLanguage * ts_language_json();
extern "C" const TSLanguage * ts_language_javascript();
@ -90,7 +87,7 @@ describe("Document", [&]() {
it("updates the parse tree", [&]() {
AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals(
"(DOCUMENT (object (string) (array (number) (number)) (string) (value (number))))"));
"(DOCUMENT (object (string) (array (number) (number)) (string) (number)))"));
});
it("re-reads only the changed portion of the input", [&]() {

View file

@ -3,7 +3,7 @@ recovers from errors at the top level
=====================================================
x * * y
---
(expression (variable)) (ERROR '*')
(variable) (ERROR '*')
=====================================================
recovers from errors inside parenthesized expressions
@ -11,5 +11,5 @@ recovers from errors inside parenthesized expressions
x + (y * + z) * 5
---
(sum
(expression (variable))
(product (group (ERROR '+')) (expression (number))))
(variable)
(product (group (ERROR '+')) (number)))

View file

@ -3,28 +3,28 @@ parses numbers
===================
5
---
(expression (number))
(number)
===================
parses variables
===================
x
---
(expression (variable))
(variable)
===================
parses products
===================
x * x
---
(product (expression (variable)) (expression (variable)))
(product (variable) (variable))
===================
parses sums
===================
x + x
---
(sum (expression (variable)) (expression (variable)))
(sum (variable) (variable))
===============================================
binds multiplication more tightly than addition
@ -32,8 +32,8 @@ binds multiplication more tightly than addition
a * b + c * d
---
(sum
(product (expression (variable)) (expression (variable)))
(product (expression (variable)) (expression (variable))))
(product (variable) (variable))
(product (variable) (variable)))
============================
parses exponents
@ -41,11 +41,9 @@ parses exponents
x + y * z^(a + b)
---
(sum
(expression (variable))
(variable)
(product
(expression (variable))
(variable)
(exponent
(variable)
(group (sum
(expression (variable))
(variable))))))
(group (sum (variable) (variable))))))

View file

@ -68,7 +68,7 @@ print(isDone() ? stuff : otherStuff);
(program (expression_statement
(function_call
(identifier)
(ternary (function_call (identifier)) (expression (identifier)) (identifier)))))
(ternary (function_call (identifier)) (identifier) (identifier)))))
==========================================
parses mathematical operators
@ -78,10 +78,10 @@ parses mathematical operators
---
(program (expression_statement
(math_op
(math_op (expression (identifier)))
(math_op (identifier))
(math_op
(math_op (expression (identifier)) (expression (identifier)))
(math_op (expression (identifier)) (math_op (identifier)))))))
(math_op (identifier) (identifier))
(math_op (identifier) (math_op (identifier)))))))
==========================================
parses boolean operators
@ -91,9 +91,9 @@ parses boolean operators
---
(program (expression_statement
(bool_op
(bool_op (expression (identifier)))
(bool_op (identifier))
(bool_op
(expression (bool_op (expression (identifier)) (identifier)))))))
(expression (bool_op (identifier) (identifier)))))))
===========================================
parses the type operators
@ -103,8 +103,8 @@ print((x instanceof Array) || (typeof x == "string"))
---
(program (expression_statement (function_call (identifier)
(bool_op
(expression (instanceof_expression (expression (identifier)) (identifier)))
(expression (typeof_expression (bool_op (expression (identifier)) (string))))))))
(expression (instanceof_expression (identifier) (identifier)))
(expression (typeof_expression (bool_op (identifier) (string))))))))
============================================
parses the 'in' operator
@ -114,7 +114,7 @@ print(x in y)
---
(program (expression_statement (function_call
(identifier)
(in_expression (expression (identifier)) (identifier)))))
(in_expression (identifier) (identifier)))))
============================================
parses assignment operators

View file

@ -37,4 +37,4 @@ recovers from errors inside nested objects
(object
(string) (object (string) (number) (ERROR '2'))
(ERROR '[')
(string) (value (number)))
(string) (number))

View file

@ -3,7 +3,7 @@ parses floating point numbers
=============================
3.14
---
(value (number))
(number)
===================
parses empty arrays
@ -28,7 +28,7 @@ parses arrays
(null)
(true)
(false)
(object (string) (value (string))))
(object (string) (string)))
====================
parses empty objects
@ -47,5 +47,5 @@ parses long objects
---
(object
(string) (string)
(string) (value (number)))
(string) (number))

View file

@ -27,7 +27,7 @@ describe("stacks", [&]() {
TSTree *node1;
before_each([&]() {
node1 = ts_tree_make_leaf(sym1, 5, 0);
node1 = ts_tree_make_leaf(sym1, 5, 1, 0);
ts_stack_push(&stack, 5, node1);
});

View file

@ -15,8 +15,8 @@ describe("Tree", []() {
TSTree *tree1, *tree2, *parent1;
before_each([&]() {
tree1 = ts_tree_make_leaf(cat, 5, 0);
tree2 = ts_tree_make_leaf(cat, 3, 0);
tree1 = ts_tree_make_leaf(cat, 5, 2, 0);
tree2 = ts_tree_make_leaf(cat, 3, 1, 0);
parent1 = ts_tree_make_node(dog, 2, tree_array({ tree1, tree2, }), 0);
});
@ -28,7 +28,11 @@ describe("Tree", []() {
describe("building a parent node", [&]() {
it("computes its size based on its child nodes", [&]() {
AssertThat(parent1->size, Equals<size_t>(8));
AssertThat(parent1->size, Equals<size_t>(9));
});
it("computes its padding based on its first child", [&]() {
AssertThat(parent1->padding, Equals<size_t>(2));
});
it("computes the offset of each child node", [&]() {
@ -39,7 +43,8 @@ describe("Tree", []() {
AssertThat(children[0].tree, Equals(tree1));
AssertThat(children[0].offset, Equals<size_t>(0));
AssertThat(children[1].tree, Equals(tree2));
AssertThat(children[1].offset, Equals<size_t>(tree1->size));
AssertThat(children[1].offset, Equals<size_t>(
tree1->size + tree2->padding));
});
describe("when one of the child nodes is hidden", [&]() {
@ -47,7 +52,7 @@ describe("Tree", []() {
before_each([&]() {
parent1->options = TSTreeOptionsHidden;
tree3 = ts_tree_make_leaf(cat, 8, 0);
tree3 = ts_tree_make_leaf(cat, 8, 5, 0);
grandparent = ts_tree_make_node(pig, 2, tree_array({
parent1,
tree3,
@ -67,18 +72,20 @@ describe("Tree", []() {
AssertThat(children[0].tree, Equals(tree1));
AssertThat(children[0].offset, Equals<size_t>(0));
AssertThat(children[1].tree, Equals(tree2));
AssertThat(children[1].offset, Equals<size_t>(tree1->size));
AssertThat(children[1].offset, Equals<size_t>(
tree1->size + tree2->padding));
AssertThat(children[2].tree, Equals(tree3));
AssertThat(children[2].offset, Equals<size_t>(tree1->size + tree2->size));
AssertThat(children[2].offset, Equals<size_t>(
tree1->size + tree2->padding + tree2->size + tree3->padding));
});
});
});
describe("equality", [&]() {
it("returns true for identical trees", [&]() {
TSTree *tree1_copy = ts_tree_make_leaf(cat, 5, 0);
TSTree *tree1_copy = ts_tree_make_leaf(cat, 5, 2, 0);
AssertThat(ts_tree_equals(tree1, tree1_copy), Equals(1));
TSTree *tree2_copy = ts_tree_make_leaf(cat, 3, 0);
TSTree *tree2_copy = ts_tree_make_leaf(cat, 3, 1, 0);
AssertThat(ts_tree_equals(tree2, tree2_copy), Equals(1));
TSTree *parent2 = ts_tree_make_node(dog, 2, tree_array({
@ -92,13 +99,13 @@ describe("Tree", []() {
});
it("returns false for trees with different symbols", [&]() {
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0);
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0, 0);
AssertThat(ts_tree_equals(tree1, different_tree), Equals(0));
ts_tree_release(different_tree);
});
it("returns false for trees with different children", [&]() {
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0);
TSTree *different_tree = ts_tree_make_leaf(pig, 0, 0, 0);
TSTree *different_parent = ts_tree_make_node(dog, 2, tree_array({
different_tree, different_tree,
}), 0);

View file

@ -38,9 +38,11 @@ class LexTableBuilder {
if (symbol == rules::ERROR())
continue;
else if (symbol == rules::END_OF_INPUT())
result.insert(LexItem(symbol, CharacterSet().include(0).copy()));
result.insert(LexItem(
symbol, after_separators(CharacterSet().include(0).copy())));
else if (symbol.is_token())
result.insert(LexItem(symbol, lex_grammar.rule(symbol)));
result.insert(
LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
}
return result;
}
@ -53,6 +55,7 @@ class LexTableBuilder {
add_accept_token_actions(item_set, state_id);
add_advance_actions(item_set, state_id);
add_token_start(item_set, state_id);
return state_id;
} else {
@ -92,6 +95,28 @@ class LexTableBuilder {
}
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &item : item_set)
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
CharacterSet separator_set() const {
CharacterSet result;
for (char c : lex_grammar.separators)
result.include(c);
return result;
}
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build(
{ make_shared<rules::Metadata>(
make_shared<rules::Repeat>(separator_set().copy()),
map<rules::MetadataKey, int>(
{ { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })),
rule, });
}
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
set<int> result;
for (const auto &item : item_set)

View file

@ -19,6 +19,25 @@ bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
if (apply(rule->left))
return true;
else if (rule_can_be_blank(rule->left))
return apply(rule->right);
else
return false;
}
bool apply_to(const rules::Metadata *rule) {
return rule->value_for(rules::START_TOKEN);
}
};
return IsTokenStart().apply(rule);
}
ostream &operator<<(ostream &stream, const LexItem &item) {
return stream << string("(item ") << item.lhs << string(" ") << *item.rule
<< string(")");

View file

@ -12,6 +12,7 @@ class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
std::ostream &operator<<(std::ostream &stream, const LexItem &item);

View file

@ -314,6 +314,8 @@ class CCodeGenerator {
void code_for_lex_state(const LexState &lex_state) {
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
line("START_TOKEN();");
for (auto pair : lex_state.actions)
if (!pair.first.is_empty())
_if([&]() { condition_for_character_set(pair.first); },

View file

@ -12,7 +12,9 @@ using rules::rule_ptr;
Grammar::Grammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules)
: rules_(rules), ubiquitous_tokens_({}) {}
: rules_(rules),
ubiquitous_tokens_({}),
separators_({ ' ', '\r', '\t', '\n' }) {}
bool Grammar::operator==(const Grammar &other) const {
if (other.rules_.size() != rules_.size())
@ -70,6 +72,13 @@ Grammar &Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
return *this;
}
const set<char> &Grammar::separators() const { return separators_; }
Grammar &Grammar::separators(const set<char> &separators) {
separators_ = separators;
return *this;
}
const vector<pair<string, rule_ptr> > &Grammar::rules() const { return rules_; }
} // namespace tree_sitter

View file

@ -57,6 +57,7 @@ class LexState {
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};
typedef int64_t LexStateId;

View file

@ -63,7 +63,7 @@ pair<LexicalGrammar, const GrammarError *> expand_tokens(
aux_rules.push_back({ pair.first, rule });
}
return { LexicalGrammar(rules, aux_rules), nullptr, };
return { LexicalGrammar(rules, aux_rules, grammar.separators), nullptr, };
}
} // namespace prepare_grammar

View file

@ -129,7 +129,7 @@ pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
return { SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens), };
LexicalGrammar(tokens, aux_tokens, input_grammar.separators), };
}
} // namespace prepare_grammar

View file

@ -72,6 +72,7 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(
InternedGrammar result;
result.rules = rules;
result.ubiquitous_tokens = ubiquitous_tokens;
result.separators = grammar.separators();
return { result, nullptr };
}

View file

@ -15,6 +15,7 @@ class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr> > rules;
std::set<rules::Symbol> ubiquitous_tokens;
std::set<char> separators;
};
} // namespace prepare_grammar

View file

@ -178,8 +178,6 @@ class PatternParser {
'9');
case 'd':
return CharacterSet().include('0', '9');
case 's':
return CharacterSet().include(' ').include('\t').include('\r');
case 't':
return CharacterSet().include('\t');
case 'n':

View file

@ -26,16 +26,21 @@ PreparedGrammar::PreparedGrammar() {}
SyntaxGrammar::SyntaxGrammar() {}
LexicalGrammar::LexicalGrammar() {}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: rules(rules), aux_rules(aux_rules) {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: PreparedGrammar(rules, aux_rules) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: PreparedGrammar(rules, aux_rules) {}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: rules(rules), aux_rules(aux_rules) {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules,
@ -44,7 +49,8 @@ SyntaxGrammar::SyntaxGrammar(
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: PreparedGrammar(rules, aux_rules) {}
const vector<pair<string, rules::rule_ptr> > &aux_rules,
const set<char> &separators)
: PreparedGrammar(rules, aux_rules), separators(separators) {}
} // namespace tree_sitter

View file

@ -44,6 +44,12 @@ class LexicalGrammar : public PreparedGrammar {
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
const std::set<char> &separators);
std::set<char> separators;
};
} // namespace tree_sitter

View file

@ -9,6 +9,7 @@ namespace tree_sitter {
namespace rules {
typedef enum {
START_TOKEN,
PRECEDENCE,
IS_TOKEN,
DESCRIPTION,

View file

@ -23,11 +23,12 @@ static int advance(TSLexer *lexer) {
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
size_t current_position = ts_lexer_position(lexer);
size_t size = current_position - lexer->token_end_position;
size_t size = current_position - lexer->token_start_position;
size_t padding = lexer->token_start_position - lexer->token_end_position;
lexer->token_end_position = current_position;
return (symbol == ts_builtin_sym_error)
? ts_tree_make_error(size, ts_lexer_lookahead_char(lexer))
: ts_tree_make_leaf(symbol, size, is_hidden);
? ts_tree_make_error(size, padding, ts_lexer_lookahead_char(lexer))
: ts_tree_make_leaf(symbol, size, padding, is_hidden);
}
/*
@ -41,6 +42,7 @@ TSLexer ts_lexer_make() {
.chunk_start = 0,
.chunk_size = 0,
.position_in_chunk = 0,
.token_start_position = 0,
.token_end_position = 0,
.advance_fn = advance,
.accept_fn = accept, };

View file

@ -16,7 +16,7 @@ TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
}
TSNode *ts_node_make_root(const TSTree *tree, const char **names) {
return ts_node_make(tree, NULL, 0, 0, names);
return ts_node_make(tree, NULL, 0, tree->padding, names);
}
void ts_node_retain(TSNode *node) { node->ref_count++; }

View file

@ -36,24 +36,16 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) {
break;
stack->size--;
position -= node->size;
DEBUG_PARSE("BREAKDOWN %s %u", parser->language->symbol_names[node->symbol],
ts_stack_top_state(stack));
position -= ts_tree_total_size(node);
for (size_t i = 0; i < child_count && position < edit->position; i++) {
TSTree *child = children[i];
TSStateId state = ts_stack_top_state(stack);
TSParseAction action = action_for(parser->language, state, child->symbol);
TSStateId next_state = (action.type == TSParseActionTypeShift)
? action.data.to_state
: state;
TSStateId next_state =
action_for(parser->language, state, child->symbol).data.to_state;
ts_stack_push(stack, next_state, child);
ts_tree_retain(child);
position += child->size;
DEBUG_PARSE("PUT_BACK %s %u",
parser->language->symbol_names[child->symbol], next_state);
position += ts_tree_total_size(child);
}
ts_tree_release(node);
@ -84,7 +76,9 @@ static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) {
* The child node count is known ahead of time, but some children
* may be ubiquitous tokens, which don't count.
*/
for (size_t i = 0; i < child_count && child_count < stack->size; i++) {
for (size_t i = 0; i < child_count; i++) {
if (child_count == stack->size)
break;
TSTree *child = stack->entries[stack->size - 1 - i].node;
if (ts_tree_is_extra(child))
child_count++;
@ -127,30 +121,15 @@ static void lex(TSParser *parser, TSStateId lex_state) {
static int handle_error(TSParser *parser) {
TSTree *error = parser->lookahead;
ts_tree_retain(error);
size_t last_token_end = parser->lexer.token_end_position;
for (;;) {
/*
* If there is no state in the stack for which we can recover with the
* current lookahead token, advance to the next token. If no characters
* were consumed, advance the lexer to the next character.
*/
size_t prev_position = ts_lexer_position(&parser->lexer);
lex(parser, ts_lex_state_error);
if (ts_lexer_position(&parser->lexer) == prev_position) {
parser->lexer.token_end_position++;
if (!ts_lexer_advance(&parser->lexer)) {
DEBUG_PARSE("FAIL TO RECOVER");
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
}
/*
* Unwind the parse stack until a state is found in which an error is
* expected and the current lookahead token is expected afterwards.
*/
size_t error_start = last_token_end;
TS_STACK_FROM_TOP(parser->stack, entry, i) {
TSParseAction action_on_error =
action_for(parser->language, entry->state, ts_builtin_sym_error);
@ -160,25 +139,41 @@ static int handle_error(TSParser *parser) {
TSParseAction action_after_error = action_for(
parser->language, state_after_error, parser->lookahead->symbol);
if (action_after_error.type == TSParseActionTypeShift ||
action_after_error.type == TSParseActionTypeReduce) {
if (action_after_error.type != TSParseActionTypeError) {
DEBUG_PARSE("RECOVER %u", state_after_error);
error->size += ts_lexer_position(&parser->lexer) - 1 - error_start;
ts_stack_shrink(&parser->stack, i + 1);
error->size = ts_lexer_position(&parser->lexer) -
parser->lookahead->size -
ts_stack_right_position(&parser->stack);
ts_stack_push(&parser->stack, state_after_error, error);
ts_tree_release(error);
return 1;
}
}
TSTree *removed_tree = entry->node;
error_start -= ts_tree_total_size(removed_tree);
}
/*
* If there is no state in the stack for which we can recover with the
* current lookahead token, advance to the next token. If no characters
* were consumed, advance the lexer to the next character.
*/
size_t prev_position = ts_lexer_position(&parser->lexer);
lex(parser, ts_lex_state_error);
parser->lookahead->padding = 0;
if (ts_lexer_position(&parser->lexer) == prev_position)
if (!ts_lexer_advance(&parser->lexer)) {
DEBUG_PARSE("FAIL TO RECOVER");
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
}
}
static TSTree *get_root(TSParser *parser) {
if (parser->stack.size == 0)
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0));
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0, 0));
reduce(parser, ts_builtin_sym_document, parser->stack.size);
parser->lookahead->options = 0;

View file

@ -50,26 +50,7 @@ size_t ts_stack_right_position(const TSStack *stack) {
size_t result = 0;
for (size_t i = 0; i < stack->size; i++) {
TSTree *node = stack->entries[i].node;
result += node->size;
result += ts_tree_total_size(node);
}
return result;
}
TSTree **ts_stack_pop_extras(TSStack *stack, size_t *count) {
size_t first = stack->size;
while (first > 0 && ts_tree_is_extra(stack->entries[first - 1].node))
first--;
*count = (stack->size - first);
if (*count == 0)
return NULL;
TSTree **result = malloc(*count * sizeof(TSTree *));
for (size_t i = 0; i < *count; i++) {
result[i] = stack->entries[first + i].node;
ts_tree_retain(result[i]);
}
ts_stack_shrink(stack, first - 1);
return result;
}

View file

@ -25,7 +25,6 @@ void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node);
TSStateId ts_stack_top_state(const TSStack *stack);
TSTree *ts_stack_top_node(const TSStack *stack);
size_t ts_stack_right_position(const TSStack *stack);
TSTree **ts_stack_pop_extras(TSStack *, size_t *);
#define TS_STACK_FROM_TOP(stack, entry, index) \
size_t index = stack.size - 1; \

View file

@ -4,7 +4,7 @@
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, size_t padding, bool is_hidden) {
TSTree *result = malloc(sizeof(TSTree));
*result = (TSTree) { .ref_count = 1,
.symbol = sym,
@ -12,12 +12,13 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
.child_count = 0,
.children = NULL,
.lookahead_char = 0,
.padding = padding,
.options = is_hidden ? TSTreeOptionsHidden : 0, };
return result;
}
TSTree *ts_tree_make_error(size_t size, char lookahead_char) {
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, false);
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char) {
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, padding, false);
result->lookahead_char = lookahead_char;
return result;
}
@ -26,14 +27,20 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
TSTree **children, bool is_hidden) {
/*
* Determine the new node's size and visible child count based on
* Determine the new node's size, padding and visible child count based on
* the given child nodes.
*/
size_t size = 0, visible_child_count = 0;
size_t size = 0, padding = 0, visible_child_count = 0;
for (size_t i = 0; i < child_count; i++) {
TSTree *child = children[i];
ts_tree_retain(child);
size += child->size;
if (i == 0) {
padding = child->padding;
size = child->size;
} else {
size += child->padding + child->size;
}
if (ts_tree_is_visible(child))
visible_child_count++;
@ -63,6 +70,7 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
.child_count = child_count,
.visible_child_count = visible_child_count,
.size = size,
.padding = padding,
.options = options };
/*
@ -73,6 +81,9 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
for (size_t i = 0, vis_i = 0, offset = 0; i < child_count; i++) {
TSTree *child = children[i];
if (i > 0)
offset += child->padding;
if (ts_tree_is_visible(child)) {
visible_children[vis_i].tree = child;
visible_children[vis_i].offset = offset;
@ -107,6 +118,10 @@ void ts_tree_release(TSTree *tree) {
}
}
size_t ts_tree_total_size(const TSTree *tree) {
return tree->padding + tree->size;
}
int ts_tree_equals(const TSTree *node1, const TSTree *node2) {
if (node1->symbol != node2->symbol)
return 0;

View file

@ -18,6 +18,7 @@ struct TSTree {
TSSymbol symbol;
TSTreeOptions options;
size_t ref_count;
size_t padding;
size_t size;
char lookahead_char;
size_t child_count;
@ -46,9 +47,9 @@ static inline int ts_tree_is_wrapper(const TSTree *tree) {
return (tree->options & TSTreeOptionsWrapper);
}
TSTree *ts_tree_make_leaf(TSSymbol, size_t, bool);
TSTree *ts_tree_make_leaf(TSSymbol, size_t, size_t, bool);
TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, bool);
TSTree *ts_tree_make_error(size_t, char);
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char);
void ts_tree_retain(TSTree *tree);
void ts_tree_release(TSTree *tree);
int ts_tree_equals(const TSTree *tree1, const TSTree *tree2);
@ -56,6 +57,7 @@ char *ts_tree_string(const TSTree *tree, const char **names);
char *ts_tree_error_string(const TSTree *tree, const char **names);
TSTree **ts_tree_children(const TSTree *tree, size_t *count);
TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count);
size_t ts_tree_total_size(const TSTree *tree);
#ifdef __cplusplus
}