Control lexer's error-mode via explicit boolean argument

Previously, the lexer would operate in error-mode (ignoring any garbage input
until it found a valid token) if it was invoked in the 'error' state. Now that
the error state is deduped with other lexical states, the lexer might be invoked
in that state even when error-mode is not intended. This adds a third argument
to `ts_lex` that explicitly sets the error-mode.

This bug was unlikely to occur in any real grammars, but it caused the
node-tree-sitter-compiler test suite to fail for some grammars with only one
rule.
This commit is contained in:
Max Brunsfeld 2015-12-30 09:37:40 -08:00
parent 4ad1a666be
commit 4b04afac5e
10 changed files with 442 additions and 452 deletions

View file

@ -87,7 +87,7 @@ struct TSLanguage {
const unsigned short *parse_table;
const TSParseActionEntry *parse_actions;
const TSStateId *lex_states;
TSTree *(*lex_fn)(TSLexer *, TSStateId);
TSTree *(*lex_fn)(TSLexer *, TSStateId, bool);
};
/*
@ -95,24 +95,23 @@ struct TSLanguage {
*/
#define START_LEXER() \
const bool error_mode = (lex_state == ts_lex_state_error); \
lexer->start_fn(lexer, lex_state); \
lexer->start_fn(lexer, state); \
int32_t lookahead; \
next_state: \
lookahead = lexer->lookahead;
#define START_TOKEN() lexer->start_token_fn(lexer);
#define GO_TO_STATE(state_index) \
#define GO_TO_STATE(state_value) \
{ \
lex_state = state_index; \
state = state_value; \
goto next_state; \
}
#define ADVANCE(state_index) \
#define ADVANCE(state_value) \
{ \
lexer->advance_fn(lexer, state_index); \
GO_TO_STATE(state_index); \
lexer->advance_fn(lexer, state_value); \
GO_TO_STATE(state_value); \
}
#define ACCEPT_FRAGILE_TOKEN(symbol) \
@ -123,27 +122,19 @@ struct TSLanguage {
return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \
ts_symbol_names[symbol], false);
#define LEX_ERROR() \
if (error_mode) { \
if (lex_state == ts_lex_state_error) \
ADVANCE(ts_lex_state_error) \
else \
GO_TO_STATE(ts_lex_state_error) \
} else { \
ACCEPT_TOKEN(ts_builtin_sym_error) \
#define LEX_ERROR() \
if (error_mode) { \
if (state == ts_lex_state_error) \
lexer->advance_fn(lexer, state); \
GO_TO_STATE(ts_lex_state_error) \
} else { \
ACCEPT_TOKEN(ts_builtin_sym_error) \
}
/*
* Parse Table Macros
*/
#define ACTIONS(...) \
(TSParseAction[]) { \
__VA_ARGS__, { \
.type = 0 \
} \
}
enum {
FRAGILE = 1,
CAN_HIDE_SPLIT = 2,

View file

@ -31,9 +31,25 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
[anon_sym_DQUOTEhello_DQUOTE] = {.visible = true, .named = false, .structural = true, .extra = false},
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
START_LEXER();
switch (lex_state) {
switch (state) {
case 0:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '\r')
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(11);
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
@ -94,22 +110,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
(lookahead == ' '))
ADVANCE(13);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '\r')
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(11);
LEX_ERROR();
default:
LEX_ERROR();
}

View file

@ -70,9 +70,41 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
START_LEXER();
switch (lex_state) {
switch (state) {
case 0:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '#')
ADVANCE(2);
if (lookahead == '(')
ADVANCE(3);
if (lookahead == ')')
ADVANCE(4);
if (lookahead == '*')
ADVANCE(5);
if (lookahead == '+')
ADVANCE(6);
if (lookahead == '-')
ADVANCE(7);
if (lookahead == '/')
ADVANCE(8);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z') ||
(945 <= lookahead && lookahead <= 969))
ADVANCE(10);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
@ -192,38 +224,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
if (lookahead == ')')
ADVANCE(4);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '#')
ADVANCE(2);
if (lookahead == '(')
ADVANCE(3);
if (lookahead == ')')
ADVANCE(4);
if (lookahead == '*')
ADVANCE(5);
if (lookahead == '+')
ADVANCE(6);
if (lookahead == '-')
ADVANCE(7);
if (lookahead == '/')
ADVANCE(8);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z') ||
(945 <= lookahead && lookahead <= 969))
ADVANCE(10);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
default:
LEX_ERROR();
}

View file

@ -256,9 +256,81 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
START_LEXER();
switch (lex_state) {
switch (state) {
case 0:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(3);
if (lookahead == '#')
ADVANCE(7);
if (lookahead == '&')
ADVANCE(14);
if (lookahead == '(')
ADVANCE(15);
if (lookahead == ')')
ADVANCE(16);
if (lookahead == '*')
ADVANCE(17);
if (lookahead == '+')
ADVANCE(18);
if (lookahead == ',')
ADVANCE(19);
if (lookahead == '.')
ADVANCE(20);
if (lookahead == '/')
ADVANCE(21);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(26);
if (lookahead == ';')
ADVANCE(29);
if (lookahead == '=')
ADVANCE(30);
if (('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == 'b') ||
(lookahead == 'd') ||
('g' <= lookahead && lookahead <= 'k') ||
('m' <= lookahead && lookahead <= 'q') ||
('w' <= lookahead && lookahead <= 'z'))
ADVANCE(31);
if (lookahead == '[')
ADVANCE(32);
if (lookahead == ']')
ADVANCE(33);
if (lookahead == 'a')
ADVANCE(34);
if (lookahead == 'c')
ADVANCE(38);
if (lookahead == 'e')
ADVANCE(43);
if (lookahead == 'f')
ADVANCE(49);
if (lookahead == 'l')
ADVANCE(52);
if (lookahead == 'r')
ADVANCE(56);
if (lookahead == 's')
ADVANCE(70);
if (lookahead == 't')
ADVANCE(89);
if (lookahead == 'u')
ADVANCE(96);
if (lookahead == 'v')
ADVANCE(104);
if (lookahead == '{')
ADVANCE(112);
if (lookahead == '}')
ADVANCE(113);
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
@ -2408,78 +2480,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
if (lookahead == '{')
ADVANCE(112);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(3);
if (lookahead == '#')
ADVANCE(7);
if (lookahead == '&')
ADVANCE(14);
if (lookahead == '(')
ADVANCE(15);
if (lookahead == ')')
ADVANCE(16);
if (lookahead == '*')
ADVANCE(17);
if (lookahead == '+')
ADVANCE(18);
if (lookahead == ',')
ADVANCE(19);
if (lookahead == '.')
ADVANCE(20);
if (lookahead == '/')
ADVANCE(21);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(26);
if (lookahead == ';')
ADVANCE(29);
if (lookahead == '=')
ADVANCE(30);
if (('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == 'b') ||
(lookahead == 'd') ||
('g' <= lookahead && lookahead <= 'k') ||
('m' <= lookahead && lookahead <= 'q') ||
('w' <= lookahead && lookahead <= 'z'))
ADVANCE(31);
if (lookahead == '[')
ADVANCE(32);
if (lookahead == ']')
ADVANCE(33);
if (lookahead == 'a')
ADVANCE(34);
if (lookahead == 'c')
ADVANCE(38);
if (lookahead == 'e')
ADVANCE(43);
if (lookahead == 'f')
ADVANCE(49);
if (lookahead == 'l')
ADVANCE(52);
if (lookahead == 'r')
ADVANCE(56);
if (lookahead == 's')
ADVANCE(70);
if (lookahead == 't')
ADVANCE(89);
if (lookahead == 'u')
ADVANCE(96);
if (lookahead == 'v')
ADVANCE(104);
if (lookahead == '{')
ADVANCE(112);
if (lookahead == '}')
ADVANCE(113);
LEX_ERROR();
default:
LEX_ERROR();
}

View file

@ -259,9 +259,85 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
START_LEXER();
switch (lex_state) {
switch (state) {
case 0:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '!')
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '&')
ADVANCE(8);
if (lookahead == '(')
ADVANCE(10);
if (lookahead == ')')
ADVANCE(11);
if (lookahead == '*')
ADVANCE(12);
if (lookahead == ',')
ADVANCE(13);
if (lookahead == '.')
ADVANCE(14);
if (lookahead == '/')
ADVANCE(17);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(19);
if (lookahead == ':')
ADVANCE(22);
if (lookahead == ';')
ADVANCE(24);
if (lookahead == '<')
ADVANCE(25);
if (lookahead == '=')
ADVANCE(27);
if (lookahead == '>')
ADVANCE(29);
if (('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == 'a') ||
(lookahead == 'b') ||
(lookahead == 'g') ||
(lookahead == 'h') ||
('j' <= lookahead && lookahead <= 'l') ||
('o' <= lookahead && lookahead <= 'q') ||
(lookahead == 'u') ||
('w' <= lookahead && lookahead <= 'z'))
ADVANCE(31);
if (lookahead == 'c')
ADVANCE(32);
if (lookahead == 'd')
ADVANCE(41);
if (lookahead == 'e')
ADVANCE(52);
if (lookahead == 'f')
ADVANCE(64);
if (lookahead == 'i')
ADVANCE(70);
if (lookahead == 'm')
ADVANCE(90);
if (lookahead == 'n')
ADVANCE(97);
if (lookahead == 'r')
ADVANCE(106);
if (lookahead == 's')
ADVANCE(114);
if (lookahead == 't')
ADVANCE(120);
if (lookahead == 'v')
ADVANCE(138);
if (lookahead == '{')
ADVANCE(152);
if (lookahead == '}')
ADVANCE(153);
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
@ -3095,82 +3171,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
LEX_ERROR();
case 259:
ACCEPT_TOKEN(anon_sym_initializer_list);
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '!')
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '&')
ADVANCE(8);
if (lookahead == '(')
ADVANCE(10);
if (lookahead == ')')
ADVANCE(11);
if (lookahead == '*')
ADVANCE(12);
if (lookahead == ',')
ADVANCE(13);
if (lookahead == '.')
ADVANCE(14);
if (lookahead == '/')
ADVANCE(17);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(19);
if (lookahead == ':')
ADVANCE(22);
if (lookahead == ';')
ADVANCE(24);
if (lookahead == '<')
ADVANCE(25);
if (lookahead == '=')
ADVANCE(27);
if (lookahead == '>')
ADVANCE(29);
if (('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == 'a') ||
(lookahead == 'b') ||
(lookahead == 'g') ||
(lookahead == 'h') ||
('j' <= lookahead && lookahead <= 'l') ||
('o' <= lookahead && lookahead <= 'q') ||
(lookahead == 'u') ||
('w' <= lookahead && lookahead <= 'z'))
ADVANCE(31);
if (lookahead == 'c')
ADVANCE(32);
if (lookahead == 'd')
ADVANCE(41);
if (lookahead == 'e')
ADVANCE(52);
if (lookahead == 'f')
ADVANCE(64);
if (lookahead == 'i')
ADVANCE(70);
if (lookahead == 'm')
ADVANCE(90);
if (lookahead == 'n')
ADVANCE(97);
if (lookahead == 'r')
ADVANCE(106);
if (lookahead == 's')
ADVANCE(114);
if (lookahead == 't')
ADVANCE(120);
if (lookahead == 'v')
ADVANCE(138);
if (lookahead == '{')
ADVANCE(152);
if (lookahead == '}')
ADVANCE(153);
LEX_ERROR();
default:
LEX_ERROR();
}

View file

@ -262,9 +262,93 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
START_LEXER();
switch (lex_state) {
switch (state) {
case 0:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '!')
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '&')
ADVANCE(8);
if (lookahead == '(')
ADVANCE(10);
if (lookahead == ')')
ADVANCE(11);
if (lookahead == '*')
ADVANCE(12);
if (lookahead == '+')
ADVANCE(13);
if (lookahead == ',')
ADVANCE(14);
if (lookahead == '-')
ADVANCE(15);
if (lookahead == '.')
ADVANCE(16);
if (lookahead == '/')
ADVANCE(17);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(19);
if (lookahead == ':')
ADVANCE(22);
if (lookahead == ';')
ADVANCE(24);
if (lookahead == '<')
ADVANCE(25);
if (lookahead == '=')
ADVANCE(27);
if (lookahead == '>')
ADVANCE(29);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'd') ||
(lookahead == 'g') ||
(lookahead == 'h') ||
('j' <= lookahead && lookahead <= 'l') ||
(lookahead == 'n') ||
(lookahead == 'o') ||
(lookahead == 'q') ||
(lookahead == 'u') ||
('w' <= lookahead && lookahead <= 'z'))
ADVANCE(31);
if (lookahead == '[')
ADVANCE(32);
if (lookahead == ']')
ADVANCE(33);
if (lookahead == 'e')
ADVANCE(34);
if (lookahead == 'f')
ADVANCE(38);
if (lookahead == 'i')
ADVANCE(44);
if (lookahead == 'm')
ADVANCE(59);
if (lookahead == 'p')
ADVANCE(62);
if (lookahead == 'r')
ADVANCE(69);
if (lookahead == 's')
ADVANCE(79);
if (lookahead == 't')
ADVANCE(85);
if (lookahead == 'v')
ADVANCE(89);
if (lookahead == '{')
ADVANCE(92);
if (lookahead == '|')
ADVANCE(93);
if (lookahead == '}')
ADVANCE(95);
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
@ -2159,90 +2243,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
if (lookahead == '/')
ADVANCE(97);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '!')
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '&')
ADVANCE(8);
if (lookahead == '(')
ADVANCE(10);
if (lookahead == ')')
ADVANCE(11);
if (lookahead == '*')
ADVANCE(12);
if (lookahead == '+')
ADVANCE(13);
if (lookahead == ',')
ADVANCE(14);
if (lookahead == '-')
ADVANCE(15);
if (lookahead == '.')
ADVANCE(16);
if (lookahead == '/')
ADVANCE(17);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(19);
if (lookahead == ':')
ADVANCE(22);
if (lookahead == ';')
ADVANCE(24);
if (lookahead == '<')
ADVANCE(25);
if (lookahead == '=')
ADVANCE(27);
if (lookahead == '>')
ADVANCE(29);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'd') ||
(lookahead == 'g') ||
(lookahead == 'h') ||
('j' <= lookahead && lookahead <= 'l') ||
(lookahead == 'n') ||
(lookahead == 'o') ||
(lookahead == 'q') ||
(lookahead == 'u') ||
('w' <= lookahead && lookahead <= 'z'))
ADVANCE(31);
if (lookahead == '[')
ADVANCE(32);
if (lookahead == ']')
ADVANCE(33);
if (lookahead == 'e')
ADVANCE(34);
if (lookahead == 'f')
ADVANCE(38);
if (lookahead == 'i')
ADVANCE(44);
if (lookahead == 'm')
ADVANCE(59);
if (lookahead == 'p')
ADVANCE(62);
if (lookahead == 'r')
ADVANCE(69);
if (lookahead == 's')
ADVANCE(79);
if (lookahead == 't')
ADVANCE(85);
if (lookahead == 'v')
ADVANCE(89);
if (lookahead == '{')
ADVANCE(92);
if (lookahead == '|')
ADVANCE(93);
if (lookahead == '}')
ADVANCE(95);
LEX_ERROR();
default:
LEX_ERROR();
}

View file

@ -331,9 +331,104 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
[sym__line_break] = {.visible = false, .named = false, .structural = true, .extra = true},
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
START_LEXER();
switch (lex_state) {
switch (state) {
case 0:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '!')
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(6);
if ((lookahead == '$') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
(lookahead == 'a') ||
(lookahead == 'g') ||
(lookahead == 'h') ||
('j' <= lookahead && lookahead <= 'm') ||
('o' <= lookahead && lookahead <= 'q') ||
('x' <= lookahead && lookahead <= 'z'))
ADVANCE(10);
if (lookahead == '&')
ADVANCE(11);
if (lookahead == '\'')
ADVANCE(13);
if (lookahead == '(')
ADVANCE(16);
if (lookahead == ')')
ADVANCE(17);
if (lookahead == '*')
ADVANCE(18);
if (lookahead == '+')
ADVANCE(20);
if (lookahead == ',')
ADVANCE(23);
if (lookahead == '-')
ADVANCE(24);
if (lookahead == '.')
ADVANCE(27);
if (lookahead == '/')
ADVANCE(28);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(50);
if (lookahead == ':')
ADVANCE(53);
if (lookahead == ';')
ADVANCE(54);
if (lookahead == '<')
ADVANCE(55);
if (lookahead == '=')
ADVANCE(56);
if (lookahead == '>')
ADVANCE(59);
if (lookahead == '?')
ADVANCE(60);
if (lookahead == '[')
ADVANCE(61);
if (lookahead == ']')
ADVANCE(62);
if (lookahead == 'b')
ADVANCE(63);
if (lookahead == 'c')
ADVANCE(68);
if (lookahead == 'd')
ADVANCE(75);
if (lookahead == 'e')
ADVANCE(86);
if (lookahead == 'f')
ADVANCE(90);
if (lookahead == 'i')
ADVANCE(110);
if (lookahead == 'n')
ADVANCE(121);
if (lookahead == 'r')
ADVANCE(127);
if (lookahead == 's')
ADVANCE(133);
if (lookahead == 't')
ADVANCE(139);
if (lookahead == 'u')
ADVANCE(153);
if (lookahead == 'v')
ADVANCE(162);
if (lookahead == 'w')
ADVANCE(165);
if (lookahead == '{')
ADVANCE(170);
if (lookahead == '|')
ADVANCE(171);
if (lookahead == '}')
ADVANCE(173);
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
@ -6055,101 +6150,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
if (lookahead == '{')
ADVANCE(170);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '!')
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(6);
if ((lookahead == '$') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
(lookahead == 'a') ||
(lookahead == 'g') ||
(lookahead == 'h') ||
('j' <= lookahead && lookahead <= 'm') ||
('o' <= lookahead && lookahead <= 'q') ||
('x' <= lookahead && lookahead <= 'z'))
ADVANCE(10);
if (lookahead == '&')
ADVANCE(11);
if (lookahead == '\'')
ADVANCE(13);
if (lookahead == '(')
ADVANCE(16);
if (lookahead == ')')
ADVANCE(17);
if (lookahead == '*')
ADVANCE(18);
if (lookahead == '+')
ADVANCE(20);
if (lookahead == ',')
ADVANCE(23);
if (lookahead == '-')
ADVANCE(24);
if (lookahead == '.')
ADVANCE(27);
if (lookahead == '/')
ADVANCE(28);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(50);
if (lookahead == ':')
ADVANCE(53);
if (lookahead == ';')
ADVANCE(54);
if (lookahead == '<')
ADVANCE(55);
if (lookahead == '=')
ADVANCE(56);
if (lookahead == '>')
ADVANCE(59);
if (lookahead == '?')
ADVANCE(60);
if (lookahead == '[')
ADVANCE(61);
if (lookahead == ']')
ADVANCE(62);
if (lookahead == 'b')
ADVANCE(63);
if (lookahead == 'c')
ADVANCE(68);
if (lookahead == 'd')
ADVANCE(75);
if (lookahead == 'e')
ADVANCE(86);
if (lookahead == 'f')
ADVANCE(90);
if (lookahead == 'i')
ADVANCE(110);
if (lookahead == 'n')
ADVANCE(121);
if (lookahead == 'r')
ADVANCE(127);
if (lookahead == 's')
ADVANCE(133);
if (lookahead == 't')
ADVANCE(139);
if (lookahead == 'u')
ADVANCE(153);
if (lookahead == 'v')
ADVANCE(162);
if (lookahead == 'w')
ADVANCE(165);
if (lookahead == '{')
ADVANCE(170);
if (lookahead == '|')
ADVANCE(171);
if (lookahead == '}')
ADVANCE(173);
LEX_ERROR();
default:
LEX_ERROR();
}

View file

@ -64,9 +64,41 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
[sym_false] = {.visible = true, .named = true, .structural = true, .extra = false},
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
START_LEXER();
switch (lex_state) {
switch (state) {
case 0:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ',')
ADVANCE(6);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
if (lookahead == ':')
ADVANCE(10);
if (lookahead == '[')
ADVANCE(11);
if (lookahead == ']')
ADVANCE(12);
if (lookahead == 'f')
ADVANCE(13);
if (lookahead == 'n')
ADVANCE(18);
if (lookahead == 't')
ADVANCE(22);
if (lookahead == '{')
ADVANCE(26);
if (lookahead == '}')
ADVANCE(27);
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
@ -305,38 +337,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
if (lookahead == '\"')
ADVANCE(2);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ',')
ADVANCE(6);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
if (lookahead == ':')
ADVANCE(10);
if (lookahead == '[')
ADVANCE(11);
if (lookahead == ']')
ADVANCE(12);
if (lookahead == 'f')
ADVANCE(13);
if (lookahead == 'n')
ADVANCE(18);
if (lookahead == 't')
ADVANCE(22);
if (lookahead == '{')
ADVANCE(26);
if (lookahead == '}')
ADVANCE(27);
LEX_ERROR();
default:
LEX_ERROR();
}

View file

@ -188,14 +188,13 @@ class CCodeGenerator {
}
void add_lex_function() {
line("static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {");
line("static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {");
indent([&]() {
line("START_LEXER();");
_switch("lex_state", [&]() {
for (size_t i = 1; i < lex_table.states.size(); i++)
_case(to_string(i), [&]() { add_lex_state(lex_table.states[i]); });
_case("ts_lex_state_error",
[&]() { add_lex_state(lex_table.states[0]); });
_switch("state", [&]() {
size_t i = 0;
for (const LexState &state : lex_table.states)
_case(to_string(i++), [&]() { add_lex_state(state); });
_default([&]() { line("LEX_ERROR();"); });
});
});

View file

@ -203,7 +203,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
TSStateId parse_state = ts_stack_top_state(self->stack, head);
TSStateId lex_state = self->language->lex_states[parse_state];
LOG("lex state:%d", lex_state);
return self->language->lex_fn(&self->lexer, lex_state);
return self->language->lex_fn(&self->lexer, lex_state, false);
}
static int ts_parser__split(TSParser *self, int head) {
@ -464,7 +464,7 @@ static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead)
LOG("skip token:%s", SYM_NAME(lookahead->symbol));
ts_parser__shift(self, head, ts_stack_top_state(self->stack, head),
lookahead);
lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error);
lookahead = self->language->lex_fn(&self->lexer, 0, true);
error_token_count++;
/*