Control lexer's error-mode via explicit boolean argument
Previously, the lexer would operate in error-mode (ignoring any garbage input until it found a valid token) if it was invoked in the 'error' state. Now that the error state is deduped with other lexical states, the lexer might be invoked in that state even when error-mode is not intended. This adds a third argument to `ts_lex` that explicitly sets the error-mode. This bug was unlikely to occur in any real grammars, but it caused the node-tree-sitter-compiler test suite to fail for some grammars with only one rule.
This commit is contained in:
parent
4ad1a666be
commit
4b04afac5e
10 changed files with 442 additions and 452 deletions
|
|
@ -87,7 +87,7 @@ struct TSLanguage {
|
|||
const unsigned short *parse_table;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const TSStateId *lex_states;
|
||||
TSTree *(*lex_fn)(TSLexer *, TSStateId);
|
||||
TSTree *(*lex_fn)(TSLexer *, TSStateId, bool);
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -95,24 +95,23 @@ struct TSLanguage {
|
|||
*/
|
||||
|
||||
#define START_LEXER() \
|
||||
const bool error_mode = (lex_state == ts_lex_state_error); \
|
||||
lexer->start_fn(lexer, lex_state); \
|
||||
lexer->start_fn(lexer, state); \
|
||||
int32_t lookahead; \
|
||||
next_state: \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define START_TOKEN() lexer->start_token_fn(lexer);
|
||||
|
||||
#define GO_TO_STATE(state_index) \
|
||||
#define GO_TO_STATE(state_value) \
|
||||
{ \
|
||||
lex_state = state_index; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
lexer->advance_fn(lexer, state_index); \
|
||||
GO_TO_STATE(state_index); \
|
||||
lexer->advance_fn(lexer, state_value); \
|
||||
GO_TO_STATE(state_value); \
|
||||
}
|
||||
|
||||
#define ACCEPT_FRAGILE_TOKEN(symbol) \
|
||||
|
|
@ -123,27 +122,19 @@ struct TSLanguage {
|
|||
return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \
|
||||
ts_symbol_names[symbol], false);
|
||||
|
||||
#define LEX_ERROR() \
|
||||
if (error_mode) { \
|
||||
if (lex_state == ts_lex_state_error) \
|
||||
ADVANCE(ts_lex_state_error) \
|
||||
else \
|
||||
GO_TO_STATE(ts_lex_state_error) \
|
||||
} else { \
|
||||
ACCEPT_TOKEN(ts_builtin_sym_error) \
|
||||
#define LEX_ERROR() \
|
||||
if (error_mode) { \
|
||||
if (state == ts_lex_state_error) \
|
||||
lexer->advance_fn(lexer, state); \
|
||||
GO_TO_STATE(ts_lex_state_error) \
|
||||
} else { \
|
||||
ACCEPT_TOKEN(ts_builtin_sym_error) \
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define ACTIONS(...) \
|
||||
(TSParseAction[]) { \
|
||||
__VA_ARGS__, { \
|
||||
.type = 0 \
|
||||
} \
|
||||
}
|
||||
|
||||
enum {
|
||||
FRAGILE = 1,
|
||||
CAN_HIDE_SPLIT = 2,
|
||||
|
|
|
|||
36
spec/fixtures/parsers/anonymous_tokens.c
vendored
36
spec/fixtures/parsers/anonymous_tokens.c
vendored
|
|
@ -31,9 +31,25 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
|
|||
[anon_sym_DQUOTEhello_DQUOTE] = {.visible = true, .named = false, .structural = true, .extra = false},
|
||||
};
|
||||
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
|
||||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\r')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
|
|
@ -94,22 +110,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
(lookahead == ' '))
|
||||
ADVANCE(13);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\r')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
|
|||
68
spec/fixtures/parsers/arithmetic.c
vendored
68
spec/fixtures/parsers/arithmetic.c
vendored
|
|
@ -70,9 +70,41 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
|
|||
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
|
||||
};
|
||||
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
|
||||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '#')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(3);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(9);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z') ||
|
||||
(945 <= lookahead && lookahead <= 969))
|
||||
ADVANCE(10);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
|
|
@ -192,38 +224,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
if (lookahead == ')')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '#')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(3);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(9);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z') ||
|
||||
(945 <= lookahead && lookahead <= 969))
|
||||
ADVANCE(10);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
|
|||
148
spec/fixtures/parsers/c.c
vendored
148
spec/fixtures/parsers/c.c
vendored
|
|
@ -256,9 +256,81 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
|
|||
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
|
||||
};
|
||||
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
|
||||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '#')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(15);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(16);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(17);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(18);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(20);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(21);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(26);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(29);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(30);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == 'b') ||
|
||||
(lookahead == 'd') ||
|
||||
('g' <= lookahead && lookahead <= 'k') ||
|
||||
('m' <= lookahead && lookahead <= 'q') ||
|
||||
('w' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(31);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(32);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(33);
|
||||
if (lookahead == 'a')
|
||||
ADVANCE(34);
|
||||
if (lookahead == 'c')
|
||||
ADVANCE(38);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(43);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(49);
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(52);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(56);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(70);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(89);
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(96);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(104);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(112);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(113);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
|
|
@ -2408,78 +2480,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
if (lookahead == '{')
|
||||
ADVANCE(112);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '#')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(15);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(16);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(17);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(18);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(20);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(21);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(26);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(29);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(30);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == 'b') ||
|
||||
(lookahead == 'd') ||
|
||||
('g' <= lookahead && lookahead <= 'k') ||
|
||||
('m' <= lookahead && lookahead <= 'q') ||
|
||||
('w' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(31);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(32);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(33);
|
||||
if (lookahead == 'a')
|
||||
ADVANCE(34);
|
||||
if (lookahead == 'c')
|
||||
ADVANCE(38);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(43);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(49);
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(52);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(56);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(70);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(89);
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(96);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(104);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(112);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(113);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
|
|||
156
spec/fixtures/parsers/cpp.c
vendored
156
spec/fixtures/parsers/cpp.c
vendored
|
|
@ -259,9 +259,85 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
|
|||
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
|
||||
};
|
||||
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
|
||||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '!')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(10);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(12);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(13);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(17);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(19);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(22);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(24);
|
||||
if (lookahead == '<')
|
||||
ADVANCE(25);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(27);
|
||||
if (lookahead == '>')
|
||||
ADVANCE(29);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == 'a') ||
|
||||
(lookahead == 'b') ||
|
||||
(lookahead == 'g') ||
|
||||
(lookahead == 'h') ||
|
||||
('j' <= lookahead && lookahead <= 'l') ||
|
||||
('o' <= lookahead && lookahead <= 'q') ||
|
||||
(lookahead == 'u') ||
|
||||
('w' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(31);
|
||||
if (lookahead == 'c')
|
||||
ADVANCE(32);
|
||||
if (lookahead == 'd')
|
||||
ADVANCE(41);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(52);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(64);
|
||||
if (lookahead == 'i')
|
||||
ADVANCE(70);
|
||||
if (lookahead == 'm')
|
||||
ADVANCE(90);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(97);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(106);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(114);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(120);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(138);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(152);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(153);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
|
|
@ -3095,82 +3171,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
LEX_ERROR();
|
||||
case 259:
|
||||
ACCEPT_TOKEN(anon_sym_initializer_list);
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '!')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(10);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(12);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(13);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(17);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(19);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(22);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(24);
|
||||
if (lookahead == '<')
|
||||
ADVANCE(25);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(27);
|
||||
if (lookahead == '>')
|
||||
ADVANCE(29);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == 'a') ||
|
||||
(lookahead == 'b') ||
|
||||
(lookahead == 'g') ||
|
||||
(lookahead == 'h') ||
|
||||
('j' <= lookahead && lookahead <= 'l') ||
|
||||
('o' <= lookahead && lookahead <= 'q') ||
|
||||
(lookahead == 'u') ||
|
||||
('w' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(31);
|
||||
if (lookahead == 'c')
|
||||
ADVANCE(32);
|
||||
if (lookahead == 'd')
|
||||
ADVANCE(41);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(52);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(64);
|
||||
if (lookahead == 'i')
|
||||
ADVANCE(70);
|
||||
if (lookahead == 'm')
|
||||
ADVANCE(90);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(97);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(106);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(114);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(120);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(138);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(152);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(153);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
|
|||
172
spec/fixtures/parsers/golang.c
vendored
172
spec/fixtures/parsers/golang.c
vendored
|
|
@ -262,9 +262,93 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
|
|||
[sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true},
|
||||
};
|
||||
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
|
||||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '!')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(10);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(13);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(15);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(16);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(17);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(19);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(22);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(24);
|
||||
if (lookahead == '<')
|
||||
ADVANCE(25);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(27);
|
||||
if (lookahead == '>')
|
||||
ADVANCE(29);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'd') ||
|
||||
(lookahead == 'g') ||
|
||||
(lookahead == 'h') ||
|
||||
('j' <= lookahead && lookahead <= 'l') ||
|
||||
(lookahead == 'n') ||
|
||||
(lookahead == 'o') ||
|
||||
(lookahead == 'q') ||
|
||||
(lookahead == 'u') ||
|
||||
('w' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(31);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(32);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(33);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(34);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(38);
|
||||
if (lookahead == 'i')
|
||||
ADVANCE(44);
|
||||
if (lookahead == 'm')
|
||||
ADVANCE(59);
|
||||
if (lookahead == 'p')
|
||||
ADVANCE(62);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(69);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(79);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(85);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(89);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(92);
|
||||
if (lookahead == '|')
|
||||
ADVANCE(93);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(95);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
|
|
@ -2159,90 +2243,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
if (lookahead == '/')
|
||||
ADVANCE(97);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '!')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(10);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(13);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(15);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(16);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(17);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(19);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(22);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(24);
|
||||
if (lookahead == '<')
|
||||
ADVANCE(25);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(27);
|
||||
if (lookahead == '>')
|
||||
ADVANCE(29);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'd') ||
|
||||
(lookahead == 'g') ||
|
||||
(lookahead == 'h') ||
|
||||
('j' <= lookahead && lookahead <= 'l') ||
|
||||
(lookahead == 'n') ||
|
||||
(lookahead == 'o') ||
|
||||
(lookahead == 'q') ||
|
||||
(lookahead == 'u') ||
|
||||
('w' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(31);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(32);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(33);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(34);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(38);
|
||||
if (lookahead == 'i')
|
||||
ADVANCE(44);
|
||||
if (lookahead == 'm')
|
||||
ADVANCE(59);
|
||||
if (lookahead == 'p')
|
||||
ADVANCE(62);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(69);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(79);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(85);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(89);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(92);
|
||||
if (lookahead == '|')
|
||||
ADVANCE(93);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(95);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
|
|||
194
spec/fixtures/parsers/javascript.c
vendored
194
spec/fixtures/parsers/javascript.c
vendored
|
|
@ -331,9 +331,104 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
|
|||
[sym__line_break] = {.visible = false, .named = false, .structural = true, .extra = true},
|
||||
};
|
||||
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
|
||||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '!')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(6);
|
||||
if ((lookahead == '$') ||
|
||||
('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == '_') ||
|
||||
(lookahead == 'a') ||
|
||||
(lookahead == 'g') ||
|
||||
(lookahead == 'h') ||
|
||||
('j' <= lookahead && lookahead <= 'm') ||
|
||||
('o' <= lookahead && lookahead <= 'q') ||
|
||||
('x' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(10);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\'')
|
||||
ADVANCE(13);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(16);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(17);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(18);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(20);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(23);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(24);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(27);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(28);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(50);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(53);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(54);
|
||||
if (lookahead == '<')
|
||||
ADVANCE(55);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(56);
|
||||
if (lookahead == '>')
|
||||
ADVANCE(59);
|
||||
if (lookahead == '?')
|
||||
ADVANCE(60);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(61);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(62);
|
||||
if (lookahead == 'b')
|
||||
ADVANCE(63);
|
||||
if (lookahead == 'c')
|
||||
ADVANCE(68);
|
||||
if (lookahead == 'd')
|
||||
ADVANCE(75);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(86);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(90);
|
||||
if (lookahead == 'i')
|
||||
ADVANCE(110);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(121);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(127);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(133);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(139);
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(153);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(162);
|
||||
if (lookahead == 'w')
|
||||
ADVANCE(165);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(170);
|
||||
if (lookahead == '|')
|
||||
ADVANCE(171);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(173);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
|
|
@ -6055,101 +6150,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
if (lookahead == '{')
|
||||
ADVANCE(170);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '!')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(6);
|
||||
if ((lookahead == '$') ||
|
||||
('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == '_') ||
|
||||
(lookahead == 'a') ||
|
||||
(lookahead == 'g') ||
|
||||
(lookahead == 'h') ||
|
||||
('j' <= lookahead && lookahead <= 'm') ||
|
||||
('o' <= lookahead && lookahead <= 'q') ||
|
||||
('x' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(10);
|
||||
if (lookahead == '&')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\'')
|
||||
ADVANCE(13);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(16);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(17);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(18);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(20);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(23);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(24);
|
||||
if (lookahead == '.')
|
||||
ADVANCE(27);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(28);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(50);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(53);
|
||||
if (lookahead == ';')
|
||||
ADVANCE(54);
|
||||
if (lookahead == '<')
|
||||
ADVANCE(55);
|
||||
if (lookahead == '=')
|
||||
ADVANCE(56);
|
||||
if (lookahead == '>')
|
||||
ADVANCE(59);
|
||||
if (lookahead == '?')
|
||||
ADVANCE(60);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(61);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(62);
|
||||
if (lookahead == 'b')
|
||||
ADVANCE(63);
|
||||
if (lookahead == 'c')
|
||||
ADVANCE(68);
|
||||
if (lookahead == 'd')
|
||||
ADVANCE(75);
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(86);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(90);
|
||||
if (lookahead == 'i')
|
||||
ADVANCE(110);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(121);
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(127);
|
||||
if (lookahead == 's')
|
||||
ADVANCE(133);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(139);
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(153);
|
||||
if (lookahead == 'v')
|
||||
ADVANCE(162);
|
||||
if (lookahead == 'w')
|
||||
ADVANCE(165);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(170);
|
||||
if (lookahead == '|')
|
||||
ADVANCE(171);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(173);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
|
|||
68
spec/fixtures/parsers/json.c
vendored
68
spec/fixtures/parsers/json.c
vendored
|
|
@ -64,9 +64,41 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
|
|||
[sym_false] = {.visible = true, .named = true, .structural = true, .extra = false},
|
||||
};
|
||||
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
||||
static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
|
||||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(2);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(6);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(7);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(11);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(12);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(13);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(18);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(22);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(26);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
|
|
@ -305,38 +337,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
if (lookahead == '\"')
|
||||
ADVANCE(2);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(2);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(6);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(7);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(11);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(12);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(13);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(18);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(22);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(26);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -188,14 +188,13 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_lex_function() {
|
||||
line("static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {");
|
||||
line("static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {");
|
||||
indent([&]() {
|
||||
line("START_LEXER();");
|
||||
_switch("lex_state", [&]() {
|
||||
for (size_t i = 1; i < lex_table.states.size(); i++)
|
||||
_case(to_string(i), [&]() { add_lex_state(lex_table.states[i]); });
|
||||
_case("ts_lex_state_error",
|
||||
[&]() { add_lex_state(lex_table.states[0]); });
|
||||
_switch("state", [&]() {
|
||||
size_t i = 0;
|
||||
for (const LexState &state : lex_table.states)
|
||||
_case(to_string(i++), [&]() { add_lex_state(state); });
|
||||
_default([&]() { line("LEX_ERROR();"); });
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
|
|||
TSStateId parse_state = ts_stack_top_state(self->stack, head);
|
||||
TSStateId lex_state = self->language->lex_states[parse_state];
|
||||
LOG("lex state:%d", lex_state);
|
||||
return self->language->lex_fn(&self->lexer, lex_state);
|
||||
return self->language->lex_fn(&self->lexer, lex_state, false);
|
||||
}
|
||||
|
||||
static int ts_parser__split(TSParser *self, int head) {
|
||||
|
|
@ -464,7 +464,7 @@ static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead)
|
|||
LOG("skip token:%s", SYM_NAME(lookahead->symbol));
|
||||
ts_parser__shift(self, head, ts_stack_top_state(self->stack, head),
|
||||
lookahead);
|
||||
lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error);
|
||||
lookahead = self->language->lex_fn(&self->lexer, 0, true);
|
||||
error_token_count++;
|
||||
|
||||
/*
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue