diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 5ce5cd2e..e93d633e 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -87,7 +87,7 @@ struct TSLanguage { const unsigned short *parse_table; const TSParseActionEntry *parse_actions; const TSStateId *lex_states; - TSTree *(*lex_fn)(TSLexer *, TSStateId); + TSTree *(*lex_fn)(TSLexer *, TSStateId, bool); }; /* @@ -95,24 +95,23 @@ struct TSLanguage { */ #define START_LEXER() \ - const bool error_mode = (lex_state == ts_lex_state_error); \ - lexer->start_fn(lexer, lex_state); \ + lexer->start_fn(lexer, state); \ int32_t lookahead; \ next_state: \ lookahead = lexer->lookahead; #define START_TOKEN() lexer->start_token_fn(lexer); -#define GO_TO_STATE(state_index) \ +#define GO_TO_STATE(state_value) \ { \ - lex_state = state_index; \ + state = state_value; \ goto next_state; \ } -#define ADVANCE(state_index) \ +#define ADVANCE(state_value) \ { \ - lexer->advance_fn(lexer, state_index); \ - GO_TO_STATE(state_index); \ + lexer->advance_fn(lexer, state_value); \ + GO_TO_STATE(state_value); \ } #define ACCEPT_FRAGILE_TOKEN(symbol) \ @@ -123,27 +122,19 @@ struct TSLanguage { return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \ ts_symbol_names[symbol], false); -#define LEX_ERROR() \ - if (error_mode) { \ - if (lex_state == ts_lex_state_error) \ - ADVANCE(ts_lex_state_error) \ - else \ - GO_TO_STATE(ts_lex_state_error) \ - } else { \ - ACCEPT_TOKEN(ts_builtin_sym_error) \ +#define LEX_ERROR() \ + if (error_mode) { \ + if (state == ts_lex_state_error) \ + lexer->advance_fn(lexer, state); \ + GO_TO_STATE(ts_lex_state_error) \ + } else { \ + ACCEPT_TOKEN(ts_builtin_sym_error) \ } /* * Parse Table Macros */ -#define ACTIONS(...) \ - (TSParseAction[]) { \ - __VA_ARGS__, { \ - .type = 0 \ - } \ - } - enum { FRAGILE = 1, CAN_HIDE_SPLIT = 2, diff --git a/spec/fixtures/parsers/anonymous_tokens.c b/spec/fixtures/parsers/anonymous_tokens.c index 3cc44655..bc6ede0f 100644 --- a/spec/fixtures/parsers/anonymous_tokens.c +++ b/spec/fixtures/parsers/anonymous_tokens.c @@ -31,9 +31,25 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { [anon_sym_DQUOTEhello_DQUOTE] = {.visible = true, .named = false, .structural = true, .extra = false}, }; -static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { +static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { START_LEXER(); - switch (lex_state) { + switch (state) { + case 0: + START_TOKEN(); + if (lookahead == 0) + ADVANCE(1); + if ((lookahead == '\t') || + (lookahead == ' ')) + ADVANCE(0); + if (lookahead == '\n') + ADVANCE(2); + if (lookahead == '\r') + ADVANCE(3); + if (lookahead == '\"') + ADVANCE(4); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(11); + LEX_ERROR(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); case 2: @@ -94,22 +110,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == ' ')) ADVANCE(13); LEX_ERROR(); - case ts_lex_state_error: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '\n') - ADVANCE(2); - if (lookahead == '\r') - ADVANCE(3); - if (lookahead == '\"') - ADVANCE(4); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(11); - LEX_ERROR(); default: LEX_ERROR(); } diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 02f7a1b4..a7544629 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -70,9 +70,41 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { [sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true}, }; -static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { +static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { START_LEXER(); - switch (lex_state) { + switch (state) { + case 0: + START_TOKEN(); + if (lookahead == 0) + ADVANCE(1); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(0); + if (lookahead == '#') + ADVANCE(2); + if (lookahead == '(') + ADVANCE(3); + if (lookahead == ')') + ADVANCE(4); + if (lookahead == '*') + ADVANCE(5); + if (lookahead == '+') + ADVANCE(6); + if (lookahead == '-') + ADVANCE(7); + if (lookahead == '/') + ADVANCE(8); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(9); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z') || + (945 <= lookahead && lookahead <= 969)) + ADVANCE(10); + if (lookahead == '^') + ADVANCE(11); + LEX_ERROR(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); case 2: @@ -192,38 +224,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == ')') ADVANCE(4); LEX_ERROR(); - case ts_lex_state_error: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '#') - ADVANCE(2); - if (lookahead == '(') - ADVANCE(3); - if (lookahead == ')') - ADVANCE(4); - if (lookahead == '*') - ADVANCE(5); - if (lookahead == '+') - ADVANCE(6); - if (lookahead == '-') - ADVANCE(7); - if (lookahead == '/') - ADVANCE(8); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(9); - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z') || - (945 <= lookahead && lookahead <= 969)) - ADVANCE(10); - if (lookahead == '^') - ADVANCE(11); - LEX_ERROR(); default: LEX_ERROR(); } diff --git a/spec/fixtures/parsers/c.c b/spec/fixtures/parsers/c.c index eefa4ed1..0a927585 100644 --- a/spec/fixtures/parsers/c.c +++ b/spec/fixtures/parsers/c.c @@ -256,9 +256,81 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { [sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true}, }; -static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { +static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { START_LEXER(); - switch (lex_state) { + switch (state) { + case 0: + START_TOKEN(); + if (lookahead == 0) + ADVANCE(1); + if ((lookahead == '\t') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(0); + if (lookahead == '\n') + ADVANCE(2); + if (lookahead == '\"') + ADVANCE(3); + if (lookahead == '#') + ADVANCE(7); + if (lookahead == '&') + ADVANCE(14); + if (lookahead == '(') + ADVANCE(15); + if (lookahead == ')') + ADVANCE(16); + if (lookahead == '*') + ADVANCE(17); + if (lookahead == '+') + ADVANCE(18); + if (lookahead == ',') + ADVANCE(19); + if (lookahead == '.') + ADVANCE(20); + if (lookahead == '/') + ADVANCE(21); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(26); + if (lookahead == ';') + ADVANCE(29); + if (lookahead == '=') + ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + (lookahead == 'b') || + (lookahead == 'd') || + ('g' <= lookahead && lookahead <= 'k') || + ('m' <= lookahead && lookahead <= 'q') || + ('w' <= lookahead && lookahead <= 'z')) + ADVANCE(31); + if (lookahead == '[') + ADVANCE(32); + if (lookahead == ']') + ADVANCE(33); + if (lookahead == 'a') + ADVANCE(34); + if (lookahead == 'c') + ADVANCE(38); + if (lookahead == 'e') + ADVANCE(43); + if (lookahead == 'f') + ADVANCE(49); + if (lookahead == 'l') + ADVANCE(52); + if (lookahead == 'r') + ADVANCE(56); + if (lookahead == 's') + ADVANCE(70); + if (lookahead == 't') + ADVANCE(89); + if (lookahead == 'u') + ADVANCE(96); + if (lookahead == 'v') + ADVANCE(104); + if (lookahead == '{') + ADVANCE(112); + if (lookahead == '}') + ADVANCE(113); + LEX_ERROR(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); case 2: @@ -2408,78 +2480,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '{') ADVANCE(112); LEX_ERROR(); - case ts_lex_state_error: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '\n') - ADVANCE(2); - if (lookahead == '\"') - ADVANCE(3); - if (lookahead == '#') - ADVANCE(7); - if (lookahead == '&') - ADVANCE(14); - if (lookahead == '(') - ADVANCE(15); - if (lookahead == ')') - ADVANCE(16); - if (lookahead == '*') - ADVANCE(17); - if (lookahead == '+') - ADVANCE(18); - if (lookahead == ',') - ADVANCE(19); - if (lookahead == '.') - ADVANCE(20); - if (lookahead == '/') - ADVANCE(21); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(26); - if (lookahead == ';') - ADVANCE(29); - if (lookahead == '=') - ADVANCE(30); - if (('A' <= lookahead && lookahead <= 'Z') || - (lookahead == 'b') || - (lookahead == 'd') || - ('g' <= lookahead && lookahead <= 'k') || - ('m' <= lookahead && lookahead <= 'q') || - ('w' <= lookahead && lookahead <= 'z')) - ADVANCE(31); - if (lookahead == '[') - ADVANCE(32); - if (lookahead == ']') - ADVANCE(33); - if (lookahead == 'a') - ADVANCE(34); - if (lookahead == 'c') - ADVANCE(38); - if (lookahead == 'e') - ADVANCE(43); - if (lookahead == 'f') - ADVANCE(49); - if (lookahead == 'l') - ADVANCE(52); - if (lookahead == 'r') - ADVANCE(56); - if (lookahead == 's') - ADVANCE(70); - if (lookahead == 't') - ADVANCE(89); - if (lookahead == 'u') - ADVANCE(96); - if (lookahead == 'v') - ADVANCE(104); - if (lookahead == '{') - ADVANCE(112); - if (lookahead == '}') - ADVANCE(113); - LEX_ERROR(); default: LEX_ERROR(); } diff --git a/spec/fixtures/parsers/cpp.c b/spec/fixtures/parsers/cpp.c index 8383b2b4..57a76a3f 100644 --- a/spec/fixtures/parsers/cpp.c +++ b/spec/fixtures/parsers/cpp.c @@ -259,9 +259,85 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { [sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true}, }; -static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { +static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { START_LEXER(); - switch (lex_state) { + switch (state) { + case 0: + START_TOKEN(); + if (lookahead == 0) + ADVANCE(1); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(0); + if (lookahead == '!') + ADVANCE(2); + if (lookahead == '\"') + ADVANCE(4); + if (lookahead == '&') + ADVANCE(8); + if (lookahead == '(') + ADVANCE(10); + if (lookahead == ')') + ADVANCE(11); + if (lookahead == '*') + ADVANCE(12); + if (lookahead == ',') + ADVANCE(13); + if (lookahead == '.') + ADVANCE(14); + if (lookahead == '/') + ADVANCE(17); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(19); + if (lookahead == ':') + ADVANCE(22); + if (lookahead == ';') + ADVANCE(24); + if (lookahead == '<') + ADVANCE(25); + if (lookahead == '=') + ADVANCE(27); + if (lookahead == '>') + ADVANCE(29); + if (('A' <= lookahead && lookahead <= 'Z') || + (lookahead == 'a') || + (lookahead == 'b') || + (lookahead == 'g') || + (lookahead == 'h') || + ('j' <= lookahead && lookahead <= 'l') || + ('o' <= lookahead && lookahead <= 'q') || + (lookahead == 'u') || + ('w' <= lookahead && lookahead <= 'z')) + ADVANCE(31); + if (lookahead == 'c') + ADVANCE(32); + if (lookahead == 'd') + ADVANCE(41); + if (lookahead == 'e') + ADVANCE(52); + if (lookahead == 'f') + ADVANCE(64); + if (lookahead == 'i') + ADVANCE(70); + if (lookahead == 'm') + ADVANCE(90); + if (lookahead == 'n') + ADVANCE(97); + if (lookahead == 'r') + ADVANCE(106); + if (lookahead == 's') + ADVANCE(114); + if (lookahead == 't') + ADVANCE(120); + if (lookahead == 'v') + ADVANCE(138); + if (lookahead == '{') + ADVANCE(152); + if (lookahead == '}') + ADVANCE(153); + LEX_ERROR(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); case 2: @@ -3095,82 +3171,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { LEX_ERROR(); case 259: ACCEPT_TOKEN(anon_sym_initializer_list); - case ts_lex_state_error: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '!') - ADVANCE(2); - if (lookahead == '\"') - ADVANCE(4); - if (lookahead == '&') - ADVANCE(8); - if (lookahead == '(') - ADVANCE(10); - if (lookahead == ')') - ADVANCE(11); - if (lookahead == '*') - ADVANCE(12); - if (lookahead == ',') - ADVANCE(13); - if (lookahead == '.') - ADVANCE(14); - if (lookahead == '/') - ADVANCE(17); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(19); - if (lookahead == ':') - ADVANCE(22); - if (lookahead == ';') - ADVANCE(24); - if (lookahead == '<') - ADVANCE(25); - if (lookahead == '=') - ADVANCE(27); - if (lookahead == '>') - ADVANCE(29); - if (('A' <= lookahead && lookahead <= 'Z') || - (lookahead == 'a') || - (lookahead == 'b') || - (lookahead == 'g') || - (lookahead == 'h') || - ('j' <= lookahead && lookahead <= 'l') || - ('o' <= lookahead && lookahead <= 'q') || - (lookahead == 'u') || - ('w' <= lookahead && lookahead <= 'z')) - ADVANCE(31); - if (lookahead == 'c') - ADVANCE(32); - if (lookahead == 'd') - ADVANCE(41); - if (lookahead == 'e') - ADVANCE(52); - if (lookahead == 'f') - ADVANCE(64); - if (lookahead == 'i') - ADVANCE(70); - if (lookahead == 'm') - ADVANCE(90); - if (lookahead == 'n') - ADVANCE(97); - if (lookahead == 'r') - ADVANCE(106); - if (lookahead == 's') - ADVANCE(114); - if (lookahead == 't') - ADVANCE(120); - if (lookahead == 'v') - ADVANCE(138); - if (lookahead == '{') - ADVANCE(152); - if (lookahead == '}') - ADVANCE(153); - LEX_ERROR(); default: LEX_ERROR(); } diff --git a/spec/fixtures/parsers/golang.c b/spec/fixtures/parsers/golang.c index 9f123abc..95ab52df 100644 --- a/spec/fixtures/parsers/golang.c +++ b/spec/fixtures/parsers/golang.c @@ -262,9 +262,93 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { [sym_comment] = {.visible = true, .named = true, .structural = false, .extra = true}, }; -static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { +static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { START_LEXER(); - switch (lex_state) { + switch (state) { + case 0: + START_TOKEN(); + if (lookahead == 0) + ADVANCE(1); + if ((lookahead == '\t') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(0); + if (lookahead == '\n') + ADVANCE(2); + if (lookahead == '!') + ADVANCE(3); + if (lookahead == '\"') + ADVANCE(4); + if (lookahead == '&') + ADVANCE(8); + if (lookahead == '(') + ADVANCE(10); + if (lookahead == ')') + ADVANCE(11); + if (lookahead == '*') + ADVANCE(12); + if (lookahead == '+') + ADVANCE(13); + if (lookahead == ',') + ADVANCE(14); + if (lookahead == '-') + ADVANCE(15); + if (lookahead == '.') + ADVANCE(16); + if (lookahead == '/') + ADVANCE(17); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(19); + if (lookahead == ':') + ADVANCE(22); + if (lookahead == ';') + ADVANCE(24); + if (lookahead == '<') + ADVANCE(25); + if (lookahead == '=') + ADVANCE(27); + if (lookahead == '>') + ADVANCE(29); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'd') || + (lookahead == 'g') || + (lookahead == 'h') || + ('j' <= lookahead && lookahead <= 'l') || + (lookahead == 'n') || + (lookahead == 'o') || + (lookahead == 'q') || + (lookahead == 'u') || + ('w' <= lookahead && lookahead <= 'z')) + ADVANCE(31); + if (lookahead == '[') + ADVANCE(32); + if (lookahead == ']') + ADVANCE(33); + if (lookahead == 'e') + ADVANCE(34); + if (lookahead == 'f') + ADVANCE(38); + if (lookahead == 'i') + ADVANCE(44); + if (lookahead == 'm') + ADVANCE(59); + if (lookahead == 'p') + ADVANCE(62); + if (lookahead == 'r') + ADVANCE(69); + if (lookahead == 's') + ADVANCE(79); + if (lookahead == 't') + ADVANCE(85); + if (lookahead == 'v') + ADVANCE(89); + if (lookahead == '{') + ADVANCE(92); + if (lookahead == '|') + ADVANCE(93); + if (lookahead == '}') + ADVANCE(95); + LEX_ERROR(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); case 2: @@ -2159,90 +2243,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '/') ADVANCE(97); LEX_ERROR(); - case ts_lex_state_error: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '\n') - ADVANCE(2); - if (lookahead == '!') - ADVANCE(3); - if (lookahead == '\"') - ADVANCE(4); - if (lookahead == '&') - ADVANCE(8); - if (lookahead == '(') - ADVANCE(10); - if (lookahead == ')') - ADVANCE(11); - if (lookahead == '*') - ADVANCE(12); - if (lookahead == '+') - ADVANCE(13); - if (lookahead == ',') - ADVANCE(14); - if (lookahead == '-') - ADVANCE(15); - if (lookahead == '.') - ADVANCE(16); - if (lookahead == '/') - ADVANCE(17); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(19); - if (lookahead == ':') - ADVANCE(22); - if (lookahead == ';') - ADVANCE(24); - if (lookahead == '<') - ADVANCE(25); - if (lookahead == '=') - ADVANCE(27); - if (lookahead == '>') - ADVANCE(29); - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'd') || - (lookahead == 'g') || - (lookahead == 'h') || - ('j' <= lookahead && lookahead <= 'l') || - (lookahead == 'n') || - (lookahead == 'o') || - (lookahead == 'q') || - (lookahead == 'u') || - ('w' <= lookahead && lookahead <= 'z')) - ADVANCE(31); - if (lookahead == '[') - ADVANCE(32); - if (lookahead == ']') - ADVANCE(33); - if (lookahead == 'e') - ADVANCE(34); - if (lookahead == 'f') - ADVANCE(38); - if (lookahead == 'i') - ADVANCE(44); - if (lookahead == 'm') - ADVANCE(59); - if (lookahead == 'p') - ADVANCE(62); - if (lookahead == 'r') - ADVANCE(69); - if (lookahead == 's') - ADVANCE(79); - if (lookahead == 't') - ADVANCE(85); - if (lookahead == 'v') - ADVANCE(89); - if (lookahead == '{') - ADVANCE(92); - if (lookahead == '|') - ADVANCE(93); - if (lookahead == '}') - ADVANCE(95); - LEX_ERROR(); default: LEX_ERROR(); } diff --git a/spec/fixtures/parsers/javascript.c b/spec/fixtures/parsers/javascript.c index afdf715c..e8e7f166 100644 --- a/spec/fixtures/parsers/javascript.c +++ b/spec/fixtures/parsers/javascript.c @@ -331,9 +331,104 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { [sym__line_break] = {.visible = false, .named = false, .structural = true, .extra = true}, }; -static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { +static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { START_LEXER(); - switch (lex_state) { + switch (state) { + case 0: + START_TOKEN(); + if (lookahead == 0) + ADVANCE(1); + if ((lookahead == '\t') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(0); + if (lookahead == '\n') + ADVANCE(2); + if (lookahead == '!') + ADVANCE(3); + if (lookahead == '\"') + ADVANCE(6); + if ((lookahead == '$') || + ('A' <= lookahead && lookahead <= 'Z') || + (lookahead == '_') || + (lookahead == 'a') || + (lookahead == 'g') || + (lookahead == 'h') || + ('j' <= lookahead && lookahead <= 'm') || + ('o' <= lookahead && lookahead <= 'q') || + ('x' <= lookahead && lookahead <= 'z')) + ADVANCE(10); + if (lookahead == '&') + ADVANCE(11); + if (lookahead == '\'') + ADVANCE(13); + if (lookahead == '(') + ADVANCE(16); + if (lookahead == ')') + ADVANCE(17); + if (lookahead == '*') + ADVANCE(18); + if (lookahead == '+') + ADVANCE(20); + if (lookahead == ',') + ADVANCE(23); + if (lookahead == '-') + ADVANCE(24); + if (lookahead == '.') + ADVANCE(27); + if (lookahead == '/') + ADVANCE(28); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(50); + if (lookahead == ':') + ADVANCE(53); + if (lookahead == ';') + ADVANCE(54); + if (lookahead == '<') + ADVANCE(55); + if (lookahead == '=') + ADVANCE(56); + if (lookahead == '>') + ADVANCE(59); + if (lookahead == '?') + ADVANCE(60); + if (lookahead == '[') + ADVANCE(61); + if (lookahead == ']') + ADVANCE(62); + if (lookahead == 'b') + ADVANCE(63); + if (lookahead == 'c') + ADVANCE(68); + if (lookahead == 'd') + ADVANCE(75); + if (lookahead == 'e') + ADVANCE(86); + if (lookahead == 'f') + ADVANCE(90); + if (lookahead == 'i') + ADVANCE(110); + if (lookahead == 'n') + ADVANCE(121); + if (lookahead == 'r') + ADVANCE(127); + if (lookahead == 's') + ADVANCE(133); + if (lookahead == 't') + ADVANCE(139); + if (lookahead == 'u') + ADVANCE(153); + if (lookahead == 'v') + ADVANCE(162); + if (lookahead == 'w') + ADVANCE(165); + if (lookahead == '{') + ADVANCE(170); + if (lookahead == '|') + ADVANCE(171); + if (lookahead == '}') + ADVANCE(173); + LEX_ERROR(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); case 2: @@ -6055,101 +6150,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '{') ADVANCE(170); LEX_ERROR(); - case ts_lex_state_error: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '\n') - ADVANCE(2); - if (lookahead == '!') - ADVANCE(3); - if (lookahead == '\"') - ADVANCE(6); - if ((lookahead == '$') || - ('A' <= lookahead && lookahead <= 'Z') || - (lookahead == '_') || - (lookahead == 'a') || - (lookahead == 'g') || - (lookahead == 'h') || - ('j' <= lookahead && lookahead <= 'm') || - ('o' <= lookahead && lookahead <= 'q') || - ('x' <= lookahead && lookahead <= 'z')) - ADVANCE(10); - if (lookahead == '&') - ADVANCE(11); - if (lookahead == '\'') - ADVANCE(13); - if (lookahead == '(') - ADVANCE(16); - if (lookahead == ')') - ADVANCE(17); - if (lookahead == '*') - ADVANCE(18); - if (lookahead == '+') - ADVANCE(20); - if (lookahead == ',') - ADVANCE(23); - if (lookahead == '-') - ADVANCE(24); - if (lookahead == '.') - ADVANCE(27); - if (lookahead == '/') - ADVANCE(28); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(50); - if (lookahead == ':') - ADVANCE(53); - if (lookahead == ';') - ADVANCE(54); - if (lookahead == '<') - ADVANCE(55); - if (lookahead == '=') - ADVANCE(56); - if (lookahead == '>') - ADVANCE(59); - if (lookahead == '?') - ADVANCE(60); - if (lookahead == '[') - ADVANCE(61); - if (lookahead == ']') - ADVANCE(62); - if (lookahead == 'b') - ADVANCE(63); - if (lookahead == 'c') - ADVANCE(68); - if (lookahead == 'd') - ADVANCE(75); - if (lookahead == 'e') - ADVANCE(86); - if (lookahead == 'f') - ADVANCE(90); - if (lookahead == 'i') - ADVANCE(110); - if (lookahead == 'n') - ADVANCE(121); - if (lookahead == 'r') - ADVANCE(127); - if (lookahead == 's') - ADVANCE(133); - if (lookahead == 't') - ADVANCE(139); - if (lookahead == 'u') - ADVANCE(153); - if (lookahead == 'v') - ADVANCE(162); - if (lookahead == 'w') - ADVANCE(165); - if (lookahead == '{') - ADVANCE(170); - if (lookahead == '|') - ADVANCE(171); - if (lookahead == '}') - ADVANCE(173); - LEX_ERROR(); default: LEX_ERROR(); } diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index 27f1acca..2270fcc6 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -64,9 +64,41 @@ static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { [sym_false] = {.visible = true, .named = true, .structural = true, .extra = false}, }; -static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { +static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { START_LEXER(); - switch (lex_state) { + switch (state) { + case 0: + START_TOKEN(); + if (lookahead == 0) + ADVANCE(1); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(0); + if (lookahead == '\"') + ADVANCE(2); + if (lookahead == ',') + ADVANCE(6); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(7); + if (lookahead == ':') + ADVANCE(10); + if (lookahead == '[') + ADVANCE(11); + if (lookahead == ']') + ADVANCE(12); + if (lookahead == 'f') + ADVANCE(13); + if (lookahead == 'n') + ADVANCE(18); + if (lookahead == 't') + ADVANCE(22); + if (lookahead == '{') + ADVANCE(26); + if (lookahead == '}') + ADVANCE(27); + LEX_ERROR(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); case 2: @@ -305,38 +337,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '\"') ADVANCE(2); LEX_ERROR(); - case ts_lex_state_error: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '\"') - ADVANCE(2); - if (lookahead == ',') - ADVANCE(6); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(7); - if (lookahead == ':') - ADVANCE(10); - if (lookahead == '[') - ADVANCE(11); - if (lookahead == ']') - ADVANCE(12); - if (lookahead == 'f') - ADVANCE(13); - if (lookahead == 'n') - ADVANCE(18); - if (lookahead == 't') - ADVANCE(22); - if (lookahead == '{') - ADVANCE(26); - if (lookahead == '}') - ADVANCE(27); - LEX_ERROR(); default: LEX_ERROR(); } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 664ee40c..7e96cdcf 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -188,14 +188,13 @@ class CCodeGenerator { } void add_lex_function() { - line("static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {"); + line("static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {"); indent([&]() { line("START_LEXER();"); - _switch("lex_state", [&]() { - for (size_t i = 1; i < lex_table.states.size(); i++) - _case(to_string(i), [&]() { add_lex_state(lex_table.states[i]); }); - _case("ts_lex_state_error", - [&]() { add_lex_state(lex_table.states[0]); }); + _switch("state", [&]() { + size_t i = 0; + for (const LexState &state : lex_table.states) + _case(to_string(i++), [&]() { add_lex_state(state); }); _default([&]() { line("LEX_ERROR();"); }); }); }); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index d2971a1a..92595225 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -203,7 +203,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { TSStateId parse_state = ts_stack_top_state(self->stack, head); TSStateId lex_state = self->language->lex_states[parse_state]; LOG("lex state:%d", lex_state); - return self->language->lex_fn(&self->lexer, lex_state); + return self->language->lex_fn(&self->lexer, lex_state, false); } static int ts_parser__split(TSParser *self, int head) { @@ -464,7 +464,7 @@ static bool ts_parser__handle_error(TSParser *self, int head, TSTree *lookahead) LOG("skip token:%s", SYM_NAME(lookahead->symbol)); ts_parser__shift(self, head, ts_stack_top_state(self->stack, head), lookahead); - lookahead = self->language->lex_fn(&self->lexer, ts_lex_state_error); + lookahead = self->language->lex_fn(&self->lexer, 0, true); error_token_count++; /*