Fix handling of tokens consisting of separator characters
The parser is no longer hard-coded to skip whitespace. Tokens such as newlines, whose characters overlap with the separator characters, can now be correctly recognized.
This commit is contained in:
parent
f39cb1890d
commit
1cc7e32e2d
32 changed files with 5401 additions and 4847 deletions
|
|
@ -51,10 +51,11 @@ static ts_tree * ts_lex(ts_lexer *lexer, state_id lex_state)
|
|||
|
||||
#define START_LEXER() \
|
||||
char lookahead; \
|
||||
ts_lexer_skip_whitespace(lexer); \
|
||||
if (!ts_lexer_lookahead_char(lexer)) return ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \
|
||||
next_state: \
|
||||
lookahead = ts_lexer_lookahead_char(lexer);
|
||||
|
||||
#define START_TOKEN() \
|
||||
ts_lexer_start_token(lexer);
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ ts_lexer_advance(lexer); lex_state = state_index; goto next_state; }
|
||||
|
|
@ -73,7 +74,7 @@ static const ts_parse_action ts_parse_actions[ts_state_count][ts_symbol_count]
|
|||
|
||||
#define EXPORT_PARSER(constructor_name) \
|
||||
ts_parser constructor_name() { \
|
||||
return (ts_parser){ \
|
||||
return (ts_parser) { \
|
||||
.parse_fn = ts_parse, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.data = ts_lr_parser_make(ts_symbol_count, (const ts_parse_action *)ts_parse_actions, ts_lex_states, hidden_symbol_flags), \
|
||||
|
|
@ -161,6 +162,10 @@ static void ts_lexer_advance(ts_lexer *lexer) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_lexer_start_token(ts_lexer *lexer) {
|
||||
lexer->token_start_position = ts_lexer_position(lexer);
|
||||
}
|
||||
|
||||
static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) {
|
||||
size_t current_position = ts_lexer_position(lexer);
|
||||
size_t size = current_position - lexer->token_start_position;
|
||||
|
|
@ -169,12 +174,6 @@ static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) {
|
|||
return ts_tree_make_leaf(symbol, size, offset);
|
||||
}
|
||||
|
||||
static void ts_lexer_skip_whitespace(ts_lexer *lexer) {
|
||||
while (isspace(ts_lexer_lookahead_char(lexer)))
|
||||
ts_lexer_advance(lexer);
|
||||
lexer->token_start_position = ts_lexer_position(lexer);
|
||||
}
|
||||
|
||||
static const state_id ts_lex_state_error = -1;
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue