Avoid creating external tokens that start after they end
This commit is contained in:
parent
90629bd45a
commit
fbcefe25f7
5 changed files with 140 additions and 0 deletions
|
|
@ -418,6 +418,10 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
|
|||
symbol = self->language->external_scanner.symbol_map[symbol];
|
||||
}
|
||||
|
||||
if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
|
||||
self->lexer.token_start_position = self->lexer.token_end_position;
|
||||
}
|
||||
|
||||
Length padding = length_sub(self->lexer.token_start_position, start_position);
|
||||
Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
|
||||
result = ts_tree_make_leaf(symbol, padding, size, self->language);
|
||||
|
|
|
|||
32
test/fixtures/test_grammars/inverted_external_token/corpus.txt
vendored
Normal file
32
test/fixtures/test_grammars/inverted_external_token/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
========================
|
||||
Expressions on one line
|
||||
=========================
|
||||
|
||||
a
|
||||
b
|
||||
.c
|
||||
d
|
||||
.e
|
||||
.f
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(statement (identifier) (line_break))
|
||||
(statement (member_expression (identifier) (identifier)) (line_break))
|
||||
(statement (member_expression (member_expression (identifier) (identifier)) (identifier)) (line_break)))
|
||||
|
||||
=====================================
|
||||
Line breaks followed by whitespace
|
||||
=====================================
|
||||
|
||||
a
|
||||
b
|
||||
c
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(statement (identifier) (line_break))
|
||||
(statement (identifier) (line_break))
|
||||
(statement (identifier) (line_break)))
|
||||
55
test/fixtures/test_grammars/inverted_external_token/grammar.json
vendored
Normal file
55
test/fixtures/test_grammars/inverted_external_token/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"name": "inverted_external_token",
|
||||
|
||||
"externals": [
|
||||
{"type": "SYMBOL", "name": "line_break"}
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"program": {
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "statement"
|
||||
},
|
||||
},
|
||||
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "line_break"}
|
||||
]
|
||||
},
|
||||
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "member_expression"}
|
||||
]
|
||||
},
|
||||
|
||||
"member_expression": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "STRING", "value": "."},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\a+"
|
||||
}
|
||||
}
|
||||
}
|
||||
1
test/fixtures/test_grammars/inverted_external_token/readme.md
vendored
Normal file
1
test/fixtures/test_grammars/inverted_external_token/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This language has an external scanner that calls `lexer->advance(lexer, true)` (in order to skip whitespace) *after* having called `lexer->mark_end(lexer)`. This tests an edge case in the parser's handling of token start and end positions.
|
||||
48
test/fixtures/test_grammars/inverted_external_token/scanner.c
vendored
Normal file
48
test/fixtures/test_grammars/inverted_external_token/scanner.c
vendored
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
LINE_BREAK
|
||||
};
|
||||
|
||||
void *tree_sitter_inverted_external_token_external_scanner_create() { return NULL; }
|
||||
|
||||
void tree_sitter_inverted_external_token_external_scanner_destroy(void *payload) {}
|
||||
|
||||
void tree_sitter_inverted_external_token_external_scanner_reset(void *payload) {}
|
||||
|
||||
unsigned tree_sitter_inverted_external_token_external_scanner_serialize(
|
||||
void *payload,
|
||||
char *buffer
|
||||
) { return true; }
|
||||
|
||||
void tree_sitter_inverted_external_token_external_scanner_deserialize(
|
||||
void *payload,
|
||||
const char *buffer,
|
||||
unsigned length
|
||||
) {}
|
||||
|
||||
bool tree_sitter_inverted_external_token_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
while (lexer->lookahead == ' ') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
// Mark the end of the line break token.
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
// Skip whitespace *after* having marked the end.
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead != '.') {
|
||||
lexer->result_symbol = LINE_BREAK;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue