Merge pull request #978 from tree-sitter/fix-get-column-at-eof
Fix the behavior of Lexer.get_column
This commit is contained in:
commit
d366356299
10 changed files with 463 additions and 81 deletions
|
|
@ -74,3 +74,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
|
|||
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
|
||||
let dir = fixtures_dir().join("test_grammars").join(name);
|
||||
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
|
||||
(grammar, Some(dir))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use super::helpers::edits::ReadRecorder;
|
||||
use super::helpers::fixtures::{get_language, get_test_language};
|
||||
use super::helpers::fixtures::{get_language, get_test_grammar, get_test_language};
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use crate::parse::{perform_edit, Edit};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
|
@ -406,6 +406,83 @@ fn test_parsing_empty_file_with_reused_tree() {
|
|||
parser.parse("\n ", tree.as_ref());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_after_editing_tree_that_depends_on_column_values() {
|
||||
let (grammar, path) = get_test_grammar("uses_current_column");
|
||||
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(get_test_language(
|
||||
&grammar_name,
|
||||
&parser_code,
|
||||
path.as_ref().map(AsRef::as_ref),
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let mut code = b"
|
||||
a = b
|
||||
c = do d
|
||||
e + f
|
||||
g
|
||||
h + i
|
||||
"
|
||||
.to_vec();
|
||||
let mut tree = parser.parse(&code, None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(block ",
|
||||
"(binary_expression (identifier) (identifier)) ",
|
||||
"(binary_expression (identifier) (do_expression (block (identifier) (binary_expression (identifier) (identifier)) (identifier)))) ",
|
||||
"(binary_expression (identifier) (identifier)))",
|
||||
)
|
||||
);
|
||||
|
||||
perform_edit(
|
||||
&mut tree,
|
||||
&mut code,
|
||||
&Edit {
|
||||
position: 8,
|
||||
deleted_length: 0,
|
||||
inserted_text: b"1234".to_vec(),
|
||||
},
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
code,
|
||||
b"
|
||||
a = b
|
||||
c1234 = do d
|
||||
e + f
|
||||
g
|
||||
h + i
|
||||
"
|
||||
);
|
||||
|
||||
let mut recorder = ReadRecorder::new(&code);
|
||||
let tree = parser
|
||||
.parse_with(&mut |i, _| recorder.read(i), Some(&tree))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(block ",
|
||||
"(binary_expression (identifier) (identifier)) ",
|
||||
"(binary_expression (identifier) (do_expression (block (identifier)))) ",
|
||||
"(binary_expression (identifier) (identifier)) ",
|
||||
"(identifier) ",
|
||||
"(binary_expression (identifier) (identifier)))",
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
recorder.strings_read(),
|
||||
vec!["\nc1234 = do d\n e + f\n g\n"]
|
||||
);
|
||||
}
|
||||
|
||||
// Thread safety
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
119
lib/src/lexer.c
119
lib/src/lexer.c
|
|
@ -102,6 +102,56 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_lexer_goto(Lexer *self, Length position) {
|
||||
self->current_position = position;
|
||||
bool found_included_range = false;
|
||||
|
||||
// Move to the first valid position at or after the given position.
|
||||
for (unsigned i = 0; i < self->included_range_count; i++) {
|
||||
TSRange *included_range = &self->included_ranges[i];
|
||||
if (included_range->end_byte > position.bytes) {
|
||||
if (included_range->start_byte > position.bytes) {
|
||||
self->current_position = (Length) {
|
||||
.bytes = included_range->start_byte,
|
||||
.extent = included_range->start_point,
|
||||
};
|
||||
}
|
||||
|
||||
self->current_included_range_index = i;
|
||||
found_included_range = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_included_range) {
|
||||
// If the current position is outside of the current chunk of text,
|
||||
// then clear out the current chunk of text.
|
||||
if (self->chunk && (
|
||||
position.bytes < self->chunk_start ||
|
||||
position.bytes >= self->chunk_start + self->chunk_size
|
||||
)) {
|
||||
ts_lexer__clear_chunk(self);
|
||||
}
|
||||
|
||||
self->lookahead_size = 0;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
|
||||
// If the given position is beyond any of included ranges, move to the EOF
|
||||
// state - past the end of the included ranges.
|
||||
else {
|
||||
self->current_included_range_index = self->included_range_count;
|
||||
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
|
||||
self->current_position = (Length) {
|
||||
.bytes = last_included_range->end_byte,
|
||||
.extent = last_included_range->end_point,
|
||||
};
|
||||
ts_lexer__clear_chunk(self);
|
||||
self->lookahead_size = 1;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
// Advance to the next character in the source code, retrieving a new
|
||||
// chunk of source code if needed.
|
||||
static void ts_lexer__advance(TSLexer *_self, bool skip) {
|
||||
|
|
@ -183,22 +233,8 @@ static void ts_lexer__mark_end(TSLexer *_self) {
|
|||
|
||||
static uint32_t ts_lexer__get_column(TSLexer *_self) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
self->current_position.bytes -= self->current_position.extent.column;
|
||||
self->current_position.extent.column = 0;
|
||||
|
||||
if (self->current_position.bytes < self->chunk_start) {
|
||||
ts_lexer__get_chunk(self);
|
||||
}
|
||||
|
||||
uint32_t result = 0;
|
||||
while (self->current_position.bytes < goal_byte) {
|
||||
ts_lexer__advance(&self->data, false);
|
||||
result++;
|
||||
}
|
||||
|
||||
return result;
|
||||
self->did_get_column = true;
|
||||
return self->current_position.extent.column;
|
||||
}
|
||||
|
||||
// Is the lexer at a boundary between two disjoint included ranges of
|
||||
|
|
@ -247,56 +283,6 @@ void ts_lexer_delete(Lexer *self) {
|
|||
ts_free(self->included_ranges);
|
||||
}
|
||||
|
||||
static void ts_lexer_goto(Lexer *self, Length position) {
|
||||
self->current_position = position;
|
||||
bool found_included_range = false;
|
||||
|
||||
// Move to the first valid position at or after the given position.
|
||||
for (unsigned i = 0; i < self->included_range_count; i++) {
|
||||
TSRange *included_range = &self->included_ranges[i];
|
||||
if (included_range->end_byte > position.bytes) {
|
||||
if (included_range->start_byte > position.bytes) {
|
||||
self->current_position = (Length) {
|
||||
.bytes = included_range->start_byte,
|
||||
.extent = included_range->start_point,
|
||||
};
|
||||
}
|
||||
|
||||
self->current_included_range_index = i;
|
||||
found_included_range = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_included_range) {
|
||||
// If the current position is outside of the current chunk of text,
|
||||
// then clear out the current chunk of text.
|
||||
if (self->chunk && (
|
||||
position.bytes < self->chunk_start ||
|
||||
position.bytes >= self->chunk_start + self->chunk_size
|
||||
)) {
|
||||
ts_lexer__clear_chunk(self);
|
||||
}
|
||||
|
||||
self->lookahead_size = 0;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
|
||||
// If the given position is beyond any of included ranges, move to the EOF
|
||||
// state - past the end of the included ranges.
|
||||
else {
|
||||
self->current_included_range_index = self->included_range_count;
|
||||
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
|
||||
self->current_position = (Length) {
|
||||
.bytes = last_included_range->end_byte,
|
||||
.extent = last_included_range->end_point,
|
||||
};
|
||||
ts_lexer__clear_chunk(self);
|
||||
self->lookahead_size = 1;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_set_input(Lexer *self, TSInput input) {
|
||||
self->input = input;
|
||||
ts_lexer__clear_chunk(self);
|
||||
|
|
@ -315,6 +301,7 @@ void ts_lexer_start(Lexer *self) {
|
|||
self->token_start_position = self->current_position;
|
||||
self->token_end_position = LENGTH_UNDEFINED;
|
||||
self->data.result_symbol = 0;
|
||||
self->did_get_column = false;
|
||||
if (!ts_lexer__eof(&self->data)) {
|
||||
if (!self->chunk_size) ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
|
||||
|
|
|
|||
|
|
@ -17,16 +17,17 @@ typedef struct {
|
|||
Length token_end_position;
|
||||
|
||||
TSRange *included_ranges;
|
||||
size_t included_range_count;
|
||||
size_t current_included_range_index;
|
||||
|
||||
const char *chunk;
|
||||
TSInput input;
|
||||
TSLogger logger;
|
||||
|
||||
uint32_t included_range_count;
|
||||
uint32_t current_included_range_index;
|
||||
uint32_t chunk_start;
|
||||
uint32_t chunk_size;
|
||||
uint32_t lookahead_size;
|
||||
bool did_get_column;
|
||||
|
||||
TSInput input;
|
||||
TSLogger logger;
|
||||
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
|
||||
} Lexer;
|
||||
|
||||
|
|
|
|||
|
|
@ -403,6 +403,7 @@ static Subtree ts_parser__lex(
|
|||
bool found_external_token = false;
|
||||
bool error_mode = parse_state == ERROR_STATE;
|
||||
bool skipped_error = false;
|
||||
bool called_get_column = false;
|
||||
int32_t first_error_character = 0;
|
||||
Length error_start_position = length_zero();
|
||||
Length error_end_position = length_zero();
|
||||
|
|
@ -445,6 +446,7 @@ static Subtree ts_parser__lex(
|
|||
(!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
|
||||
)) {
|
||||
found_external_token = true;
|
||||
called_get_column = self->lexer.did_get_column;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -546,6 +548,7 @@ static Subtree ts_parser__lex(
|
|||
lookahead_bytes,
|
||||
parse_state,
|
||||
found_external_token,
|
||||
called_get_column,
|
||||
is_keyword,
|
||||
self->language
|
||||
);
|
||||
|
|
|
|||
|
|
@ -166,7 +166,8 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t l
|
|||
|
||||
Subtree ts_subtree_new_leaf(
|
||||
SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
|
||||
uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
|
||||
uint32_t lookahead_bytes, TSStateId parse_state,
|
||||
bool has_external_tokens, bool depends_on_column,
|
||||
bool is_keyword, const TSLanguage *language
|
||||
) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
||||
|
|
@ -213,6 +214,7 @@ Subtree ts_subtree_new_leaf(
|
|||
.fragile_right = false,
|
||||
.has_changes = false,
|
||||
.has_external_tokens = has_external_tokens,
|
||||
.depends_on_column = depends_on_column,
|
||||
.is_missing = false,
|
||||
.is_keyword = is_keyword,
|
||||
{{.first_leaf = {.symbol = 0, .parse_state = 0}}}
|
||||
|
|
@ -245,7 +247,7 @@ Subtree ts_subtree_new_error(
|
|||
) {
|
||||
Subtree result = ts_subtree_new_leaf(
|
||||
pool, ts_builtin_sym_error, padding, size, bytes_scanned,
|
||||
parse_state, false, false, language
|
||||
parse_state, false, false, false, language
|
||||
);
|
||||
SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
|
||||
data->fragile_left = true;
|
||||
|
|
@ -378,6 +380,7 @@ void ts_subtree_summarize_children(
|
|||
self.ptr->repeat_depth = 0;
|
||||
self.ptr->node_count = 1;
|
||||
self.ptr->has_external_tokens = false;
|
||||
self.ptr->depends_on_column = false;
|
||||
self.ptr->dynamic_precedence = 0;
|
||||
|
||||
uint32_t structural_index = 0;
|
||||
|
|
@ -388,6 +391,13 @@ void ts_subtree_summarize_children(
|
|||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
Subtree child = children[i];
|
||||
|
||||
if (
|
||||
self.ptr->size.extent.row == 0 &&
|
||||
ts_subtree_depends_on_column(child)
|
||||
) {
|
||||
self.ptr->depends_on_column = true;
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
self.ptr->padding = ts_subtree_padding(child);
|
||||
self.ptr->size = ts_subtree_size(child);
|
||||
|
|
@ -545,7 +555,7 @@ Subtree ts_subtree_new_missing_leaf(
|
|||
) {
|
||||
Subtree result = ts_subtree_new_leaf(
|
||||
pool, symbol, padding, length_zero(), 0,
|
||||
0, false, false, language
|
||||
0, false, false, false, language
|
||||
);
|
||||
if (result.data.is_inline) {
|
||||
result.data.is_missing = true;
|
||||
|
|
@ -670,6 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
Edit edit = entry.edit;
|
||||
bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
|
||||
bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
|
||||
bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree);
|
||||
|
||||
Length size = ts_subtree_size(*entry.tree);
|
||||
Length padding = ts_subtree_padding(*entry.tree);
|
||||
|
|
@ -733,6 +744,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
data->fragile_right = false;
|
||||
data->has_changes = false;
|
||||
data->has_external_tokens = false;
|
||||
data->depends_on_column = false;
|
||||
data->is_missing = result.data.is_missing;
|
||||
data->is_keyword = result.data.is_keyword;
|
||||
result.ptr = data;
|
||||
|
|
@ -755,9 +767,18 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
// If this child ends before the edit, it is not affected.
|
||||
if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
|
||||
|
||||
// If this child starts after the edit, then we're done processing children.
|
||||
if (child_left.bytes > edit.old_end.bytes ||
|
||||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
|
||||
// Keep editing child nodes until a node is reached that starts after the edit.
|
||||
// Also, if this node's validity depends on its column position, then continue
|
||||
// invaliditing child nodes until reaching a line break.
|
||||
if ((
|
||||
(child_left.bytes > edit.old_end.bytes) ||
|
||||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
|
||||
) && (
|
||||
!invalidate_first_row ||
|
||||
child_left.extent.row > entry.tree->ptr->padding.extent.row
|
||||
)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Transform edit into the child's coordinate space.
|
||||
Edit child_edit = {
|
||||
|
|
@ -775,8 +796,10 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
// Interpret all inserted text as applying to the *first* child that touches the edit.
|
||||
// Subsequent children are only never have any text inserted into them; they are only
|
||||
// shrunk to compensate for the edit.
|
||||
if (child_right.bytes > edit.start.bytes ||
|
||||
(child_right.bytes == edit.start.bytes && is_pure_insertion)) {
|
||||
if (
|
||||
child_right.bytes > edit.start.bytes ||
|
||||
(child_right.bytes == edit.start.bytes && is_pure_insertion)
|
||||
) {
|
||||
edit.new_end = edit.start;
|
||||
}
|
||||
|
||||
|
|
@ -981,12 +1004,14 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
|
|||
"state: %d\n"
|
||||
"error-cost: %u\n"
|
||||
"has-changes: %u\n"
|
||||
"depends-on-column: %u\n"
|
||||
"repeat-depth: %u\n"
|
||||
"lookahead-bytes: %u",
|
||||
start_offset, end_offset,
|
||||
ts_subtree_parse_state(*self),
|
||||
ts_subtree_error_cost(*self),
|
||||
ts_subtree_has_changes(*self),
|
||||
ts_subtree_depends_on_column(*self),
|
||||
ts_subtree_repeat_depth(*self),
|
||||
ts_subtree_lookahead_bytes(*self)
|
||||
);
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ typedef struct {
|
|||
bool fragile_right : 1;
|
||||
bool has_changes : 1;
|
||||
bool has_external_tokens : 1;
|
||||
bool depends_on_column: 1;
|
||||
bool is_missing : 1;
|
||||
bool is_keyword : 1;
|
||||
|
||||
|
|
@ -138,7 +139,7 @@ void ts_subtree_pool_delete(SubtreePool *);
|
|||
|
||||
Subtree ts_subtree_new_leaf(
|
||||
SubtreePool *, TSSymbol, Length, Length, uint32_t,
|
||||
TSStateId, bool, bool, const TSLanguage *
|
||||
TSStateId, bool, bool, bool, const TSLanguage *
|
||||
);
|
||||
Subtree ts_subtree_new_error(
|
||||
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
|
||||
|
|
@ -284,6 +285,10 @@ static inline bool ts_subtree_has_external_tokens(Subtree self) {
|
|||
return self.data.is_inline ? false : self.ptr->has_external_tokens;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_depends_on_column(Subtree self) {
|
||||
return self.data.is_inline ? false : self.ptr->depends_on_column;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_is_fragile(Subtree self) {
|
||||
return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
|
||||
}
|
||||
|
|
|
|||
76
test/fixtures/test_grammars/uses_current_column/corpus.txt
vendored
Normal file
76
test/fixtures/test_grammars/uses_current_column/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
===============
|
||||
Simple blocks
|
||||
===============
|
||||
|
||||
do a
|
||||
e
|
||||
f
|
||||
|
||||
---
|
||||
|
||||
(block
|
||||
(do_expression (block
|
||||
(identifier)
|
||||
(identifier)))
|
||||
(identifier))
|
||||
|
||||
=====================
|
||||
Nested blocks
|
||||
=====================
|
||||
|
||||
a = do b
|
||||
c + do e
|
||||
f
|
||||
g
|
||||
h
|
||||
i
|
||||
|
||||
---
|
||||
|
||||
(block
|
||||
(binary_expression
|
||||
(identifier)
|
||||
(do_expression (block
|
||||
(identifier)
|
||||
(binary_expression
|
||||
(identifier)
|
||||
(do_expression (block
|
||||
(identifier)
|
||||
(identifier)
|
||||
(identifier))))
|
||||
(identifier))))
|
||||
(identifier))
|
||||
|
||||
===============================
|
||||
Blocks with leading newlines
|
||||
===============================
|
||||
|
||||
do
|
||||
|
||||
|
||||
a = b
|
||||
do
|
||||
c
|
||||
d
|
||||
e
|
||||
f
|
||||
|
||||
---
|
||||
|
||||
(block
|
||||
(do_expression (block
|
||||
(binary_expression (identifier) (identifier))
|
||||
(do_expression (block
|
||||
(identifier)
|
||||
(identifier)))
|
||||
(identifier)
|
||||
(identifier))))
|
||||
|
||||
=====================
|
||||
Unterminated blocks
|
||||
=====================
|
||||
|
||||
do
|
||||
---
|
||||
|
||||
(ERROR)
|
||||
69
test/fixtures/test_grammars/uses_current_column/grammar.json
vendored
Normal file
69
test/fixtures/test_grammars/uses_current_column/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
{
|
||||
"name": "uses_current_column",
|
||||
|
||||
"externals": [
|
||||
{"type": "SYMBOL", "name": "_indent"},
|
||||
{"type": "SYMBOL", "name": "_dedent"},
|
||||
{"type": "SYMBOL", "name": "_newline"}
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"block": {
|
||||
"type": "REPEAT1",
|
||||
"content": {"type": "SYMBOL", "name": "_statement"}
|
||||
},
|
||||
|
||||
"_statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "_newline"}
|
||||
]
|
||||
},
|
||||
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "do_expression"},
|
||||
{"type": "SYMBOL", "name": "binary_expression"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"do_expression": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "do"},
|
||||
{"type": "SYMBOL", "name": "_indent"},
|
||||
{"type": "SYMBOL", "name": "block"},
|
||||
{"type": "SYMBOL", "name": "_dedent"}
|
||||
]
|
||||
},
|
||||
|
||||
"binary_expression": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "="},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "STRING", "value": "-"}
|
||||
]
|
||||
},
|
||||
{"type": "SYMBOL", "name": "_expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {"type": "PATTERN", "value": "\\w+"}
|
||||
}
|
||||
}
|
||||
133
test/fixtures/test_grammars/uses_current_column/scanner.c
vendored
Normal file
133
test/fixtures/test_grammars/uses_current_column/scanner.c
vendored
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
#include <stdlib.h>
|
||||
#include <wctype.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum TokenType {
|
||||
INDENT,
|
||||
DEDENT,
|
||||
NEWLINE,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint8_t queued_dedent_count;
|
||||
uint8_t indent_count;
|
||||
int8_t indents[32];
|
||||
} Scanner;
|
||||
|
||||
void *tree_sitter_uses_current_column_external_scanner_create() {
|
||||
Scanner *self = malloc(sizeof(Scanner));
|
||||
self->queued_dedent_count = 0;
|
||||
self->indent_count = 1;
|
||||
self->indents[0] = 0;
|
||||
return (void *)self;
|
||||
}
|
||||
|
||||
void tree_sitter_uses_current_column_external_scanner_destroy(void *payload) {
|
||||
free(payload);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_uses_current_column_external_scanner_serialize(
|
||||
void *payload,
|
||||
char *buffer
|
||||
) {
|
||||
Scanner *self = (Scanner *)payload;
|
||||
buffer[0] = self->queued_dedent_count;
|
||||
for (unsigned i = 0; i < self->indent_count; i++) {
|
||||
buffer[i + 1] = self->indents[i];
|
||||
}
|
||||
return self->indent_count + 1;
|
||||
}
|
||||
|
||||
void tree_sitter_uses_current_column_external_scanner_deserialize(
|
||||
void *payload,
|
||||
const char *buffer,
|
||||
unsigned length
|
||||
) {
|
||||
Scanner *self = (Scanner *)payload;
|
||||
if (length > 0) {
|
||||
self->queued_dedent_count = buffer[0];
|
||||
self->indent_count = length - 1;
|
||||
for (unsigned i = 0; i < self->indent_count; i++) {
|
||||
self->indents[i] = buffer[i + 1];
|
||||
}
|
||||
} else {
|
||||
self->queued_dedent_count = 0;
|
||||
self->indent_count = 1;
|
||||
self->indents[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool tree_sitter_uses_current_column_external_scanner_scan(
|
||||
void *payload,
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols
|
||||
) {
|
||||
Scanner *self = (Scanner *)payload;
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
// If dedents were found in a previous run, and are valid now,
|
||||
// then return a dedent.
|
||||
if (self->queued_dedent_count > 0 && valid_symbols[DEDENT]) {
|
||||
lexer->result_symbol = DEDENT;
|
||||
self->queued_dedent_count--;
|
||||
return true;
|
||||
}
|
||||
|
||||
// If an indent is valid, then add an entry to the indent stack
|
||||
// for the current column, and return an indent.
|
||||
if (valid_symbols[INDENT]) {
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
uint32_t column = lexer->get_column(lexer);
|
||||
if (column > self->indents[self->indent_count - 1]) {
|
||||
self->indents[self->indent_count++] = column - 2;
|
||||
lexer->result_symbol = INDENT;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If at the end of a statement, then get the current indent
|
||||
// level and pop some number of entries off of the indent stack.
|
||||
if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) {
|
||||
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
uint32_t next_column = 0;
|
||||
for (;;) {
|
||||
if (lexer->lookahead == ' ') {
|
||||
next_column++;
|
||||
lexer->advance(lexer, false);
|
||||
} else if (lexer->lookahead == '\n') {
|
||||
next_column = 0;
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned dedent_count = 0;
|
||||
while (next_column < self->indents[self->indent_count - 1]) {
|
||||
dedent_count++;
|
||||
self->indent_count--;
|
||||
}
|
||||
|
||||
if (dedent_count > 0 && valid_symbols[DEDENT]) {
|
||||
lexer->result_symbol = DEDENT;
|
||||
return true;
|
||||
} else if (valid_symbols[NEWLINE]) {
|
||||
self->queued_dedent_count += dedent_count;
|
||||
lexer->result_symbol = NEWLINE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue