When editing, properly invalidate trees that depend on get_column
This commit is contained in:
parent
e29d3714f7
commit
a40045a419
8 changed files with 136 additions and 35 deletions
|
|
@ -74,3 +74,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
|
|||
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
|
||||
let dir = fixtures_dir().join("test_grammars").join(name);
|
||||
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
|
||||
(grammar, Some(dir))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use super::helpers::edits::ReadRecorder;
|
||||
use super::helpers::fixtures::{get_language, get_test_language};
|
||||
use super::helpers::fixtures::{get_language, get_test_grammar, get_test_language};
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use crate::parse::{perform_edit, Edit};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
|
@ -406,6 +406,83 @@ fn test_parsing_empty_file_with_reused_tree() {
|
|||
parser.parse("\n ", tree.as_ref());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_after_editing_tree_that_depends_on_column_values() {
|
||||
let (grammar, path) = get_test_grammar("uses_current_column");
|
||||
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(get_test_language(
|
||||
&grammar_name,
|
||||
&parser_code,
|
||||
path.as_ref().map(AsRef::as_ref),
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let mut code = b"
|
||||
a = b
|
||||
c = do d
|
||||
e + f
|
||||
g
|
||||
h + i
|
||||
"
|
||||
.to_vec();
|
||||
let mut tree = parser.parse(&code, None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(block ",
|
||||
"(binary_expression (identifier) (identifier)) ",
|
||||
"(binary_expression (identifier) (do_expression (block (identifier) (binary_expression (identifier) (identifier)) (identifier)))) ",
|
||||
"(binary_expression (identifier) (identifier)))",
|
||||
)
|
||||
);
|
||||
|
||||
perform_edit(
|
||||
&mut tree,
|
||||
&mut code,
|
||||
&Edit {
|
||||
position: 8,
|
||||
deleted_length: 0,
|
||||
inserted_text: b"1234".to_vec(),
|
||||
},
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
code,
|
||||
b"
|
||||
a = b
|
||||
c1234 = do d
|
||||
e + f
|
||||
g
|
||||
h + i
|
||||
"
|
||||
);
|
||||
|
||||
let mut recorder = ReadRecorder::new(&code);
|
||||
let tree = parser
|
||||
.parse_with(&mut |i, _| recorder.read(i), Some(&tree))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(block ",
|
||||
"(binary_expression (identifier) (identifier)) ",
|
||||
"(binary_expression (identifier) (do_expression (block (identifier)))) ",
|
||||
"(binary_expression (identifier) (identifier)) ",
|
||||
"(identifier) ",
|
||||
"(binary_expression (identifier) (identifier)))",
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
recorder.strings_read(),
|
||||
vec!["\nc1234 = do d\n e + f\n g\n"]
|
||||
);
|
||||
}
|
||||
|
||||
// Thread safety
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -233,25 +233,8 @@ static void ts_lexer__mark_end(TSLexer *_self) {
|
|||
|
||||
static uint32_t ts_lexer__get_column(TSLexer *_self) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
ts_lexer_goto(self, (Length) {
|
||||
.bytes = self->current_position.bytes - self->current_position.extent.column,
|
||||
.extent = {
|
||||
.row = self->current_position.extent.row,
|
||||
.column = 0,
|
||||
}
|
||||
});
|
||||
if (!self->chunk_size) ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
|
||||
|
||||
uint32_t result = 0;
|
||||
while (self->current_position.bytes < goal_byte) {
|
||||
ts_lexer__advance(&self->data, false);
|
||||
result++;
|
||||
}
|
||||
|
||||
return result;
|
||||
self->did_get_column = true;
|
||||
return self->current_position.extent.column;
|
||||
}
|
||||
|
||||
// Is the lexer at a boundary between two disjoint included ranges of
|
||||
|
|
@ -318,6 +301,7 @@ void ts_lexer_start(Lexer *self) {
|
|||
self->token_start_position = self->current_position;
|
||||
self->token_end_position = LENGTH_UNDEFINED;
|
||||
self->data.result_symbol = 0;
|
||||
self->did_get_column = false;
|
||||
if (!ts_lexer__eof(&self->data)) {
|
||||
if (!self->chunk_size) ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
|
||||
|
|
|
|||
|
|
@ -17,16 +17,17 @@ typedef struct {
|
|||
Length token_end_position;
|
||||
|
||||
TSRange *included_ranges;
|
||||
size_t included_range_count;
|
||||
size_t current_included_range_index;
|
||||
|
||||
const char *chunk;
|
||||
TSInput input;
|
||||
TSLogger logger;
|
||||
|
||||
uint32_t included_range_count;
|
||||
uint32_t current_included_range_index;
|
||||
uint32_t chunk_start;
|
||||
uint32_t chunk_size;
|
||||
uint32_t lookahead_size;
|
||||
bool did_get_column;
|
||||
|
||||
TSInput input;
|
||||
TSLogger logger;
|
||||
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
|
||||
} Lexer;
|
||||
|
||||
|
|
|
|||
|
|
@ -403,6 +403,7 @@ static Subtree ts_parser__lex(
|
|||
bool found_external_token = false;
|
||||
bool error_mode = parse_state == ERROR_STATE;
|
||||
bool skipped_error = false;
|
||||
bool called_get_column = false;
|
||||
int32_t first_error_character = 0;
|
||||
Length error_start_position = length_zero();
|
||||
Length error_end_position = length_zero();
|
||||
|
|
@ -445,6 +446,7 @@ static Subtree ts_parser__lex(
|
|||
(!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
|
||||
)) {
|
||||
found_external_token = true;
|
||||
called_get_column = self->lexer.did_get_column;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -546,6 +548,7 @@ static Subtree ts_parser__lex(
|
|||
lookahead_bytes,
|
||||
parse_state,
|
||||
found_external_token,
|
||||
called_get_column,
|
||||
is_keyword,
|
||||
self->language
|
||||
);
|
||||
|
|
|
|||
|
|
@ -166,7 +166,8 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t l
|
|||
|
||||
Subtree ts_subtree_new_leaf(
|
||||
SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
|
||||
uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
|
||||
uint32_t lookahead_bytes, TSStateId parse_state,
|
||||
bool has_external_tokens, bool depends_on_column,
|
||||
bool is_keyword, const TSLanguage *language
|
||||
) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
||||
|
|
@ -213,6 +214,7 @@ Subtree ts_subtree_new_leaf(
|
|||
.fragile_right = false,
|
||||
.has_changes = false,
|
||||
.has_external_tokens = has_external_tokens,
|
||||
.depends_on_column = depends_on_column,
|
||||
.is_missing = false,
|
||||
.is_keyword = is_keyword,
|
||||
{{.first_leaf = {.symbol = 0, .parse_state = 0}}}
|
||||
|
|
@ -245,7 +247,7 @@ Subtree ts_subtree_new_error(
|
|||
) {
|
||||
Subtree result = ts_subtree_new_leaf(
|
||||
pool, ts_builtin_sym_error, padding, size, bytes_scanned,
|
||||
parse_state, false, false, language
|
||||
parse_state, false, false, false, language
|
||||
);
|
||||
SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
|
||||
data->fragile_left = true;
|
||||
|
|
@ -378,6 +380,7 @@ void ts_subtree_summarize_children(
|
|||
self.ptr->repeat_depth = 0;
|
||||
self.ptr->node_count = 1;
|
||||
self.ptr->has_external_tokens = false;
|
||||
self.ptr->depends_on_column = false;
|
||||
self.ptr->dynamic_precedence = 0;
|
||||
|
||||
uint32_t structural_index = 0;
|
||||
|
|
@ -388,6 +391,13 @@ void ts_subtree_summarize_children(
|
|||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
Subtree child = children[i];
|
||||
|
||||
if (
|
||||
self.ptr->size.extent.row == 0 &&
|
||||
ts_subtree_depends_on_column(child)
|
||||
) {
|
||||
self.ptr->depends_on_column = true;
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
self.ptr->padding = ts_subtree_padding(child);
|
||||
self.ptr->size = ts_subtree_size(child);
|
||||
|
|
@ -545,7 +555,7 @@ Subtree ts_subtree_new_missing_leaf(
|
|||
) {
|
||||
Subtree result = ts_subtree_new_leaf(
|
||||
pool, symbol, padding, length_zero(), 0,
|
||||
0, false, false, language
|
||||
0, false, false, false, language
|
||||
);
|
||||
if (result.data.is_inline) {
|
||||
result.data.is_missing = true;
|
||||
|
|
@ -670,6 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
Edit edit = entry.edit;
|
||||
bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
|
||||
bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
|
||||
bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree);
|
||||
|
||||
Length size = ts_subtree_size(*entry.tree);
|
||||
Length padding = ts_subtree_padding(*entry.tree);
|
||||
|
|
@ -733,6 +744,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
data->fragile_right = false;
|
||||
data->has_changes = false;
|
||||
data->has_external_tokens = false;
|
||||
data->depends_on_column = false;
|
||||
data->is_missing = result.data.is_missing;
|
||||
data->is_keyword = result.data.is_keyword;
|
||||
result.ptr = data;
|
||||
|
|
@ -755,9 +767,18 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
// If this child ends before the edit, it is not affected.
|
||||
if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
|
||||
|
||||
// If this child starts after the edit, then we're done processing children.
|
||||
if (child_left.bytes > edit.old_end.bytes ||
|
||||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
|
||||
// Keep editing child nodes until a node is reached that starts after the edit.
|
||||
// Also, if this node's validity depends on its column position, then continue
|
||||
// invaliditing child nodes until reaching a line break.
|
||||
if ((
|
||||
(child_left.bytes > edit.old_end.bytes) ||
|
||||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
|
||||
) && (
|
||||
!invalidate_first_row ||
|
||||
child_left.extent.row > entry.tree->ptr->padding.extent.row
|
||||
)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Transform edit into the child's coordinate space.
|
||||
Edit child_edit = {
|
||||
|
|
@ -775,8 +796,10 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
// Interpret all inserted text as applying to the *first* child that touches the edit.
|
||||
// Subsequent children are only never have any text inserted into them; they are only
|
||||
// shrunk to compensate for the edit.
|
||||
if (child_right.bytes > edit.start.bytes ||
|
||||
(child_right.bytes == edit.start.bytes && is_pure_insertion)) {
|
||||
if (
|
||||
child_right.bytes > edit.start.bytes ||
|
||||
(child_right.bytes == edit.start.bytes && is_pure_insertion)
|
||||
) {
|
||||
edit.new_end = edit.start;
|
||||
}
|
||||
|
||||
|
|
@ -981,12 +1004,14 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
|
|||
"state: %d\n"
|
||||
"error-cost: %u\n"
|
||||
"has-changes: %u\n"
|
||||
"depends-on-column: %u\n"
|
||||
"repeat-depth: %u\n"
|
||||
"lookahead-bytes: %u",
|
||||
start_offset, end_offset,
|
||||
ts_subtree_parse_state(*self),
|
||||
ts_subtree_error_cost(*self),
|
||||
ts_subtree_has_changes(*self),
|
||||
ts_subtree_depends_on_column(*self),
|
||||
ts_subtree_repeat_depth(*self),
|
||||
ts_subtree_lookahead_bytes(*self)
|
||||
);
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ typedef struct {
|
|||
bool fragile_right : 1;
|
||||
bool has_changes : 1;
|
||||
bool has_external_tokens : 1;
|
||||
bool depends_on_column: 1;
|
||||
bool is_missing : 1;
|
||||
bool is_keyword : 1;
|
||||
|
||||
|
|
@ -138,7 +139,7 @@ void ts_subtree_pool_delete(SubtreePool *);
|
|||
|
||||
Subtree ts_subtree_new_leaf(
|
||||
SubtreePool *, TSSymbol, Length, Length, uint32_t,
|
||||
TSStateId, bool, bool, const TSLanguage *
|
||||
TSStateId, bool, bool, bool, const TSLanguage *
|
||||
);
|
||||
Subtree ts_subtree_new_error(
|
||||
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
|
||||
|
|
@ -284,6 +285,10 @@ static inline bool ts_subtree_has_external_tokens(Subtree self) {
|
|||
return self.data.is_inline ? false : self.ptr->has_external_tokens;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_depends_on_column(Subtree self) {
|
||||
return self.data.is_inline ? false : self.ptr->depends_on_column;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_is_fragile(Subtree self) {
|
||||
return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ bool tree_sitter_uses_current_column_external_scanner_scan(
|
|||
// If at the end of a statement, then get the current indent
|
||||
// level and pop some number of entries off of the indent stack.
|
||||
if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) {
|
||||
while (lexer->lookahead == ' ') {
|
||||
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue