When editing, properly invalidate trees that depend on get_column

This commit is contained in:
Max Brunsfeld 2021-03-11 14:46:13 -08:00
parent e29d3714f7
commit a40045a419
8 changed files with 136 additions and 35 deletions

View file

@ -74,3 +74,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
.unwrap()
}
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
let dir = fixtures_dir().join("test_grammars").join(name);
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
(grammar, Some(dir))
}

View file

@ -1,5 +1,5 @@
use super::helpers::edits::ReadRecorder;
use super::helpers::fixtures::{get_language, get_test_language};
use super::helpers::fixtures::{get_language, get_test_grammar, get_test_language};
use crate::generate::generate_parser_for_grammar;
use crate::parse::{perform_edit, Edit};
use std::sync::atomic::{AtomicUsize, Ordering};
@ -406,6 +406,83 @@ fn test_parsing_empty_file_with_reused_tree() {
parser.parse("\n ", tree.as_ref());
}
#[test]
fn test_parsing_after_editing_tree_that_depends_on_column_values() {
let (grammar, path) = get_test_grammar("uses_current_column");
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap();
let mut parser = Parser::new();
parser
.set_language(get_test_language(
&grammar_name,
&parser_code,
path.as_ref().map(AsRef::as_ref),
))
.unwrap();
let mut code = b"
a = b
c = do d
e + f
g
h + i
"
.to_vec();
let mut tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
"(block ",
"(binary_expression (identifier) (identifier)) ",
"(binary_expression (identifier) (do_expression (block (identifier) (binary_expression (identifier) (identifier)) (identifier)))) ",
"(binary_expression (identifier) (identifier)))",
)
);
perform_edit(
&mut tree,
&mut code,
&Edit {
position: 8,
deleted_length: 0,
inserted_text: b"1234".to_vec(),
},
);
assert_eq!(
code,
b"
a = b
c1234 = do d
e + f
g
h + i
"
);
let mut recorder = ReadRecorder::new(&code);
let tree = parser
.parse_with(&mut |i, _| recorder.read(i), Some(&tree))
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
"(block ",
"(binary_expression (identifier) (identifier)) ",
"(binary_expression (identifier) (do_expression (block (identifier)))) ",
"(binary_expression (identifier) (identifier)) ",
"(identifier) ",
"(binary_expression (identifier) (identifier)))",
)
);
assert_eq!(
recorder.strings_read(),
vec!["\nc1234 = do d\n e + f\n g\n"]
);
}
// Thread safety
#[test]

View file

@ -233,25 +233,8 @@ static void ts_lexer__mark_end(TSLexer *_self) {
static uint32_t ts_lexer__get_column(TSLexer *_self) {
Lexer *self = (Lexer *)_self;
uint32_t goal_byte = self->current_position.bytes;
ts_lexer_goto(self, (Length) {
.bytes = self->current_position.bytes - self->current_position.extent.column,
.extent = {
.row = self->current_position.extent.row,
.column = 0,
}
});
if (!self->chunk_size) ts_lexer__get_chunk(self);
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
uint32_t result = 0;
while (self->current_position.bytes < goal_byte) {
ts_lexer__advance(&self->data, false);
result++;
}
return result;
self->did_get_column = true;
return self->current_position.extent.column;
}
// Is the lexer at a boundary between two disjoint included ranges of
@ -318,6 +301,7 @@ void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->token_end_position = LENGTH_UNDEFINED;
self->data.result_symbol = 0;
self->did_get_column = false;
if (!ts_lexer__eof(&self->data)) {
if (!self->chunk_size) ts_lexer__get_chunk(self);
if (!self->lookahead_size) ts_lexer__get_lookahead(self);

View file

@ -17,16 +17,17 @@ typedef struct {
Length token_end_position;
TSRange *included_ranges;
size_t included_range_count;
size_t current_included_range_index;
const char *chunk;
TSInput input;
TSLogger logger;
uint32_t included_range_count;
uint32_t current_included_range_index;
uint32_t chunk_start;
uint32_t chunk_size;
uint32_t lookahead_size;
bool did_get_column;
TSInput input;
TSLogger logger;
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
} Lexer;

View file

@ -403,6 +403,7 @@ static Subtree ts_parser__lex(
bool found_external_token = false;
bool error_mode = parse_state == ERROR_STATE;
bool skipped_error = false;
bool called_get_column = false;
int32_t first_error_character = 0;
Length error_start_position = length_zero();
Length error_end_position = length_zero();
@ -445,6 +446,7 @@ static Subtree ts_parser__lex(
(!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
)) {
found_external_token = true;
called_get_column = self->lexer.did_get_column;
break;
}
@ -546,6 +548,7 @@ static Subtree ts_parser__lex(
lookahead_bytes,
parse_state,
found_external_token,
called_get_column,
is_keyword,
self->language
);

View file

@ -166,7 +166,8 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t l
Subtree ts_subtree_new_leaf(
SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
uint32_t lookahead_bytes, TSStateId parse_state,
bool has_external_tokens, bool depends_on_column,
bool is_keyword, const TSLanguage *language
) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
@ -213,6 +214,7 @@ Subtree ts_subtree_new_leaf(
.fragile_right = false,
.has_changes = false,
.has_external_tokens = has_external_tokens,
.depends_on_column = depends_on_column,
.is_missing = false,
.is_keyword = is_keyword,
{{.first_leaf = {.symbol = 0, .parse_state = 0}}}
@ -245,7 +247,7 @@ Subtree ts_subtree_new_error(
) {
Subtree result = ts_subtree_new_leaf(
pool, ts_builtin_sym_error, padding, size, bytes_scanned,
parse_state, false, false, language
parse_state, false, false, false, language
);
SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
data->fragile_left = true;
@ -378,6 +380,7 @@ void ts_subtree_summarize_children(
self.ptr->repeat_depth = 0;
self.ptr->node_count = 1;
self.ptr->has_external_tokens = false;
self.ptr->depends_on_column = false;
self.ptr->dynamic_precedence = 0;
uint32_t structural_index = 0;
@ -388,6 +391,13 @@ void ts_subtree_summarize_children(
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = children[i];
if (
self.ptr->size.extent.row == 0 &&
ts_subtree_depends_on_column(child)
) {
self.ptr->depends_on_column = true;
}
if (i == 0) {
self.ptr->padding = ts_subtree_padding(child);
self.ptr->size = ts_subtree_size(child);
@ -545,7 +555,7 @@ Subtree ts_subtree_new_missing_leaf(
) {
Subtree result = ts_subtree_new_leaf(
pool, symbol, padding, length_zero(), 0,
0, false, false, language
0, false, false, false, language
);
if (result.data.is_inline) {
result.data.is_missing = true;
@ -670,6 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
Edit edit = entry.edit;
bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree);
Length size = ts_subtree_size(*entry.tree);
Length padding = ts_subtree_padding(*entry.tree);
@ -733,6 +744,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
data->fragile_right = false;
data->has_changes = false;
data->has_external_tokens = false;
data->depends_on_column = false;
data->is_missing = result.data.is_missing;
data->is_keyword = result.data.is_keyword;
result.ptr = data;
@ -755,9 +767,18 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
// If this child ends before the edit, it is not affected.
if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
// If this child starts after the edit, then we're done processing children.
if (child_left.bytes > edit.old_end.bytes ||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
// Keep editing child nodes until a node is reached that starts after the edit.
// Also, if this node's validity depends on its column position, then continue
// invaliditing child nodes until reaching a line break.
if ((
(child_left.bytes > edit.old_end.bytes) ||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
) && (
!invalidate_first_row ||
child_left.extent.row > entry.tree->ptr->padding.extent.row
)) {
break;
}
// Transform edit into the child's coordinate space.
Edit child_edit = {
@ -775,8 +796,10 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
// Interpret all inserted text as applying to the *first* child that touches the edit.
// Subsequent children are only never have any text inserted into them; they are only
// shrunk to compensate for the edit.
if (child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && is_pure_insertion)) {
if (
child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && is_pure_insertion)
) {
edit.new_end = edit.start;
}
@ -981,12 +1004,14 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
"state: %d\n"
"error-cost: %u\n"
"has-changes: %u\n"
"depends-on-column: %u\n"
"repeat-depth: %u\n"
"lookahead-bytes: %u",
start_offset, end_offset,
ts_subtree_parse_state(*self),
ts_subtree_error_cost(*self),
ts_subtree_has_changes(*self),
ts_subtree_depends_on_column(*self),
ts_subtree_repeat_depth(*self),
ts_subtree_lookahead_bytes(*self)
);

View file

@ -78,6 +78,7 @@ typedef struct {
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool depends_on_column: 1;
bool is_missing : 1;
bool is_keyword : 1;
@ -138,7 +139,7 @@ void ts_subtree_pool_delete(SubtreePool *);
Subtree ts_subtree_new_leaf(
SubtreePool *, TSSymbol, Length, Length, uint32_t,
TSStateId, bool, bool, const TSLanguage *
TSStateId, bool, bool, bool, const TSLanguage *
);
Subtree ts_subtree_new_error(
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
@ -284,6 +285,10 @@ static inline bool ts_subtree_has_external_tokens(Subtree self) {
return self.data.is_inline ? false : self.ptr->has_external_tokens;
}
static inline bool ts_subtree_depends_on_column(Subtree self) {
return self.data.is_inline ? false : self.ptr->depends_on_column;
}
static inline bool ts_subtree_is_fragile(Subtree self) {
return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
}

View file

@ -92,7 +92,7 @@ bool tree_sitter_uses_current_column_external_scanner_scan(
// If at the end of a statement, then get the current indent
// level and pop some number of entries off of the indent stack.
if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) {
while (lexer->lookahead == ' ') {
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
lexer->advance(lexer, false);
}