From e29d3714f7ee821bb717ad4222bf5280ec7a67a9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 11 Mar 2021 11:25:10 -0800
Subject: [PATCH 1/2] Fix behavior of Lexer.get_column when at EOF

---
 lib/src/lexer.c                               | 115 +++++++--------
 .../uses_current_column/corpus.txt            |  76 ++++++++++
 .../uses_current_column/grammar.json          |  69 +++++++++
 .../uses_current_column/scanner.c             | 133 ++++++++++++++++++
 4 files changed, 337 insertions(+), 56 deletions(-)
 create mode 100644 test/fixtures/test_grammars/uses_current_column/corpus.txt
 create mode 100644 test/fixtures/test_grammars/uses_current_column/grammar.json
 create mode 100644 test/fixtures/test_grammars/uses_current_column/scanner.c

diff --git a/lib/src/lexer.c b/lib/src/lexer.c
index 08e90a8c..f349d76f 100644
--- a/lib/src/lexer.c
+++ b/lib/src/lexer.c
@@ -102,6 +102,56 @@ static void ts_lexer__get_lookahead(Lexer *self) {
   }
 }
 
+static void ts_lexer_goto(Lexer *self, Length position) {
+  self->current_position = position;
+  bool found_included_range = false;
+
+  // Move to the first valid position at or after the given position.
+  for (unsigned i = 0; i < self->included_range_count; i++) {
+    TSRange *included_range = &self->included_ranges[i];
+    if (included_range->end_byte > position.bytes) {
+      if (included_range->start_byte > position.bytes) {
+        self->current_position = (Length) {
+          .bytes = included_range->start_byte,
+          .extent = included_range->start_point,
+        };
+      }
+
+      self->current_included_range_index = i;
+      found_included_range = true;
+      break;
+    }
+  }
+
+  if (found_included_range) {
+    // If the current position is outside of the current chunk of text,
+    // then clear out the current chunk of text.
+    if (self->chunk && (
+      position.bytes < self->chunk_start ||
+      position.bytes >= self->chunk_start + self->chunk_size
+    )) {
+      ts_lexer__clear_chunk(self);
+    }
+
+    self->lookahead_size = 0;
+    self->data.lookahead = '\0';
+  }
+
+  // If the given position is beyond any of included ranges, move to the EOF
+  // state - past the end of the included ranges.
+  else {
+    self->current_included_range_index = self->included_range_count;
+    TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
+    self->current_position = (Length) {
+      .bytes = last_included_range->end_byte,
+      .extent = last_included_range->end_point,
+    };
+    ts_lexer__clear_chunk(self);
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
+  }
+}
+
 // Advance to the next character in the source code, retrieving a new
 // chunk of source code if needed.
 static void ts_lexer__advance(TSLexer *_self, bool skip) {
@@ -185,12 +235,15 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) {
   Lexer *self = (Lexer *)_self;
   uint32_t goal_byte = self->current_position.bytes;
 
-  self->current_position.bytes -= self->current_position.extent.column;
-  self->current_position.extent.column = 0;
-
-  if (self->current_position.bytes < self->chunk_start) {
-    ts_lexer__get_chunk(self);
-  }
+  ts_lexer_goto(self, (Length) {
+    .bytes = self->current_position.bytes - self->current_position.extent.column,
+    .extent = {
+      .row = self->current_position.extent.row,
+      .column = 0,
+    }
+  });
+  if (!self->chunk_size) ts_lexer__get_chunk(self);
+  if (!self->lookahead_size) ts_lexer__get_lookahead(self);
 
   uint32_t result = 0;
   while (self->current_position.bytes < goal_byte) {
@@ -247,56 +300,6 @@ void ts_lexer_delete(Lexer *self) {
   ts_free(self->included_ranges);
 }
 
-static void ts_lexer_goto(Lexer *self, Length position) {
-  self->current_position = position;
-  bool found_included_range = false;
-
-  // Move to the first valid position at or after the given position.
-  for (unsigned i = 0; i < self->included_range_count; i++) {
-    TSRange *included_range = &self->included_ranges[i];
-    if (included_range->end_byte > position.bytes) {
-      if (included_range->start_byte > position.bytes) {
-        self->current_position = (Length) {
-          .bytes = included_range->start_byte,
-          .extent = included_range->start_point,
-        };
-      }
-
-      self->current_included_range_index = i;
-      found_included_range = true;
-      break;
-    }
-  }
-
-  if (found_included_range) {
-    // If the current position is outside of the current chunk of text,
-    // then clear out the current chunk of text.
-    if (self->chunk && (
-      position.bytes < self->chunk_start ||
-      position.bytes >= self->chunk_start + self->chunk_size
-    )) {
-      ts_lexer__clear_chunk(self);
-    }
-
-    self->lookahead_size = 0;
-    self->data.lookahead = '\0';
-  }
-
-  // If the given position is beyond any of included ranges, move to the EOF
-  // state - past the end of the included ranges.
-  else {
-    self->current_included_range_index = self->included_range_count;
-    TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
-    self->current_position = (Length) {
-      .bytes = last_included_range->end_byte,
-      .extent = last_included_range->end_point,
-    };
-    ts_lexer__clear_chunk(self);
-    self->lookahead_size = 1;
-    self->data.lookahead = '\0';
-  }
-}
-
 void ts_lexer_set_input(Lexer *self, TSInput input) {
   self->input = input;
   ts_lexer__clear_chunk(self);
diff --git a/test/fixtures/test_grammars/uses_current_column/corpus.txt b/test/fixtures/test_grammars/uses_current_column/corpus.txt
new file mode 100644
index 00000000..9638e25e
--- /dev/null
+++ b/test/fixtures/test_grammars/uses_current_column/corpus.txt
@@ -0,0 +1,76 @@
+===============
+Simple blocks
+===============
+
+do a
+   e
+f
+
+---
+
+(block
+  (do_expression (block
+    (identifier)
+    (identifier)))
+  (identifier))
+
+=====================
+Nested blocks
+=====================
+
+a = do b
+       c + do e
+              f
+              g
+       h
+i
+
+---
+
+(block
+  (binary_expression
+    (identifier)
+    (do_expression (block
+      (identifier)
+      (binary_expression
+        (identifier)
+        (do_expression (block
+          (identifier)
+          (identifier)
+          (identifier))))
+      (identifier))))
+  (identifier))
+
+===============================
+Blocks with leading newlines
+===============================
+
+do
+
+
+   a = b
+   do
+      c
+      d
+   e
+ f
+
+---
+
+(block
+  (do_expression (block
+    (binary_expression (identifier) (identifier))
+    (do_expression (block
+      (identifier)
+      (identifier)))
+    (identifier)
+    (identifier))))
+
+=====================
+Unterminated blocks
+=====================
+
+do
+---
+
+(ERROR)
diff --git a/test/fixtures/test_grammars/uses_current_column/grammar.json b/test/fixtures/test_grammars/uses_current_column/grammar.json
new file mode 100644
index 00000000..90c740b6
--- /dev/null
+++ b/test/fixtures/test_grammars/uses_current_column/grammar.json
@@ -0,0 +1,69 @@
+{
+  "name": "uses_current_column",
+
+  "externals": [
+    {"type": "SYMBOL", "name": "_indent"},
+    {"type": "SYMBOL", "name": "_dedent"},
+    {"type": "SYMBOL", "name": "_newline"}
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "block": {
+      "type": "REPEAT1",
+      "content": {"type": "SYMBOL", "name": "_statement"}
+    },
+
+    "_statement": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "_expression"},
+        {"type": "SYMBOL", "name": "_newline"}
+      ]
+    },
+
+    "_expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "do_expression"},
+        {"type": "SYMBOL", "name": "binary_expression"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "do_expression": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "do"},
+        {"type": "SYMBOL", "name": "_indent"},
+        {"type": "SYMBOL", "name": "block"},
+        {"type": "SYMBOL", "name": "_dedent"}
+      ]
+    },
+
+    "binary_expression": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "_expression"},
+          {
+            "type": "CHOICE",
+            "members": [
+              {"type": "STRING", "value": "="},
+              {"type": "STRING", "value": "+"},
+              {"type": "STRING", "value": "-"}
+            ]
+          },
+          {"type": "SYMBOL", "name": "_expression"}
+        ]
+      }
+    },
+
+    "identifier": {"type": "PATTERN", "value": "\\w+"}
+  }
+}
diff --git a/test/fixtures/test_grammars/uses_current_column/scanner.c b/test/fixtures/test_grammars/uses_current_column/scanner.c
new file mode 100644
index 00000000..efd27f9f
--- /dev/null
+++ b/test/fixtures/test_grammars/uses_current_column/scanner.c
@@ -0,0 +1,133 @@
+#include <stdlib.h>
+#include <wctype.h>
+#include <tree_sitter/parser.h>
+
+enum TokenType {
+  INDENT,
+  DEDENT,
+  NEWLINE,
+};
+
+typedef struct {
+  uint8_t queued_dedent_count;
+  uint8_t indent_count;
+  int8_t indents[32];
+} Scanner;
+
+void *tree_sitter_uses_current_column_external_scanner_create() {
+  Scanner *self = malloc(sizeof(Scanner));
+  self->queued_dedent_count = 0;
+  self->indent_count = 1;
+  self->indents[0] = 0;
+  return (void *)self;
+}
+
+void tree_sitter_uses_current_column_external_scanner_destroy(void *payload) {
+  free(payload);
+}
+
+unsigned tree_sitter_uses_current_column_external_scanner_serialize(
+  void *payload,
+  char *buffer
+) {
+  Scanner *self = (Scanner *)payload;
+  buffer[0] = self->queued_dedent_count;
+  for (unsigned i = 0; i < self->indent_count; i++) {
+    buffer[i + 1] = self->indents[i];
+  }
+  return self->indent_count + 1;
+}
+
+void tree_sitter_uses_current_column_external_scanner_deserialize(
+  void *payload,
+  const char *buffer,
+  unsigned length
+) {
+  Scanner *self = (Scanner *)payload;
+  if (length > 0) {
+    self->queued_dedent_count = buffer[0];
+    self->indent_count = length - 1;
+    for (unsigned i = 0; i < self->indent_count; i++) {
+      self->indents[i] = buffer[i + 1];
+    }
+  } else {
+    self->queued_dedent_count = 0;
+    self->indent_count = 1;
+    self->indents[0] = 0;
+  }
+}
+
+bool tree_sitter_uses_current_column_external_scanner_scan(
+  void *payload,
+  TSLexer *lexer,
+  const bool *valid_symbols
+) {
+  Scanner *self = (Scanner *)payload;
+  lexer->mark_end(lexer);
+
+  // If dedents were found in a previous run, and are valid now,
+  // then return a dedent.
+  if (self->queued_dedent_count > 0 && valid_symbols[DEDENT]) {
+    lexer->result_symbol = DEDENT;
+    self->queued_dedent_count--;
+    return true;
+  }
+
+  // If an indent is valid, then add an entry to the indent stack
+  // for the current column, and return an indent.
+  if (valid_symbols[INDENT]) {
+    while (iswspace(lexer->lookahead)) {
+      lexer->advance(lexer, false);
+    }
+    uint32_t column = lexer->get_column(lexer);
+    if (column > self->indents[self->indent_count - 1]) {
+      self->indents[self->indent_count++] = column - 2;
+      lexer->result_symbol = INDENT;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // If at the end of a statement, then get the current indent
+  // level and pop some number of entries off of the indent stack.
+  if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) {
+    while (lexer->lookahead == ' ') {
+      lexer->advance(lexer, false);
+    }
+
+    if (lexer->lookahead == '\n') {
+      lexer->advance(lexer, false);
+
+      uint32_t next_column = 0;
+      for (;;) {
+        if (lexer->lookahead == ' ') {
+          next_column++;
+          lexer->advance(lexer, false);
+        } else if (lexer->lookahead == '\n') {
+          next_column = 0;
+          lexer->advance(lexer, false);
+        } else {
+          break;
+        }
+      }
+
+      unsigned dedent_count = 0;
+      while (next_column < self->indents[self->indent_count - 1]) {
+        dedent_count++;
+        self->indent_count--;
+      }
+
+      if (dedent_count > 0 && valid_symbols[DEDENT]) {
+        lexer->result_symbol = DEDENT;
+        return true;
+      } else if (valid_symbols[NEWLINE]) {
+        self->queued_dedent_count += dedent_count;
+        lexer->result_symbol = NEWLINE;
+        return true;
+      }
+    }
+  }
+
+  return false;
+}

From a40045a419e5b0a7818c4dbc0a2ff49c8dbca822 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 11 Mar 2021 14:46:13 -0800
Subject: [PATCH 2/2] When editing, properly invalidate trees that depend on
 get_column

---
 cli/src/tests/helpers/fixtures.rs             |  6 ++
 cli/src/tests/parser_test.rs                  | 79 ++++++++++++++++++-
 lib/src/lexer.c                               | 22 +-----
 lib/src/lexer.h                               | 11 +--
 lib/src/parser.c                              |  3 +
 lib/src/subtree.c                             | 41 ++++++++--
 lib/src/subtree.h                             |  7 +-
 .../uses_current_column/scanner.c             |  2 +-
 8 files changed, 136 insertions(+), 35 deletions(-)

diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs
index fc459777..d098bd28 100644
--- a/cli/src/tests/helpers/fixtures.rs
+++ b/cli/src/tests/helpers/fixtures.rs
@@ -74,3 +74,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
         .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
         .unwrap()
 }
+
+pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
+    let dir = fixtures_dir().join("test_grammars").join(name);
+    let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
+    (grammar, Some(dir))
+}
diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index b02f04b2..d623126f 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -1,5 +1,5 @@
 use super::helpers::edits::ReadRecorder;
-use super::helpers::fixtures::{get_language, get_test_language};
+use super::helpers::fixtures::{get_language, get_test_grammar, get_test_language};
 use crate::generate::generate_parser_for_grammar;
 use crate::parse::{perform_edit, Edit};
 use std::sync::atomic::{AtomicUsize, Ordering};
@@ -406,6 +406,83 @@ fn test_parsing_empty_file_with_reused_tree() {
     parser.parse("\n  ", tree.as_ref());
 }
 
+#[test]
+fn test_parsing_after_editing_tree_that_depends_on_column_values() {
+    let (grammar, path) = get_test_grammar("uses_current_column");
+    let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap();
+
+    let mut parser = Parser::new();
+    parser
+        .set_language(get_test_language(
+            &grammar_name,
+            &parser_code,
+            path.as_ref().map(AsRef::as_ref),
+        ))
+        .unwrap();
+
+    let mut code = b"
+a = b
+c = do d
+       e + f
+       g
+h + i
+    "
+    .to_vec();
+    let mut tree = parser.parse(&code, None).unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        concat!(
+            "(block ",
+            "(binary_expression (identifier) (identifier)) ",
+            "(binary_expression (identifier) (do_expression (block (identifier) (binary_expression (identifier) (identifier)) (identifier)))) ",
+            "(binary_expression (identifier) (identifier)))",
+        )
+    );
+
+    perform_edit(
+        &mut tree,
+        &mut code,
+        &Edit {
+            position: 8,
+            deleted_length: 0,
+            inserted_text: b"1234".to_vec(),
+        },
+    );
+
+    assert_eq!(
+        code,
+        b"
+a = b
+c1234 = do d
+       e + f
+       g
+h + i
+    "
+    );
+
+    let mut recorder = ReadRecorder::new(&code);
+    let tree = parser
+        .parse_with(&mut |i, _| recorder.read(i), Some(&tree))
+        .unwrap();
+
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        concat!(
+            "(block ",
+            "(binary_expression (identifier) (identifier)) ",
+            "(binary_expression (identifier) (do_expression (block (identifier)))) ",
+            "(binary_expression (identifier) (identifier)) ",
+            "(identifier) ",
+            "(binary_expression (identifier) (identifier)))",
+        )
+    );
+
+    assert_eq!(
+        recorder.strings_read(),
+        vec!["\nc1234 = do d\n       e + f\n       g\n"]
+    );
+}
+
 // Thread safety
 
 #[test]
diff --git a/lib/src/lexer.c b/lib/src/lexer.c
index f349d76f..5d1965ad 100644
--- a/lib/src/lexer.c
+++ b/lib/src/lexer.c
@@ -233,25 +233,8 @@ static void ts_lexer__mark_end(TSLexer *_self) {
 
 static uint32_t ts_lexer__get_column(TSLexer *_self) {
   Lexer *self = (Lexer *)_self;
-  uint32_t goal_byte = self->current_position.bytes;
-
-  ts_lexer_goto(self, (Length) {
-    .bytes = self->current_position.bytes - self->current_position.extent.column,
-    .extent = {
-      .row = self->current_position.extent.row,
-      .column = 0,
-    }
-  });
-  if (!self->chunk_size) ts_lexer__get_chunk(self);
-  if (!self->lookahead_size) ts_lexer__get_lookahead(self);
-
-  uint32_t result = 0;
-  while (self->current_position.bytes < goal_byte) {
-    ts_lexer__advance(&self->data, false);
-    result++;
-  }
-
-  return result;
+  self->did_get_column = true;
+  return self->current_position.extent.column;
 }
 
 // Is the lexer at a boundary between two disjoint included ranges of
@@ -318,6 +301,7 @@ void ts_lexer_start(Lexer *self) {
   self->token_start_position = self->current_position;
   self->token_end_position = LENGTH_UNDEFINED;
   self->data.result_symbol = 0;
+  self->did_get_column = false;
   if (!ts_lexer__eof(&self->data)) {
     if (!self->chunk_size) ts_lexer__get_chunk(self);
     if (!self->lookahead_size) ts_lexer__get_lookahead(self);
diff --git a/lib/src/lexer.h b/lib/src/lexer.h
index 5e392945..c1a5bfdb 100644
--- a/lib/src/lexer.h
+++ b/lib/src/lexer.h
@@ -17,16 +17,17 @@ typedef struct {
   Length token_end_position;
 
   TSRange *included_ranges;
-  size_t included_range_count;
-  size_t current_included_range_index;
-
   const char *chunk;
+  TSInput input;
+  TSLogger logger;
+
+  uint32_t included_range_count;
+  uint32_t current_included_range_index;
   uint32_t chunk_start;
   uint32_t chunk_size;
   uint32_t lookahead_size;
+  bool did_get_column;
 
-  TSInput input;
-  TSLogger logger;
   char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
 } Lexer;
 
diff --git a/lib/src/parser.c b/lib/src/parser.c
index 35069f63..0f0b4ac4 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -403,6 +403,7 @@ static Subtree ts_parser__lex(
   bool found_external_token = false;
   bool error_mode = parse_state == ERROR_STATE;
   bool skipped_error = false;
+  bool called_get_column = false;
   int32_t first_error_character = 0;
   Length error_start_position = length_zero();
   Length error_end_position = length_zero();
@@ -445,6 +446,7 @@ static Subtree ts_parser__lex(
         (!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
       )) {
         found_external_token = true;
+        called_get_column = self->lexer.did_get_column;
         break;
       }
 
@@ -546,6 +548,7 @@ static Subtree ts_parser__lex(
       lookahead_bytes,
       parse_state,
       found_external_token,
+      called_get_column,
       is_keyword,
       self->language
     );
diff --git a/lib/src/subtree.c b/lib/src/subtree.c
index e90dc9d7..e5f253ea 100644
--- a/lib/src/subtree.c
+++ b/lib/src/subtree.c
@@ -166,7 +166,8 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t l
 
 Subtree ts_subtree_new_leaf(
   SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
-  uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
+  uint32_t lookahead_bytes, TSStateId parse_state,
+  bool has_external_tokens, bool depends_on_column,
   bool is_keyword, const TSLanguage *language
 ) {
   TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
@@ -213,6 +214,7 @@ Subtree ts_subtree_new_leaf(
       .fragile_right = false,
       .has_changes = false,
       .has_external_tokens = has_external_tokens,
+      .depends_on_column = depends_on_column,
       .is_missing = false,
       .is_keyword = is_keyword,
       {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
@@ -245,7 +247,7 @@ Subtree ts_subtree_new_error(
 ) {
   Subtree result = ts_subtree_new_leaf(
     pool, ts_builtin_sym_error, padding, size, bytes_scanned,
-    parse_state, false, false, language
+    parse_state, false, false, false, language
   );
   SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
   data->fragile_left = true;
@@ -378,6 +380,7 @@ void ts_subtree_summarize_children(
   self.ptr->repeat_depth = 0;
   self.ptr->node_count = 1;
   self.ptr->has_external_tokens = false;
+  self.ptr->depends_on_column = false;
   self.ptr->dynamic_precedence = 0;
 
   uint32_t structural_index = 0;
@@ -388,6 +391,13 @@ void ts_subtree_summarize_children(
   for (uint32_t i = 0; i < self.ptr->child_count; i++) {
     Subtree child = children[i];
 
+    if (
+      self.ptr->size.extent.row == 0 &&
+      ts_subtree_depends_on_column(child)
+    ) {
+      self.ptr->depends_on_column = true;
+    }
+
     if (i == 0) {
       self.ptr->padding = ts_subtree_padding(child);
       self.ptr->size = ts_subtree_size(child);
@@ -545,7 +555,7 @@ Subtree ts_subtree_new_missing_leaf(
 ) {
   Subtree result = ts_subtree_new_leaf(
     pool, symbol, padding, length_zero(), 0,
-    0, false, false, language
+    0, false, false, false, language
   );
   if (result.data.is_inline) {
     result.data.is_missing = true;
@@ -670,6 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
     Edit edit = entry.edit;
     bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
     bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
+    bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree);
 
     Length size = ts_subtree_size(*entry.tree);
     Length padding = ts_subtree_padding(*entry.tree);
@@ -733,6 +744,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
         data->fragile_right = false;
         data->has_changes = false;
         data->has_external_tokens = false;
+        data->depends_on_column = false;
         data->is_missing = result.data.is_missing;
         data->is_keyword = result.data.is_keyword;
         result.ptr = data;
@@ -755,9 +767,18 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
       // If this child ends before the edit, it is not affected.
       if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
 
-      // If this child starts after the edit, then we're done processing children.
-      if (child_left.bytes > edit.old_end.bytes ||
-          (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
+      // Keep editing child nodes until a node is reached that starts after the edit.
+      // Also, if this node's validity depends on its column position, then continue
+      // invaliditing child nodes until reaching a line break.
+      if ((
+        (child_left.bytes > edit.old_end.bytes) ||
+        (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
+      ) && (
+        !invalidate_first_row ||
+        child_left.extent.row > entry.tree->ptr->padding.extent.row
+      )) {
+        break;
+      }
 
       // Transform edit into the child's coordinate space.
       Edit child_edit = {
@@ -775,8 +796,10 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
       // Interpret all inserted text as applying to the *first* child that touches the edit.
       // Subsequent children are only never have any text inserted into them; they are only
       // shrunk to compensate for the edit.
-      if (child_right.bytes > edit.start.bytes ||
-          (child_right.bytes == edit.start.bytes && is_pure_insertion)) {
+      if (
+        child_right.bytes > edit.start.bytes ||
+        (child_right.bytes == edit.start.bytes && is_pure_insertion)
+      ) {
         edit.new_end = edit.start;
       }
 
@@ -981,12 +1004,14 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
     "state: %d\n"
     "error-cost: %u\n"
     "has-changes: %u\n"
+    "depends-on-column: %u\n"
     "repeat-depth: %u\n"
     "lookahead-bytes: %u",
     start_offset, end_offset,
     ts_subtree_parse_state(*self),
     ts_subtree_error_cost(*self),
     ts_subtree_has_changes(*self),
+    ts_subtree_depends_on_column(*self),
     ts_subtree_repeat_depth(*self),
     ts_subtree_lookahead_bytes(*self)
   );
diff --git a/lib/src/subtree.h b/lib/src/subtree.h
index b020deb6..d227db10 100644
--- a/lib/src/subtree.h
+++ b/lib/src/subtree.h
@@ -78,6 +78,7 @@ typedef struct {
   bool fragile_right : 1;
   bool has_changes : 1;
   bool has_external_tokens : 1;
+  bool depends_on_column: 1;
   bool is_missing : 1;
   bool is_keyword : 1;
 
@@ -138,7 +139,7 @@ void ts_subtree_pool_delete(SubtreePool *);
 
 Subtree ts_subtree_new_leaf(
   SubtreePool *, TSSymbol, Length, Length, uint32_t,
-  TSStateId, bool, bool, const TSLanguage *
+  TSStateId, bool, bool, bool, const TSLanguage *
 );
 Subtree ts_subtree_new_error(
   SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
@@ -284,6 +285,10 @@ static inline bool ts_subtree_has_external_tokens(Subtree self) {
   return self.data.is_inline ? false : self.ptr->has_external_tokens;
 }
 
+static inline bool ts_subtree_depends_on_column(Subtree self) {
+  return self.data.is_inline ? false : self.ptr->depends_on_column;
+}
+
 static inline bool ts_subtree_is_fragile(Subtree self) {
   return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
 }
diff --git a/test/fixtures/test_grammars/uses_current_column/scanner.c b/test/fixtures/test_grammars/uses_current_column/scanner.c
index efd27f9f..62b16392 100644
--- a/test/fixtures/test_grammars/uses_current_column/scanner.c
+++ b/test/fixtures/test_grammars/uses_current_column/scanner.c
@@ -92,7 +92,7 @@ bool tree_sitter_uses_current_column_external_scanner_scan(
   // If at the end of a statement, then get the current indent
   // level and pop some number of entries off of the indent stack.
   if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) {
-    while (lexer->lookahead == ' ') {
+    while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
       lexer->advance(lexer, false);
     }