Fix behavior of Lexer.get_column when at EOF

2021-03-11 11:25:10 -08:00 · 2021-03-11 11:25:10 -08:00 · e29d3714f7
commit e29d3714f7
parent 57036b4f8a
4 changed files with 337 additions and 56 deletions
--- a/lib/src/lexer.c
+++ b/lib/src/lexer.c
@ -102,6 +102,56 @@ static void ts_lexer__get_lookahead(Lexer *self) {
  }
 }

+static void ts_lexer_goto(Lexer *self, Length position) {
+  self->current_position = position;
+  bool found_included_range = false;
+
+  // Move to the first valid position at or after the given position.
+  for (unsigned i = 0; i < self->included_range_count; i++) {
+    TSRange *included_range = &self->included_ranges[i];
+    if (included_range->end_byte > position.bytes) {
+      if (included_range->start_byte > position.bytes) {
+        self->current_position = (Length) {
+          .bytes = included_range->start_byte,
+          .extent = included_range->start_point,
+        };
+      }
+
+      self->current_included_range_index = i;
+      found_included_range = true;
+      break;
+    }
+  }
+
+  if (found_included_range) {
+    // If the current position is outside of the current chunk of text,
+    // then clear out the current chunk of text.
+    if (self->chunk && (
+      position.bytes < self->chunk_start ||
+      position.bytes >= self->chunk_start + self->chunk_size
+    )) {
+      ts_lexer__clear_chunk(self);
+    }
+
+    self->lookahead_size = 0;
+    self->data.lookahead = '\0';
+  }
+
+  // If the given position is beyond any of included ranges, move to the EOF
+  // state - past the end of the included ranges.
+  else {
+    self->current_included_range_index = self->included_range_count;
+    TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
+    self->current_position = (Length) {
+      .bytes = last_included_range->end_byte,
+      .extent = last_included_range->end_point,
+    };
+    ts_lexer__clear_chunk(self);
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
+  }
+}
+
 // Advance to the next character in the source code, retrieving a new
 // chunk of source code if needed.
 static void ts_lexer__advance(TSLexer *_self, bool skip) {
@ -185,12 +235,15 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) {
  Lexer *self = (Lexer *)_self;
  uint32_t goal_byte = self->current_position.bytes;

-  self->current_position.bytes -= self->current_position.extent.column;
-  self->current_position.extent.column = 0;
-
-  if (self->current_position.bytes < self->chunk_start) {
-    ts_lexer__get_chunk(self);
-  }
+  ts_lexer_goto(self, (Length) {
+    .bytes = self->current_position.bytes - self->current_position.extent.column,
+    .extent = {
+      .row = self->current_position.extent.row,
+      .column = 0,
+    }
+  });
+  if (!self->chunk_size) ts_lexer__get_chunk(self);
+  if (!self->lookahead_size) ts_lexer__get_lookahead(self);

  uint32_t result = 0;
  while (self->current_position.bytes < goal_byte) {
@ -247,56 +300,6 @@ void ts_lexer_delete(Lexer *self) {
  ts_free(self->included_ranges);
 }

-static void ts_lexer_goto(Lexer *self, Length position) {
-  self->current_position = position;
-  bool found_included_range = false;
-
-  // Move to the first valid position at or after the given position.
-  for (unsigned i = 0; i < self->included_range_count; i++) {
-    TSRange *included_range = &self->included_ranges[i];
-    if (included_range->end_byte > position.bytes) {
-      if (included_range->start_byte > position.bytes) {
-        self->current_position = (Length) {
-          .bytes = included_range->start_byte,
-          .extent = included_range->start_point,
-        };
-      }
-
-      self->current_included_range_index = i;
-      found_included_range = true;
-      break;
-    }
-  }
-
-  if (found_included_range) {
-    // If the current position is outside of the current chunk of text,
-    // then clear out the current chunk of text.
-    if (self->chunk && (
-      position.bytes < self->chunk_start ||
-      position.bytes >= self->chunk_start + self->chunk_size
-    )) {
-      ts_lexer__clear_chunk(self);
-    }
-
-    self->lookahead_size = 0;
-    self->data.lookahead = '\0';
-  }
-
-  // If the given position is beyond any of included ranges, move to the EOF
-  // state - past the end of the included ranges.
-  else {
-    self->current_included_range_index = self->included_range_count;
-    TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
-    self->current_position = (Length) {
-      .bytes = last_included_range->end_byte,
-      .extent = last_included_range->end_point,
-    };
-    ts_lexer__clear_chunk(self);
-    self->lookahead_size = 1;
-    self->data.lookahead = '\0';
-  }
-}
-
 void ts_lexer_set_input(Lexer *self, TSInput input) {
  self->input = input;
  ts_lexer__clear_chunk(self);