From c66fddd3aa7d7e8d1dd0286c0f77d467081d2636 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 15 Jun 2017 16:35:34 -0700 Subject: [PATCH] Add TSInput option to measure columns in bytes not characters --- include/tree_sitter/runtime.h | 1 + src/runtime/document.c | 9 +++++++-- src/runtime/lexer.c | 2 ++ src/runtime/string_input.c | 3 ++- test/helpers/spy_input.cc | 1 + test/runtime/document_test.cc | 15 +++++++++++++-- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 95da0787..638bc5bd 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -31,6 +31,7 @@ typedef struct { const char *(*read)(void *payload, uint32_t *bytes_read); int (*seek)(void *payload, uint32_t character_index, uint32_t byte_index); TSInputEncoding encoding; + bool measure_columns_in_bytes; } TSInput; typedef enum { diff --git a/src/runtime/document.c b/src/runtime/document.c index 6bcc5fbc..64677cb4 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -26,8 +26,13 @@ void ts_document_free(TSDocument *self) { parser_destroy(&self->parser); if (self->tree) ts_tree_release(self->tree); - ts_document_set_input(self, - (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 }); + ts_document_set_input(self, (TSInput){ + NULL, + NULL, + NULL, + TSInputEncodingUTF8, + false + }); ts_free(self); } diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 7e0ef51f..21ce2b96 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -60,6 +60,8 @@ static void ts_lexer__advance(void *payload, bool skip) { if (self->data.lookahead == '\n') { self->current_position.extent.row++; self->current_position.extent.column = 0; + } else if (self->input.measure_columns_in_bytes) { + self->current_position.extent.column += self->lookahead_size; } else { self->current_position.extent.column++; } diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index 3b951a10..6cbf5b2c 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -43,8 +43,9 @@ TSInput ts_string_input_make_with_length(const char *string, uint32_t length) { .read = ts_string_input_read, .seek = ts_string_input_seek, .encoding = TSInputEncodingUTF8, + .measure_columns_in_bytes = false, }; error: - return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 }; + return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8, false }; } diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index bdcb2709..9edaf554 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -86,6 +86,7 @@ TSInput SpyInput::input() { result.encoding = encoding; result.seek = seek; result.read = read; + result.measure_columns_in_bytes = true; return result; } diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index df71ea02..6c321d75 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -76,11 +76,22 @@ describe("Document", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content)); spy_input->encoding = TSInputEncodingUTF16; - // spy_input->measure_columns_in_bytes + TSInput input = spy_input->input(); - ts_document_set_input(document, spy_input->input()); + input.measure_columns_in_bytes = false; + ts_document_set_input(document, input); ts_document_invalidate(document); ts_document_parse(document); + + TSNode root = ts_document_root_node(document); + AssertThat(ts_node_end_point(root), Equals({0, 13})); + + input.measure_columns_in_bytes = true; + ts_document_set_input(document, input); + ts_document_invalidate(document); + ts_document_parse(document); + root = ts_document_root_node(document); + AssertThat(ts_node_end_point(root), Equals({0, 26})); }); it("allows the input to be retrieved later", [&]() {