diff --git a/lib/src/lexer.c b/lib/src/lexer.c index 57dc55d5..f2c10fbd 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -210,12 +210,6 @@ static void ts_lexer__advance(TSLexer *_self, bool skip) { ts_lexer__do_advance(self, skip); } -// Advance without logging. -static void ts_lexer__advance_no_log(Lexer *self, bool skip) { - if (!self->chunk) return; - ts_lexer__do_advance(self, skip); -} - // Mark that a token match has completed. This can be called multiple // times if a longer match is found later. static void ts_lexer__mark_end(TSLexer *_self) { @@ -257,8 +251,8 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { uint32_t result = 0; ts_lexer__get_lookahead(self); - while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self)) { - ts_lexer__advance_no_log(self, false); + while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) { + ts_lexer__do_advance(self, false); result++; } diff --git a/test/fixtures/test_grammars/external_unicode_column_alignment/README.md b/test/fixtures/test_grammars/external_unicode_column_alignment/README.md new file mode 100644 index 00000000..8fe141d2 --- /dev/null +++ b/test/fixtures/test_grammars/external_unicode_column_alignment/README.md @@ -0,0 +1 @@ +This tests that `get_column` correctly counts codepoints since start of line. \ No newline at end of file diff --git a/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt b/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt new file mode 100644 index 00000000..de7a5f24 --- /dev/null +++ b/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt @@ -0,0 +1,93 @@ +======================== +Single list, no boxes +======================== + +- +- +- + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) +) + +======================== +Two lists, no boxes +======================== + + - + - + - + - + - + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) + (list + (list_item) + (list_item) + ) +) + +======================== +List with boxes +======================== + + - +□- + - + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) +) + +======================== +Multiple lists with boxes +======================== + + - +□ □- + □ - +□□□□□□- +□ □ □ - + - +□□□ - +□□□- +□ □- + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) + (list + (list_item) + (list_item) + (list_item) + (list_item) + ) + (list + (list_item) + (list_item) + ) +) diff --git a/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js b/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js new file mode 100644 index 00000000..3016b31d --- /dev/null +++ b/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js @@ -0,0 +1,17 @@ +module.exports = grammar({ + name: "external_unicode_column_alignment", + + externals: $ => [ + $._start_list, + $.list_item, + $._end_list + ], + + extras: $ => [/\s/, '□'], + + rules: { + expression: $ => repeat($.list), + + list: $ => seq($._start_list, repeat1($.list_item), $._end_list) + } +}) diff --git a/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c b/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c new file mode 100644 index 00000000..13d9f9db --- /dev/null +++ b/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c @@ -0,0 +1,83 @@ +#include + +enum { + LIST_START, + LIST_ITEM, + LIST_END +}; + +typedef struct { + int32_t column; +} Scanner; + +void *tree_sitter_external_unicode_column_alignment_external_scanner_create() { + Scanner *scanner = malloc(sizeof(Scanner)); + *scanner = (Scanner){ + .column = -1 + }; + return scanner; +} + +void tree_sitter_external_unicode_column_alignment_external_scanner_destroy(void *payload) { + free(payload); +} + +unsigned tree_sitter_external_unicode_column_alignment_external_scanner_serialize( + void *payload, + char *buffer +) { + Scanner *scanner = payload; + unsigned copied = sizeof(int32_t); + memcpy(buffer, &(scanner->column), copied); + return copied; +} + +void tree_sitter_external_unicode_column_alignment_external_scanner_deserialize( + void *payload, + const char *buffer, + unsigned length +) { + Scanner *scanner = payload; + scanner->column = -1; + if (length > 0) { + memcpy(&(scanner->column), buffer, sizeof(int32_t)); + } +} + +bool tree_sitter_external_unicode_column_alignment_external_scanner_scan( + void *payload, + TSLexer *lexer, + const bool *whitelist +) { + Scanner *scanner = payload; + // 9633 is the int equivalent of □ (U+25A1) + while (iswspace(lexer->lookahead) || 9633 == lexer->lookahead) { + lexer->advance(lexer, true); + } + if ('-' == lexer->lookahead) { + const int32_t column = lexer->get_column(lexer); + if (-1 == scanner->column) { + lexer->result_symbol = LIST_START; + scanner->column = column; + return true; + } else { + if (column == scanner->column) { + lexer->result_symbol = LIST_ITEM; + lexer->advance(lexer, false); + return true; + } else { + lexer->result_symbol = LIST_END; + scanner->column = -1; + return true; + } + } + } + + if (lexer->eof(lexer) && -1 != scanner->column) { + lexer->result_symbol = LIST_END; + scanner->column = -1; + return true; + } + + return false; +}