From 80c34d62ab1de5d0d7faf45919bca9341f9b1521 Mon Sep 17 00:00:00 2001 From: Andrew Helwer Date: Fri, 7 Jan 2022 10:36:25 -0500 Subject: [PATCH] Fixed rust build, updated docs --- docs/section-3-creating-parsers.md | 2 +- lib/src/lexer.c | 44 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index f5f7c933..05824e22 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -674,7 +674,7 @@ This function is responsible for recognizing external tokens. It should return ` * **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above. * **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace. * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token. -* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of bytes (not characters) since the start of the current line. +* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line. * **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. diff --git a/lib/src/lexer.c b/lib/src/lexer.c index 0f94b309..57dc55d5 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -152,28 +152,7 @@ static void ts_lexer_goto(Lexer *self, Length position) { } } -// Advance without logging. -static void ts_lexer__advance_no_log(Lexer *self, bool skip) { - if (!self->chunk) return; - ts_lexer__do_advance(self, skip); -} - -// Advance to the next character in the source code, retrieving a new -// chunk of source code if needed. -static void ts_lexer__advance(TSLexer *_self, bool skip) { - Lexer *self = (Lexer *)_self; - if (!self->chunk) return; - - if (skip) { - LOG("skip", self->data.lookahead); - } else { - LOG("consume", self->data.lookahead); - } - - ts_lexer__do_advance(self, skip); -} - -// Intended to be called only from functions below that control logging. +// Intended to be called only from functions that control logging. static void ts_lexer__do_advance(Lexer *self, bool skip) { if (self->lookahead_size) { self->current_position.bytes += self->lookahead_size; @@ -216,6 +195,27 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) { } } +// Advance to the next character in the source code, retrieving a new +// chunk of source code if needed. +static void ts_lexer__advance(TSLexer *_self, bool skip) { + Lexer *self = (Lexer *)_self; + if (!self->chunk) return; + + if (skip) { + LOG("skip", self->data.lookahead); + } else { + LOG("consume", self->data.lookahead); + } + + ts_lexer__do_advance(self, skip); +} + +// Advance without logging. +static void ts_lexer__advance_no_log(Lexer *self, bool skip) { + if (!self->chunk) return; + ts_lexer__do_advance(self, skip); +} + // Mark that a token match has completed. This can be called multiple // times if a longer match is found later. static void ts_lexer__mark_end(TSLexer *_self) {