feat: add the ability to specify a custom decode function

This commit is contained in:
Amaan Qureshi 2024-10-30 23:49:42 -04:00
parent e27160b118
commit 500f4326d5
10 changed files with 347 additions and 16 deletions

View file

@ -1,9 +1,11 @@
#include <stdio.h>
#include "./lexer.h"
#include "./subtree.h"
#include "./length.h"
#include "./lexer.h"
#include "./unicode.h"
#include "tree_sitter/api.h"
#include <stdarg.h>
#include <stdio.h>
#define LOG(message, character) \
if (self->logger.log) { \
@ -112,9 +114,10 @@ static void ts_lexer__get_lookahead(Lexer *self) {
}
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
UnicodeDecodeFunction decode =
self->input.encoding == TSInputEncodingUTF8 ? ts_decode_utf8 :
self->input.encoding == TSInputEncodingUTF16LE ? ts_decode_utf16_le : ts_decode_utf16_be;
DecodeFunction decode =
self->input.encoding == TSInputEncodingUTF8 ? ts_decode_utf8 :
self->input.encoding == TSInputEncodingUTF16LE ? ts_decode_utf16_le :
self->input.encoding == TSInputEncodingUTF16BE ? ts_decode_utf16_be : self->input.decode;
self->lookahead_size = decode(chunk, size, &self->data.lookahead);

View file

@ -2163,6 +2163,7 @@ TSTree *ts_parser_parse_string_encoding(
&input,
ts_string_input_read,
encoding,
NULL,
});
}

View file

@ -38,14 +38,6 @@ extern "C" {
static const int32_t TS_DECODE_ERROR = U_SENTINEL;
// These functions read one unicode code point from the given string,
// returning the number of bytes consumed.
typedef uint32_t (*UnicodeDecodeFunction)(
const uint8_t *string,
uint32_t length,
int32_t *code_point
);
static inline uint32_t ts_decode_utf8(
const uint8_t *string,
uint32_t length,