Merge pull request #74 from tree-sitter/check-utf8proc_iterate-return
Check utf8proc_iterate return
This commit is contained in:
commit
f594ed2519
5 changed files with 32 additions and 9 deletions
|
|
@ -36,11 +36,17 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
|
||||
uint32_t size = self->chunk_size - position_in_chunk + 1;
|
||||
|
||||
if (self->input.encoding == TSInputEncodingUTF8)
|
||||
self->lookahead_size =
|
||||
utf8proc_iterate(chunk, size, &self->data.lookahead);
|
||||
else
|
||||
if (self->input.encoding == TSInputEncodingUTF8) {
|
||||
int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead);
|
||||
if (lookahead_size < 0) {
|
||||
self->lookahead_size = 1;
|
||||
} else {
|
||||
self->lookahead_size = lookahead_size;
|
||||
}
|
||||
}
|
||||
else {
|
||||
self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
static void ts_lexer__advance(void *payload, bool skip) {
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ TreeArray ts_tree_array_remove_trailing_extras(TreeArray *self) {
|
|||
return result;
|
||||
}
|
||||
|
||||
Tree *ts_tree_make_error(Length size, Length padding, char lookahead_char) {
|
||||
Tree *ts_tree_make_error(Length size, Length padding, int32_t lookahead_char) {
|
||||
Tree *result = ts_tree_make_leaf(ts_builtin_sym_error, padding, size,
|
||||
(TSSymbolMetadata){
|
||||
.visible = true, .named = true,
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ Tree *ts_tree_make_leaf(TSSymbol, Length, Length, TSSymbolMetadata);
|
|||
Tree *ts_tree_make_node(TSSymbol, uint32_t, Tree **, TSSymbolMetadata);
|
||||
Tree *ts_tree_make_copy(Tree *child);
|
||||
Tree *ts_tree_make_error_node(TreeArray *);
|
||||
Tree *ts_tree_make_error(Length, Length, char);
|
||||
Tree *ts_tree_make_error(Length, Length, int32_t);
|
||||
void ts_tree_retain(Tree *tree);
|
||||
void ts_tree_release(Tree *tree);
|
||||
bool ts_tree_eq(const Tree *tree1, const Tree *tree2);
|
||||
|
|
|
|||
|
|
@ -4,10 +4,16 @@
|
|||
#include "utf8proc.h"
|
||||
|
||||
static inline int string_iterate(TSInputEncoding encoding, const uint8_t *string, size_t length, int32_t *code_point) {
|
||||
if (encoding == TSInputEncodingUTF8)
|
||||
return utf8proc_iterate(string, length, code_point);
|
||||
else
|
||||
if (encoding == TSInputEncodingUTF8) {
|
||||
int32_t character_size = utf8proc_iterate(string, length, code_point);
|
||||
if (character_size < 0) {
|
||||
return 1;
|
||||
} else {
|
||||
return character_size;
|
||||
}
|
||||
} else {
|
||||
return utf16_iterate(string, length, code_point);
|
||||
}
|
||||
}
|
||||
|
||||
size_t string_char_count(TSInputEncoding encoding, const std::string &input) {
|
||||
|
|
|
|||
|
|
@ -473,6 +473,17 @@ describe("Parser", [&]() {
|
|||
AssertThat(ts_node_end_char(root), Equals(strlen("'OOO - DD';")));
|
||||
AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';")));
|
||||
});
|
||||
|
||||
it("handles non-UTF8 characters", [&]() {
|
||||
const char *string = "cons\xeb\x00e=ls\x83l6hi');\x0a";
|
||||
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
ts_document_set_input_string(document, string);
|
||||
ts_document_parse(document);
|
||||
|
||||
TSNode root = ts_document_root_node(document);
|
||||
AssertThat(ts_node_end_byte(root), Equals(strlen(string)));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue