tree-sitter/spec/runtime/helpers/encoding_helpers.cc
Max Brunsfeld f2e7058ad9 Support UTF16 directly
This makes the API easier to use from javascript
2015-12-28 13:53:22 -08:00

58 lines
1.8 KiB
C++

#include "runtime/helpers/encoding_helpers.h"
#include "runtime/utf16.h"
#include <assert.h>
#include "utf8proc.h"
static inline int string_iterate(TSInputEncoding encoding, const uint8_t *string, size_t length, int32_t *code_point) {
if (encoding == TSInputEncodingUTF8)
return utf8proc_iterate(string, length, code_point);
else
return utf16_iterate(string, length, code_point);
}
size_t string_char_count(TSInputEncoding encoding, const std::string &input) {
const char *string = input.data();
size_t size = input.size();
size_t character = 0, byte = 0;
while (byte < size) {
int32_t code_point;
byte += string_iterate(encoding, (uint8_t *)string + byte, size - byte, &code_point);
character++;
}
return character;
}
long string_byte_for_character(TSInputEncoding encoding, const std::string &input, size_t byte_offset, size_t goal_character) {
const char *string = input.data() + byte_offset;
size_t size = input.size() - byte_offset;
size_t character = 0, byte = 0;
while (character < goal_character) {
if (byte >= size)
return -1;
int32_t code_point;
byte += string_iterate(encoding, (uint8_t *)string + byte, size - byte, &code_point);
character++;
}
return byte;
}
size_t utf8_char_count(const std::string &input) {
return string_char_count(TSInputEncodingUTF8, input);
}
size_t utf16_char_count(const std::string &input) {
return string_char_count(TSInputEncodingUTF16, input);
}
long utf8_byte_for_character(const std::string &input, size_t byte_offset, size_t goal_character) {
return string_byte_for_character(TSInputEncodingUTF8, input, byte_offset, goal_character);
}
long utf16_byte_for_character(const std::string &input, size_t byte_offset, size_t goal_character) {
return string_byte_for_character(TSInputEncodingUTF16, input, byte_offset, goal_character);
}