diff --git a/README.md b/README.md index d4f4b9b8..472827a8 100644 --- a/README.md +++ b/README.md @@ -179,21 +179,21 @@ tokens, like `(` and `+`. This is useful when analyzing the meaning of a documen TSLanguage *ts_language_arithmetic(); int main() { - TSDocument *document = ts_document_make(); + TSDocument *document = ts_document_new(); ts_document_set_language(document, ts_language_arithmetic()); ts_document_set_input_string(document, "a + b * 5"); ts_document_parse(document); TSNode root_node = ts_document_root_node(document); - assert(!strcmp(ts_node_name(root_node, document), "expression")); + assert(!strcmp(ts_node_type(root_node, document), "expression")); assert(ts_node_named_child_count(root_node) == 1); TSNode sum_node = ts_node_named_child(root_node, 0); - assert(!strcmp(ts_node_name(sum_node, document), "sum")); + assert(!strcmp(ts_node_type(sum_node, document), "sum")); assert(ts_node_named_child_count(sum_node) == 2); TSNode product_node = ts_node_child(ts_node_named_child(sum_node, 1), 0); - assert(!strcmp(ts_node_name(product_node, document), "product")); + assert(!strcmp(ts_node_type(product_node, document), "product")); assert(ts_node_named_child_count(product_node) == 2); printf("Syntax tree: %s\n", ts_node_string(root_node, document)); diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 3bed984e..8c3bb4dc 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -5,20 +5,16 @@ extern "C" { #endif -#include #include -#include "tree_sitter/runtime.h" - -#define TS_STATE_ERROR 0 -#define TS_DEBUG_BUFFER_SIZE 512 +#include +#include +typedef unsigned short TSSymbol; typedef unsigned short TSStateId; -typedef struct { - size_t bytes; - size_t chars; - TSPoint extent; -} TSLength; +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define ts_builtin_sym_start 1 typedef struct { bool visible : 1; @@ -27,23 +23,10 @@ typedef struct { bool structural : 1; } TSSymbolMetadata; -typedef struct TSLexer { - void (*advance)(struct TSLexer *, TSStateId, bool); - - TSLength current_position; - TSLength token_start_position; - - const char *chunk; - size_t chunk_start; - size_t chunk_size; - - size_t lookahead_size; +typedef struct { + void (*advance)(void *, TSStateId, bool); int32_t lookahead; TSSymbol result_symbol; - - TSInput input; - TSLogger logger; - char debug_buffer[TS_DEBUG_BUFFER_SIZE]; } TSLexer; typedef enum { @@ -75,7 +58,7 @@ typedef union { }; } TSParseActionEntry; -struct TSLanguage { +typedef struct TSLanguage { size_t symbol_count; const char **symbol_names; const TSSymbolMetadata *symbol_metadata; @@ -83,7 +66,7 @@ struct TSLanguage { const TSParseActionEntry *parse_actions; const TSStateId *lex_states; bool (*lex_fn)(TSLexer *, TSStateId); -}; +} TSLanguage; /* * Lexer Macros @@ -94,18 +77,18 @@ struct TSLanguage { next_state: \ lookahead = lexer->lookahead; -#define ADVANCE(state_value) \ - { \ +#define ADVANCE(state_value) \ + { \ lexer->advance(lexer, state_value, false); \ - state = state_value; \ - goto next_state; \ + state = state_value; \ + goto next_state; \ } -#define SKIP(state_value) \ - { \ +#define SKIP(state_value) \ + { \ lexer->advance(lexer, state_value, true); \ - state = state_value; \ - goto next_state; \ + state = state_value; \ + goto next_state; \ } #define ACCEPT_TOKEN(symbol_value) \ diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 71c3a8f4..5ee5ba6f 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -114,10 +114,6 @@ size_t ts_document_parse_count(const TSDocument *); size_t ts_language_symbol_count(const TSLanguage *); const char *ts_language_symbol_name(const TSLanguage *, TSSymbol); -#define ts_builtin_sym_error ((TSSymbol)-1) -#define ts_builtin_sym_end 0 -#define ts_builtin_sym_start 1 - #ifdef __cplusplus } #endif diff --git a/spec/helpers/point_helpers.cc b/spec/helpers/point_helpers.cc index 395087ee..e61faf3e 100644 --- a/spec/helpers/point_helpers.cc +++ b/spec/helpers/point_helpers.cc @@ -1,6 +1,8 @@ #include "./point_helpers.h" #include #include +#include "runtime/length.h" +#include "tree_sitter/runtime.h" using namespace std; diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 417c8d31..78a53ee2 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -161,7 +161,7 @@ describe("Document", [&]() { }); }); - describe("set_logger(TSDebugger)", [&]() { + describe("set_logger(TSLogger)", [&]() { SpyLogger *logger; before_each([&]() { diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index 174b4110..00acdd85 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -125,7 +125,6 @@ describe("Parser", [&]() { "(array (number) (ERROR (UNEXPECTED 'a')) (true))"); TSNode error = ts_node_named_child(root, 1); - AssertThat(ts_node_symbol(error), Equals(ts_builtin_sym_error)); AssertThat(ts_node_type(error, doc), Equals("ERROR")); AssertThat(ts_node_child_count(error), Equals(2)); diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index 16261ada..7738f2fe 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -248,7 +248,7 @@ describe("Stack", [&]() { it("stops popping entries early if it reaches an error tree", [&]() { // . <──0── A <──1── B <──2── C <──3── ERROR <──4── D* - ts_stack_push(stack, 0, trees[3], false, TS_STATE_ERROR); + ts_stack_push(stack, 0, trees[3], false, ERROR_STATE); ts_stack_push(stack, 0, trees[4], false, stateD); // . <──0── A <──1── B <──2── C <──3── ERROR <──4── D* @@ -258,7 +258,7 @@ describe("Stack", [&]() { AssertThat(pop.status, Equals(StackPopResult::StackPopStoppedAtError)); AssertThat(ts_stack_version_count(stack), Equals(2)); - AssertThat(ts_stack_top_state(stack, 1), Equals(TS_STATE_ERROR)); + AssertThat(ts_stack_top_state(stack, 1), Equals(ERROR_STATE)); AssertThat(pop.slices.size, Equals(1)); StackSlice slice = pop.slices.contents[0]; diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index 3c209b37..79395d09 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -183,14 +183,13 @@ describe("Tree", []() { describe("edits within a tree's padding", [&]() { it("resizes the padding of the tree and its leftmost descendants", [&]() { - TSInputEdit edit = { - .start_byte = 1, - .bytes_removed = 0, - .bytes_added = 1, - .start_point = {0, 1}, - .extent_removed = {0, 0}, - .extent_added = {0, 1}, - }; + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 0; + edit.bytes_added = 1; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 0}; + edit.extent_added = {0, 1}; ts_tree_edit(tree, &edit); assert_consistent(tree); @@ -210,14 +209,13 @@ describe("Tree", []() { describe("edits that start in a tree's padding but extend into its content", [&]() { it("shrinks the content to compensate for the expanded padding", [&]() { - TSInputEdit edit = { - .start_byte = 1, - .bytes_removed = 3, - .bytes_added = 4, - .start_point = {0, 1}, - .extent_removed = {0, 3}, - .extent_added = {0, 4}, - }; + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 3; + edit.bytes_added = 4; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 3}; + edit.extent_added = {0, 4}; ts_tree_edit(tree, &edit); assert_consistent(tree); @@ -233,14 +231,13 @@ describe("Tree", []() { describe("insertions at the edge of a tree's padding", [&]() { it("expands the tree's padding", [&]() { - TSInputEdit edit = { - .start_byte = 2, - .bytes_removed = 0, - .bytes_added = 2, - .start_point = {0, 2}, - .extent_removed = {0, 0}, - .extent_added = {0, 2}, - }; + TSInputEdit edit; + edit.start_byte = 2; + edit.bytes_removed = 0; + edit.bytes_added = 2; + edit.start_point = {0, 2}; + edit.extent_removed = {0, 0}; + edit.extent_added = {0, 2}; ts_tree_edit(tree, &edit); assert_consistent(tree); @@ -260,14 +257,13 @@ describe("Tree", []() { describe("replacements starting at the edge of a tree's padding", [&]() { it("resizes the content and not the padding", [&]() { - TSInputEdit edit = { - .start_byte = 2, - .bytes_removed = 2, - .bytes_added = 5, - .start_point = {0, 2}, - .extent_removed = {0, 2}, - .extent_added = {0, 5}, - }; + TSInputEdit edit; + edit.start_byte = 2; + edit.bytes_removed = 2; + edit.bytes_added = 5; + edit.start_point = {0, 2}; + edit.extent_removed = {0, 2}; + edit.extent_added = {0, 5}; ts_tree_edit(tree, &edit); assert_consistent(tree); @@ -285,14 +281,13 @@ describe("Tree", []() { describe("deletions that span more than one child node", [&]() { it("shrinks subsequent child nodes", [&]() { - TSInputEdit edit = { - .start_byte = 1, - .bytes_removed = 10, - .bytes_added = 3, - .start_point = {0, 1}, - .extent_removed = {0, 10}, - .extent_added = {0, 3}, - }; + TSInputEdit edit; + edit.start_byte = 1; + edit.bytes_removed = 10; + edit.bytes_added = 3; + edit.start_point = {0, 1}; + edit.extent_removed = {0, 10}; + edit.extent_added = {0, 3}; ts_tree_edit(tree, &edit); assert_consistent(tree); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 16e7101d..93820424 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -195,7 +195,8 @@ class ParseTableBuilder { ParseAction action = ParseAction::ShiftExtra(); ParseState &state = parse_table.states[state_id]; for (const Symbol &extra_symbol : grammar.extra_tokens) - if (!state.entries.count(extra_symbol) || state.has_shift_action() || allow_any_conflict) + if (!state.entries.count(extra_symbol) || state.has_shift_action() || + allow_any_conflict) parse_table.add_action(state_id, extra_symbol, action); } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 714d3045..65244fdf 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -183,8 +183,7 @@ class CCodeGenerator { } void add_lex_function() { - line( - "static bool ts_lex(TSLexer *lexer, TSStateId state) {"); + line("static bool ts_lex(TSLexer *lexer, TSStateId state) {"); indent([&]() { line("START_LEXER();"); _switch("state", [&]() { diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 808c4ce2..d345f0e4 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -126,7 +126,8 @@ ParseState::ParseState() : lex_state_id(-1) {} bool ParseState::has_shift_action() const { for (const auto &pair : entries) - if (pair.second.actions.size() > 0 && pair.second.actions.back().type == ParseActionTypeShift) + if (pair.second.actions.size() > 0 && + pair.second.actions.back().type == ParseActionTypeShift) return true; return false; } diff --git a/src/runtime/alloc.h b/src/runtime/alloc.h index 81b5f18f..55ab15d1 100644 --- a/src/runtime/alloc.h +++ b/src/runtime/alloc.h @@ -5,6 +5,9 @@ extern "C" { #endif +#include +#include + #if defined(TREE_SITTER_WRAP_MALLOC) void *ts_record_malloc(size_t); diff --git a/src/runtime/document.c b/src/runtime/document.c index ec664c45..2ccb8d6f 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -1,4 +1,3 @@ -#include "tree_sitter/parser.h" #include "runtime/alloc.h" #include "runtime/node.h" #include "runtime/tree.h" diff --git a/src/runtime/document.h b/src/runtime/document.h index 3f2887bd..d7725e85 100644 --- a/src/runtime/document.h +++ b/src/runtime/document.h @@ -1,7 +1,6 @@ #ifndef RUNTIME_DOCUMENT_H_ #define RUNTIME_DOCUMENT_H_ -#include "tree_sitter/parser.h" #include "runtime/parser.h" #include "runtime/tree.h" #include diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index e7c8ed71..a2187a25 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -1,6 +1,7 @@ #ifndef RUNTIME_ERROR_COSTS_H_ #define RUNTIME_ERROR_COSTS_H_ +#define ERROR_STATE 0 #define ERROR_COST_PER_SKIPPED_TREE 10 #define ERROR_COST_PER_SKIPPED_LINE 3 #define ERROR_COST_PER_SKIPPED_CHAR 0 @@ -12,15 +13,15 @@ typedef struct { } ErrorStatus; static inline unsigned error_status_min_cost(ErrorStatus status) { - return status.cost + - ERROR_COST_PER_SKIPPED_TREE * status.count * status.count; + return status.cost + ERROR_COST_PER_SKIPPED_TREE * status.count * status.count; } static inline unsigned error_status_max_cost(ErrorStatus status) { return status.cost + - ERROR_COST_PER_SKIPPED_TREE * status.count * status.count + - (6 * ERROR_COST_PER_SKIPPED_TREE * status.count + 12 * ERROR_COST_PER_SKIPPED_TREE) / - (1 + status.push_count / 2); + ERROR_COST_PER_SKIPPED_TREE * status.count * status.count + + (6 * ERROR_COST_PER_SKIPPED_TREE * status.count + + 12 * ERROR_COST_PER_SKIPPED_TREE) / + (1 + status.push_count / 2); } static inline int error_status_compare(ErrorStatus a, ErrorStatus b) { diff --git a/src/runtime/language.c b/src/runtime/language.c index 8ab122ae..0bc4ae7e 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -1,6 +1,6 @@ -#include "tree_sitter/parser.h" #include "runtime/language.h" #include "runtime/tree.h" +#include "runtime/error_costs.h" static const TSParseAction ERROR_SHIFT_EXTRA = { .type = TSParseActionTypeShift, .extra = true, @@ -10,7 +10,7 @@ void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result) { size_t action_index; if (symbol == ts_builtin_sym_error) { - if (state == TS_STATE_ERROR) { + if (state == ERROR_STATE) { result->action_count = 1; result->is_reusable = false; result->depends_on_lookahead = false; diff --git a/src/runtime/length.h b/src/runtime/length.h index 5a6ae20a..16590ca0 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -1,8 +1,15 @@ #ifndef RUNTIME_LENGTH_H_ #define RUNTIME_LENGTH_H_ -#include "tree_sitter/parser.h" +#include #include +#include "tree_sitter/runtime.h" + +typedef struct { + size_t bytes; + size_t chars; + TSPoint extent; +} TSLength; static inline TSPoint ts_point_add(TSPoint a, TSPoint b) { if (b.row > 0) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index f46a7d1a..f8a023f2 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -1,31 +1,30 @@ #include #include "runtime/lexer.h" -#include "tree_sitter/parser.h" #include "runtime/tree.h" #include "runtime/length.h" #include "runtime/utf16.h" #include "utf8proc.h" -#define LOG(...) \ - if (self->logger.log) { \ - snprintf(self->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ - self->logger.log(self->logger.payload, TSLogTypeLex, \ - self->debug_buffer); \ +#define LOG(...) \ + if (self->logger.log) { \ + snprintf(self->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ + self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); \ } -#define LOG_LOOKAHEAD() \ - LOG((0 < self->lookahead && self->lookahead < 256) ? "lookahead char:'%c'" \ - : "lookahead char:%d", \ - self->lookahead); +#define LOG_LOOKAHEAD() \ + LOG((0 < self->data.lookahead && self->data.lookahead < 256) \ + ? "lookahead char:'%c'" \ + : "lookahead char:%d", \ + self->data.lookahead); static const char empty_chunk[2] = { 0, 0 }; -static void ts_lexer__get_chunk(TSLexer *self) { +static void ts_lexer__get_chunk(Lexer *self) { TSInput input = self->input; if (!self->chunk || self->current_position.bytes != self->chunk_start + self->chunk_size) input.seek(input.payload, self->current_position.chars, - self->current_position.bytes); + self->current_position.bytes); self->chunk_start = self->current_position.bytes; self->chunk = input.read(input.payload, &self->chunk_size); @@ -33,28 +32,29 @@ static void ts_lexer__get_chunk(TSLexer *self) { self->chunk = empty_chunk; } -static void ts_lexer__get_lookahead(TSLexer *self) { +static void ts_lexer__get_lookahead(Lexer *self) { size_t position_in_chunk = self->current_position.bytes - self->chunk_start; const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; size_t size = self->chunk_size - position_in_chunk + 1; if (self->input.encoding == TSInputEncodingUTF8) - self->lookahead_size = utf8proc_iterate(chunk, size, &self->lookahead); + self->lookahead_size = + utf8proc_iterate(chunk, size, &self->data.lookahead); else - self->lookahead_size = utf16_iterate(chunk, size, &self->lookahead); + self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead); LOG_LOOKAHEAD(); } -static void ts_lexer__advance(TSLexer *self, TSStateId state, bool skip) { - +static void ts_lexer__advance(void *payload, TSStateId state, bool skip) { + Lexer *self = (Lexer *)payload; if (self->chunk == empty_chunk) return; if (self->lookahead_size) { self->current_position.bytes += self->lookahead_size; self->current_position.chars++; - if (self->lookahead == '\n') { + if (self->data.lookahead == '\n') { self->current_position.extent.row++; self->current_position.extent.column = 0; } else { @@ -80,9 +80,12 @@ static void ts_lexer__advance(TSLexer *self, TSStateId state, bool skip) { * parsers can call it without needing to be linked against this library. */ -void ts_lexer_init(TSLexer *self) { - *self = (TSLexer){ - .advance = ts_lexer__advance, +void ts_lexer_init(Lexer *self) { + *self = (Lexer){ + .data = + { + .advance = ts_lexer__advance, .lookahead = 0, .result_symbol = 0, + }, .chunk = NULL, .chunk_start = 0, .logger = {}, @@ -90,36 +93,37 @@ void ts_lexer_init(TSLexer *self) { ts_lexer_reset(self, ts_length_zero()); } -static inline void ts_lexer__reset(TSLexer *self, TSLength position) { +static inline void ts_lexer__reset(Lexer *self, TSLength position) { self->token_start_position = position; self->current_position = position; - if (self->chunk && (position.bytes < self->chunk_start || position.bytes >= self->chunk_start + self->chunk_size)) { + if (self->chunk && (position.bytes < self->chunk_start || + position.bytes >= self->chunk_start + self->chunk_size)) { self->chunk = 0; self->chunk_start = 0; self->chunk_size = 0; } self->lookahead_size = 0; - self->lookahead = 0; + self->data.lookahead = 0; } -void ts_lexer_set_input(TSLexer *self, TSInput input) { +void ts_lexer_set_input(Lexer *self, TSInput input) { self->input = input; ts_lexer__reset(self, ts_length_zero()); } -void ts_lexer_reset(TSLexer *self, TSLength position) { +void ts_lexer_reset(Lexer *self, TSLength position) { if (!ts_length_eq(position, self->current_position)) ts_lexer__reset(self, position); return; } -void ts_lexer_start(TSLexer *self, TSStateId lex_state) { +void ts_lexer_start(Lexer *self, TSStateId lex_state) { LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); self->token_start_position = self->current_position; - self->result_symbol = 0; + self->data.result_symbol = 0; if (!self->chunk) ts_lexer__get_chunk(self); diff --git a/src/runtime/lexer.h b/src/runtime/lexer.h index afecb19c..889383a5 100644 --- a/src/runtime/lexer.h +++ b/src/runtime/lexer.h @@ -6,11 +6,31 @@ extern "C" { #endif #include "tree_sitter/parser.h" +#include "tree_sitter/runtime.h" +#include "runtime/length.h" -void ts_lexer_init(TSLexer *); -void ts_lexer_set_input(TSLexer *, TSInput); -void ts_lexer_reset(TSLexer *, TSLength); -void ts_lexer_start(TSLexer *, TSStateId); +#define TS_DEBUG_BUFFER_SIZE 512 + +typedef struct { + TSLexer data; + TSLength current_position; + TSLength token_start_position; + + const char *chunk; + size_t chunk_start; + size_t chunk_size; + + size_t lookahead_size; + + TSInput input; + TSLogger logger; + char debug_buffer[TS_DEBUG_BUFFER_SIZE]; +} Lexer; + +void ts_lexer_init(Lexer *); +void ts_lexer_set_input(Lexer *, TSInput); +void ts_lexer_reset(Lexer *, TSLength); +void ts_lexer_start(Lexer *, TSStateId); #ifdef __cplusplus } diff --git a/src/runtime/node.c b/src/runtime/node.c index 5aec705d..ea767ffc 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -196,10 +196,8 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, size_t min, return last_visible_node; } -static inline TSNode ts_node__descendant_for_point_range(TSNode self, - TSPoint min, - TSPoint max, - bool include_anonymous) { +static inline TSNode ts_node__descendant_for_point_range( + TSNode self, TSPoint min, TSPoint max, bool include_anonymous) { TSNode node = self; TSNode last_visible_node = self; @@ -353,7 +351,8 @@ TSNode ts_node_descendant_for_char_range(TSNode self, size_t min, size_t max) { return ts_node__descendant_for_char_range(self, min, max, true); } -TSNode ts_node_named_descendant_for_char_range(TSNode self, size_t min, size_t max) { +TSNode ts_node_named_descendant_for_char_range(TSNode self, size_t min, + size_t max) { return ts_node__descendant_for_char_range(self, min, max, false); } @@ -361,7 +360,8 @@ TSNode ts_node_descendant_for_byte_range(TSNode self, size_t min, size_t max) { return ts_node__descendant_for_byte_range(self, min, max, true); } -TSNode ts_node_named_descendant_for_byte_range(TSNode self, size_t min, size_t max) { +TSNode ts_node_named_descendant_for_byte_range(TSNode self, size_t min, + size_t max) { return ts_node__descendant_for_byte_range(self, min, max, false); } @@ -369,6 +369,7 @@ TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint min, TSPoint max) return ts_node__descendant_for_point_range(self, min, max, true); } -TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint min, TSPoint max) { +TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint min, + TSPoint max) { return ts_node__descendant_for_point_range(self, min, max, false); } diff --git a/src/runtime/node.h b/src/runtime/node.h index d6007c94..3b56aea5 100644 --- a/src/runtime/node.h +++ b/src/runtime/node.h @@ -1,7 +1,6 @@ #ifndef RUNTIME_NODE_H_ #define RUNTIME_NODE_H_ -#include "tree_sitter/parser.h" #include "runtime/tree.h" TSNode ts_node_make(const TSTree *, size_t character, size_t byte, size_t row); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index cfe70439..8040f5af 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -4,7 +4,6 @@ #include #include #include "tree_sitter/runtime.h" -#include "tree_sitter/parser.h" #include "runtime/tree.h" #include "runtime/lexer.h" #include "runtime/length.h" @@ -14,16 +13,16 @@ #include "runtime/reduce_action.h" #include "runtime/error_costs.h" -#define LOG(...) \ - if (self->lexer.logger.log) { \ - snprintf(self->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ - self->lexer.logger.log(self->lexer.logger.payload, \ - TSLogTypeParse, self->lexer.debug_buffer); \ - } \ - if (self->print_debugging_graphs) { \ - fprintf(stderr, "graph {\nlabel=\""); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, "\"\n}\n\n"); \ +#define LOG(...) \ + if (self->lexer.logger.log) { \ + snprintf(self->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ + self->lexer.logger.log(self->lexer.logger.payload, TSLogTypeParse, \ + self->lexer.debug_buffer); \ + } \ + if (self->print_debugging_graphs) { \ + fprintf(stderr, "graph {\nlabel=\""); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\"\n}\n\n"); \ } #define LOG_STACK() \ @@ -111,7 +110,7 @@ static BreakdownResult parser__breakdown_top_of_stack(Parser *self, pending = child->child_count > 0; if (child->symbol == ts_builtin_sym_error) { - state = TS_STATE_ERROR; + state = ERROR_STATE; } else if (!child->extra) { const TSParseAction *action = ts_language_last_action(self->language, state, child->symbol); @@ -131,7 +130,8 @@ static BreakdownResult parser__breakdown_top_of_stack(Parser *self, LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); LOG_STACK(); - ts_stack_decrease_push_count(self->stack, slice.version, parent->child_count + 1); + ts_stack_decrease_push_count(self->stack, slice.version, + parent->child_count + 1); ts_tree_release(parent); array_delete(&slice.trees); } @@ -170,8 +170,7 @@ static bool parser__breakdown_lookahead(Parser *self, TSTree **lookahead, ReusableNode *reusable_node) { bool result = false; while (reusable_node->tree->child_count > 0 && - (self->is_split || - reusable_node->tree->parse_state != state || + (self->is_split || reusable_node->tree->parse_state != state || reusable_node->tree->fragile_left || reusable_node->tree->fragile_right)) { LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol)); @@ -255,10 +254,10 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { ts_lexer_start(&self->lexer, start_state); - while (!self->language->lex_fn(&self->lexer, current_state)) { - if (current_state != TS_STATE_ERROR) { + while (!self->language->lex_fn(&self->lexer.data, current_state)) { + if (current_state != ERROR_STATE) { LOG("retry_in_error_mode"); - current_state = TS_STATE_ERROR; + current_state = ERROR_STATE; ts_lexer_reset(&self->lexer, start_position); ts_lexer_start(&self->lexer, current_state); continue; @@ -266,15 +265,15 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { if (!skipped_error) { error_start_position = self->lexer.token_start_position; - first_error_character = self->lexer.lookahead; + first_error_character = self->lexer.data.lookahead; } if (self->lexer.current_position.bytes == error_end_position.bytes) { - if (self->lexer.lookahead == 0) { - self->lexer.result_symbol = ts_builtin_sym_error; + if (self->lexer.data.lookahead == 0) { + self->lexer.data.result_symbol = ts_builtin_sym_error; break; } - self->lexer.advance(&self->lexer, TS_STATE_ERROR, false); + self->lexer.data.advance(&self->lexer, ERROR_STATE, false); } skipped_error = true; @@ -289,11 +288,14 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { ts_lexer_reset(&self->lexer, error_end_position); result = ts_tree_make_error(size, padding, first_error_character); } else { - TSSymbol symbol = self->lexer.result_symbol; - TSLength padding = ts_length_sub(self->lexer.token_start_position, start_position); - TSLength size = ts_length_sub(self->lexer.current_position, self->lexer.token_start_position); - result = ts_tree_make_leaf(symbol, padding, size, - ts_language_symbol_metadata(self->language, symbol)); + TSSymbol symbol = self->lexer.data.result_symbol; + TSLength padding = + ts_length_sub(self->lexer.token_start_position, start_position); + TSLength size = ts_length_sub(self->lexer.current_position, + self->lexer.token_start_position); + result = + ts_tree_make_leaf(symbol, padding, size, + ts_language_symbol_metadata(self->language, symbol)); } if (!result) @@ -470,8 +472,8 @@ static bool parser__switch_children(Parser *self, TSTree *tree, } static Reduction parser__reduce(Parser *self, StackVersion version, - TSSymbol symbol, unsigned count, - bool fragile, bool allow_skipping) { + TSSymbol symbol, unsigned count, bool fragile, + bool allow_skipping) { size_t initial_version_count = ts_stack_version_count(self->stack); StackPopResult pop = ts_stack_pop_count(self->stack, version, count); switch (pop.status) { @@ -541,14 +543,15 @@ static Reduction parser__reduce(Parser *self, StackVersion version, CHECK(other_version != STACK_VERSION_NONE); CHECK(ts_stack_push(self->stack, other_version, parent, false, - TS_STATE_ERROR)); + ERROR_STATE)); for (size_t j = parent->child_count; j < slice.trees.size; j++) { TSTree *tree = slice.trees.contents[j]; CHECK(ts_stack_push(self->stack, other_version, tree, false, - TS_STATE_ERROR)); + ERROR_STATE)); } - ErrorStatus error_status = ts_stack_error_status(self->stack, other_version); + ErrorStatus error_status = + ts_stack_error_status(self->stack, other_version); if (parser__better_version_exists(self, version, error_status)) ts_stack_remove_version(self->stack, other_version); } @@ -894,8 +897,8 @@ static PotentialReductionStatus parser__do_potential_reductions( bool did_reduce = false; for (size_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; - Reduction reduction = parser__reduce(self, version, action.symbol, - action.count, true, false); + Reduction reduction = + parser__reduce(self, version, action.symbol, action.count, true, false); switch (reduction.status) { case ReduceFailed: goto error; @@ -932,7 +935,7 @@ typedef struct { static StackIterateAction parser__repair_consumed_error_callback( void *payload, TSStateId state, TreeArray *trees, size_t tree_count, bool is_done, bool is_pending) { - if (tree_count > 0 && state != TS_STATE_ERROR) { + if (tree_count > 0 && state != ERROR_STATE) { SkipPrecedingTokensSession *session = payload; Parser *self = session->parser; TSSymbol lookahead_symbol = session->lookahead_symbol; @@ -1003,10 +1006,10 @@ static bool parser__handle_error(Parser *self, StackVersion version, } } - CHECK(ts_stack_push(self->stack, version, NULL, false, TS_STATE_ERROR)); + CHECK(ts_stack_push(self->stack, version, NULL, false, ERROR_STATE)); while (ts_stack_version_count(self->stack) > previous_version_count) { CHECK(ts_stack_push(self->stack, previous_version_count, NULL, false, - TS_STATE_ERROR)); + ERROR_STATE)); assert(ts_stack_merge(self->stack, version, previous_version_count)); } @@ -1032,7 +1035,7 @@ static bool parser__recover(Parser *self, StackVersion version, TSStateId state, CHECK(new_version != STACK_VERSION_NONE); CHECK(parser__shift( - self, new_version, TS_STATE_ERROR, lookahead, + self, new_version, ERROR_STATE, lookahead, ts_language_symbol_metadata(self->language, lookahead->symbol).extra)); ErrorStatus error_status = ts_stack_error_status(self->stack, new_version); if (parser__better_version_exists(self, version, error_status)) { @@ -1127,9 +1130,9 @@ static bool parser__advance(Parser *self, StackVersion version, LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), action.child_count); - Reduction reduction = parser__reduce( - self, version, action.symbol, action.child_count, - (i < table_entry.action_count - 1), true); + Reduction reduction = + parser__reduce(self, version, action.symbol, action.child_count, + (i < table_entry.action_count - 1), true); switch (reduction.status) { case ReduceFailed: @@ -1203,8 +1206,8 @@ static bool parser__advance(Parser *self, StackVersion version, break; } - if (state == TS_STATE_ERROR) { - return parser__push(self, version, lookahead, TS_STATE_ERROR); + if (state == ERROR_STATE) { + return parser__push(self, version, lookahead, ERROR_STATE); } CHECK(parser__handle_error(self, version, lookahead->symbol)); diff --git a/src/runtime/parser.h b/src/runtime/parser.h index a5bd60f0..c27e652e 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -7,6 +7,7 @@ extern "C" { #include "runtime/stack.h" #include "runtime/array.h" +#include "runtime/lexer.h" #include "runtime/reduce_action.h" typedef struct { @@ -15,7 +16,7 @@ typedef struct { } ReusableNode; typedef struct { - TSLexer lexer; + Lexer lexer; Stack *stack; const TSLanguage *language; ReduceActionSet reduce_actions; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index be440d93..c1863a47 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -1,4 +1,3 @@ -#include "tree_sitter/parser.h" #include "runtime/alloc.h" #include "runtime/tree.h" #include "runtime/array.h" @@ -107,10 +106,7 @@ static StackNode *stack_node_new(StackNode *next, TSTree *tree, bool is_pending, node->link_count = 1; node->links[0] = (StackLink){ - .node = next, - .tree = tree, - .is_pending = is_pending, - .push_count = 0, + .node = next, .tree = tree, .is_pending = is_pending, .push_count = 0, }; node->error_count = next->error_count; @@ -120,7 +116,7 @@ static StackNode *stack_node_new(StackNode *next, TSTree *tree, bool is_pending, ts_tree_retain(tree); node->error_cost += tree->error_cost; - if (state == TS_STATE_ERROR) { + if (state == ERROR_STATE) { if (!tree->extra) { node->error_cost += ERROR_COST_PER_SKIPPED_TREE + ERROR_COST_PER_SKIPPED_CHAR * @@ -171,9 +167,7 @@ static void stack_node_add_link(StackNode *self, StackLink link) { static StackVersion ts_stack__add_version(Stack *self, StackNode *node, unsigned push_count) { StackHead head = { - .node = node, - .is_halted = false, - .push_count = push_count, + .node = node, .is_halted = false, .push_count = push_count, }; if (!array_push(&self->heads, head)) return STACK_VERSION_NONE; @@ -222,8 +216,8 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version, bool is_done = node == self->base_node; StackIterateAction action = - callback(payload, node->state, &iterator->trees, iterator->tree_count, is_done, - iterator->is_pending); + callback(payload, node->state, &iterator->trees, iterator->tree_count, + is_done, iterator->is_pending); bool should_pop = action & StackIteratePop; bool should_stop = action & StackIterateStop || node->link_count == 0; @@ -234,7 +228,8 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version, if (!ts_tree_array_copy(trees, &trees)) goto error; array_reverse(&trees); - if (!ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count)) + if (!ts_stack__add_slice(self, node, &trees, + push_count + iterator->push_count)) goto error; } @@ -370,7 +365,7 @@ unsigned ts_stack_push_count(const Stack *self, StackVersion version) { } void ts_stack_decrease_push_count(const Stack *self, StackVersion version, - unsigned decrement) { + unsigned decrement) { array_get(&self->heads, version)->push_count -= decrement; } @@ -401,10 +396,10 @@ bool ts_stack_push(Stack *self, StackVersion version, TSTree *tree, return false; stack_node_release(node, &self->node_pool); head->node = new_node; - if (state == TS_STATE_ERROR) { + if (state == ERROR_STATE) { new_node->links[0].push_count = head->push_count; head->push_count = 0; - }else + } else head->push_count++; return true; } @@ -424,7 +419,7 @@ INLINE StackIterateAction pop_count_callback(void *payload, TSStateId state, return StackIteratePop | StackIterateStop; } - if (state == TS_STATE_ERROR) { + if (state == ERROR_STATE) { if (pop_session->found_valid_path || pop_session->found_error) { return StackIterateStop; } else { @@ -569,7 +564,8 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { continue; fprintf(f, "node_head_%lu [shape=none, label=\"\"]\n", i); fprintf( - f, "node_head_%lu -> node_%p [label=%lu, fontcolor=blue, weight=10000, " + f, + "node_head_%lu -> node_%p [label=%lu, fontcolor=blue, weight=10000, " "labeltooltip=\"push_count: %u\"]\n", i, head->node, i, head->push_count); if (!array_push(&self->iterators, ((Iterator){.node = head->node }))) @@ -596,7 +592,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { all_iterators_done = false; fprintf(f, "node_%p [", node); - if (node->state == TS_STATE_ERROR) + if (node->state == ERROR_STATE) fprintf(f, "label=\"?\""); else if (node->link_count == 1 && node->links[0].tree && node->links[0].tree->extra) diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 14c8172f..881f0ea7 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -5,7 +5,6 @@ extern "C" { #endif -#include "tree_sitter/parser.h" #include "runtime/array.h" #include "runtime/tree.h" #include "runtime/error_costs.h" diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 1aae1393..f35b372f 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -3,7 +3,6 @@ #include #include #include -#include "tree_sitter/parser.h" #include "runtime/alloc.h" #include "runtime/tree.h" #include "runtime/length.h" @@ -430,7 +429,8 @@ static size_t ts_tree__write_to_string(const TSTree *self, if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.chars > 0) { cursor += snprintf(*writer, limit, "(UNEXPECTED "); - cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); + cursor += + ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); } else { cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, self->symbol)); diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 11ef85aa..a274c186 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -7,6 +7,7 @@ extern "C" { #include #include "tree_sitter/parser.h" +#include "tree_sitter/runtime.h" #include "runtime/length.h" #include "runtime/array.h" #include diff --git a/src/runtime/tree_path.h b/src/runtime/tree_path.h index 52100092..30b0b990 100644 --- a/src/runtime/tree_path.h +++ b/src/runtime/tree_path.h @@ -6,6 +6,7 @@ extern "C" { #endif #include "runtime/tree.h" +#include "runtime/error_costs.h" typedef Array(TSRange) RangeArray; @@ -107,8 +108,8 @@ static bool tree_must_eq(TSTree *old_tree, TSTree *new_tree) { old_tree->size.bytes == new_tree->size.bytes && old_tree->parse_state != TS_TREE_STATE_NONE && new_tree->parse_state != TS_TREE_STATE_NONE && - (old_tree->parse_state == TS_STATE_ERROR) == - (new_tree->parse_state == TS_STATE_ERROR) + (old_tree->parse_state == ERROR_STATE) == + (new_tree->parse_state == ERROR_STATE) ); }