From 908b102786f4d5a6c40e63233b59ca5be3e705ba Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Oct 2020 11:58:27 -0700 Subject: [PATCH] Add more doc comments in the C lib --- lib/src/parser.c | 16 +++++++++++++--- lib/src/subtree.c | 11 +++-------- lib/src/subtree.h | 45 +++++++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/lib/src/parser.c b/lib/src/parser.c index 3984d002..0c711b0c 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -673,6 +673,10 @@ static Subtree ts_parser__reuse_node( return NULL_SUBTREE; } +// Determine if a given tree should be replaced by an alternative tree. +// +// The decision is based on the trees' error costs (if any), their dynamic precedence, +// and finally, as a default, by a recursive comparison of the trees' symbols. static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { if (!left.ptr) return true; if (!right.ptr) return false; @@ -718,18 +722,26 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) } } +// Determine if a given tree's children should be replaced by an alternative +// array of children. static bool ts_parser__select_children( TSParser *self, Subtree left, const SubtreeArray *children ) { array_assign(&self->scratch_trees, children); + + // Create a temporary subtree using the scratch trees array. This node does + // not perform any allocation except for possibly growing the array to make + // room for its own heap data. The scratch tree is never explicitly released, + // so the same 'scratch trees' array can be reused again later. MutableSubtree scratch_tree = ts_subtree_new_node( ts_subtree_symbol(left), &self->scratch_trees, 0, self->language ); + return ts_parser__select_tree( self, left, @@ -841,9 +853,6 @@ static StackVersion ts_parser__reduce( } } - parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->production_id = production_id; - TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); if (end_of_non_terminal_extra && next_state == state) { @@ -856,6 +865,7 @@ static StackVersion ts_parser__reduce( } else { parent.ptr->parse_state = state; } + parent.ptr->dynamic_precedence += dynamic_precedence; // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 4d3986ec..fc1db617 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -254,15 +254,10 @@ Subtree ts_subtree_new_error( return result; } -// Get the size needed to store a heap-allocated subtree with -// the given number o children. // Clone a subtree. -// -// This will reuse the given allocated buffer if it is present. If the -// buffer is NULL, a new allocation will be created. -MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { +MutableSubtree ts_subtree_clone(Subtree self) { size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *children = ts_realloc(buffer_to_reuse, alloc_size); + Subtree *children = ts_malloc(alloc_size); memcpy(children, ts_subtree_children(self), alloc_size); SubtreeHeapData *result = (SubtreeHeapData *)&children[self.ptr->child_count]; if (self.ptr->child_count > 0) { @@ -286,7 +281,7 @@ MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { if (self.data.is_inline) return (MutableSubtree) {self.data}; if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - MutableSubtree result = ts_subtree_clone(self, NULL); + MutableSubtree result = ts_subtree_clone(self); ts_subtree_release(pool, self); return result; } diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 7df8b09a..b020deb6 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -14,12 +14,19 @@ extern "C" { #include "tree_sitter/api.h" #include "tree_sitter/parser.h" -static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX; +#define TS_TREE_STATE_NONE USHRT_MAX #define NULL_SUBTREE ((Subtree) {.ptr = NULL}) -typedef union Subtree Subtree; -typedef union MutableSubtree MutableSubtree; - +// The serialized state of an external scanner. +// +// Every time an external token subtree is created after a call to an +// external scanner, the scanner's `serialize` function is called to +// retrieve a serialized copy of its state. The bytes are then copied +// onto the subtree itself so that the scanner's state can later be +// restored using its `deserialize` function. +// +// Small byte arrays are stored inline, and long ones are allocated +// separately on the heap. typedef struct { union { char *long_data; @@ -28,6 +35,10 @@ typedef struct { uint32_t length; } ExternalScannerState; +// A compact representation of a subtree. +// +// This representation is used for small leaf nodes that are not +// errors, and were not created by an external scanner. typedef struct { bool is_inline : 1; bool visible : 1; @@ -45,6 +56,11 @@ typedef struct { uint16_t parse_state; } SubtreeInlineData; +// A heap-allocated representation of a subtree. +// +// This representation is used for parent nodes, external tokens, +// errors, and other leaf nodes whose data is too large to fit into +// the inlinen representation. typedef struct { volatile uint32_t ref_count; Length padding; @@ -88,15 +104,17 @@ typedef struct { }; } SubtreeHeapData; -union Subtree { +// The fundamental building block of a syntax tree. +typedef union { SubtreeInlineData data; const SubtreeHeapData *ptr; -}; +} Subtree; -union MutableSubtree { +// Like Subtree, but mutable. +typedef union { SubtreeInlineData data; SubtreeHeapData *ptr; -}; +} MutableSubtree; typedef Array(Subtree) SubtreeArray; typedef Array(MutableSubtree) MutableSubtreeArray; @@ -142,7 +160,6 @@ char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all); void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); Subtree ts_subtree_last_external_token(Subtree); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); -MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse); #define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name) @@ -158,10 +175,17 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE #undef SUBTREE_GET +// Get the size needed to store a heap-allocated subtree with the given +// number of children. static inline size_t ts_subtree_alloc_size(uint32_t child_count) { return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); } +// Get a subtree's children, which are allocated immediately before the +// tree's own heap data. +#define ts_subtree_children(self) \ + ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) + static inline void ts_subtree_set_extra(MutableSubtree *self) { if (self->data.is_inline) { self->data.extra = true; @@ -208,9 +232,6 @@ static inline uint32_t ts_subtree_total_bytes(Subtree self) { return ts_subtree_total_size(self).bytes; } -#define ts_subtree_children(self) \ - ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) - static inline uint32_t ts_subtree_child_count(Subtree self) { return self.data.is_inline ? 0 : self.ptr->child_count; }