From 47918070f600f0bdc81c7dc30a220a50ab4b44c9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 13 Nov 2018 15:35:14 -0800 Subject: [PATCH] Add a single-source file way of building the runtime library --- .gitignore | 4 +++- docs/section-2-using-parsers.md | 26 ++++++++++++++++++++++++- script/build-runtime | 19 ++++++++++++++++++ script/ci | 1 + src/runtime/lexer.c | 2 ++ src/runtime/node.c | 26 ++++++++++++------------- src/runtime/parser.c | 2 ++ src/runtime/runtime.c | 19 ++++++++++++++++++ src/runtime/stack.c | 34 +++++++++++++++++---------------- src/runtime/tree_cursor.c | 18 ++++++++--------- 10 files changed, 111 insertions(+), 40 deletions(-) create mode 100755 script/build-runtime create mode 100644 src/runtime/runtime.c diff --git a/.gitignore b/.gitignore index f084ae20..7cee7e33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,12 @@ # Compiled binaries out +*.a +*.o fuzz-results log.html -# Generated build config files +# Generated build config files gyp-mac-tool Makefile *.Makefile diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 894fabce..4ecade03 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -9,6 +9,30 @@ All of Tree-sitter's parsing functionality is exposed through C APIs. Applicatio This document will describes the general concepts of how to use Tree-sitter, which should be relevant regardless of what language you're using. It also goes into some C-specific details that are useful if you're using the C API directly or are building a new binding to a different language. +## Building the Runtime Library + +Building the runtime library requires one git submodule: [`utf8proc`](https://github.com/JuliaStrings/utf8proc). Make sure that `utf8proc` is downloaded by running this command from the Tree-sitter directory: + +```sh +git submodule update --init +``` + +To build the runtime library on a POSIX system, run this script, which will create a static library called `libruntime.a` in the Tree-sitter folder: + +```sh +script/build-runtime +``` + +Alternatively, you can use the runtime library in a larger project by adding one source file to the project. This source file needs three directories to be in the include path when compiled: + +**source file:** +* `tree-sitter/src/runtime/runtime.c` + +**include directories:** +* `tree-sitter/src` +* `tree-sitter/include` +* `tree-sitter/externals/utf8proc` + ## The Objects There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSParser`, `TSLanguage`, `TSTree`, and `TSNode`. @@ -86,7 +110,7 @@ clang \ -I tree-sitter/include \ test-json-parser.c \ tree-sitter-json/src/parser.c \ - tree-sitter/out/Release/libruntime.a \ + tree-sitter/libruntime.a \ -o test-json-parser ./test-json-parser diff --git a/script/build-runtime b/script/build-runtime new file mode 100755 index 00000000..881b48e9 --- /dev/null +++ b/script/build-runtime @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# If `CC` isn't set, pick a default compiler +if which -s clang; then + : ${CC:=clang} +else + : ${CC:=gcc} +fi + +${CC} \ + -c \ + -I src \ + -I include \ + -I externals/utf8proc \ + src/runtime/runtime.c \ + -o runtime.o + +ar rcs libruntime.a runtime.o +rm runtime.o diff --git a/script/ci b/script/ci index 7e9c1f39..6ad8a2b7 100755 --- a/script/ci +++ b/script/ci @@ -4,5 +4,6 @@ set -e script/fetch-fixtures script/check-mallocs +script/build-runtime script/test -b script/benchmark -b diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 10cc7f14..42c7e668 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -292,3 +292,5 @@ TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { *count = self->included_range_count; return self->included_ranges; } + +#undef LOG diff --git a/src/runtime/node.c b/src/runtime/node.c index 5a528f03..c1763261 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -10,7 +10,7 @@ typedef struct { uint32_t child_index; uint32_t structural_child_index; const TSSymbol *alias_sequence; -} ChildIterator; +} NodeChildIterator; // TSNode - constructors @@ -44,18 +44,18 @@ static inline Subtree ts_node__subtree(TSNode self) { return *(const Subtree *)self.id; } -// ChildIterator +// NodeChildIterator -static inline ChildIterator ts_node_iterate_children(const TSNode *node) { +static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { Subtree subtree = ts_node__subtree(*node); if (ts_subtree_child_count(subtree) == 0) { - return (ChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; + return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; } const TSSymbol *alias_sequence = ts_language_alias_sequence( node->tree->language, subtree.ptr->alias_sequence_id ); - return (ChildIterator) { + return (NodeChildIterator) { .tree = node->tree, .parent = subtree, .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, @@ -65,7 +65,7 @@ static inline ChildIterator ts_node_iterate_children(const TSNode *node) { }; } -static inline bool ts_node_child_iterator_next(ChildIterator *self, TSNode *result) { +static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) { if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; const Subtree *child = &self->parent.ptr->children[self->child_index]; TSSymbol alias_symbol = 0; @@ -126,7 +126,7 @@ static inline TSNode ts_node__child(TSNode self, uint32_t child_index, bool incl TSNode child; uint32_t index = 0; - ChildIterator iterator = ts_node_iterate_children(&result); + NodeChildIterator iterator = ts_node_iterate_children(&result); while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node__is_relevant(child, include_anonymous)) { if (index == child_index) { @@ -177,7 +177,7 @@ static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) bool found_child_containing_target = false; TSNode child; - ChildIterator iterator = ts_node_iterate_children(&node); + NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (child.id == self.id) break; if (iterator.position.bytes > target_end_byte) { @@ -234,7 +234,7 @@ static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) TSNode child_containing_target = ts_node__null(); TSNode child; - ChildIterator iterator = ts_node_iterate_children(&node); + NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (iterator.position.bytes < target_end_byte) continue; if (ts_node_start_byte(child) <= ts_node_start_byte(self)) { @@ -285,7 +285,7 @@ static inline TSNode ts_node__first_child_for_byte(TSNode self, uint32_t goal, did_descend = false; TSNode child; - ChildIterator iterator = ts_node_iterate_children(&node); + NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node_end_byte(child) > goal) { if (ts_node__is_relevant(child, include_anonymous)) { @@ -313,7 +313,7 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi did_descend = false; TSNode child; - ChildIterator iterator = ts_node_iterate_children(&node); + NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (iterator.position.bytes > max) { if (ts_node_start_byte(child) > min) break; @@ -342,7 +342,7 @@ static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint mi did_descend = false; TSNode child; - ChildIterator iterator = ts_node_iterate_children(&node); + NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (point_gt(iterator.position.extent, max)) { if (point_gt(ts_node_start_point(child), min)) break; @@ -424,7 +424,7 @@ TSNode ts_node_parent(TSNode self) { did_descend = false; TSNode child; - ChildIterator iterator = ts_node_iterate_children(&node); + NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if ( ts_node_start_byte(child) > ts_node_start_byte(self) || diff --git a/src/runtime/parser.c b/src/runtime/parser.c index d87f25d4..3bcbfa42 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1663,3 +1663,5 @@ TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree, TSInputEncodingUTF8, }); } + +#undef LOG diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c new file mode 100644 index 00000000..51455a8b --- /dev/null +++ b/src/runtime/runtime.c @@ -0,0 +1,19 @@ +// The Tree-sitter runtime library can be built by compiling this +// one source file. +// +// The following directories must be added to the include path: +// - src +// - include +// - externals/utf8proc + +#include "runtime/get_changed_ranges.c" +#include "runtime/language.c" +#include "runtime/lexer.c" +#include "runtime/node.c" +#include "runtime/parser.c" +#include "runtime/stack.c" +#include "runtime/subtree.c" +#include "runtime/tree_cursor.c" +#include "runtime/tree.c" +#include "runtime/utf16.c" +#include "utf8proc.c" diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 8268b2a8..cc434e38 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -41,7 +41,7 @@ typedef struct { SubtreeArray subtrees; uint32_t subtree_count; bool is_pending; -} Iterator; +} StackIterator; typedef struct { void *payload; @@ -68,7 +68,7 @@ typedef struct { struct Stack { Array(StackHead) heads; StackSliceArray slices; - Array(Iterator) iterators; + Array(StackIterator) iterators; StackNodeArray node_pool; StackNode *base_node; SubtreePool *subtree_pool; @@ -81,7 +81,7 @@ enum { StackActionPop = 2, }; -typedef StackAction (*StackCallback)(void *, const Iterator *); +typedef StackAction (*StackCallback)(void *, const StackIterator *); static void stack_node_retain(StackNode *self) { if (!self) @@ -278,7 +278,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); - Iterator iterator = { + StackIterator iterator = { .node = head->node, .subtrees = array_new(), .subtree_count = 0, @@ -295,7 +295,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, while (self->iterators.size > 0) { for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { - Iterator *iterator = &self->iterators.contents[i]; + StackIterator *iterator = &self->iterators.contents[i]; StackNode *node = iterator->node; StackAction action = callback(payload, iterator); @@ -324,7 +324,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, } for (uint32_t j = 1; j <= node->link_count; j++) { - Iterator *next_iterator; + StackIterator *next_iterator; StackLink link; if (j == node->link_count) { link = node->links[0]; @@ -332,7 +332,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, } else { if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; link = node->links[j]; - Iterator current_iterator = self->iterators.contents[i]; + StackIterator current_iterator = self->iterators.contents[i]; array_push(&self->iterators, current_iterator); next_iterator = array_back(&self->iterators); ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); @@ -450,7 +450,7 @@ void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, head->node = new_node; } -inline StackAction iterate_callback(void *payload, const Iterator *iterator) { +inline StackAction iterate_callback(void *payload, const StackIterator *iterator) { StackIterateSession *session = payload; session->callback( session->payload, @@ -466,7 +466,7 @@ void ts_stack_iterate(Stack *self, StackVersion version, stack__iter(self, version, iterate_callback, &session, -1); } -inline StackAction pop_count_callback(void *payload, const Iterator *iterator) { +inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { unsigned *goal_subtree_count = payload; if (iterator->subtree_count == *goal_subtree_count) { return StackActionPop | StackActionStop; @@ -479,7 +479,7 @@ StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t c return stack__iter(self, version, pop_count_callback, &count, count); } -inline StackAction pop_pending_callback(void *payload, const Iterator *iterator) { +inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { if (iterator->subtree_count >= 1) { if (iterator->is_pending) { return StackActionPop | StackActionStop; @@ -500,7 +500,7 @@ StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { return pop; } -inline StackAction pop_error_callback(void *payload, const Iterator *iterator) { +inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { if (iterator->subtrees.size > 0) { bool *found_error = payload; if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { @@ -531,7 +531,7 @@ SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { return (SubtreeArray){.size = 0}; } -inline StackAction pop_all_callback(void *payload, const Iterator *iterator) { +inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; } @@ -544,7 +544,7 @@ typedef struct { unsigned max_depth; } SummarizeStackSession; -inline StackAction summarize_stack_callback(void *payload, const Iterator *iterator) { +inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { SummarizeStackSession *session = payload; TSStateId state = iterator->node->state; unsigned depth = iterator->subtree_count; @@ -748,7 +748,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) } fprintf(f, "\"]\n"); - array_push(&self->iterators, ((Iterator){.node = head->node })); + array_push(&self->iterators, ((StackIterator){.node = head->node })); } bool all_iterators_done = false; @@ -756,7 +756,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) all_iterators_done = true; for (uint32_t i = 0; i < self->iterators.size; i++) { - Iterator iterator = self->iterators.contents[i]; + StackIterator iterator = self->iterators.contents[i]; StackNode *node = iterator.node; for (uint32_t j = 0; j < visited_nodes.size; j++) { @@ -821,7 +821,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) fprintf(f, "];\n"); - Iterator *next_iterator; + StackIterator *next_iterator; if (j == 0) { next_iterator = &self->iterators.contents[i]; } else { @@ -841,3 +841,5 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) ts_toggle_allocation_recording(was_recording_allocations); return true; } + +#undef inline diff --git a/src/runtime/tree_cursor.c b/src/runtime/tree_cursor.c index ac7a015d..9fce48be 100644 --- a/src/runtime/tree_cursor.c +++ b/src/runtime/tree_cursor.c @@ -11,20 +11,20 @@ typedef struct { uint32_t child_index; uint32_t structural_child_index; const TSSymbol *alias_sequence; -} ChildIterator; +} CursorChildIterator; -// ChildIterator +// CursorChildIterator -static inline ChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { +static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { TreeCursorEntry *last_entry = array_back(&self->stack); if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (ChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL}; + return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL}; } const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, last_entry->subtree->ptr->alias_sequence_id ); - return (ChildIterator) { + return (CursorChildIterator) { .tree = self->tree, .parent = *last_entry->subtree, .position = last_entry->position, @@ -34,7 +34,7 @@ static inline ChildIterator ts_tree_cursor_iterate_children(const TreeCursor *se }; } -static inline bool ts_tree_cursor_child_iterator_next(ChildIterator *self, +static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, TreeCursorEntry *result, bool *visible) { if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; @@ -105,7 +105,7 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *_self) { bool visible; TreeCursorEntry entry; - ChildIterator iterator = ts_tree_cursor_iterate_children(self); + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { if (visible) { array_push(&self->stack, entry); @@ -134,7 +134,7 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t g bool visible; TreeCursorEntry entry; - ChildIterator iterator = ts_tree_cursor_iterate_children(self); + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { uint32_t end_byte = entry.position.bytes + ts_subtree_size(*entry.subtree).bytes; bool at_goal = end_byte > goal_byte; @@ -174,7 +174,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) { while (self->stack.size > 1) { TreeCursorEntry entry = array_pop(&self->stack); - ChildIterator iterator = ts_tree_cursor_iterate_children(self); + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); iterator.child_index = entry.child_index; iterator.structural_child_index = entry.structural_child_index; iterator.position = entry.position;