Merge remote-tracking branch 'origin/master' into query-testy
This commit is contained in:
commit
938eae8536
19 changed files with 744 additions and 512 deletions
|
|
@ -38,7 +38,6 @@ script:
|
|||
- (eval "$WASM_ENV" && script/generate-fixtures-wasm)
|
||||
|
||||
# Run the tests
|
||||
- export TREE_SITTER_STATIC_ANALYSIS=1
|
||||
- script/test
|
||||
- script/test-wasm
|
||||
- script/benchmark
|
||||
|
|
|
|||
599
Cargo.lock
generated
599
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -21,6 +21,7 @@ harness = false
|
|||
[dependencies]
|
||||
ansi_term = "0.11"
|
||||
cc = "1.0"
|
||||
atty = "0.2"
|
||||
clap = "2.32"
|
||||
difference = "2.0"
|
||||
dirs = "2.0.2"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use serde::ser::SerializeMap;
|
|||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::time::Instant;
|
||||
use std::{fs, io, path, str, usize};
|
||||
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
|
||||
|
|
@ -278,14 +279,14 @@ pub fn ansi(
|
|||
source: &[u8],
|
||||
config: &HighlightConfiguration,
|
||||
print_time: bool,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> Result<()> {
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
let time = Instant::now();
|
||||
let cancellation_flag = util::cancel_on_stdin();
|
||||
let mut highlighter = Highlighter::new();
|
||||
|
||||
let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
|
||||
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
|
||||
loader.highlight_config_for_injection_string(string)
|
||||
})?;
|
||||
|
||||
|
|
@ -320,6 +321,7 @@ pub fn html(
|
|||
theme: &Theme,
|
||||
source: &[u8],
|
||||
config: &HighlightConfiguration,
|
||||
quiet: bool,
|
||||
print_time: bool,
|
||||
) -> Result<()> {
|
||||
use std::io::Write;
|
||||
|
|
@ -343,17 +345,19 @@ pub fn html(
|
|||
}
|
||||
})?;
|
||||
|
||||
write!(&mut stdout, "<table>\n")?;
|
||||
for (i, line) in renderer.lines().enumerate() {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
|
||||
i + 1,
|
||||
line
|
||||
)?;
|
||||
}
|
||||
if !quiet {
|
||||
write!(&mut stdout, "<table>\n")?;
|
||||
for (i, line) in renderer.lines().enumerate() {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
|
||||
i + 1,
|
||||
line
|
||||
)?;
|
||||
}
|
||||
|
||||
write!(&mut stdout, "</table>\n")?;
|
||||
write!(&mut stdout, "</table>\n")?;
|
||||
}
|
||||
|
||||
if print_time {
|
||||
eprintln!("Time: {}ms", time.elapsed().as_millis());
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use std::{env, fs, u64};
|
|||
use tree_sitter::Language;
|
||||
use tree_sitter_cli::{
|
||||
config, error, generate, highlight, loader, logger, parse, query, tags, test, test_highlight,
|
||||
wasm, web_ui,
|
||||
util, wasm, web_ui,
|
||||
};
|
||||
|
||||
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
|
|
@ -66,7 +66,6 @@ fn run() -> error::Result<()> {
|
|||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("stat").long("stat").short("s"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("allow-cancellation").long("cancel"))
|
||||
.arg(Arg::with_name("timeout").long("timeout").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("edits")
|
||||
|
|
@ -136,7 +135,7 @@ fn run() -> error::Result<()> {
|
|||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("html").long("html").short("h"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("q").short("q")),
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("build-wasm")
|
||||
|
|
@ -226,7 +225,8 @@ fn run() -> error::Result<()> {
|
|||
let edits = matches
|
||||
.values_of("edits")
|
||||
.map_or(Vec::new(), |e| e.collect());
|
||||
let allow_cancellation = matches.is_present("allow-cancellation");
|
||||
let cancellation_flag = util::cancel_on_stdin();
|
||||
|
||||
let timeout = matches
|
||||
.value_of("timeout")
|
||||
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
|
||||
|
|
@ -255,7 +255,7 @@ fn run() -> error::Result<()> {
|
|||
timeout,
|
||||
debug,
|
||||
debug_graph,
|
||||
allow_cancellation,
|
||||
Some(&cancellation_flag),
|
||||
)?;
|
||||
|
||||
if should_track_stats {
|
||||
|
|
@ -314,12 +314,16 @@ fn run() -> error::Result<()> {
|
|||
loader.find_all_languages(&config.parser_directories)?;
|
||||
|
||||
let time = matches.is_present("time");
|
||||
let quiet = matches.is_present("quiet");
|
||||
let html_mode = quiet || matches.is_present("html");
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
let html_mode = matches.is_present("html");
|
||||
if html_mode {
|
||||
|
||||
if html_mode && !quiet {
|
||||
println!("{}", highlight::HTML_HEADER);
|
||||
}
|
||||
|
||||
let cancellation_flag = util::cancel_on_stdin();
|
||||
|
||||
let mut lang = None;
|
||||
if let Some(scope) = matches.value_of("scope") {
|
||||
lang = loader.language_configuration_for_scope(scope)?;
|
||||
|
|
@ -344,16 +348,30 @@ fn run() -> error::Result<()> {
|
|||
if let Some(highlight_config) = language_config.highlight_config(language)? {
|
||||
let source = fs::read(path)?;
|
||||
if html_mode {
|
||||
highlight::html(&loader, &config.theme, &source, highlight_config, time)?;
|
||||
highlight::html(
|
||||
&loader,
|
||||
&config.theme,
|
||||
&source,
|
||||
highlight_config,
|
||||
quiet,
|
||||
time,
|
||||
)?;
|
||||
} else {
|
||||
highlight::ansi(&loader, &config.theme, &source, highlight_config, time)?;
|
||||
highlight::ansi(
|
||||
&loader,
|
||||
&config.theme,
|
||||
&source,
|
||||
highlight_config,
|
||||
time,
|
||||
Some(&cancellation_flag),
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
eprintln!("No syntax highlighting config found for path {:?}", path);
|
||||
}
|
||||
}
|
||||
|
||||
if html_mode {
|
||||
if html_mode && !quiet {
|
||||
println!("{}", highlight::HTML_FOOTER);
|
||||
}
|
||||
} else if let Some(matches) = matches.subcommand_matches("build-wasm") {
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ use super::error::{Error, Result};
|
|||
use super::util;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::time::Instant;
|
||||
use std::{fmt, fs, thread, usize};
|
||||
use std::{fmt, fs, usize};
|
||||
use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -40,7 +40,7 @@ pub fn parse_file_at_path(
|
|||
timeout: u64,
|
||||
debug: bool,
|
||||
debug_graph: bool,
|
||||
allow_cancellation: bool,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> Result<bool> {
|
||||
let mut _log_session = None;
|
||||
let mut parser = Parser::new();
|
||||
|
|
@ -51,16 +51,7 @@ pub fn parse_file_at_path(
|
|||
|
||||
// If the `--cancel` flag was passed, then cancel the parse
|
||||
// when the user types a newline.
|
||||
if allow_cancellation {
|
||||
let flag = Box::new(AtomicUsize::new(0));
|
||||
unsafe { parser.set_cancellation_flag(Some(&flag)) };
|
||||
thread::spawn(move || {
|
||||
let mut line = String::new();
|
||||
io::stdin().read_line(&mut line).unwrap();
|
||||
eprintln!("Cancelling");
|
||||
flag.store(1, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
unsafe { parser.set_cancellation_flag(cancellation_flag) };
|
||||
|
||||
// Set a timeout based on the `--time` flag.
|
||||
parser.set_timeout_micros(timeout);
|
||||
|
|
|
|||
|
|
@ -15,14 +15,16 @@ const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style
|
|||
|
||||
pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
|
||||
let result = Arc::new(AtomicUsize::new(0));
|
||||
thread::spawn({
|
||||
let flag = result.clone();
|
||||
move || {
|
||||
let mut line = String::new();
|
||||
io::stdin().read_line(&mut line).unwrap();
|
||||
flag.store(1, Ordering::Relaxed);
|
||||
}
|
||||
});
|
||||
if atty::is(atty::Stream::Stdin) {
|
||||
thread::spawn({
|
||||
let flag = result.clone();
|
||||
move || {
|
||||
let mut line = String::new();
|
||||
io::stdin().read_line(&mut line).unwrap();
|
||||
flag.store(1, Ordering::Relaxed);
|
||||
}
|
||||
});
|
||||
}
|
||||
result
|
||||
}
|
||||
#[cfg(windows)]
|
||||
|
|
|
|||
|
|
@ -585,6 +585,38 @@ For example, this pattern would match any node inside a call:
|
|||
(call (_) @call.inner)
|
||||
```
|
||||
|
||||
|
||||
#### Anchors
|
||||
|
||||
The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query.
|
||||
|
||||
When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`:
|
||||
|
||||
```
|
||||
(array . (identifier) @the-element)
|
||||
```
|
||||
|
||||
Without this anchor, the pattern would match once for every identifier in the array, with `@the-element` bound to each matched identifier.
|
||||
|
||||
Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`.
|
||||
|
||||
```
|
||||
(block (_) @last-expression .)
|
||||
```
|
||||
|
||||
Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`.
|
||||
|
||||
```
|
||||
(dotted_name
|
||||
(identifier) @prev-id
|
||||
.
|
||||
(identifier) @next-id)
|
||||
```
|
||||
|
||||
Without the anchor, non-consecutive pairs like `a, c` and `b, d` would also be matched.
|
||||
|
||||
The restrictions placed on a pattern by an anchor operator ignore anonymous nodes.
|
||||
|
||||
#### Predicates
|
||||
|
||||
You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings.
|
||||
|
|
|
|||
|
|
@ -74,6 +74,9 @@ extern "C" {
|
|||
#define array_assign(self, other) \
|
||||
array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
|
||||
|
||||
#define array_swap(self, other) \
|
||||
array__swap((VoidArray *)(self), (VoidArray *)(other))
|
||||
|
||||
// Search a sorted array for a given `needle` value, using the given `compare`
|
||||
// callback to determine the order.
|
||||
//
|
||||
|
|
@ -139,7 +142,7 @@ static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t
|
|||
if (self->contents) {
|
||||
self->contents = ts_realloc(self->contents, new_capacity * element_size);
|
||||
} else {
|
||||
self->contents = ts_calloc(new_capacity, element_size);
|
||||
self->contents = ts_malloc(new_capacity * element_size);
|
||||
}
|
||||
self->capacity = new_capacity;
|
||||
}
|
||||
|
|
@ -151,6 +154,12 @@ static inline void array__assign(VoidArray *self, const VoidArray *other, size_t
|
|||
memcpy(self->contents, other->contents, self->size * element_size);
|
||||
}
|
||||
|
||||
static inline void array__swap(VoidArray *self, VoidArray *other) {
|
||||
VoidArray swap = *other;
|
||||
*other = *self;
|
||||
*self = swap;
|
||||
}
|
||||
|
||||
static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
|
||||
size_t new_size = self->size + count;
|
||||
if (new_size > self->capacity) {
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
|
|||
Length position = entry.position;
|
||||
uint32_t structural_child_index = 0;
|
||||
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
|
||||
const Subtree *child = &entry.subtree->ptr->children[i];
|
||||
const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
|
||||
Length child_left = length_add(position, ts_subtree_padding(*child));
|
||||
Length child_right = length_add(child_left, ts_subtree_size(*child));
|
||||
|
||||
|
|
@ -260,7 +260,7 @@ static void iterator_advance(Iterator *self) {
|
|||
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
|
||||
uint32_t structural_child_index = entry.structural_child_index;
|
||||
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
|
||||
const Subtree *next_child = &parent->ptr->children[child_index];
|
||||
const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
|
||||
|
||||
array_push(&self->cursor.stack, ((TreeCursorEntry){
|
||||
.subtree = next_child,
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ static inline bool ts_node_child_iterator_next(
|
|||
TSNode *result
|
||||
) {
|
||||
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
|
||||
const Subtree *child = &self->parent.ptr->children[self->child_index];
|
||||
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
|
||||
TSSymbol alias_symbol = 0;
|
||||
if (!ts_subtree_extra(*child)) {
|
||||
if (self->alias_sequence) {
|
||||
|
|
@ -178,7 +178,7 @@ static bool ts_subtree_has_trailing_empty_descendant(
|
|||
Subtree other
|
||||
) {
|
||||
for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
|
||||
Subtree child = self.ptr->children[i];
|
||||
Subtree child = ts_subtree_children(self)[i];
|
||||
if (ts_subtree_total_bytes(child) > 0) break;
|
||||
if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
|
||||
return true;
|
||||
|
|
|
|||
126
lib/src/parser.c
126
lib/src/parser.c
|
|
@ -60,8 +60,9 @@ struct TSParser {
|
|||
const TSLanguage *language;
|
||||
ReduceActionSet reduce_actions;
|
||||
Subtree finished_tree;
|
||||
SubtreeHeapData scratch_tree_data;
|
||||
MutableSubtree scratch_tree;
|
||||
SubtreeArray trailing_extras;
|
||||
SubtreeArray trailing_extras2;
|
||||
SubtreeArray scratch_trees;
|
||||
TokenCache token_cache;
|
||||
ReusableNode reusable_node;
|
||||
void *external_scanner_payload;
|
||||
|
|
@ -155,7 +156,7 @@ static bool ts_parser__breakdown_top_of_stack(
|
|||
Subtree parent = *array_front(&slice.subtrees);
|
||||
|
||||
for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
|
||||
Subtree child = parent.ptr->children[j];
|
||||
Subtree child = ts_subtree_children(parent)[j];
|
||||
pending = ts_subtree_child_count(child) > 0;
|
||||
|
||||
if (ts_subtree_is_error(child)) {
|
||||
|
|
@ -672,6 +673,10 @@ static Subtree ts_parser__reuse_node(
|
|||
return NULL_SUBTREE;
|
||||
}
|
||||
|
||||
// Determine if a given tree should be replaced by an alternative tree.
|
||||
//
|
||||
// The decision is based on the trees' error costs (if any), their dynamic precedence,
|
||||
// and finally, as a default, by a recursive comparison of the trees' symbols.
|
||||
static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
|
||||
if (!left.ptr) return true;
|
||||
if (!right.ptr) return false;
|
||||
|
|
@ -717,6 +722,33 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right)
|
|||
}
|
||||
}
|
||||
|
||||
// Determine if a given tree's children should be replaced by an alternative
|
||||
// array of children.
|
||||
static bool ts_parser__select_children(
|
||||
TSParser *self,
|
||||
Subtree left,
|
||||
const SubtreeArray *children
|
||||
) {
|
||||
array_assign(&self->scratch_trees, children);
|
||||
|
||||
// Create a temporary subtree using the scratch trees array. This node does
|
||||
// not perform any allocation except for possibly growing the array to make
|
||||
// room for its own heap data. The scratch tree is never explicitly released,
|
||||
// so the same 'scratch trees' array can be reused again later.
|
||||
MutableSubtree scratch_tree = ts_subtree_new_node(
|
||||
ts_subtree_symbol(left),
|
||||
&self->scratch_trees,
|
||||
0,
|
||||
self->language
|
||||
);
|
||||
|
||||
return ts_parser__select_tree(
|
||||
self,
|
||||
left,
|
||||
ts_subtree_from_mut(scratch_tree)
|
||||
);
|
||||
}
|
||||
|
||||
static void ts_parser__shift(
|
||||
TSParser *self,
|
||||
StackVersion version,
|
||||
|
|
@ -742,22 +774,6 @@ static void ts_parser__shift(
|
|||
}
|
||||
}
|
||||
|
||||
static bool ts_parser__replace_children(
|
||||
TSParser *self,
|
||||
MutableSubtree *tree,
|
||||
SubtreeArray *children
|
||||
) {
|
||||
*self->scratch_tree.ptr = *tree->ptr;
|
||||
self->scratch_tree.ptr->child_count = 0;
|
||||
ts_subtree_set_children(self->scratch_tree, children->contents, children->size, self->language);
|
||||
if (ts_parser__select_tree(self, ts_subtree_from_mut(*tree), ts_subtree_from_mut(self->scratch_tree))) {
|
||||
*tree->ptr = *self->scratch_tree.ptr;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static StackVersion ts_parser__reduce(
|
||||
TSParser *self,
|
||||
StackVersion version,
|
||||
|
|
@ -802,11 +818,9 @@ static StackVersion ts_parser__reduce(
|
|||
// node. They will be re-pushed onto the stack after the parent node is
|
||||
// created and pushed.
|
||||
SubtreeArray children = slice.subtrees;
|
||||
while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
|
||||
children.size--;
|
||||
}
|
||||
ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
|
||||
|
||||
MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
|
||||
MutableSubtree parent = ts_subtree_new_node(
|
||||
symbol, &children, production_id, self->language
|
||||
);
|
||||
|
||||
|
|
@ -820,21 +834,25 @@ static StackVersion ts_parser__reduce(
|
|||
i++;
|
||||
|
||||
SubtreeArray children = next_slice.subtrees;
|
||||
while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
|
||||
children.size--;
|
||||
}
|
||||
ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras2);
|
||||
|
||||
if (ts_parser__replace_children(self, &parent, &children)) {
|
||||
ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
|
||||
slice = next_slice;
|
||||
if (ts_parser__select_children(
|
||||
self,
|
||||
ts_subtree_from_mut(parent),
|
||||
&children
|
||||
)) {
|
||||
ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
|
||||
ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
|
||||
array_swap(&self->trailing_extras, &self->trailing_extras2);
|
||||
parent = ts_subtree_new_node(
|
||||
symbol, &children, production_id, self->language
|
||||
);
|
||||
} else {
|
||||
array_clear(&self->trailing_extras2);
|
||||
ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
|
||||
}
|
||||
}
|
||||
|
||||
parent.ptr->dynamic_precedence += dynamic_precedence;
|
||||
parent.ptr->production_id = production_id;
|
||||
|
||||
TSStateId state = ts_stack_state(self->stack, slice_version);
|
||||
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
|
||||
if (end_of_non_terminal_extra && next_state == state) {
|
||||
|
|
@ -847,12 +865,13 @@ static StackVersion ts_parser__reduce(
|
|||
} else {
|
||||
parent.ptr->parse_state = state;
|
||||
}
|
||||
parent.ptr->dynamic_precedence += dynamic_precedence;
|
||||
|
||||
// Push the parent node onto the stack, along with any extra tokens that
|
||||
// were previously on top of the stack.
|
||||
ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
|
||||
for (uint32_t j = parent.ptr->child_count; j < slice.subtrees.size; j++) {
|
||||
ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state);
|
||||
for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
|
||||
ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
|
||||
}
|
||||
|
||||
for (StackVersion j = 0; j < slice_version; j++) {
|
||||
|
|
@ -884,22 +903,22 @@ static void ts_parser__accept(
|
|||
|
||||
Subtree root = NULL_SUBTREE;
|
||||
for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
|
||||
Subtree child = trees.contents[j];
|
||||
if (!ts_subtree_extra(child)) {
|
||||
assert(!child.data.is_inline);
|
||||
uint32_t child_count = ts_subtree_child_count(child);
|
||||
Subtree tree = trees.contents[j];
|
||||
if (!ts_subtree_extra(tree)) {
|
||||
assert(!tree.data.is_inline);
|
||||
uint32_t child_count = ts_subtree_child_count(tree);
|
||||
const Subtree *children = ts_subtree_children(tree);
|
||||
for (uint32_t k = 0; k < child_count; k++) {
|
||||
ts_subtree_retain(child.ptr->children[k]);
|
||||
ts_subtree_retain(children[k]);
|
||||
}
|
||||
array_splice(&trees, j, 1, child_count, child.ptr->children);
|
||||
array_splice(&trees, j, 1, child_count, children);
|
||||
root = ts_subtree_from_mut(ts_subtree_new_node(
|
||||
&self->tree_pool,
|
||||
ts_subtree_symbol(child),
|
||||
ts_subtree_symbol(tree),
|
||||
&trees,
|
||||
child.ptr->production_id,
|
||||
tree.ptr->production_id,
|
||||
self->language
|
||||
));
|
||||
ts_subtree_release(&self->tree_pool, child);
|
||||
ts_subtree_release(&self->tree_pool, tree);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -1125,7 +1144,7 @@ static bool ts_parser__recover_to_state(
|
|||
Subtree error_tree = error_trees.contents[0];
|
||||
uint32_t error_child_count = ts_subtree_child_count(error_tree);
|
||||
if (error_child_count > 0) {
|
||||
array_splice(&slice.subtrees, 0, 0, error_child_count, error_tree.ptr->children);
|
||||
array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
|
||||
for (unsigned j = 0; j < error_child_count; j++) {
|
||||
ts_subtree_retain(slice.subtrees.contents[j]);
|
||||
}
|
||||
|
|
@ -1133,22 +1152,21 @@ static bool ts_parser__recover_to_state(
|
|||
ts_subtree_array_delete(&self->tree_pool, &error_trees);
|
||||
}
|
||||
|
||||
SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees);
|
||||
ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
|
||||
|
||||
if (slice.subtrees.size > 0) {
|
||||
Subtree error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, true, self->language);
|
||||
Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
|
||||
ts_stack_push(self->stack, slice.version, error, false, goal_state);
|
||||
} else {
|
||||
array_delete(&slice.subtrees);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < trailing_extras.size; j++) {
|
||||
Subtree tree = trailing_extras.contents[j];
|
||||
for (unsigned j = 0; j < self->trailing_extras.size; j++) {
|
||||
Subtree tree = self->trailing_extras.contents[j];
|
||||
ts_stack_push(self->stack, slice.version, tree, false, goal_state);
|
||||
}
|
||||
|
||||
previous_version = slice.version;
|
||||
array_delete(&trailing_extras);
|
||||
}
|
||||
|
||||
return previous_version != STACK_VERSION_NONE;
|
||||
|
|
@ -1245,7 +1263,7 @@ static void ts_parser__recover(
|
|||
if (ts_subtree_is_eof(lookahead)) {
|
||||
LOG("recover_eof");
|
||||
SubtreeArray children = array_new();
|
||||
Subtree parent = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language);
|
||||
Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
|
||||
ts_stack_push(self->stack, version, parent, false, 1);
|
||||
ts_parser__accept(self, version, lookahead);
|
||||
return;
|
||||
|
|
@ -1278,7 +1296,6 @@ static void ts_parser__recover(
|
|||
array_reserve(&children, 1);
|
||||
array_push(&children, lookahead);
|
||||
MutableSubtree error_repeat = ts_subtree_new_node(
|
||||
&self->tree_pool,
|
||||
ts_builtin_sym_error_repeat,
|
||||
&children,
|
||||
0,
|
||||
|
|
@ -1307,7 +1324,6 @@ static void ts_parser__recover(
|
|||
ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
|
||||
array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
|
||||
error_repeat = ts_subtree_new_node(
|
||||
&self->tree_pool,
|
||||
ts_builtin_sym_error_repeat,
|
||||
&pop.contents[0].subtrees,
|
||||
0,
|
||||
|
|
@ -1666,7 +1682,6 @@ TSParser *ts_parser_new(void) {
|
|||
self->end_clock = clock_null();
|
||||
self->operation_count = 0;
|
||||
self->old_tree = NULL_SUBTREE;
|
||||
self->scratch_tree.ptr = &self->scratch_tree_data;
|
||||
self->included_range_differences = (TSRangeArray) array_new();
|
||||
self->included_range_difference_index = 0;
|
||||
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
|
||||
|
|
@ -1692,6 +1707,9 @@ void ts_parser_delete(TSParser *self) {
|
|||
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
|
||||
ts_subtree_pool_delete(&self->tree_pool);
|
||||
reusable_node_delete(&self->reusable_node);
|
||||
array_delete(&self->trailing_extras);
|
||||
array_delete(&self->trailing_extras2);
|
||||
array_delete(&self->scratch_trees);
|
||||
ts_free(self);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ static inline void reusable_node_advance(ReusableNode *self) {
|
|||
} while (ts_subtree_child_count(tree) <= next_index);
|
||||
|
||||
array_push(&self->stack, ((StackEntry) {
|
||||
.tree = tree.ptr->children[next_index],
|
||||
.tree = ts_subtree_children(tree)[next_index],
|
||||
.child_index = next_index,
|
||||
.byte_offset = byte_offset,
|
||||
}));
|
||||
|
|
@ -63,7 +63,7 @@ static inline bool reusable_node_descend(ReusableNode *self) {
|
|||
StackEntry last_entry = *array_back(&self->stack);
|
||||
if (ts_subtree_child_count(last_entry.tree) > 0) {
|
||||
array_push(&self->stack, ((StackEntry) {
|
||||
.tree = last_entry.tree.ptr->children[0],
|
||||
.tree = ts_subtree_children(last_entry.tree)[0],
|
||||
.child_index = 0,
|
||||
.byte_offset = last_entry.byte_offset,
|
||||
}));
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
|
|||
bool include_subtrees = false;
|
||||
if (goal_subtree_count >= 0) {
|
||||
include_subtrees = true;
|
||||
array_reserve(&iterator.subtrees, goal_subtree_count);
|
||||
array_reserve(&iterator.subtrees, ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
|
||||
}
|
||||
|
||||
array_push(&self->iterators, iterator);
|
||||
|
|
@ -304,8 +304,9 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
|
|||
|
||||
if (should_pop) {
|
||||
SubtreeArray subtrees = iterator->subtrees;
|
||||
if (!should_stop)
|
||||
if (!should_stop) {
|
||||
ts_subtree_array_copy(subtrees, &subtrees);
|
||||
}
|
||||
ts_subtree_array_reverse(&subtrees);
|
||||
ts_stack__add_slice(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -80,26 +80,33 @@ void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
|
|||
}
|
||||
}
|
||||
|
||||
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
|
||||
void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) {
|
||||
for (uint32_t i = 0; i < self->size; i++) {
|
||||
ts_subtree_release(pool, self->contents[i]);
|
||||
}
|
||||
array_clear(self);
|
||||
}
|
||||
|
||||
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
|
||||
ts_subtree_array_clear(pool, self);
|
||||
array_delete(self);
|
||||
}
|
||||
|
||||
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
|
||||
SubtreeArray result = array_new();
|
||||
|
||||
uint32_t i = self->size - 1;
|
||||
for (; i + 1 > 0; i--) {
|
||||
Subtree child = self->contents[i];
|
||||
if (!ts_subtree_extra(child)) break;
|
||||
array_push(&result, child);
|
||||
void ts_subtree_array_remove_trailing_extras(
|
||||
SubtreeArray *self,
|
||||
SubtreeArray *destination
|
||||
) {
|
||||
array_clear(destination);
|
||||
while (self->size > 0) {
|
||||
Subtree last = self->contents[self->size - 1];
|
||||
if (ts_subtree_extra(last)) {
|
||||
self->size--;
|
||||
array_push(destination, last);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self->size = i + 1;
|
||||
ts_subtree_array_reverse(&result);
|
||||
return result;
|
||||
ts_subtree_array_reverse(destination);
|
||||
}
|
||||
|
||||
void ts_subtree_array_reverse(SubtreeArray *self) {
|
||||
|
|
@ -247,28 +254,45 @@ Subtree ts_subtree_new_error(
|
|||
return result;
|
||||
}
|
||||
|
||||
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
|
||||
if (self.data.is_inline) return (MutableSubtree) {self.data};
|
||||
if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
|
||||
|
||||
SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
|
||||
memcpy(result, self.ptr, sizeof(SubtreeHeapData));
|
||||
if (result->child_count > 0) {
|
||||
result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
|
||||
memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
|
||||
for (uint32_t i = 0; i < result->child_count; i++) {
|
||||
ts_subtree_retain(result->children[i]);
|
||||
// Clone a subtree.
|
||||
MutableSubtree ts_subtree_clone(Subtree self) {
|
||||
size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count);
|
||||
Subtree *new_children = ts_malloc(alloc_size);
|
||||
Subtree *old_children = ts_subtree_children(self);
|
||||
memcpy(new_children, old_children, alloc_size);
|
||||
SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count];
|
||||
if (self.ptr->child_count > 0) {
|
||||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
ts_subtree_retain(new_children[i]);
|
||||
}
|
||||
} else if (result->has_external_tokens) {
|
||||
result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
|
||||
} else if (self.ptr->has_external_tokens) {
|
||||
result->external_scanner_state = ts_external_scanner_state_copy(
|
||||
&self.ptr->external_scanner_state
|
||||
);
|
||||
}
|
||||
result->ref_count = 1;
|
||||
ts_subtree_release(pool, self);
|
||||
return (MutableSubtree) {.ptr = result};
|
||||
}
|
||||
|
||||
static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
|
||||
MutableSubtreeArray *stack) {
|
||||
// Get mutable version of a subtree.
|
||||
//
|
||||
// This takes ownership of the subtree. If the subtree has only one owner,
|
||||
// this will directly convert it into a mutable version. Otherwise, it will
|
||||
// perform a copy.
|
||||
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
|
||||
if (self.data.is_inline) return (MutableSubtree) {self.data};
|
||||
if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
|
||||
MutableSubtree result = ts_subtree_clone(self);
|
||||
ts_subtree_release(pool, self);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void ts_subtree__compress(
|
||||
MutableSubtree self,
|
||||
unsigned count,
|
||||
const TSLanguage *language,
|
||||
MutableSubtreeArray *stack
|
||||
) {
|
||||
unsigned initial_stack_size = stack->size;
|
||||
|
||||
MutableSubtree tree = self;
|
||||
|
|
@ -276,7 +300,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
|
|||
for (unsigned i = 0; i < count; i++) {
|
||||
if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
|
||||
|
||||
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
|
||||
MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
|
||||
if (
|
||||
child.data.is_inline ||
|
||||
child.ptr->child_count < 2 ||
|
||||
|
|
@ -284,7 +308,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
|
|||
child.ptr->symbol != symbol
|
||||
) break;
|
||||
|
||||
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
|
||||
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]);
|
||||
if (
|
||||
grandchild.data.is_inline ||
|
||||
grandchild.ptr->child_count < 2 ||
|
||||
|
|
@ -292,20 +316,20 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa
|
|||
grandchild.ptr->symbol != symbol
|
||||
) break;
|
||||
|
||||
tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
|
||||
child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1];
|
||||
grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
|
||||
ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild);
|
||||
ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1];
|
||||
ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
|
||||
array_push(stack, tree);
|
||||
tree = grandchild;
|
||||
}
|
||||
|
||||
while (stack->size > initial_stack_size) {
|
||||
tree = array_pop(stack);
|
||||
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
|
||||
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]);
|
||||
ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
|
||||
ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
|
||||
ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
|
||||
MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
|
||||
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]);
|
||||
ts_subtree_summarize_children(grandchild, language);
|
||||
ts_subtree_summarize_children(child, language);
|
||||
ts_subtree_summarize_children(tree, language);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -320,8 +344,8 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
|
|||
MutableSubtree tree = array_pop(&pool->tree_stack);
|
||||
|
||||
if (tree.ptr->repeat_depth > 0) {
|
||||
Subtree child1 = tree.ptr->children[0];
|
||||
Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1];
|
||||
Subtree child1 = ts_subtree_children(tree)[0];
|
||||
Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
|
||||
long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
|
||||
if (repeat_delta > 0) {
|
||||
unsigned n = repeat_delta;
|
||||
|
|
@ -333,7 +357,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
|
|||
}
|
||||
|
||||
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
|
||||
Subtree child = tree.ptr->children[i];
|
||||
Subtree child = ts_subtree_children(tree)[i];
|
||||
if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
|
||||
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
|
||||
}
|
||||
|
|
@ -341,17 +365,13 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
|
|||
}
|
||||
}
|
||||
|
||||
void ts_subtree_set_children(
|
||||
MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
|
||||
// Assign all of the node's properties that depend on its children.
|
||||
void ts_subtree_summarize_children(
|
||||
MutableSubtree self,
|
||||
const TSLanguage *language
|
||||
) {
|
||||
assert(!self.data.is_inline);
|
||||
|
||||
if (self.ptr->child_count > 0 && children != self.ptr->children) {
|
||||
ts_free(self.ptr->children);
|
||||
}
|
||||
|
||||
self.ptr->child_count = child_count;
|
||||
self.ptr->children = children;
|
||||
self.ptr->named_child_count = 0;
|
||||
self.ptr->visible_child_count = 0;
|
||||
self.ptr->error_cost = 0;
|
||||
|
|
@ -364,8 +384,9 @@ void ts_subtree_set_children(
|
|||
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
|
||||
uint32_t lookahead_end_byte = 0;
|
||||
|
||||
const Subtree *children = ts_subtree_children(self);
|
||||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
Subtree child = self.ptr->children[i];
|
||||
Subtree child = children[i];
|
||||
|
||||
if (i == 0) {
|
||||
self.ptr->padding = ts_subtree_padding(child);
|
||||
|
|
@ -384,6 +405,17 @@ void ts_subtree_set_children(
|
|||
self.ptr->error_cost += ts_subtree_error_cost(child);
|
||||
}
|
||||
|
||||
uint32_t grandchild_count = ts_subtree_child_count(child);
|
||||
if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
|
||||
if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
|
||||
if (ts_subtree_visible(child)) {
|
||||
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
|
||||
} else if (grandchild_count > 0) {
|
||||
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
|
||||
self.ptr->node_count += ts_subtree_node_count(child);
|
||||
|
||||
|
|
@ -395,7 +427,7 @@ void ts_subtree_set_children(
|
|||
} else if (ts_subtree_visible(child)) {
|
||||
self.ptr->visible_child_count++;
|
||||
if (ts_subtree_named(child)) self.ptr->named_child_count++;
|
||||
} else if (ts_subtree_child_count(child) > 0) {
|
||||
} else if (grandchild_count > 0) {
|
||||
self.ptr->visible_child_count += child.ptr->visible_child_count;
|
||||
self.ptr->named_child_count += child.ptr->named_child_count;
|
||||
}
|
||||
|
|
@ -417,22 +449,11 @@ void ts_subtree_set_children(
|
|||
ERROR_COST_PER_RECOVERY +
|
||||
ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
|
||||
ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
|
||||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
Subtree child = self.ptr->children[i];
|
||||
uint32_t grandchild_count = ts_subtree_child_count(child);
|
||||
if (ts_subtree_extra(child)) continue;
|
||||
if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
|
||||
if (ts_subtree_visible(child)) {
|
||||
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
|
||||
} else if (grandchild_count > 0) {
|
||||
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (self.ptr->child_count > 0) {
|
||||
Subtree first_child = self.ptr->children[0];
|
||||
Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
|
||||
Subtree first_child = children[0];
|
||||
Subtree last_child = children[self.ptr->child_count - 1];
|
||||
|
||||
self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
|
||||
self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
|
||||
|
|
@ -455,15 +476,30 @@ void ts_subtree_set_children(
|
|||
}
|
||||
}
|
||||
|
||||
MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
|
||||
SubtreeArray *children, unsigned production_id,
|
||||
const TSLanguage *language) {
|
||||
// Create a new parent node with the given children.
|
||||
//
|
||||
// This takes ownership of the children array.
|
||||
MutableSubtree ts_subtree_new_node(
|
||||
TSSymbol symbol,
|
||||
SubtreeArray *children,
|
||||
unsigned production_id,
|
||||
const TSLanguage *language
|
||||
) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
||||
bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
|
||||
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
|
||||
|
||||
// Allocate the node's data at the end of the array of children.
|
||||
size_t new_byte_size = ts_subtree_alloc_size(children->size);
|
||||
if (children->capacity * sizeof(Subtree) < new_byte_size) {
|
||||
children->contents = ts_realloc(children->contents, new_byte_size);
|
||||
children->capacity = new_byte_size / sizeof(Subtree);
|
||||
}
|
||||
SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
|
||||
|
||||
*data = (SubtreeHeapData) {
|
||||
.ref_count = 1,
|
||||
.symbol = symbol,
|
||||
.child_count = children->size,
|
||||
.visible = metadata.visible,
|
||||
.named = metadata.named,
|
||||
.has_changes = false,
|
||||
|
|
@ -477,32 +513,45 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
|
|||
}}
|
||||
};
|
||||
MutableSubtree result = {.ptr = data};
|
||||
ts_subtree_set_children(result, children->contents, children->size, language);
|
||||
ts_subtree_summarize_children(result, language);
|
||||
return result;
|
||||
}
|
||||
|
||||
Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
|
||||
bool extra, const TSLanguage *language) {
|
||||
// Create a new error node contaning the given children.
|
||||
//
|
||||
// This node is treated as 'extra'. Its children are prevented from having
|
||||
// having any effect on the parse state.
|
||||
Subtree ts_subtree_new_error_node(
|
||||
SubtreeArray *children,
|
||||
bool extra,
|
||||
const TSLanguage *language
|
||||
) {
|
||||
MutableSubtree result = ts_subtree_new_node(
|
||||
pool, ts_builtin_sym_error, children, 0, language
|
||||
ts_builtin_sym_error, children, 0, language
|
||||
);
|
||||
result.ptr->extra = extra;
|
||||
return ts_subtree_from_mut(result);
|
||||
}
|
||||
|
||||
Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
|
||||
const TSLanguage *language) {
|
||||
// Create a new 'missing leaf' node.
|
||||
//
|
||||
// This node is treated as 'extra'. Its children are prevented from having
|
||||
// having any effect on the parse state.
|
||||
Subtree ts_subtree_new_missing_leaf(
|
||||
SubtreePool *pool,
|
||||
TSSymbol symbol,
|
||||
Length padding,
|
||||
const TSLanguage *language
|
||||
) {
|
||||
Subtree result = ts_subtree_new_leaf(
|
||||
pool, symbol, padding, length_zero(), 0,
|
||||
0, false, false, language
|
||||
);
|
||||
|
||||
if (result.data.is_inline) {
|
||||
result.data.is_missing = true;
|
||||
} else {
|
||||
((SubtreeHeapData *)result.ptr)->is_missing = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -525,19 +574,22 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) {
|
|||
while (pool->tree_stack.size > 0) {
|
||||
MutableSubtree tree = array_pop(&pool->tree_stack);
|
||||
if (tree.ptr->child_count > 0) {
|
||||
Subtree *children = ts_subtree_children(tree);
|
||||
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
|
||||
Subtree child = tree.ptr->children[i];
|
||||
Subtree child = children[i];
|
||||
if (child.data.is_inline) continue;
|
||||
assert(child.ptr->ref_count > 0);
|
||||
if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
|
||||
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
|
||||
}
|
||||
}
|
||||
ts_free(tree.ptr->children);
|
||||
} else if (tree.ptr->has_external_tokens) {
|
||||
ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
|
||||
ts_free(children);
|
||||
} else {
|
||||
if (tree.ptr->has_external_tokens) {
|
||||
ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
|
||||
}
|
||||
ts_subtree_pool_free(pool, tree.ptr);
|
||||
}
|
||||
ts_subtree_pool_free(pool, tree.ptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -564,7 +616,7 @@ bool ts_subtree_eq(Subtree self, Subtree other) {
|
|||
if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
|
||||
|
||||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
|
||||
if (!ts_subtree_eq(ts_subtree_children(self)[i], ts_subtree_children(other)[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -578,8 +630,8 @@ int ts_subtree_compare(Subtree left, Subtree right) {
|
|||
if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
|
||||
if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
|
||||
for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
|
||||
Subtree left_child = left.ptr->children[i];
|
||||
Subtree right_child = right.ptr->children[i];
|
||||
Subtree left_child = ts_subtree_children(left)[i];
|
||||
Subtree right_child = ts_subtree_children(right)[i];
|
||||
switch (ts_subtree_compare(left_child, right_child)) {
|
||||
case -1: return -1;
|
||||
case 1: return 1;
|
||||
|
|
@ -695,7 +747,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
|
|||
|
||||
Length child_left, child_right = length_zero();
|
||||
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
|
||||
Subtree *child = &result.ptr->children[i];
|
||||
Subtree *child = &ts_subtree_children(*entry.tree)[i];
|
||||
Length child_size = ts_subtree_total_size(*child);
|
||||
child_left = child_right;
|
||||
child_right = length_add(child_left, child_size);
|
||||
|
|
@ -750,7 +802,7 @@ Subtree ts_subtree_last_external_token(Subtree tree) {
|
|||
if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
|
||||
while (tree.ptr->child_count > 0) {
|
||||
for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
|
||||
Subtree child = tree.ptr->children[i];
|
||||
Subtree child = ts_subtree_children(tree)[i];
|
||||
if (ts_subtree_has_external_tokens(child)) {
|
||||
tree = child;
|
||||
break;
|
||||
|
|
@ -853,7 +905,7 @@ static size_t ts_subtree__write_to_string(
|
|||
|
||||
uint32_t structural_child_index = 0;
|
||||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
Subtree child = self.ptr->children[i];
|
||||
Subtree child = ts_subtree_children(self)[i];
|
||||
if (ts_subtree_extra(child)) {
|
||||
cursor += ts_subtree__write_to_string(
|
||||
child, *writer, limit,
|
||||
|
|
@ -950,7 +1002,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
|
|||
language->max_alias_sequence_length *
|
||||
ts_subtree_production_id(*self);
|
||||
for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
|
||||
const Subtree *child = &self->ptr->children[i];
|
||||
const Subtree *child = &ts_subtree_children(*self)[i];
|
||||
TSSymbol alias_symbol = 0;
|
||||
if (!ts_subtree_extra(*child) && child_info_offset) {
|
||||
alias_symbol = language->alias_sequences[child_info_offset];
|
||||
|
|
|
|||
|
|
@ -14,12 +14,19 @@ extern "C" {
|
|||
#include "tree_sitter/api.h"
|
||||
#include "tree_sitter/parser.h"
|
||||
|
||||
static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
|
||||
#define TS_TREE_STATE_NONE USHRT_MAX
|
||||
#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
|
||||
|
||||
typedef union Subtree Subtree;
|
||||
typedef union MutableSubtree MutableSubtree;
|
||||
|
||||
// The serialized state of an external scanner.
|
||||
//
|
||||
// Every time an external token subtree is created after a call to an
|
||||
// external scanner, the scanner's `serialize` function is called to
|
||||
// retrieve a serialized copy of its state. The bytes are then copied
|
||||
// onto the subtree itself so that the scanner's state can later be
|
||||
// restored using its `deserialize` function.
|
||||
//
|
||||
// Small byte arrays are stored inline, and long ones are allocated
|
||||
// separately on the heap.
|
||||
typedef struct {
|
||||
union {
|
||||
char *long_data;
|
||||
|
|
@ -28,6 +35,10 @@ typedef struct {
|
|||
uint32_t length;
|
||||
} ExternalScannerState;
|
||||
|
||||
// A compact representation of a subtree.
|
||||
//
|
||||
// This representation is used for small leaf nodes that are not
|
||||
// errors, and were not created by an external scanner.
|
||||
typedef struct {
|
||||
bool is_inline : 1;
|
||||
bool visible : 1;
|
||||
|
|
@ -45,6 +56,11 @@ typedef struct {
|
|||
uint16_t parse_state;
|
||||
} SubtreeInlineData;
|
||||
|
||||
// A heap-allocated representation of a subtree.
|
||||
//
|
||||
// This representation is used for parent nodes, external tokens,
|
||||
// errors, and other leaf nodes whose data is too large to fit into
|
||||
// the inlinen representation.
|
||||
typedef struct {
|
||||
volatile uint32_t ref_count;
|
||||
Length padding;
|
||||
|
|
@ -68,7 +84,6 @@ typedef struct {
|
|||
union {
|
||||
// Non-terminal subtrees (`child_count > 0`)
|
||||
struct {
|
||||
Subtree *children;
|
||||
uint32_t visible_child_count;
|
||||
uint32_t named_child_count;
|
||||
uint32_t node_count;
|
||||
|
|
@ -89,15 +104,17 @@ typedef struct {
|
|||
};
|
||||
} SubtreeHeapData;
|
||||
|
||||
union Subtree {
|
||||
// The fundamental building block of a syntax tree.
|
||||
typedef union {
|
||||
SubtreeInlineData data;
|
||||
const SubtreeHeapData *ptr;
|
||||
};
|
||||
} Subtree;
|
||||
|
||||
union MutableSubtree {
|
||||
// Like Subtree, but mutable.
|
||||
typedef union {
|
||||
SubtreeInlineData data;
|
||||
SubtreeHeapData *ptr;
|
||||
};
|
||||
} MutableSubtree;
|
||||
|
||||
typedef Array(Subtree) SubtreeArray;
|
||||
typedef Array(MutableSubtree) MutableSubtreeArray;
|
||||
|
|
@ -111,8 +128,9 @@ void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsign
|
|||
const char *ts_external_scanner_state_data(const ExternalScannerState *);
|
||||
|
||||
void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
|
||||
void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
|
||||
void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
|
||||
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
|
||||
void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
|
||||
void ts_subtree_array_reverse(SubtreeArray *);
|
||||
|
||||
SubtreePool ts_subtree_pool_new(uint32_t capacity);
|
||||
|
|
@ -125,8 +143,8 @@ Subtree ts_subtree_new_leaf(
|
|||
Subtree ts_subtree_new_error(
|
||||
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
|
||||
);
|
||||
MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
|
||||
Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *);
|
||||
MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
|
||||
Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
|
||||
Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
|
||||
MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
|
||||
void ts_subtree_retain(Subtree);
|
||||
|
|
@ -134,7 +152,8 @@ void ts_subtree_release(SubtreePool *, Subtree);
|
|||
bool ts_subtree_eq(Subtree, Subtree);
|
||||
int ts_subtree_compare(Subtree, Subtree);
|
||||
void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
|
||||
void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
|
||||
void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
|
||||
void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
|
||||
void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
|
||||
Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
|
||||
char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
|
||||
|
|
@ -156,6 +175,17 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE
|
|||
|
||||
#undef SUBTREE_GET
|
||||
|
||||
// Get the size needed to store a heap-allocated subtree with the given
|
||||
// number of children.
|
||||
static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
|
||||
return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
|
||||
}
|
||||
|
||||
// Get a subtree's children, which are allocated immediately before the
|
||||
// tree's own heap data.
|
||||
#define ts_subtree_children(self) \
|
||||
((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
|
||||
|
||||
static inline void ts_subtree_set_extra(MutableSubtree *self) {
|
||||
if (self->data.is_inline) {
|
||||
self->data.extra = true;
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
|
|||
TreeCursorEntry *result,
|
||||
bool *visible) {
|
||||
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
|
||||
const Subtree *child = &self->parent.ptr->children[self->child_index];
|
||||
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
|
||||
*result = (TreeCursorEntry) {
|
||||
.subtree = child,
|
||||
.position = self->position,
|
||||
|
|
@ -56,7 +56,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
|
|||
self->child_index++;
|
||||
|
||||
if (self->child_index < self->parent.ptr->child_count) {
|
||||
Subtree next_child = self->parent.ptr->children[self->child_index];
|
||||
Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
|
||||
self->position = length_add(self->position, ts_subtree_padding(next_child));
|
||||
}
|
||||
|
||||
|
|
@ -306,7 +306,7 @@ void ts_tree_cursor_current_status(
|
|||
unsigned structural_child_index = entry->structural_child_index;
|
||||
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
|
||||
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
|
||||
Subtree sibling = parent_entry->subtree->ptr->children[j];
|
||||
Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
|
||||
TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
|
||||
self->tree->language,
|
||||
subtree_symbol(sibling, structural_child_index)
|
||||
|
|
|
|||
34
script/heap-profile
Executable file
34
script/heap-profile
Executable file
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Usage:
|
||||
# script/heap-profile
|
||||
#
|
||||
# Parse an example source file and record memory usage
|
||||
#
|
||||
# Dependencies:
|
||||
# * `pprof` executable: https://github.com/google/pprof
|
||||
# * `gperftools` package: https://github.com/gperftools/gperftools
|
||||
|
||||
set -e
|
||||
|
||||
GRAMMARS_DIR=$PWD/test/fixtures/grammars
|
||||
|
||||
# Build the library
|
||||
make
|
||||
|
||||
# Build the heap-profiling harness
|
||||
clang++ \
|
||||
-I lib/include \
|
||||
-I $GRAMMARS_DIR \
|
||||
-D GRAMMARS_DIR=\"${GRAMMARS_DIR}/\" \
|
||||
-l tcmalloc \
|
||||
./libtree-sitter.a \
|
||||
test/profile/heap.cc \
|
||||
-o target/heap-profile
|
||||
|
||||
# Run the harness with heap profiling enabled.
|
||||
export HEAPPROFILE=$PWD/profile
|
||||
target/heap-profile $@
|
||||
|
||||
# Extract statistics using pprof.
|
||||
pprof -top -cum profile.0001.heap
|
||||
42
test/profile/heap.cc
Normal file
42
test/profile/heap.cc
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#include <fstream>
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <tree_sitter/api.h>
|
||||
|
||||
extern "C" {
|
||||
#include "javascript/src/parser.c"
|
||||
#include "javascript/src/scanner.c"
|
||||
}
|
||||
|
||||
#define LANGUAGE tree_sitter_javascript
|
||||
#define SOURCE_PATH "javascript/examples/jquery.js"
|
||||
|
||||
int main() {
|
||||
TSParser *parser = ts_parser_new();
|
||||
if (!ts_parser_set_language(parser, LANGUAGE())) {
|
||||
fprintf(stderr, "Invalid language\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const char *source_path = GRAMMARS_DIR SOURCE_PATH;
|
||||
|
||||
printf("Parsing %s\n", source_path);
|
||||
|
||||
std::ifstream source_file(source_path);
|
||||
if (!source_file.good()) {
|
||||
fprintf(stderr, "Invalid source path %s\n", source_path);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string source_code(
|
||||
(std::istreambuf_iterator<char>(source_file)),
|
||||
std::istreambuf_iterator<char>()
|
||||
);
|
||||
|
||||
TSTree *tree = ts_parser_parse_string(
|
||||
parser,
|
||||
NULL,
|
||||
source_code.c_str(),
|
||||
source_code.size()
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue