Merge branch 'master' into pike-doc-extras-default

This commit is contained in:
Max Brunsfeld 2018-06-13 09:42:22 -07:00
commit 0895ca237d
89 changed files with 5484 additions and 4095 deletions

223
README.md
View file

@ -3,222 +3,11 @@
[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter)
[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
Tree-sitter is a C library for incremental parsing, intended to be used via
[bindings](https://github.com/tree-sitter/node-tree-sitter) to higher-level
languages. It can be used to build a concrete syntax tree for a program and
efficiently update the syntax tree as the program is edited. This makes it suitable
for use in text-editing programs.
Tree-sitter is an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:
Tree-sitter uses an incremental [LR parsing](https://en.wikipedia.org/wiki/LR_parser)
algorithm, as described in the paper *[Incremental Analysis of Real Programming Languages](https://www.semanticscholar.org/paper/Incremental-Analysis-of-real-Programming-Languages-Wagner-Graham/163592ac3777ee396f32318fcd83b1c563f2e496)*
by Tim Wagner & Susan Graham. It handles ambiguity at compile-time via [precedence annotations](https://en.wikipedia.org/wiki/Operator-precedence_parser),
and at run-time via the [GLR algorithm](https://en.wikipedia.org/wiki/GLR_parser).
This allows it to generate a fast parser for any language that can be described with a context-free grammar.
* **General** enough to parse any programming language
* **Fast** enough to parse on every keystroke in a text editor
* **Robust** enough to provide useful results even in the presence of syntax errors,
* **Dependency-free** (and written in pure C) so that it can be embedded in any application
### Installation
```sh
script/configure # Generate a Makefile
make # Build static libraries for the compiler and runtime
```
### Overview
Tree-sitter consists of two libraries. The first library, `libcompiler`, can be
used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the
language. Once the parser has been generated, `libcompiler` is no longer needed.
The second library, `libruntime`, is used in combination with the parsers
generated by `libcompiler`, to generate syntax trees based on text documents, and keep the
syntax trees up-to-date as changes are made to the documents.
### Writing a grammar
Tree-sitter's grammars are specified as JSON strings. This format allows them
to be easily created and manipulated in high-level languages like [JavaScript](https://github.com/tree-sitter/node-tree-sitter-compiler).
The structure of a grammar is formally specified by [this JSON schema](./src/compiler/grammar-schema.json).
You can generate a parser for a grammar using the `ts_compile_grammar` function
provided by `libcompiler`.
Here's a simple example of using `ts_compile_grammar` to create a parser for basic
arithmetic expressions. It uses C++11 multi-line strings for readability.
```cpp
// arithmetic_grammar.cc
#include <stdio.h>
#include "tree_sitter/compiler.h"
int main() {
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "arithmetic",
// Things that can appear anywhere in the language, like comments
// and whitespace, are expressed as 'extras'.
"extras": [
{"type": "PATTERN", "value": "\\s"},
{"type": "SYMBOL", "name": "comment"}
],
"rules": {
// The first rule listed in the grammar becomes the 'start rule'.
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "product"},
{"type": "SYMBOL", "name": "number"},
{"type": "SYMBOL", "name": "variable"},
{
"type": "SEQ",
"members": [
{"type": "STRING", "value": "("},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": ")"}
]
}
]
},
// Tokens like '+' and '*' are described directly within the
// grammar's rules, as opposed to in a seperate lexer description.
"sum": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
// Ambiguities can be resolved at compile time by assigning precedence
// values to rule subtrees.
"product": {
"type": "PREC_LEFT",
"value": 2,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
// Tokens can be specified using ECMAScript regexps.
"number": {"type": "PATTERN", "value": "\\d+"},
"comment": {"type": "PATTERN", "value": "#.*"},
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"},
}
}
)JSON");
if (result.error_type != TSCompileErrorTypeNone) {
fprintf(stderr, "Compilation failed: %s\n", result.error_message);
return 1;
}
puts(result.code);
return 0;
}
```
To create the parser, compile this file like this:
```sh
clang++ -std=c++11 \
-I tree-sitter/include \
arithmetic_grammar.cc \
"$(find tree-sitter/out/Release -name libcompiler.a)" \
-o arithmetic_grammar
```
Then run the executable to print out the C code for the parser:
```sh
./arithmetic_grammar > arithmetic_parser.c
```
### Using the parser
#### Providing the text to parse
Text input is provided to a tree-sitter parser via a `TSInput` struct, which
contains function pointers for seeking to positions in the text, and for reading
chunks of text. The text can be encoded in either UTF8 or UTF16. This interface
allows you to efficiently parse text that is stored in your own data structure.
#### Querying the syntax tree
The `libruntime` API provides a DOM-style interface for inspecting
syntax trees. Functions like `ts_node_child(node, index)` and `ts_node_next_sibling(node)`
expose every node in the concrete syntax tree. This is useful for operations
like syntax-highlighting, which operate on a token-by-token basis. You can also
traverse the tree in a more abstract way by using functions like
`ts_node_named_child(node, index)` and `ts_node_next_named_sibling(node)`. These
functions don't expose nodes that were specified in the grammar as anonymous
tokens, like `(` and `+`. This is useful when analyzing the meaning of a document.
```c
// test_parser.c
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include "tree_sitter/runtime.h"
// Declare the language function that was generated from your grammar.
TSLanguage *tree_sitter_arithmetic();
int main() {
TSDocument *document = ts_document_new();
ts_document_set_language(document, tree_sitter_arithmetic());
ts_document_set_input_string(document, "a + b * 5");
ts_document_parse(document);
TSNode root_node = ts_document_root_node(document);
assert(!strcmp(ts_node_type(root_node, document), "expression"));
assert(ts_node_named_child_count(root_node) == 1);
TSNode sum_node = ts_node_named_child(root_node, 0);
assert(!strcmp(ts_node_type(sum_node, document), "sum"));
assert(ts_node_named_child_count(sum_node) == 2);
TSNode product_node = ts_node_child(ts_node_named_child(sum_node, 1), 0);
assert(!strcmp(ts_node_type(product_node, document), "product"));
assert(ts_node_named_child_count(product_node) == 2);
printf("Syntax tree: %s\n", ts_node_string(root_node, document));
ts_document_free(document);
return 0;
}
```
To demo this parser's capabilities, compile this program like this:
```sh
clang \
-I tree-sitter/include \
test_parser.c arithmetic_parser.c \
"$(find tree-sitter/out/Release -name libruntime.a)" \
-o test_parser
./test_parser
```
### References
- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
- [Context Aware Scanning for Parsing Extensible Languages](http://www.umsec.umn.edu/publications/Context-Aware-Scanning-Parsing-Extensible)
- [Efficient and Flexible Incremental Parsing](http://ftp.cs.berkeley.edu/sggs/toplas-parsing.ps)
- [Incremental Analysis of Real Programming Languages](https://pdfs.semanticscholar.org/ca69/018c29cc415820ed207d7e1d391e2da1656f.pdf)
- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
- [Error Recovery for LR Parsers](http://www.dtic.mil/dtic/tr/fulltext/u2/a043470.pdf)
[Documentation](http://tree-sitter.github.io/tree-sitter/)

View file

@ -1 +1,2 @@
markdown: kramdown
theme: jekyll-theme-cayman

133
docs/_layouts/default.html Normal file
View file

@ -0,0 +1,133 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}" media="screen" type="text/css">
<title>Tree-sitter{{ page.title }}</title>
</head>
<body class="sidebar-hidden">
<script>
if (localStorage.getItem('sidebar-active') === 'true') {
document.body.classList.remove('sidebar-hidden');
}
</script>
<div id="container">
<div id="sidebar">
<nav id="table-of-contents">
<a class="logo table-of-contents-section" href="https://github.com/tree-sitter/tree-sitter">
<img src="{{ '/assets/images/tree-sitter-small.png' | relative_url }}" width=200 height=200 />
</a>
{% for other_page in site.html_pages %}
{% if page.title == other_page.title %}
<li class="table-of-contents-section active">
<a class="nav-link" href="{{ other_page.url | relative_url }}">
{{ other_page.title }}
</a>
<div id="current-page-table-of-contents">
{% capture whitespace %}
{% assign min_header = 2 %}
{% assign nodes = content | split: "<h" %}
{% assign first_header = true %}
{% for node in nodes %}
{% if node == "" %}
{% continue %}
{% endif %}
{% assign header_level = node | replace: '"', '' | slice: 0, 1 | times: 1 %}
{% if header_level < min_header or header_level > maxHeader %}
{% continue %}
{% endif %}
{% if first_header %}
{% assign first_header = false %}
{% assign min_header = header_level %}
{% endif %}
{% assign indent_level = header_level | minus: min_header | add: 1 %}
{% assign header_content = node | split: '</h' %}
{% assign header_content = header_content[0] %}
{% assign html_id = header_content | split: 'id="' %}
{% assign html_id = html_id[1] | split: '"' %}
{% assign html_id = html_id[0] %}
{% capture header_attrs_to_strip %}{{ header_content | split: '>' | first }}>{% endcapture %}
{% assign header = header_content | replace: header_attrs_to_strip, '' %}
{% assign space = '' %}
{% for i in (1..indent_level) %}
{% assign space = space | prepend: ' ' %}
{% endfor %}
{% capture my_toc %}{{ my_toc }}
{{ space }}- [{{ header }}](#{{ html_id }}){: .nav-link}{% endcapture %}
{% endfor %}
{% endcapture %}
{{ my_toc | strip | markdownify | strip }}
</ul>
</li>
{% else %}
<li class="table-of-contents-section">
<a class="nav-link" href="{{ other_page.url | relative_url }}">
{{ other_page.title }}
</a>
</li>
{% endif %}
{% endfor %}
</nav>
</div>
<a id="sidebar-toggle-link" href="#"></a>
<main id="main-content">
{{ content }}
</main>
</div>
</body>
</html>
<script
src="https://code.jquery.com/jquery-3.3.1.min.js"
crossorigin="anonymous">
</script>
<script
src="https://maxcdn.bootstrapcdn.com/bootstrap/4.1.0/js/bootstrap.bundle.min.js">
</script>
<script>
$('#sidebar-toggle-link').click(function(e) {
e.preventDefault();
$(document.body).toggleClass('sidebar-hidden');
localStorage.setItem(
'sidebar-active',
localStorage.getItem('sidebar-active') === 'true' ? 'false' : 'true'
);
});
if (document.body.scrollHeight > window.innerHeight + 500) {
$(document.body).scrollspy({
target: '#current-page-table-of-contents',
offset: 40
});
}
$(document).scroll(function() {
if ($(document).scrollLeft() > 0) {
localStorage.setItem('sidebar-active', 'false');
$(document.body).addClass('sidebar-hidden');
}
});
$('h1, h2, h3, h4, h5, h6').filter('[id]').each(function() {
$(this).html('<a href="#'+$(this).attr('id')+'">' + $(this).text() + '</a>');
});
</script>

View file

@ -1,74 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" media="screen">
<link rel="stylesheet" type="text/css" href="{{ site.baseurl }}/css/style.css" media="screen">
<title>{{ page.title }}</title>
</head>
<body data-spy="scroll" data-target="#table-of-contents" data-offset="40">
<!-- Fixed sidebar -->
<div style="position: fixed; width: 100%;">
<div class="container" style="height: 0;">
<div class="row" style="height: 0;">
<div class="col-md-3">
<nav class="nav navbar navbar-light" id="table-of-contents"></nav>
</div>
</div>
</div>
</div>
<!-- Main content -->
<div class="container">
<div class="row">
<div class="col-md-3" style="pointer-events: none;">
</div>
<div class="col-md-9 content">
<div id="main-content">
{{ content }}
</div>
</div>
</div>
</div>
</body>
</html>
<!-- Generate a table of contents based on header elements -->
<script type="text/javascript">
var mainContent = document.getElementById('main-content');
var tableOfContents = document.getElementById('table-of-contents');
var headers = mainContent.querySelectorAll('h2, h3');
var lastSubnav;
for (let i = 0, length = headers.length; i < length; i++) {
var header = headers[i];
if (!header.id) continue;
var li = document.createElement('li');
li.className = 'navbar-item';
var link = document.createElement('a');
link.href = '#' + header.id;
link.innerText = header.innerText;
link.className = 'nav-link'
li.appendChild(link);
if (header.tagName === 'H2') {
lastSubnav = document.createElement('ul');
lastSubnav.className = 'nav navbar';
li.appendChild(lastSubnav);
tableOfContents.appendChild(li);
} else {
lastSubnav.appendChild(li);
}
}
</script>
<script
src="https://code.jquery.com/jquery-3.3.1.min.js"
crossorigin="anonymous"></script>
<script
src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.bundle.min.js"></script>

118
docs/assets/css/style.scss Normal file
View file

@ -0,0 +1,118 @@
---
---
@import 'jekyll-theme-cayman';
$padding: 20px;
$sidebar-width: 300px;
$sidebar-transition: left 0.25s;
$container-width: 1024px;
body {
overflow: scroll;
}
#container {
position: relative;
max-width: $container-width;
margin: 0 auto;
}
#main-content, #sidebar {
padding: $padding 0;
}
#sidebar {
position: fixed;
background: white;
top: 0;
bottom: 0;
width: $sidebar-width;
overflow-y: auto;
border-right: 1px solid #ccc;
z-index: 1;
}
#sidebar-toggle-link {
font-size: 24px;
position: fixed;
background-color: white;
opacity: 0.75;
box-shadow: 1px 1px 5px #aaa;
left: $sidebar-width;
padding: 5px 10px;
display: none;
z-index: 100;
text-decoration: none !important;
color: #aaa;
}
#main-content {
position: relative;
padding: $padding;
padding-left: $sidebar-width + $padding;
}
.nav-link.active {
text-decoration: underline;
}
.table-of-contents-section {
border-bottom: 1px solid #ccc;
}
.logo {
display: block;
}
.table-of-contents-section.active {
background-color: #edffcb;
}
.table-of-contents-section {
padding: 10px 20px;
}
#table-of-contents {
ul {
padding: 0;
margin: 0;
}
li {
display: block;
padding: 5px 20px;
}
}
@media (max-width: 900px) {
#sidebar {
left: 0;
transition: $sidebar-transition;
}
#sidebar-toggle-link {
display: block;
transition: $sidebar-transition;
}
#main-content {
left: $sidebar-width;
padding-left: $padding;
transition: $sidebar-transition;
}
body.sidebar-hidden {
#sidebar {
left: -$sidebar-width;
}
#main-content {
left: 0;
}
#sidebar-toggle-link {
left: 0;
}
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

View file

@ -1,13 +0,0 @@
#main-content, #table-of-contents {
margin-top: 20px;
}
#table-of-contents {
padding: 10px;
border-radius: 10px;
border: 1px solid #ddd;
}
.nav-link.active {
text-decoration: underline;
}

View file

@ -1,10 +1,62 @@
Tree-sitter is a library for parsing source code. It aims to be:
---
title: Introduction
---
# Introduction
Tree-sitter is an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:
* **General** enough to parse any programming language
* **Dependency-free** and written in pure C so that it can be embedded in any application
* **Fast** and incremental so that it can be used in a text editor
* **Robust** enough to provide useful results even in the presence of syntax errors
* **Fast** enough to parse on every keystroke in a text editor
* **Robust** enough to provide useful results even in the presence of syntax errors,
* **Dependency-free** (and written in pure C) so that it can be embedded in any application
## Table of contents
### Language Bindings
1. [Creating parsers](creating-parsers.md)
There are currently bindings that allow Tree-sitter to be used from the following languages:
* [JavaScript](https://github.com/tree-sitter/node-tree-sitter)
* [Rust](https://github.com/tree-sitter/rust-tree-sitter)
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
### Available Parsers
Parsers for these languages are fairly complete:
* [Bash](https://github.com/tree-sitter/tree-sitter-bash)
* [C](https://github.com/tree-sitter/tree-sitter-c)
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
* [Go](https://github.com/tree-sitter/tree-sitter-go)
* [HTML](https://github.com/tree-sitter/tree-sitter-html)
* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
* [PHP](https://github.com/tree-sitter/tree-sitter-php)
* [Python](https://github.com/tree-sitter/tree-sitter-python)
* [Ruby](https://github.com/tree-sitter/tree-sitter-ruby)
* [Rust](https://github.com/tree-sitter/tree-sitter-rust)
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
Parsers for these languages are in development:
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
* [Java](https://github.com/tree-sitter/tree-sitter-java)
* [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml)
* [C-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp)
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
### Talks on Tree-sitter
* [FOSDEM 2018](https://www.youtube.com/watch?v=0CGzC_iss-8)
* [GitHub Universe 2017](https://www.youtube.com/watch?v=a1rC79DHpmY)
### Underlying Research
The design of Tree-sitter was greatly influenced by the following research papers:
- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
- [Context Aware Scanning for Parsing Extensible Languages](http://www.umsec.umn.edu/publications/Context-Aware-Scanning-Parsing-Extensible)
- [Efficient and Flexible Incremental Parsing](http://ftp.cs.berkeley.edu/sggs/toplas-parsing.ps)
- [Incremental Analysis of Real Programming Languages](https://pdfs.semanticscholar.org/ca69/018c29cc415820ed207d7e1d391e2da1656f.pdf)
- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
- [Error Recovery for LR Parsers](http://www.dtic.mil/dtic/tr/fulltext/u2/a043470.pdf)

View file

@ -0,0 +1,24 @@
---
title: Architecture
permalink: architecture
---
# Architecture
Tree-sitter consists of two separate libraries, both of which expose C APIs.
The first library, `libcompiler`, is
used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the
language. `libcompiler` is a build tool; it is no longer needed once a parser has been generated. Its public interface is specified in the header file [`compiler.h`](https://github.com/tree-sitter/tree-sitter/blob/master/include/tree_sitter/compiler.h).
The second library, `libruntime`, is used in combination with the parsers
generated by `libcompiler`, to produce syntax trees from source code and keep the
syntax trees up-to-date as the source code changes. `libruntime` is designed to be embedded in applications. Its interface is specified in the header file [`runtime.h`](https://github.com/tree-sitter/tree-sitter/blob/master/include/tree_sitter/runtime.h).
## The Compiler
WIP
## The Runtime
WIP

View file

@ -1,5 +1,6 @@
---
layout: table-of-contents
title: Creating Parsers
permalink: creating-parsers
---
# Creating parsers
@ -57,59 +58,63 @@ It's usually a good idea to find a formal specification for the language you're
Although languages have very different constructs, their constructs can often be categorized in to similar groups like *Declarations*, *Definitions*, *Statements*, *Expressions*, *Types*, and *Patterns*. In writing your grammar, a good first step is to create just enough structure to include all of these basic *groups* of symbols. For an imaginary C-like language, this might look something like this:
```js
rules: $ => {
source_file: $ => repeat($._definition),
{
// ...
_definition: $ => choice(
$.function_definition
// TODO: other kinds of definitions
),
rules: $ => {
source_file: $ => repeat($._definition),
function_definition: $ => seq(
'func',
$.identifier,
$.parameter_list,
$._type,
$.block
),
_definition: $ => choice(
$.function_definition
// TODO: other kinds of definitions
),
parameter_list: $ => seq(
'(',
// TODO: parameters
')'
),
function_definition: $ => seq(
'func',
$.identifier,
$.parameter_list,
$._type,
$.block
),
_type: $ => choice(
'bool'
// TODO: other kinds of types
),
parameter_list: $ => seq(
'(',
// TODO: parameters
')'
),
block: $ => seq(
'{',
repeat($._statement),
'}'
),
_type: $ => choice(
'bool'
// TODO: other kinds of types
),
_statement: $ => choice(
$.return_statement
// TODO: other kinds of statements
),
block: $ => seq(
'{',
repeat($._statement),
'}'
),
return_statement: $ => seq(
'return',
$._expression,
';'
),
_statement: $ => choice(
$.return_statement
// TODO: other kinds of statements
),
_expression: $ => choice(
$.identifier,
$.number
// TODO: other kinds of expressions
),
return_statement: $ => seq(
'return',
$._expression,
';'
),
identifier: $ => /[a-z]+/,
_expression: $ => choice(
$.identifier,
$.number
// TODO: other kinds of expressions
),
number: $ => /\d+/
identifier: $ => /[a-z]+/,
number: $ => /\d+/
}
}
```
@ -118,27 +123,31 @@ Some of the details of this grammar will be explained in more depth later on, bu
With this structure in place, you can now freely decide what part of the grammar to flesh out next. For example, you might decide to start with *types*. One-by-one, you could define the rules for writing basic types and composing them into more complex types:
```js
_type: $ => choice(
$.primitive_type,
$.array_type,
$.pointer_type
),
{
// ...
primitive_type: $ => choice(
'bool',
'int'
),
_type: $ => choice(
$.primitive_type,
$.array_type,
$.pointer_type
),
array_type: $ => seq(
'[',
']',
$._type
),
primitive_type: $ => choice(
'bool',
'int'
),
pointer_type: $ => seq(
'*',
$._type
),
array_type: $ => seq(
'[',
']',
$._type
),
pointer_type: $ => seq(
'*',
$._type
)
}
```
After developing the *type* sublanguage a bit further, you might decide to switch to working on *statements* or *expressions* instead. It's often useful to check your progress by trying to parse some real code using `tree-sitter parse`.
@ -250,24 +259,28 @@ The language spec encodes the 20 precedence levels of JavaScript expressions usi
To produce a readable syntax tree, we'd like to model JavaScript expressions using a much flatter structure like this:
```js
_expression: $ => choice(
$.identifier,
$.unary_expression,
$.binary_expression,
{
// ...
),
unary_expression: $ => choice(
seq('-', $._expression),
seq('!', $._expression),
// ...
),
_expression: $ => choice(
$.identifier,
$.unary_expression,
$.binary_expression,
// ...
),
binary_expression: $ => choice(
seq($._expression, '*', $._expression),
seq($._expression, '+', $._expression),
// ...
),
unary_expression: $ => choice(
seq('-', $._expression),
seq('!', $._expression),
// ...
),
binary_expression: $ => choice(
seq($._expression, '*', $._expression),
seq($._expression, '+', $._expression),
// ...
),
}
```
Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message:
@ -293,11 +306,15 @@ Possible resolutions:
For an expression like `-a * b`, it's not clear whether the `-` operator applies to the `a * b` or just to the `a`. This is where the `prec` function described above comes into play. By wrapping a rule with `prec`, we can indicate that certain sequence of symbols should *bind to each other more tightly* than others. For example, the `'-', $._expression` sequence in `unary_expression` should bind more tightly than the `$._expression, '+', $._expression` sequence in `binary_expression`:
```js
unary_expression: $ => prec(2, choice(
seq('-', $._expression),
seq('!', $._expression),
{
// ...
))
unary_expression: $ => prec(2, choice(
seq('-', $._expression),
seq('!', $._expression),
// ...
))
}
```
### Using associativity
@ -323,11 +340,15 @@ Possible resolutions:
For an expression like `a * b * c`, it's not clear whether we mean `a * (b * c)` or `(a * b) * c`. This is where `prec.left` and `prec.right` come into use. We want to select the second interpretation, so we use `prec.left`.
```js
binary_expression: $ => choice(
prec.left(2, seq($._expression, '*', $._expression)),
prec.left(1, seq($._expression, '+', $._expression)),
{
// ...
),
binary_expression: $ => choice(
prec.left(2, seq($._expression, '*', $._expression)),
prec.left(1, seq($._expression, '+', $._expression)),
// ...
),
}
```
### Hiding rules
@ -336,6 +357,8 @@ You may have noticed in the above examples that some of the grammar rule name li
## Dealing with LR conflicts
TODO
[cst]: https://en.wikipedia.org/wiki/Parse_tree
[non-terminal]: https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols
[language-spec]: https://en.wikipedia.org/wiki/Programming_language_specification

View file

@ -0,0 +1,81 @@
---
title: Using Parsers
permalink: using-parsers
---
# Using Parsers
A Tree-sitter parser consists of a single C source file which exports one function with the naming scheme `tree_sitter_${LANGUAGE_NAME}`. This function returns a pointer to a `TSLanguage` struct, which can be used in conjunction with a `TSParser` to produce a syntax trees.
## The Raw C API
Here's an example of a simple C program that uses the Tree-sitter [JSON parser](https://github.com/tree-sitter/tree-sitter-json).
```c
// Filename - test-json-parser.c
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include "tree_sitter/runtime.h"
TSLanguage *tree_sitter_json();
int main() {
// Create a parser with the JSON language.
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_json());
// Parse some source code.
const char *source_code = "[1, null]";
TSTree *tree = ts_parser_parse_string(parser, NULL, source_code, strlen(source_code));
// Find some syntax tree nodes.
TSNode root_node = ts_tree_root_node(tree);
TSNode array_node = ts_node_named_child(root_node, 0);
TSNode number_node = ts_node_named_child(array_node, 0);
// Check that the nodes have the expected types.
assert(!strcmp(ts_node_type(root_node), "value"));
assert(!strcmp(ts_node_type(array_node), "array"));
assert(!strcmp(ts_node_type(number_node), "number"));
// Check that the nodes have the expected child counts.
assert(ts_node_child_count(root_node) == 1);
assert(ts_node_child_count(array_node) == 4);
assert(ts_node_named_child_count(array_node) == 2);
assert(ts_node_child_count(number_node) == 0);
// Print the syntax tree as an S-expression.
char *string = ts_node_string(root_node);
printf("Syntax tree: %s\n", string);
// Free all of the heap allocations.
free(string);
ts_tree_delete(tree);
ts_parser_delete(parser);
return 0;
}
```
This program uses the Tree-sitter C API, which is declared in the header file `tree_sitter/runtime.h`, so we need to add the `tree_sitter/include` directory to the include path. We also need to link `libruntime.a` into the binary.
```sh
clang \
-I tree-sitter/include \
test-json-parser.c \
tree-sitter-json/src/parser.c \
tree-sitter/out/Release/libruntime.a \
-o test-json-parser
./test-json-parser
```
### Providing the text to parse
Text input is provided to a tree-sitter parser via a `TSInput` struct, which contains function pointers for seeking to positions in the text, and for reading chunks of text. The text can be encoded in either UTF8 or UTF16. This interface allows you to efficiently parse text that is stored in your own data structure.
### Querying the syntax tree
Tree-sitter provides a DOM-style interface for inspecting syntax trees. Functions like `ts_node_child(node, index)` and `ts_node_next_sibling(node)` expose every node in the concrete syntax tree. This is useful for operations like syntax-highlighting, which operate on a token-by-token basis. You can also traverse the tree in a more abstract way by using functions like
`ts_node_named_child(node, index)` and `ts_node_next_named_sibling(node)`. These functions don't expose nodes that were specified in the grammar as anonymous tokens, like `:` and `{`. This is useful when analyzing the meaning of a document.

2
externals/utf8proc vendored

@ -1 +1 @@
Subproject commit 40e605959eb5cb90b2587fa88e3b661558fbc55a
Subproject commit d81308faba0cfb3fccf8c3b12446863c7b76ae32

View file

@ -5,6 +5,8 @@
extern "C" {
#endif
#include <stdio.h>
typedef enum {
TSCompileErrorTypeNone,
TSCompileErrorTypeInvalidGrammar,
@ -25,7 +27,7 @@ typedef struct {
TSCompileErrorType error_type;
} TSCompileResult;
TSCompileResult ts_compile_grammar(const char *input);
TSCompileResult ts_compile_grammar(const char *input, FILE *log_file);
#ifdef __cplusplus
}

View file

@ -9,13 +9,17 @@ extern "C" {
#include <stdint.h>
#include <stdlib.h>
typedef uint16_t TSSymbol;
typedef uint16_t TSStateId;
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_RUNTIME_H_
typedef uint16_t TSSymbol;
typedef struct TSLanguage TSLanguage;
#endif
typedef uint16_t TSStateId;
typedef struct {
bool visible : 1;
bool named : 1;
@ -66,7 +70,7 @@ typedef union {
};
} TSParseActionEntry;
typedef struct TSLanguage {
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
@ -91,7 +95,7 @@ typedef struct TSLanguage {
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
} TSLanguage;
};
/*
* Lexer Macros
@ -129,6 +133,7 @@ typedef struct TSLanguage {
*/
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \

View file

@ -5,15 +5,17 @@
extern "C" {
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#define TREE_SITTER_LANGUAGE_VERSION 8
typedef unsigned short TSSymbol;
typedef uint16_t TSSymbol;
typedef struct TSLanguage TSLanguage;
typedef struct TSDocument TSDocument;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
typedef enum {
TSInputEncodingUTF8,
@ -31,6 +33,11 @@ typedef struct {
uint32_t column;
} TSPoint;
typedef struct {
TSPoint start;
TSPoint end;
} TSRange;
typedef struct {
void *payload;
const char *(*read)(void *payload, uint32_t *bytes_read);
@ -50,31 +57,57 @@ typedef struct {
typedef struct {
uint32_t start_byte;
uint32_t bytes_removed;
uint32_t bytes_added;
uint32_t old_end_byte;
uint32_t new_end_byte;
TSPoint start_point;
TSPoint extent_removed;
TSPoint extent_added;
TSPoint old_end_point;
TSPoint new_end_point;
} TSInputEdit;
typedef struct {
TSPoint start;
TSPoint end;
} TSRange;
uint32_t context[4];
const void *id;
const void *tree;
} TSNode;
typedef struct {
const void *data;
uint32_t offset[2];
} TSNode;
uint32_t context[2];
const void *id;
const void *tree;
} TSTreeCursor;
TSParser *ts_parser_new();
void ts_parser_delete(TSParser *);
const TSLanguage *ts_parser_language(const TSParser *);
bool ts_parser_set_language(TSParser *, const TSLanguage *);
TSLogger ts_parser_logger(const TSParser *);
void ts_parser_set_logger(TSParser *, TSLogger);
void ts_parser_print_dot_graphs(TSParser *, FILE *);
void ts_parser_halt_on_error(TSParser *, bool);
TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput);
TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t);
bool ts_parser_enabled(const TSParser *);
void ts_parser_set_enabled(TSParser *, bool);
size_t ts_parser_operation_limit(const TSParser *);
void ts_parser_set_operation_limit(TSParser *, size_t);
TSTree *ts_parser_resume(TSParser *);
TSTree *ts_tree_copy(const TSTree *);
void ts_tree_delete(TSTree *);
TSNode ts_tree_root_node(const TSTree *);
void ts_tree_edit(TSTree *, const TSInputEdit *);
TSRange *ts_tree_get_changed_ranges(const TSTree *, const TSTree *, uint32_t *);
void ts_tree_print_dot_graph(const TSTree *, FILE *);
uint32_t ts_node_start_byte(TSNode);
TSPoint ts_node_start_point(TSNode);
uint32_t ts_node_end_byte(TSNode);
TSPoint ts_node_end_point(TSNode);
TSSymbol ts_node_symbol(TSNode);
const char *ts_node_type(TSNode, const TSDocument *);
char *ts_node_string(TSNode, const TSDocument *);
const char *ts_node_type(TSNode);
char *ts_node_string(TSNode);
bool ts_node_eq(TSNode, TSNode);
bool ts_node_is_null(TSNode);
bool ts_node_is_named(TSNode);
bool ts_node_is_missing(TSNode);
bool ts_node_has_changes(TSNode);
@ -84,7 +117,6 @@ TSNode ts_node_child(TSNode, uint32_t);
TSNode ts_node_named_child(TSNode, uint32_t);
uint32_t ts_node_child_count(TSNode);
uint32_t ts_node_named_child_count(TSNode);
uint32_t ts_node_child_index(TSNode);
TSNode ts_node_next_sibling(TSNode);
TSNode ts_node_next_named_sibling(TSNode);
TSNode ts_node_prev_sibling(TSNode);
@ -96,32 +128,13 @@ TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
TSDocument *ts_document_new();
void ts_document_free(TSDocument *);
const TSLanguage *ts_document_language(TSDocument *);
void ts_document_set_language(TSDocument *, const TSLanguage *);
TSInput ts_document_input(TSDocument *);
void ts_document_set_input(TSDocument *, TSInput);
void ts_document_set_input_string(TSDocument *, const char *);
void ts_document_set_input_string_with_length(TSDocument *, const char *, uint32_t);
TSLogger ts_document_logger(const TSDocument *);
void ts_document_set_logger(TSDocument *, TSLogger);
void ts_document_print_debugging_graphs(TSDocument *, bool);
void ts_document_edit(TSDocument *, TSInputEdit);
void ts_document_parse(TSDocument *);
void ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, uint32_t *);
typedef struct {
TSRange **changed_ranges;
uint32_t *changed_range_count;
bool halt_on_error;
} TSParseOptions;
void ts_document_parse_with_options(TSDocument *, TSParseOptions);
void ts_document_invalidate(TSDocument *);
TSNode ts_document_root_node(const TSDocument *);
uint32_t ts_document_parse_count(const TSDocument *);
TSTreeCursor ts_tree_cursor_new(const TSTree *);
void ts_tree_cursor_delete(TSTreeCursor *);
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
bool ts_tree_cursor_goto_parent(TSTreeCursor *);
TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
uint32_t ts_language_symbol_count(const TSLanguage *);
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);

View file

@ -22,6 +22,7 @@
'src/compiler/compile.cc',
'src/compiler/generate_code/c_code.cc',
'src/compiler/lex_table.cc',
'src/compiler/log.cc',
'src/compiler/parse_grammar.cc',
'src/compiler/parse_table.cc',
'src/compiler/precedence_range.cc',
@ -87,7 +88,6 @@
'externals/utf8proc',
],
'sources': [
'src/runtime/document.c',
'src/runtime/get_changed_ranges.c',
'src/runtime/language.c',
'src/runtime/lexer.c',
@ -95,7 +95,9 @@
'src/runtime/stack.c',
'src/runtime/parser.c',
'src/runtime/string_input.c',
'src/runtime/subtree.c',
'src/runtime/tree.c',
'src/runtime/tree_cursor.c',
'src/runtime/utf16.c',
'externals/utf8proc/utf8proc.c',
],
@ -150,7 +152,7 @@
'-Wno-unused-parameter'
],
'defines': ['UTF8PROC_EXPORTS'],
'defines': ['UTF8PROC_STATIC'],
'xcode_settings': {
'ALWAYS_SEARCH_USER_PATHS': 'NO',

4
script/serve-docs Executable file
View file

@ -0,0 +1,4 @@
#!/bin/bash
cd docs
bundle exec jekyll serve

View file

@ -132,7 +132,7 @@ case ${mode} in
if [[ -n $line_count ]]; then
head -n $line_count $dot_file | dot -Tsvg >> $html_file
else
cat $dot_file | dot -Tsvg >> $html_file
cat $dot_file | grep -v 'Assertion' | dot -Tsvg >> $html_file
fi
rm $dot_file
echo "Wrote $html_file - $line_count"

View file

@ -1,2 +1,9 @@
@echo off
msbuild /p:Configuration=Test tests.vcxproj
.\test\tests.exe --reporter=singleline --no-color
set only_arg=
IF not "%~1"=="" (
set only_arg=--only=%1
)
.\test\tests.exe --reporter=singleline --no-color %only_arg%

View file

@ -80,7 +80,7 @@ class TransitionBuilder {
public:
void apply(const Rule &rule) {
rule.match(
[this](const rules::Blank &) {},
[](const rules::Blank &) {},
[this](const rules::CharacterSet &character_set) {
PrecedenceRange precedence;

View file

@ -9,9 +9,24 @@
#include <vector>
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/parse_table.h"
#include "compiler/lexical_grammar.h"
#include "compiler/log.h"
#include "compiler/parse_table.h"
#include "compiler/rule.h"
#include "utf8proc.h"
namespace std {
using tree_sitter::rules::Symbol;
size_t hash<pair<Symbol::Index, Symbol::Index>>::operator()(
const pair<Symbol::Index, Symbol::Index> &p
) const {
hash<Symbol::Index> hasher;
return hasher(p.first) ^ hasher(p.second);
}
} // namespace std
namespace tree_sitter {
namespace build_tables {
@ -34,8 +49,24 @@ using rules::Symbol;
using rules::Metadata;
using rules::Seq;
static const std::unordered_set<ParseStateId> EMPTY;
bool CoincidentTokenIndex::contains(Symbol a, Symbol b) const {
return a == b || !states_with(a, b).empty();
}
const std::unordered_set<ParseStateId> &CoincidentTokenIndex::states_with(Symbol a, Symbol b) const {
if (a.index > b.index) std::swap(a, b);
auto iter = entries.find({a.index, b.index});
if (iter == entries.end()) {
return EMPTY;
} else {
return iter->second;
}
}
template <bool include_all>
class StartOrEndCharacterAggregator {
class CharacterAggregator {
public:
void apply(const Rule &rule) {
rule.match(
@ -60,8 +91,8 @@ class StartOrEndCharacterAggregator {
CharacterSet result;
};
using StartingCharacterAggregator = StartOrEndCharacterAggregator<false>;
using AllCharacterAggregator = StartOrEndCharacterAggregator<true>;
using StartingCharacterAggregator = CharacterAggregator<false>;
using AllCharacterAggregator = CharacterAggregator<true>;
class LexTableBuilderImpl : public LexTableBuilder {
LexTable main_lex_table;
@ -73,21 +104,25 @@ class LexTableBuilderImpl : public LexTableBuilder {
CharacterSet separator_start_characters;
vector<CharacterSet> starting_characters_by_token;
vector<CharacterSet> following_characters_by_token;
const vector<LookaheadSet> &coincident_tokens_by_token;
const CoincidentTokenIndex &coincident_token_index;
ParseTable *parse_table;
vector<ConflictStatus> conflict_matrix;
bool conflict_detection_mode;
LookaheadSet keyword_symbols;
Symbol keyword_capture_token;
char encoding_buffer[8];
public:
LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
const unordered_map<Symbol, LookaheadSet> &following_tokens_by_token,
const vector<LookaheadSet> &coincident_tokens)
const CoincidentTokenIndex &coincident_token_index,
ParseTable *parse_table)
: grammar(lexical_grammar),
starting_characters_by_token(lexical_grammar.variables.size()),
following_characters_by_token(lexical_grammar.variables.size()),
coincident_tokens_by_token(coincident_tokens),
coincident_token_index(coincident_token_index),
parse_table(parse_table),
conflict_matrix(lexical_grammar.variables.size() * lexical_grammar.variables.size(), DoesNotMatch),
conflict_detection_mode(false),
keyword_capture_token(rules::NONE()) {
@ -103,54 +138,57 @@ class LexTableBuilderImpl : public LexTableBuilder {
separator_start_characters = separator_character_aggregator.result;
// Compute the set of characters that each token can start with and the set of non-separator
// characters that can follow each token.
// characters that can follow each token. Also identify all of the tokens that can be
// considered 'keywords'.
LOG_START("characterizing tokens");
LookaheadSet potential_keyword_symbols;
for (unsigned i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol token = Symbol::terminal(i);
StartingCharacterAggregator starting_character_aggregator;
starting_character_aggregator.apply(grammar.variables[i].rule);
starting_characters_by_token[i] = starting_character_aggregator.result;
StartingCharacterAggregator following_character_aggregator;
const auto &following_tokens = following_tokens_by_token.find(Symbol::terminal(i));
const auto &following_tokens = following_tokens_by_token.find(token);
if (following_tokens != following_tokens_by_token.end()) {
following_tokens->second.for_each([&](Symbol following_token) {
following_character_aggregator.apply(grammar.variables[following_token.index].rule);
return true;
});
}
following_characters_by_token[i] = following_character_aggregator.result;
if (grammar.variables[i].is_string) {
AllCharacterAggregator aggregator;
aggregator.apply(grammar.variables[i].rule);
bool all_alpha = true, all_lower = true;
for (auto character : aggregator.result.included_chars) {
if (!iswalpha(character) && character != '_') all_alpha = false;
if (!iswlower(character)) all_lower = false;
AllCharacterAggregator all_character_aggregator;
all_character_aggregator.apply(grammar.variables[i].rule);
if (
!starting_character_aggregator.result.includes_all &&
!all_character_aggregator.result.includes_all
) {
bool starts_alpha = true, all_alnum = true;
for (auto character : starting_character_aggregator.result.included_chars) {
if (!iswalpha(character) && character != '_') {
starts_alpha = false;
}
}
if (all_lower) {
keyword_symbols.insert(Symbol::terminal(i));
for (auto character : all_character_aggregator.result.included_chars) {
if (!iswalnum(character) && character != '_') {
all_alnum = false;
}
}
// TODO - Refactor this. In general, a keyword token cannot be followed immediately
// by another alphanumeric character. But this requirement is currently not expressed
// anywhere in the grammar. So without this hack, we would be overly conservative about
// merging parse states because we would often consider `identifier` tokens to *conflict*
// with keyword tokens.
if (all_alpha) {
following_character_aggregator.result
.exclude('a', 'z')
.exclude('A', 'Z')
.exclude('0', '9')
.exclude('_')
.exclude('$');
if (starts_alpha && all_alnum) {
LOG("potential keyword: %s", token_name(token).c_str());
potential_keyword_symbols.insert(token);
}
}
following_characters_by_token[i] = following_character_aggregator.result;
}
LOG_END();
// For each pair of tokens, generate a lex table for just those two tokens and record what
// conflicts arise.
LOG_START("detecting conflicts between tokens");
conflict_detection_mode = true;
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
for (Symbol::Index j = 0; j < i; j++) {
@ -165,52 +203,103 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
}
}
LOG_END();
// Find a 'keyword capture token' that matches all of the indentified keywords.
LOG_START("finding keyword capture token");
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol = Symbol::terminal(i);
bool matches_all_keywords = true;
keyword_symbols.for_each([&](Symbol keyword_symbol) {
if (!(get_conflict_status(symbol, keyword_symbol) & MatchesSameString)) {
matches_all_keywords = false;
Symbol candidate = Symbol::terminal(i);
LookaheadSet homonyms;
potential_keyword_symbols.for_each([&](Symbol other_token) {
if (get_conflict_status(other_token, candidate) & MatchesShorterStringWithinSeparators) {
homonyms.clear();
return false;
}
if (get_conflict_status(candidate, other_token) == MatchesSameString) {
homonyms.insert(other_token);
}
return true;
});
if (!matches_all_keywords) continue;
if (homonyms.empty()) continue;
// Don't use a token to capture keywords if it overlaps with separator characters.
AllCharacterAggregator capture_aggregator;
capture_aggregator.apply(grammar.variables[i].rule);
if (capture_aggregator.result.includes_all) continue;
if (capture_aggregator.result.intersects(separator_start_characters)) continue;
LOG_START(
"keyword capture token candidate: %s, homonym count: %lu",
token_name(candidate).c_str(),
homonyms.size()
);
homonyms.for_each([&](Symbol homonym1) {
homonyms.for_each([&](Symbol homonym2) {
if (get_conflict_status(homonym1, homonym2) & MatchesSameString) {
LOG(
"conflict between homonyms %s %s",
token_name(homonym1).c_str(),
token_name(homonym2).c_str()
);
homonyms.remove(homonym1);
}
return false;
});
return true;
});
// Don't use a token to capture keywords if it conflicts with other tokens
// that occur in the same state as a keyword.
bool shadows_other_tokens = false;
for (Symbol::Index j = 0; j < n; j++) {
Symbol other_symbol = Symbol::terminal(j);
if ((get_conflict_status(other_symbol, symbol) & (MatchesShorterStringWithinSeparators|MatchesLongerStringWithValidNextChar)) &&
!keyword_symbols.contains(other_symbol) &&
keyword_symbols.intersects(coincident_tokens_by_token[j])) {
shadows_other_tokens = true;
break;
Symbol other_token = Symbol::terminal(j);
if (other_token == candidate || homonyms.contains(other_token)) continue;
bool candidate_shadows_other = get_conflict_status(other_token, candidate);
bool other_shadows_candidate = get_conflict_status(candidate, other_token);
if (candidate_shadows_other || other_shadows_candidate) {
homonyms.for_each([&](Symbol homonym) {
bool other_shadows_homonym = get_conflict_status(homonym, other_token);
bool candidate_was_already_present = true;
for (ParseStateId state_id : coincident_token_index.states_with(homonym, other_token)) {
if (!parse_table->states[state_id].has_terminal_entry(candidate)) {
candidate_was_already_present = false;
break;
}
}
if (candidate_was_already_present) return true;
if (candidate_shadows_other) {
homonyms.remove(homonym);
LOG(
"remove %s because candidate would shadow %s",
token_name(homonym).c_str(),
token_name(other_token).c_str()
);
} else if (other_shadows_candidate && !other_shadows_homonym) {
homonyms.remove(homonym);
LOG(
"remove %s because %s would shadow candidate",
token_name(homonym).c_str(),
token_name(other_token).c_str()
);
}
return true;
});
}
}
if (shadows_other_tokens) continue;
// If multiple keyword capture tokens are found, don't bother extracting
// the keywords into their own function.
if (keyword_capture_token == rules::NONE()) {
keyword_capture_token = symbol;
} else {
keyword_capture_token = rules::NONE();
break;
if (homonyms.size() > keyword_symbols.size()) {
LOG_START("found capture token. homonyms:");
homonyms.for_each([&](Symbol homonym) {
LOG("%s", token_name(homonym).c_str());
return true;
});
LOG_END();
keyword_symbols = homonyms;
keyword_capture_token = candidate;
}
LOG_END();
}
LOG_END();
}
BuildResult build(ParseTable *parse_table) {
BuildResult build() {
clear();
conflict_detection_mode = false;
vector<pair<LookaheadSet, vector<ParseState *>>> starting_token_sets;
@ -246,8 +335,8 @@ class LexTableBuilderImpl : public LexTableBuilder {
add_lex_state(keyword_lex_table, item_set_for_terminals(keyword_symbols, false));
mark_fragile_tokens(parse_table);
remove_duplicate_lex_states(main_lex_table, parse_table);
mark_fragile_tokens();
remove_duplicate_lex_states(main_lex_table);
return {main_lex_table, keyword_lex_table, keyword_capture_token};
}
@ -261,9 +350,12 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
private:
void record_conflict(Symbol shadowed_token, Symbol other_token, ConflictStatus status) {
bool record_conflict(Symbol shadowed_token, Symbol other_token, ConflictStatus status) {
if (!conflict_detection_mode) return false;
unsigned index = shadowed_token.index * grammar.variables.size() + other_token.index;
bool was_set = conflict_matrix[index] & status;
conflict_matrix[index] = static_cast<ConflictStatus>(conflict_matrix[index] | status);
return !was_set;
}
LexStateId add_lex_state(LexTable &lex_table, const LexItemSet &item_set) {
@ -305,13 +397,27 @@ class LexTableBuilderImpl : public LexTableBuilder {
if (prefer_advancing && !next_item_set_can_yield_this_token) {
auto advance_symbol = transition.destination.entries.begin()->lhs;
if (characters.intersects(following_characters_by_token[accept_action.symbol.index]) ||
characters.intersects(separator_start_characters)) {
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerStringWithValidNextChar);
} else {
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerString);
auto &following_chars = following_characters_by_token[accept_action.symbol.index];
CharacterSet conflicting_following_chars = characters.intersection(following_chars);
if (conflicting_following_chars.is_empty()) {
conflicting_following_chars = characters.intersection(separator_start_characters);
}
if (conflicting_following_chars.is_empty()) {
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerString);
} else {
if (record_conflict(
accept_action.symbol,
advance_symbol,
MatchesLongerStringWithValidNextChar
)) {
LOG(
"%s shadows %s followed by '%s'",
token_name(advance_symbol).c_str(),
token_name(accept_action.symbol).c_str(),
log_char(*conflicting_following_chars.included_chars.begin())
);
}
}
return;
}
}
@ -333,9 +439,21 @@ class LexTableBuilderImpl : public LexTableBuilder {
AcceptTokenAction &existing_action = lex_table.states[state_id].accept_action;
if (existing_action.is_present()) {
if (should_replace_accept_action(existing_action, action)) {
record_conflict(existing_action.symbol, action.symbol, MatchesSameString);
if (record_conflict(existing_action.symbol, action.symbol, MatchesSameString)) {
LOG(
"%s shadows %s - same length",
token_name(action.symbol).c_str(),
token_name(existing_action.symbol).c_str()
);
}
} else {
record_conflict(action.symbol, existing_action.symbol, MatchesSameString);
if (record_conflict(action.symbol, existing_action.symbol, MatchesSameString)) {
LOG(
"%s shadows %s - same length",
token_name(existing_action.symbol).c_str(),
token_name(action.symbol).c_str()
);
}
continue;
}
}
@ -344,7 +462,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
}
void mark_fragile_tokens(ParseTable *parse_table) {
void mark_fragile_tokens() {
for (ParseState &state : parse_table->states) {
for (auto &entry : state.terminal_entries) {
Symbol token = entry.first;
@ -370,7 +488,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
const LookaheadSet &existing_set = in_left ? right : *left;
existing_set.for_each([&](Symbol existing_symbol) {
if ((get_conflict_status(existing_symbol, different_symbol) & CannotDistinguish) ||
!coincident_tokens_by_token[different_symbol.index].contains(existing_symbol)) {
!coincident_token_index.contains(different_symbol, existing_symbol)) {
is_compatible = false;
return false;
}
@ -386,7 +504,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
return is_compatible;
}
void remove_duplicate_lex_states(LexTable &lex_table, ParseTable *parse_table) {
void remove_duplicate_lex_states(LexTable &lex_table) {
for (LexState &state : lex_table.states) {
state.accept_action.is_string = false;
state.accept_action.precedence = 0;
@ -510,25 +628,41 @@ class LexTableBuilderImpl : public LexTableBuilder {
main_lex_state_ids.clear();
}
const string &token_name(rules::Symbol &symbol) {
return grammar.variables[symbol.index].name;
string token_name(const rules::Symbol &symbol) {
const LexicalVariable &variable = grammar.variables[symbol.index];
if (variable.type == VariableTypeNamed) {
return variable.name;
} else {
return "'" + variable.name + "'";
}
}
const char *log_char(int32_t character) {
uint32_t count = utf8proc_encode_char(
character,
reinterpret_cast<utf8proc_uint8_t *>(encoding_buffer)
);
encoding_buffer[count] = 0;
return encoding_buffer;
}
};
unique_ptr<LexTableBuilder> LexTableBuilder::create(const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
const unordered_map<Symbol, LookaheadSet> &following_tokens,
const vector<LookaheadSet> &coincident_tokens) {
const CoincidentTokenIndex &coincident_tokens,
ParseTable *parse_table) {
return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(
syntax_grammar,
lexical_grammar,
following_tokens,
coincident_tokens
coincident_tokens,
parse_table
));
}
LexTableBuilder::BuildResult LexTableBuilder::build(ParseTable *parse_table) {
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
LexTableBuilder::BuildResult LexTableBuilder::build() {
return static_cast<LexTableBuilderImpl *>(this)->build();
}
ConflictStatus LexTableBuilder::get_conflict_status(Symbol a, Symbol b) const {

View file

@ -4,9 +4,22 @@
#include <memory>
#include <vector>
#include <unordered_map>
#include <set>
#include <unordered_set>
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/lex_table.h"
namespace std {
using tree_sitter::rules::Symbol;
template <>
struct hash<pair<Symbol::Index, Symbol::Index>> {
size_t operator()(const pair<Symbol::Index, Symbol::Index> &) const;
};
} // namespace std
namespace tree_sitter {
struct ParseTable;
@ -30,12 +43,23 @@ enum ConflictStatus {
),
};
struct CoincidentTokenIndex {
std::unordered_map<
std::pair<rules::Symbol::Index, rules::Symbol::Index>,
std::unordered_set<ParseStateId>
> entries;
bool contains(rules::Symbol, rules::Symbol) const;
const std::unordered_set<ParseStateId> &states_with(rules::Symbol, rules::Symbol) const;
};
class LexTableBuilder {
public:
static std::unique_ptr<LexTableBuilder> create(const SyntaxGrammar &,
const LexicalGrammar &,
const std::unordered_map<rules::Symbol, LookaheadSet> &,
const std::vector<LookaheadSet> &);
const CoincidentTokenIndex &,
ParseTable *);
struct BuildResult {
LexTable main_table;
@ -43,7 +67,7 @@ class LexTableBuilder {
rules::Symbol keyword_capture_token;
};
BuildResult build(ParseTable *);
BuildResult build();
ConflictStatus get_conflict_status(rules::Symbol, rules::Symbol) const;

View file

@ -117,5 +117,31 @@ bool LookaheadSet::insert(const Symbol &symbol) {
return false;
}
bool LookaheadSet::remove(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT()) {
if (eof) {
eof = false;
return true;
}
return false;
}
auto &bits = symbol.is_external() ? external_bits : terminal_bits;
if (bits.size() > static_cast<size_t>(symbol.index)) {
if (bits[symbol.index]) {
bits[symbol.index] = false;
return true;
}
}
return false;
}
void LookaheadSet::clear() {
eof = false;
terminal_bits.clear();
external_bits.clear();
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -22,6 +22,8 @@ class LookaheadSet {
bool contains(const rules::Symbol &) const;
bool insert_all(const LookaheadSet &);
bool insert(const rules::Symbol &);
bool remove(const rules::Symbol &);
void clear();
bool intersects(const LookaheadSet &) const;
template <typename Callback>

View file

@ -6,6 +6,7 @@
#include <string>
#include <unordered_map>
#include <utility>
#include "compiler/log.h"
#include "compiler/parse_table.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/parse_item_set_builder.h"
@ -51,27 +52,14 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
ParseItemSetBuilder item_set_builder;
unique_ptr<LexTableBuilder> lex_table_builder;
unordered_map<Symbol, LookaheadSet> following_tokens_by_token;
vector<LookaheadSet> coincident_tokens_by_token;
CoincidentTokenIndex coincident_token_index;
set<std::pair<Symbol, Symbol>> logged_conflict_tokens;
public:
ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar)
: grammar(syntax_grammar),
lexical_grammar(lexical_grammar),
item_set_builder(syntax_grammar, lexical_grammar),
coincident_tokens_by_token(lexical_grammar.variables.size()) {
for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
coincident_tokens_by_token[i].insert(rules::END_OF_INPUT());
if (lexical_grammar.variables[i].is_string) {
for (unsigned j = 0; j < i; j++) {
if (lexical_grammar.variables[j].is_string) {
coincident_tokens_by_token[i].insert(Symbol::terminal(j));
coincident_tokens_by_token[j].insert(Symbol::terminal(i));
}
}
}
}
}
item_set_builder(syntax_grammar, lexical_grammar) {}
BuildResult build() {
// Ensure that the empty rename sequence has index 0.
@ -104,7 +92,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
grammar,
lexical_grammar,
following_tokens_by_token,
coincident_tokens_by_token
coincident_token_index,
&parse_table
);
build_error_parse_state(error_state_id);
@ -113,7 +102,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
eliminate_unit_reductions();
populate_used_terminals();
auto lex_table_result = lex_table_builder->build(&parse_table);
auto lex_table_result = lex_table_builder->build();
return {
parse_table,
lex_table_result.main_table,
@ -150,47 +139,56 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
MatchesLongerStringWithValidNextChar
);
// Add all the tokens that have no conflict with other tokens.
LookaheadSet non_conflicting_tokens;
parse_table.states[state_id].terminal_entries.clear();
// First, identify the conflict-free tokens.
LookaheadSet conflict_free_tokens;
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
Symbol token = Symbol::terminal(i);
bool conflicts_with_other_tokens = false;
for (unsigned j = 0; j < lexical_grammar.variables.size(); j++) {
Symbol other_token = Symbol::terminal(j);
if (j != i &&
!coincident_tokens_by_token[token.index].contains(other_token) &&
if (!coincident_token_index.contains(token, other_token) &&
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
conflicts_with_other_tokens = true;
break;
}
}
if (!conflicts_with_other_tokens) non_conflicting_tokens.insert(token);
if (!conflicts_with_other_tokens) conflict_free_tokens.insert(token);
}
// Include in the error recover state all of the tokens that are either
// conflict-free themselves, or have no conflicts with any conflict-free
// tokens.
LOG_START("finding non-conflicting tokens for error recovery");
LookaheadSet tokens;
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
Symbol token = Symbol::terminal(i);
bool conflicts_with_other_tokens = false;
if (!non_conflicting_tokens.contains(token)) {
non_conflicting_tokens.for_each([&](Symbol other_token) {
if (!coincident_tokens_by_token[token.index].contains(other_token) &&
if (conflict_free_tokens.contains(token)) {
LOG("include %s", symbol_name(token).c_str());
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
} else {
bool conflicts_with_other_tokens = false;
conflict_free_tokens.for_each([&](Symbol other_token) {
if (!coincident_token_index.contains(token, other_token) &&
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
LOG(
"exclude %s: conflicts with %s",
symbol_name(token).c_str(),
symbol_name(other_token).c_str()
);
conflicts_with_other_tokens = true;
return false;
}
return true;
});
}
if (!conflicts_with_other_tokens) {
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
}
}
for (const Symbol &symbol : grammar.extra_tokens) {
if (!parse_table.states[state_id].terminal_entries.count(symbol)) {
parse_table.add_terminal_action(state_id, symbol, ParseAction::ShiftExtra());
if (!conflicts_with_other_tokens) {
LOG("include %s", symbol_name(token).c_str());
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
}
}
}
LOG_END();
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
if (grammar.external_tokens[i].corresponding_internal_token == rules::NONE()) {
@ -320,8 +318,10 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (iter->first.is_built_in() || iter->first.is_external()) continue;
for (auto other_iter = terminals.begin(); other_iter != iter; ++other_iter) {
if (other_iter->first.is_built_in() || other_iter->first.is_external()) continue;
coincident_tokens_by_token[iter->first.index].insert(other_iter->first);
coincident_tokens_by_token[other_iter->first.index].insert(iter->first);
coincident_token_index.entries[{
other_iter->first.index,
iter->first.index
}].insert(state_id);
}
}
@ -356,6 +356,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
}
void remove_duplicate_parse_states() {
LOG_START("removing duplicate parse states");
unordered_map<size_t, set<ParseStateId>> state_indices_by_signature;
for (auto &pair : state_ids_by_item_set) {
@ -517,6 +518,12 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
if (!new_token.is_built_in()) {
for (const auto &entry : state.terminal_entries) {
if (lex_table_builder->get_conflict_status(entry.first, new_token) & CannotDistinguish) {
LOG_IF(
logged_conflict_tokens.insert({entry.first, new_token}).second,
"cannot merge parse states due to token conflict: %s and %s",
symbol_name(entry.first).c_str(),
symbol_name(new_token).c_str()
);
return false;
}
}

View file

@ -3,6 +3,7 @@
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/generate_code/c_code.h"
#include "compiler/syntax_grammar.h"
#include "compiler/log.h"
#include "compiler/lexical_grammar.h"
#include "compiler/parse_grammar.h"
#include "json.h"
@ -16,7 +17,9 @@ using std::vector;
using std::get;
using std::make_tuple;
extern "C" TSCompileResult ts_compile_grammar(const char *input) {
extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) {
set_log_file(log_file);
ParseGrammarResult parse_result = parse_grammar(string(input));
if (!parse_result.error_message.empty()) {
return { nullptr, strdup(parse_result.error_message.c_str()),
@ -48,8 +51,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
move(lexical_grammar)
);
return {
strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
set_log_file(nullptr);
return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
}
} // namespace tree_sitter

31
src/compiler/log.cc Normal file
View file

@ -0,0 +1,31 @@
#include "compiler/log.h"
static const char *SPACES = " ";
namespace tree_sitter {
thread_local unsigned _indent_level = 0;
thread_local FILE *_log_file = nullptr;
void set_log_file(FILE *file) {
_log_file = file;
_indent_level = 0;
}
FILE *get_log_file() {
return _log_file;
}
void _indent_logs() {
_indent_level++;
}
void _outdent_logs() {
_indent_level--;
}
void _print_indent() {
fwrite(SPACES, 1, _indent_level * 4, _log_file);
}
}

40
src/compiler/log.h Normal file
View file

@ -0,0 +1,40 @@
#ifndef COMPILER_LOG_H_
#define COMPILER_LOG_H_
#include <stdio.h>
namespace tree_sitter {
void set_log_file(FILE *);
FILE *get_log_file();
void _indent_logs();
void _outdent_logs();
void _print_indent();
#define LOG_START(...) \
do { \
LOG(__VA_ARGS__); \
_indent_logs(); \
} while (0)
#define LOG_END(...) \
do { \
_outdent_logs(); \
} while (0)
#define LOG(...) \
LOG_IF(true, __VA_ARGS__)
#define LOG_IF(condition, ...) \
do { \
FILE *f = get_log_file(); \
if (f && condition) { \
_print_indent(); \
fprintf(f, __VA_ARGS__); \
fputs("\n", f); \
} \
} while (0)
} // namespace tree_sitter
#endif // COMPILER_LOG_H_

View file

@ -123,6 +123,10 @@ bool ParseState::has_shift_action() const {
return (!nonterminal_entries.empty());
}
bool ParseState::has_terminal_entry(rules::Symbol symbol) const {
return terminal_entries.find(symbol) != terminal_entries.end();
}
void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
for (auto &entry : terminal_entries)
for (ParseAction &action : entry.second.actions)

View file

@ -65,6 +65,7 @@ struct ParseState {
bool merge(const ParseState &);
void each_referenced_state(std::function<void(ParseStateId *)>);
bool has_shift_action() const;
bool has_terminal_entry(rules::Symbol) const;
std::map<rules::Symbol, ParseTableEntry> terminal_entries;
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;

View file

@ -30,7 +30,7 @@ class SymbolReplacer {
Rule apply(const Rule &rule) {
return rule.match(
[this](const rules::Blank &blank) -> Rule {
[](const rules::Blank &blank) -> Rule {
return blank;
},
@ -110,7 +110,7 @@ class TokenExtractor {
public:
Rule apply(const rules::Rule &rule) {
return rule.match(
[this](const rules::Blank &blank) -> Rule { return blank; },
[](const rules::Blank &blank) -> Rule { return blank; },
[this](const rules::Metadata &rule) -> Rule {
if (rule.params.is_token) {

View file

@ -136,10 +136,7 @@ class PatternParser {
}
default: {
auto pair = single_char();
if (pair.second.type)
return { Blank{}, pair.second };
return {pair.first, CompileError::none()};
return {single_char(), CompileError::none()};
}
}
}
@ -154,38 +151,46 @@ class PatternParser {
}
while (has_more_input() && (peek() != ']')) {
auto pair = single_char();
if (pair.second.type)
return { CharacterSet(), pair.second };
auto characters = single_char();
if (peek() == '-') {
next();
if (!characters.includes_all && characters.included_chars.size() == 1 && peek() != ']') {
auto next_characters = single_char();
if (!next_characters.includes_all && next_characters.included_chars.size() == 1) {
characters.include(
*characters.included_chars.begin(),
*next_characters.included_chars.begin()
);
} else {
characters.include('-');
characters.add_set(next_characters);
}
} else {
characters.include('-');
}
}
if (is_affirmative)
result.add_set(pair.first);
result.add_set(characters);
else
result.remove_set(pair.first);
result.remove_set(characters);
}
return { result, CompileError::none() };
}
pair<CharacterSet, CompileError> single_char() {
CharacterSet single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
next();
break;
default:
uint32_t first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet().include(first_char, peek());
next();
} else {
value = CharacterSet().include(first_char);
}
if (peek() == '\\') {
next();
value = escaped_char(peek());
next();
} else {
value = CharacterSet().include(peek());
next();
}
return { value, CompileError::none() };
return value;
}
CharacterSet escaped_char(uint32_t value) {
@ -220,6 +225,8 @@ class PatternParser {
.exclude('\t')
.exclude('\n')
.exclude('\r');
case '0':
return CharacterSet().include('\0');
case 't':
return CharacterSet().include('\t');
case 'n':

View file

@ -159,6 +159,11 @@ bool CharacterSet::intersects(const CharacterSet &other) const {
return !copy.remove_set(other).is_empty();
}
CharacterSet CharacterSet::intersection(const CharacterSet &other) const {
CharacterSet copy(*this);
return copy.remove_set(other);
}
vector<CharacterRange> CharacterSet::included_ranges() const {
return consolidate_ranges(included_chars);
}

View file

@ -35,6 +35,7 @@ struct CharacterSet {
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
CharacterSet intersection(const CharacterSet &other) const;
bool intersects(const CharacterSet &other) const;
bool is_empty() const;
@ -49,4 +50,4 @@ struct CharacterSet {
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_CHARACTER_SET_H_
#endif // COMPILER_RULES_CHARACTER_SET_H_

View file

@ -12,18 +12,18 @@ extern "C" {
#include <stdbool.h>
#include "runtime/alloc.h"
#define Array(T) \
struct { \
T *contents; \
#define Array(T) \
struct { \
uint32_t size; \
uint32_t capacity; \
T *contents; \
}
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
#define array_new() \
{ NULL, 0, 0 }
{ 0, 0, NULL }
#define array_get(self, index) \
(assert((uint32_t)index < (self)->size), &(self)->contents[index])
@ -34,30 +34,33 @@ extern "C" {
#define array_clear(self) ((self)->size = 0)
#define array_grow(self, new_capacity) \
array__grow((VoidArray *)(self), array__elem_size(self), new_capacity)
#define array_reserve(self, new_capacity) \
array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity)
#define array_erase(self, index) \
array__erase((VoidArray *)(self), array__elem_size(self), index)
#define array_delete(self) array__delete((VoidArray *)self)
#define array_push(self, element) \
(array_grow((self), (self)->size + 1), \
#define array_push(self, element) \
(array__grow((VoidArray *)(self), array__elem_size(self)), \
(self)->contents[(self)->size++] = (element))
#define array_push_all(self, other) \
array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
array_splice((self), (self)->size, 0, (other))
#define array_splice(self, index, old_count, new_count, new_elements) \
#define array_splice(self, index, old_count, new_array) \
array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
new_count, (new_elements))
(new_array)->size, (new_array)->contents)
#define array_insert(self, index, element) \
array_splice(self, index, 0, 1, &(element))
array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element)
#define array_pop(self) ((self)->contents[--(self)->size])
#define array_assign(self, other) \
array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
// Private
typedef Array(void) VoidArray;
@ -80,21 +83,31 @@ static inline void array__erase(VoidArray *self, size_t element_size,
self->size--;
}
static inline void array__grow(VoidArray *self, size_t element_size,
uint32_t new_capacity) {
static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (new_capacity < 2 * self->capacity)
new_capacity = 2 * self->capacity;
if (new_capacity < 8)
new_capacity = 8;
if (self->contents)
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
else
} else {
self->contents = ts_calloc(new_capacity, element_size);
}
self->capacity = new_capacity;
}
}
static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) {
array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
static inline void array__grow(VoidArray *self, size_t element_size) {
if (self->size == self->capacity) {
size_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
array__reserve(self, element_size, new_capacity);
}
}
static inline void array__splice(VoidArray *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, void *elements) {
@ -103,7 +116,7 @@ static inline void array__splice(VoidArray *self, size_t element_size,
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
array__grow(self, element_size, new_size);
array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end)

30
src/runtime/atomic.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef RUNTIME_ATOMIC_H_
#define RUNTIME_ATOMIC_H_
#include <stdint.h>
#ifdef _WIN32
#include <windows.h>
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return InterlockedIncrement(p);
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
return InterlockedDecrement(p);
}
#else
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return __sync_add_and_fetch(p, 1u);
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
return __sync_sub_and_fetch(p, 1u);
}
#endif
#endif // RUNTIME_ATOMIC_H_

View file

@ -1,179 +0,0 @@
#include "runtime/alloc.h"
#include "runtime/node.h"
#include "runtime/tree.h"
#include "runtime/parser.h"
#include "runtime/string_input.h"
#include "runtime/document.h"
#include "runtime/get_changed_ranges.h"
#define LOG(...) \
snprintf(self->parser.lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
self->parser.lexer.logger.log(self->parser.lexer.logger.payload, TSLogTypeLex, self->parser.lexer.debug_buffer); \
TSDocument *ts_document_new() {
TSDocument *self = ts_calloc(1, sizeof(TSDocument));
parser_init(&self->parser);
array_init(&self->tree_path1);
array_init(&self->tree_path2);
return self;
}
void ts_document_free(TSDocument *self) {
if (self->tree) ts_tree_release(&self->parser.tree_pool, self->tree);
if (self->tree_path1.contents) array_delete(&self->tree_path1);
if (self->tree_path2.contents) array_delete(&self->tree_path2);
parser_destroy(&self->parser);
ts_document_set_input(self, (TSInput){
NULL,
NULL,
NULL,
TSInputEncodingUTF8,
});
ts_free(self);
}
const TSLanguage *ts_document_language(TSDocument *self) {
return self->parser.language;
}
void ts_document_set_language(TSDocument *self, const TSLanguage *language) {
if (language->version != TREE_SITTER_LANGUAGE_VERSION) return;
ts_document_invalidate(self);
parser_set_language(&self->parser, language);
if (self->tree) {
ts_tree_release(&self->parser.tree_pool, self->tree);
self->tree = NULL;
}
}
TSLogger ts_document_logger(const TSDocument *self) {
return self->parser.lexer.logger;
}
void ts_document_set_logger(TSDocument *self, TSLogger logger) {
self->parser.lexer.logger = logger;
}
void ts_document_print_debugging_graphs(TSDocument *self, bool should_print) {
self->parser.print_debugging_graphs = should_print;
}
TSInput ts_document_input(TSDocument *self) {
return self->input;
}
void ts_document_set_input(TSDocument *self, TSInput input) {
if (self->owns_input)
ts_free(self->input.payload);
self->input = input;
self->owns_input = false;
}
void ts_document_set_input_string(TSDocument *self, const char *text) {
ts_document_invalidate(self);
TSInput input = ts_string_input_make(text);
ts_document_set_input(self, input);
if (input.payload) {
self->owns_input = true;
}
}
void ts_document_set_input_string_with_length(TSDocument *self, const char *text, uint32_t length) {
ts_document_invalidate(self);
TSInput input = ts_string_input_make_with_length(text, length);
ts_document_set_input(self, input);
if (input.payload) {
self->owns_input = true;
}
}
void ts_document_edit(TSDocument *self, TSInputEdit edit) {
if (!self->tree)
return;
uint32_t max_bytes = ts_tree_total_bytes(self->tree);
if (edit.start_byte > max_bytes)
return;
if (edit.bytes_removed > max_bytes - edit.start_byte)
edit.bytes_removed = max_bytes - edit.start_byte;
ts_tree_edit(self->tree, &edit);
if (self->parser.print_debugging_graphs) {
ts_tree_print_dot_graph(self->tree, self->parser.language, stderr);
}
}
void ts_document_parse(TSDocument *self) {
ts_document_parse_with_options(self, (TSParseOptions){
.halt_on_error = false,
.changed_ranges = NULL,
.changed_range_count = NULL,
});
}
void ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges,
uint32_t *range_count) {
ts_document_parse_with_options(self, (TSParseOptions){
.halt_on_error = false,
.changed_ranges = ranges,
.changed_range_count = range_count,
});
}
void ts_document_parse_with_options(TSDocument *self, TSParseOptions options) {
if (options.changed_ranges && options.changed_range_count) {
*options.changed_ranges = NULL;
*options.changed_range_count = 0;
}
if (!self->input.read || !self->parser.language)
return;
Tree *reusable_tree = self->valid ? self->tree : NULL;
if (reusable_tree && !reusable_tree->has_changes)
return;
Tree *tree = parser_parse(&self->parser, self->input, reusable_tree, options.halt_on_error);
if (self->tree) {
Tree *old_tree = self->tree;
self->tree = tree;
if (options.changed_ranges && options.changed_range_count) {
*options.changed_range_count = ts_tree_get_changed_ranges(
old_tree, tree, &self->tree_path1, &self->tree_path2,
self->parser.language, options.changed_ranges
);
if (self->parser.lexer.logger.log) {
for (unsigned i = 0; i < *options.changed_range_count; i++) {
TSRange range = (*options.changed_ranges)[i];
LOG(
"changed_range start:[%u %u], end:[%u %u]",
range.start.row, range.start.column,
range.end.row, range.end.column
);
}
}
}
ts_tree_release(&self->parser.tree_pool, old_tree);
}
self->tree = tree;
self->parse_count++;
self->valid = true;
}
void ts_document_invalidate(TSDocument *self) {
self->valid = false;
}
TSNode ts_document_root_node(const TSDocument *self) {
return ts_node_make(self->tree, 0, 0);
}
uint32_t ts_document_parse_count(const TSDocument *self) {
return self->parse_count;
}

View file

@ -1,20 +0,0 @@
#ifndef RUNTIME_DOCUMENT_H_
#define RUNTIME_DOCUMENT_H_
#include "runtime/parser.h"
#include "runtime/tree.h"
#include "runtime/get_changed_ranges.h"
#include <stdbool.h>
struct TSDocument {
Parser parser;
TSInput input;
Tree *tree;
TreePath tree_path1;
TreePath tree_path2;
size_t parse_count;
bool valid;
bool owns_input;
};
#endif

View file

@ -2,7 +2,8 @@
#define RUNTIME_ERROR_COSTS_H_
#define ERROR_STATE 0
#define ERROR_COST_PER_MISSING_TREE 150
#define ERROR_COST_PER_RECOVERY 500
#define ERROR_COST_PER_MISSING_TREE 110
#define ERROR_COST_PER_SKIPPED_TREE 100
#define ERROR_COST_PER_SKIPPED_LINE 30
#define ERROR_COST_PER_SKIPPED_CHAR 1

View file

@ -1,7 +1,8 @@
#include "runtime/get_changed_ranges.h"
#include "runtime/tree.h"
#include "runtime/subtree.h"
#include "runtime/language.h"
#include "runtime/error_costs.h"
#include "runtime/tree_cursor.h"
#include <assert.h>
// #define DEBUG_GET_CHANGED_RANGES
@ -24,22 +25,22 @@ static void range_array_add(RangeArray *results, TSPoint start, TSPoint end) {
}
typedef struct {
TreePath path;
TreeCursor cursor;
const TSLanguage *language;
unsigned visible_depth;
bool in_padding;
} Iterator;
static Iterator iterator_new(TreePath *path, Tree *tree, const TSLanguage *language) {
array_clear(path);
array_push(path, ((TreePathEntry){
.tree = tree,
static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language) {
array_clear(&cursor->stack);
array_push(&cursor->stack, ((TreeCursorEntry){
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
return (Iterator) {
.path = *path,
.cursor = *cursor,
.language = language,
.visible_depth = 1,
.in_padding = false,
@ -47,42 +48,42 @@ static Iterator iterator_new(TreePath *path, Tree *tree, const TSLanguage *langu
}
static bool iterator_done(Iterator *self) {
return self->path.size == 0;
return self->cursor.stack.size == 0;
}
Length iterator_start_position(Iterator *self) {
TreePathEntry entry = *array_back(&self->path);
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (self->in_padding) {
return entry.position;
} else {
return length_add(entry.position, entry.tree->padding);
return length_add(entry.position, entry.subtree->padding);
}
}
Length iterator_end_position(Iterator *self) {
TreePathEntry entry = *array_back(&self->path);
Length result = length_add(entry.position, entry.tree->padding);
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length result = length_add(entry.position, entry.subtree->padding);
if (self->in_padding) {
return result;
} else {
return length_add(result, entry.tree->size);
return length_add(result, entry.subtree->size);
}
}
static bool iterator_tree_is_visible(const Iterator *self) {
TreePathEntry entry = *array_back(&self->path);
if (entry.tree->visible) return true;
if (self->path.size > 1) {
Tree *parent = self->path.contents[self->path.size - 2].tree;
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (entry.subtree->visible) return true;
if (self->cursor.stack.size > 1) {
const Subtree *parent = self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(self->language, parent->alias_sequence_id);
return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
}
return false;
}
static void iterator_get_visible_state(const Iterator *self, Tree **tree,
static void iterator_get_visible_state(const Iterator *self, const Subtree **tree,
TSSymbol *alias_symbol, uint32_t *start_byte) {
uint32_t i = self->path.size - 1;
uint32_t i = self->cursor.stack.size - 1;
if (self->in_padding) {
if (i == 0) return;
@ -90,10 +91,10 @@ static void iterator_get_visible_state(const Iterator *self, Tree **tree,
}
for (; i + 1 > 0; i--) {
TreePathEntry entry = self->path.contents[i];
TreeCursorEntry entry = self->cursor.stack.contents[i];
if (i > 0) {
Tree *parent = self->path.contents[i - 1].tree;
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent->alias_sequence_id
@ -103,8 +104,8 @@ static void iterator_get_visible_state(const Iterator *self, Tree **tree,
}
}
if (entry.tree->visible || *alias_symbol) {
*tree = entry.tree;
if (entry.subtree->visible || *alias_symbol) {
*tree = entry.subtree;
*start_byte = entry.position.bytes;
break;
}
@ -114,8 +115,8 @@ static void iterator_get_visible_state(const Iterator *self, Tree **tree,
static void iterator_ascend(Iterator *self) {
if (iterator_done(self)) return;
if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
if (array_back(&self->path)->child_index > 0) self->in_padding = false;
self->path.size--;
if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
self->cursor.stack.size--;
}
static bool iterator_descend(Iterator *self, uint32_t goal_position) {
@ -124,17 +125,17 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
bool did_descend;
do {
did_descend = false;
TreePathEntry entry = *array_back(&self->path);
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < entry.tree->child_count; i++) {
Tree *child = entry.tree->children[i];
for (uint32_t i = 0; i < entry.subtree->children.size; i++) {
const Subtree *child = entry.subtree->children.contents[i];
Length child_left = length_add(position, child->padding);
Length child_right = length_add(child_left, child->size);
if (child_right.bytes > goal_position) {
array_push(&self->path, ((TreePathEntry){
.tree = child,
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = child,
.position = position,
.child_index = i,
.structural_child_index = structural_child_index,
@ -174,19 +175,19 @@ static void iterator_advance(Iterator *self) {
for (;;) {
if (iterator_tree_is_visible(self)) self->visible_depth--;
TreePathEntry entry = array_pop(&self->path);
TreeCursorEntry entry = array_pop(&self->cursor.stack);
if (iterator_done(self)) return;
Tree *parent = array_back(&self->path)->tree;
const Subtree *parent = array_back(&self->cursor.stack)->subtree;
uint32_t child_index = entry.child_index + 1;
if (parent->child_count > child_index) {
Length position = length_add(entry.position, ts_tree_total_size(entry.tree));
if (parent->children.size > child_index) {
Length position = length_add(entry.position, ts_subtree_total_size(entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!entry.tree->extra) structural_child_index++;
Tree *next_child = parent->children[child_index];
if (!entry.subtree->extra) structural_child_index++;
const Subtree *next_child = parent->children.contents[child_index];
array_push(&self->path, ((TreePathEntry){
.tree = next_child,
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = next_child,
.position = position,
.child_index = child_index,
.structural_child_index = structural_child_index,
@ -213,7 +214,7 @@ typedef enum {
} IteratorComparison;
IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) {
Tree *old_tree = NULL, *new_tree = NULL;
const Subtree *old_tree = NULL, *new_tree = NULL;
uint32_t old_start = 0, new_start = 0;
TSSymbol old_alias_symbol = 0, new_alias_symbol = 0;
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
@ -246,10 +247,10 @@ IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *ne
#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self) {
TreePathEntry entry = *array_back(&self->path);
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, entry.tree->symbol);
const char *name = ts_language_symbol_name(self->language, entry.subtree->symbol);
printf(
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
name, self->in_padding ? "(p)" : " ",
@ -260,13 +261,13 @@ static inline void iterator_print_state(Iterator *self) {
}
#endif
unsigned ts_tree_get_changed_ranges(Tree *old_tree, Tree *new_tree,
TreePath *path1, TreePath *path2,
const TSLanguage *language, TSRange **ranges) {
unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language, TSRange **ranges) {
RangeArray results = array_new();
Iterator old_iter = iterator_new(path1, old_tree, language);
Iterator new_iter = iterator_new(path2, new_tree, language);
Iterator old_iter = iterator_new(cursor1, old_tree, language);
Iterator new_iter = iterator_new(cursor2, new_tree, language);
Length position = iterator_start_position(&old_iter);
Length next_position = iterator_start_position(&new_iter);
@ -348,8 +349,8 @@ unsigned ts_tree_get_changed_ranges(Tree *old_tree, Tree *new_tree,
position = next_position;
} while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
*path1 = old_iter.path;
*path2 = new_iter.path;
*cursor1 = old_iter.cursor;
*cursor2 = new_iter.cursor;
*ranges = results.contents;
return results.size;
}

View file

@ -1,19 +1,12 @@
#ifndef RUNTIME_GET_CHANGED_RANGES_H_
#define RUNTIME_GET_CHANGED_RANGES_H_
#include "runtime/tree.h"
#include "runtime/tree_cursor.h"
#include "runtime/subtree.h"
typedef struct {
Tree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
} TreePathEntry;
typedef Array(TreePathEntry) TreePath;
unsigned ts_tree_get_changed_ranges(
Tree *old_tree, Tree *new_tree, TreePath *path1, TreePath *path2,
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language, TSRange **ranges
);

View file

@ -1,14 +1,13 @@
#include "runtime/language.h"
#include "runtime/tree.h"
#include "runtime/subtree.h"
#include "runtime/error_costs.h"
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
TSSymbol symbol, TableEntry *result) {
if (symbol == ts_builtin_sym_error) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->action_count = 0;
result->is_reusable = false;
result->actions = NULL;
return;
} else {
assert(symbol < self->token_count);
uint32_t action_index = self->parse_table[state * self->symbol_count + symbol];
@ -27,10 +26,11 @@ uint32_t ts_language_version(const TSLanguage *language) {
return language->version;
}
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language,
TSSymbol symbol) {
if (symbol == ts_builtin_sym_error) {
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) {
if (symbol == ts_builtin_sym_error) {
return (TSSymbolMetadata){.visible = true, .named = true};
} else if (symbol == ts_builtin_sym_error_repeat) {
return (TSSymbolMetadata){.visible = false, .named = false};
} else {
return language->symbol_metadata[symbol];
}
@ -39,6 +39,8 @@ TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language,
const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
if (symbol == ts_builtin_sym_error) {
return "ERROR";
} else if (symbol == ts_builtin_sym_error_repeat) {
return "_ERROR";
} else {
return language->symbol_names[symbol];
}

View file

@ -5,8 +5,10 @@
extern "C" {
#endif
#include "runtime/subtree.h"
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
typedef struct {
const TSParseAction *actions;
@ -51,7 +53,7 @@ static inline bool ts_language_has_reduce_action(const TSLanguage *self,
static inline TSStateId ts_language_next_state(const TSLanguage *self,
TSStateId state,
TSSymbol symbol) {
if (symbol == ts_builtin_sym_error) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
return 0;
} else if (symbol < self->token_count) {
uint32_t count;

View file

@ -1,6 +1,6 @@
#include <stdio.h>
#include "runtime/lexer.h"
#include "runtime/tree.h"
#include "runtime/subtree.h"
#include "runtime/length.h"
#include "runtime/utf16.h"
#include "utf8proc.h"

View file

@ -5,10 +5,10 @@
extern "C" {
#endif
#include "tree_sitter/parser.h"
#include "tree_sitter/runtime.h"
#include "runtime/length.h"
#include "runtime/tree.h"
#include "runtime/subtree.h"
#include "tree_sitter/runtime.h"
#include "tree_sitter/parser.h"
typedef struct {
TSLexer data;

View file

@ -1,45 +1,119 @@
#include <stdbool.h>
#include "runtime/node.h"
#include "runtime/subtree.h"
#include "runtime/tree.h"
#include "runtime/document.h"
#include "runtime/language.h"
TSNode ts_node_make(const Tree *tree, uint32_t byte, uint32_t row) {
return (TSNode){.data = tree, .offset = { byte, row } };
typedef struct {
const Subtree *parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} ChildIterator;
// TSNode - constructors
TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, TSSymbol alias) {
return (TSNode) {
{position.bytes, position.extent.row, position.extent.column, alias},
subtree,
tree,
};
}
/*
* Private
*/
static inline TSNode ts_node__null() {
return ts_node_make(NULL, 0, 0);
return ts_node_new(NULL, NULL, length_zero(), 0);
}
static inline const Tree *ts_node__tree(TSNode self) {
return self.data;
// TSNode - accessors
uint32_t ts_node_start_byte(const TSNode self) {
return self.context[0];
}
static inline uint32_t ts_node__offset_byte(TSNode self) {
return self.offset[0];
TSPoint ts_node_start_point(const TSNode self) {
return (TSPoint) {self.context[1], self.context[2]};
}
static inline uint32_t ts_node__offset_row(TSNode self) {
return self.offset[1];
static inline uint32_t ts_node__alias(const TSNode *self) {
return self->context[3];
}
static inline const Subtree *ts_node__subtree(TSNode self) {
return self.id;
}
static inline const TSTree *ts_node__tree(const TSNode *self) {
return self->tree;
}
// ChildIterator
static inline ChildIterator ts_node_iterate_children(const TSNode *node) {
const TSTree *tree = ts_node__tree(node);
const Subtree *subtree = ts_node__subtree(*node);
const TSSymbol *alias_sequence = ts_language_alias_sequence(
tree->language,
subtree->alias_sequence_id
);
return (ChildIterator) {
.tree = tree,
.parent = subtree,
.position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
.child_index = 0,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_node_child_iterator_next(ChildIterator *self, TSNode *result) {
if (self->child_index == self->parent->children.size) return false;
const Subtree *child = self->parent->children.contents[self->child_index];
TSSymbol alias_symbol = 0;
if (!child->extra) {
if (self->alias_sequence) {
alias_symbol = self->alias_sequence[self->structural_child_index];
}
self->structural_child_index++;
}
if (self->child_index > 0) {
self->position = length_add(self->position, child->padding);
}
*result = ts_node_new(
self->tree,
child,
self->position,
alias_symbol
);
self->position = length_add(self->position, child->size);
self->child_index++;
return true;
}
// TSNode - private
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
const Tree *tree = ts_node__tree(self);
const Subtree *tree = ts_node__subtree(self);
if (include_anonymous) {
return tree->context.alias_symbol || tree->visible;
return tree->visible || ts_node__alias(&self);
} else {
return tree->context.alias_is_named || (tree->visible && tree->named);
return (
(tree->visible && tree->named) ||
(
ts_node__alias(&self) &&
ts_language_symbol_metadata(
ts_node__tree(&self)->language,
ts_node__alias(&self)
).named
)
);
}
}
static inline uint32_t ts_node__relevant_child_count(TSNode self,
bool include_anonymous) {
const Tree *tree = ts_node__tree(self);
if (tree->child_count > 0) {
static inline uint32_t ts_node__relevant_child_count(TSNode self, bool include_anonymous) {
const Subtree *tree = ts_node__subtree(self);
if (tree->children.size > 0) {
if (include_anonymous) {
return tree->visible_child_count;
} else {
@ -50,44 +124,27 @@ static inline uint32_t ts_node__relevant_child_count(TSNode self,
}
}
static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) {
const Tree *tree = ts_node__tree(self);
*index = tree->context.index;
return ts_node_make(
tree->context.parent,
ts_node__offset_byte(self) - tree->context.offset.bytes,
ts_node__offset_row(self) - tree->context.offset.extent.row
);
}
static inline TSNode ts_node__direct_child(TSNode self, uint32_t i) {
const Tree *child_tree = ts_node__tree(self)->children[i];
return ts_node_make(
child_tree,
ts_node__offset_byte(self) + child_tree->context.offset.bytes,
ts_node__offset_row(self) + child_tree->context.offset.extent.row
);
}
static inline TSNode ts_node__child(TSNode self, uint32_t child_index,
bool include_anonymous) {
static inline TSNode ts_node__child(TSNode self, uint32_t child_index, bool include_anonymous) {
const TSTree *tree = ts_node__tree(&self);
TSNode result = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
uint32_t index = 0;
for (uint32_t i = 0; i < ts_node__tree(result)->child_count; i++) {
TSNode child = ts_node__direct_child(result, i);
ChildIterator iterator = ts_node_iterate_children(&result);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node__is_relevant(child, include_anonymous)) {
if (index == child_index)
if (index == child_index) {
ts_tree_set_cached_parent(tree, &child, &self);
return child;
}
index++;
} else {
uint32_t grandchild_index = child_index - index;
uint32_t grandchild_count =
ts_node__relevant_child_count(child, include_anonymous);
uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
if (grandchild_index < grandchild_count) {
did_descend = true;
result = child;
@ -103,53 +160,107 @@ static inline TSNode ts_node__child(TSNode self, uint32_t child_index,
}
static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
TSNode result = self;
uint32_t target_end_byte = ts_node_end_byte(self);
do {
uint32_t index;
result = ts_node__direct_parent(result, &index);
if (!result.data)
break;
TSNode node = ts_node_parent(self);
TSNode earlier_node = ts_node__null();
bool earlier_node_is_relevant = false;
for (uint32_t i = index - 1; i + 1 > 0; i--) {
TSNode child = ts_node__direct_child(result, i);
if (ts_node__is_relevant(child, include_anonymous))
return child;
uint32_t grandchild_count =
ts_node__relevant_child_count(child, include_anonymous);
if (grandchild_count > 0)
return ts_node__child(child, grandchild_count - 1, include_anonymous);
while (ts_node__subtree(node)) {
TSNode earlier_child = ts_node__null();
bool earlier_child_is_relevant = false;
bool found_child_containing_target = false;
TSNode child;
ChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (iterator.position.bytes >= target_end_byte) {
found_child_containing_target = ts_node__subtree(child) != ts_node__subtree(self);
break;
}
if (ts_node__is_relevant(child, include_anonymous)) {
earlier_child = child;
earlier_child_is_relevant = true;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
earlier_child = child;
earlier_child_is_relevant = false;
}
}
} while (!ts_node__tree(result)->visible);
if (found_child_containing_target) {
if (!ts_node_is_null(earlier_child)) {
earlier_node = earlier_child;
earlier_node_is_relevant = earlier_child_is_relevant;
}
node = child;
} else if (earlier_child_is_relevant) {
return earlier_child;
} else if (!ts_node_is_null(earlier_child)) {
node = earlier_child;
} else if (earlier_node_is_relevant) {
return earlier_node;
} else {
node = earlier_node;
}
}
return ts_node__null();
}
static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
TSNode result = self;
uint32_t target_end_byte = ts_node_end_byte(self);
do {
uint32_t index;
result = ts_node__direct_parent(result, &index);
if (!result.data)
break;
TSNode node = ts_node_parent(self);
TSNode later_node = ts_node__null();
bool later_node_is_relevant = false;
for (uint32_t i = index + 1; i < ts_node__tree(result)->child_count; i++) {
TSNode child = ts_node__direct_child(result, i);
if (ts_node__is_relevant(child, include_anonymous))
return child;
uint32_t grandchild_count =
ts_node__relevant_child_count(child, include_anonymous);
if (grandchild_count > 0)
return ts_node__child(child, 0, include_anonymous);
while (ts_node__subtree(node)) {
TSNode later_child = ts_node__null();
bool later_child_is_relevant = false;
TSNode child_containing_target = ts_node__null();
TSNode child;
ChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (iterator.position.bytes < target_end_byte) continue;
if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
if (ts_node__subtree(child) != ts_node__subtree(self)) {
child_containing_target = child;
}
} else if (ts_node__is_relevant(child, include_anonymous)) {
later_child = child;
later_child_is_relevant = true;
break;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
later_child = child;
later_child_is_relevant = false;
break;
}
}
} while (!ts_node__tree(result)->visible);
if (!ts_node_is_null(child_containing_target)) {
if (!ts_node_is_null(later_child)) {
later_node = later_child;
later_node_is_relevant = later_child_is_relevant;
}
node = child_containing_target;
} else if (later_child_is_relevant) {
return later_child;
} else if (!ts_node_is_null(later_child)) {
node = later_child;
} else if (later_node_is_relevant) {
return later_node;
} else {
node = later_node;
}
}
return ts_node__null();
}
static inline bool point_gt(TSPoint a, TSPoint b) {
return a.row > b.row || (a.row == b.row && a.column > b.column);
static inline bool point_gt(TSPoint self, TSPoint other) {
return self.row > other.row || (self.row == other.row && self.column > other.column);
}
static inline TSNode ts_node__first_child_for_byte(TSNode self, uint32_t goal,
@ -160,8 +271,9 @@ static inline TSNode ts_node__first_child_for_byte(TSNode self, uint32_t goal,
while (did_descend) {
did_descend = false;
for (uint32_t i = 0; i < ts_node__tree(node)->child_count; i++) {
TSNode child = ts_node__direct_child(node, i);
TSNode child;
ChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node_end_byte(child) > goal) {
if (ts_node__is_relevant(child, include_anonymous)) {
return child;
@ -182,17 +294,22 @@ static inline TSNode ts_node__descendant_for_byte_range(TSNode self, uint32_t mi
bool include_anonymous) {
TSNode node = self;
TSNode last_visible_node = self;
const TSTree *tree = ts_node__tree(&self);
bool did_descend = true;
while (did_descend) {
did_descend = false;
for (uint32_t i = 0, n = ts_node__tree(node)->child_count; i < n; i++) {
TSNode child = ts_node__direct_child(node, i);
if (ts_node_end_byte(child) > max) {
TSNode child;
ChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (iterator.position.bytes > max) {
if (ts_node_start_byte(child) > min) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) last_visible_node = node;
if (ts_node__is_relevant(node, include_anonymous)) {
ts_tree_set_cached_parent(tree, &child, &last_visible_node);
last_visible_node = node;
}
did_descend = true;
break;
}
@ -207,125 +324,116 @@ static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint mi
bool include_anonymous) {
TSNode node = self;
TSNode last_visible_node = self;
TSPoint start_position = ts_node_start_point(self);
TSPoint end_position = ts_node_end_point(self);
const TSTree *tree = ts_node__tree(&self);
bool did_descend = true;
while (did_descend) {
did_descend = false;
for (uint32_t i = 0, n = ts_node__tree(node)->child_count; i < n; i++) {
TSNode child = ts_node__direct_child(node, i);
const Tree *child_tree = ts_node__tree(child);
if (i > 0) start_position = point_add(start_position, child_tree->padding.extent);
end_position = point_add(start_position, child_tree->size.extent);
if (point_gt(end_position, max)) {
if (point_gt(start_position, min)) break;
TSNode child;
ChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (point_gt(iterator.position.extent, max)) {
if (point_gt(ts_node_start_point(child), min)) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) last_visible_node = node;
if (ts_node__is_relevant(node, include_anonymous)) {
ts_tree_set_cached_parent(tree, &child, &last_visible_node);
last_visible_node = node;
}
did_descend = true;
break;
}
start_position = end_position;
}
}
return last_visible_node;
}
/*
* Public
*/
uint32_t ts_node_start_byte(TSNode self) {
return ts_node__offset_byte(self) + ts_node__tree(self)->padding.bytes;
}
// TSNode - public
uint32_t ts_node_end_byte(TSNode self) {
return ts_node_start_byte(self) + ts_node__tree(self)->size.bytes;
}
TSPoint ts_node_start_point(TSNode self) {
const Tree *tree = ts_node__tree(self);
return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row,
ts_tree_start_column(tree) };
return ts_node_start_byte(self) + ts_node__subtree(self)->size.bytes;
}
TSPoint ts_node_end_point(TSNode self) {
const Tree *tree = ts_node__tree(self);
return (TSPoint){ ts_node__offset_row(self) + tree->padding.extent.row +
tree->size.extent.row,
ts_tree_end_column(tree) };
return point_add(ts_node_start_point(self), ts_node__subtree(self)->size.extent);
}
TSSymbol ts_node_symbol(TSNode self) {
const Tree *tree = ts_node__tree(self);
return tree->context.alias_symbol ? tree->context.alias_symbol : tree->symbol;
const Subtree *tree = ts_node__subtree(self);
return ts_node__alias(&self) ? ts_node__alias(&self) : tree->symbol;
}
const char *ts_node_type(TSNode self, const TSDocument *document) {
return ts_language_symbol_name(document->parser.language, ts_node_symbol(self));
const char *ts_node_type(TSNode self) {
return ts_language_symbol_name(ts_node__tree(&self)->language, ts_node_symbol(self));
}
char *ts_node_string(TSNode self, const TSDocument *document) {
return ts_tree_string(ts_node__tree(self), document->parser.language, false);
char *ts_node_string(TSNode self) {
return ts_subtree_string(ts_node__subtree(self), ts_node__tree(&self)->language, false);
}
bool ts_node_eq(TSNode self, TSNode other) {
return
ts_tree_eq(ts_node__tree(self), ts_node__tree(other)) &&
self.offset[0] == other.offset[0] &&
self.offset[1] == other.offset[1];
return self.tree == other.tree && self.id == other.id;
}
bool ts_node_is_null(TSNode self) {
return ts_node__subtree(self) == NULL;
}
bool ts_node_is_named(TSNode self) {
const Tree *tree = ts_node__tree(self);
return tree->context.alias_symbol ? tree->context.alias_is_named : tree->named;
const Subtree *tree = ts_node__subtree(self);
return ts_node__alias(&self)
? ts_language_symbol_metadata(ts_node__tree(&self)->language, ts_node__alias(&self)).named
: tree->named;
}
bool ts_node_is_missing(TSNode self) {
const Tree *tree = ts_node__tree(self);
const Subtree *tree = ts_node__subtree(self);
return tree->is_missing;
}
bool ts_node_has_changes(TSNode self) {
return ts_node__tree(self)->has_changes;
return ts_node__subtree(self)->has_changes;
}
bool ts_node_has_error(TSNode self) {
return ts_node__tree(self)->error_cost > 0;
return ts_node__subtree(self)->error_cost > 0;
}
TSNode ts_node_parent(TSNode self) {
TSNode result = self;
uint32_t index;
const TSTree *tree = ts_node__tree(&self);
TSNode node = ts_tree_get_cached_parent(tree, &self);
if (node.id) return node;
do {
result = ts_node__direct_parent(result, &index);
if (!result.data)
return ts_node__null();
} while (!ts_node__tree(result)->visible);
node = ts_tree_root_node(tree);
uint32_t end_byte = ts_node_end_byte(self);
if (ts_node__subtree(node) == ts_node__subtree(self)) return ts_node__null();
return result;
}
TSNode last_visible_node = node;
bool did_descend = true;
while (did_descend) {
did_descend = false;
uint32_t ts_node_child_index(TSNode self) {
const Tree *tree = ts_node__tree(self);
uint32_t result = 0;
for (;;) {
const Tree *parent = tree->context.parent;
uint32_t index = tree->context.index;
if (!parent) return UINT32_MAX;
for (uint32_t i = 0; i < index; i++) {
Tree *child = parent->children[i];
result += child->visible ? 1 : child->visible_child_count;
TSNode child;
ChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (
ts_node_start_byte(child) > ts_node_start_byte(self) ||
ts_node__subtree(child) == ts_node__subtree(self)
) break;
if (iterator.position.bytes >= end_byte) {
node = child;
if (ts_node__is_relevant(child, true)) {
ts_tree_set_cached_parent(tree, &node, &last_visible_node);
last_visible_node = node;
}
did_descend = true;
break;
}
}
if (parent->visible) break;
tree = parent;
}
return result;
return last_visible_node;
}
TSNode ts_node_child(TSNode self, uint32_t child_index) {
@ -337,8 +445,8 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
}
uint32_t ts_node_child_count(TSNode self) {
const Tree *tree = ts_node__tree(self);
if (tree->child_count > 0) {
const Subtree *tree = ts_node__subtree(self);
if (tree->children.size > 0) {
return tree->visible_child_count;
} else {
return 0;
@ -346,8 +454,8 @@ uint32_t ts_node_child_count(TSNode self) {
}
uint32_t ts_node_named_child_count(TSNode self) {
const Tree *tree = ts_node__tree(self);
if (tree->child_count > 0) {
const Subtree *tree = ts_node__subtree(self);
if (tree->children.size > 0) {
return tree->named_child_count;
} else {
return 0;

View file

@ -1,8 +0,0 @@
#ifndef RUNTIME_NODE_H_
#define RUNTIME_NODE_H_
#include "runtime/tree.h"
TSNode ts_node_make(const Tree *, uint32_t byte, uint32_t row);
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,46 +0,0 @@
#ifndef RUNTIME_PARSER_H_
#define RUNTIME_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "runtime/stack.h"
#include "runtime/array.h"
#include "runtime/lexer.h"
#include "runtime/reusable_node.h"
#include "runtime/reduce_action.h"
#include "runtime/tree.h"
typedef struct {
Tree *token;
Tree *last_external_token;
uint32_t byte_index;
} TokenCache;
typedef struct {
Lexer lexer;
Stack *stack;
TreePool tree_pool;
const TSLanguage *language;
ReduceActionSet reduce_actions;
Tree *finished_tree;
Tree scratch_tree;
TokenCache token_cache;
ReusableNode reusable_node;
void *external_scanner_payload;
bool in_ambiguity;
bool print_debugging_graphs;
unsigned accept_count;
} Parser;
bool parser_init(Parser *);
void parser_destroy(Parser *);
Tree *parser_parse(Parser *, TSInput, Tree *, bool halt_on_error);
void parser_set_language(Parser *, const TSLanguage *);
#ifdef __cplusplus
}
#endif
#endif // RUNTIME_PARSER_H_

View file

@ -1,46 +1,84 @@
#include "runtime/tree.h"
#include "runtime/subtree.h"
typedef struct {
Tree *tree;
uint32_t byte_index;
Tree *last_external_token;
const Subtree *tree;
uint32_t child_index;
uint32_t byte_offset;
} StackEntry;
typedef struct {
Array(StackEntry) stack;
const Subtree *last_external_token;
} ReusableNode;
static inline ReusableNode reusable_node_new(Tree *tree) {
ReusableNode result = {tree, 0, NULL};
return result;
static inline ReusableNode reusable_node_new() {
return (ReusableNode) {array_new(), NULL};
}
static inline void reusable_node_pop(ReusableNode *self) {
self->byte_index += ts_tree_total_bytes(self->tree);
if (self->tree->has_external_tokens) {
self->last_external_token = ts_tree_last_external_token(self->tree);
static inline void reusable_node_reset(ReusableNode *self, const Subtree *tree) {
array_clear(&self->stack);
array_push(&self->stack, ((StackEntry) {
.tree = tree,
.child_index = 0,
.byte_offset = 0,
}));
self->last_external_token = NULL;
}
static inline const Subtree *reusable_node_tree(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].tree
: NULL;
}
static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].byte_offset
: UINT32_MAX;
}
static inline void reusable_node_delete(ReusableNode *self) {
array_delete(&self->stack);
}
static inline void reusable_node_advance(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
if (last_entry.tree->has_external_tokens) {
self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
}
while (self->tree) {
Tree *parent = self->tree->context.parent;
uint32_t next_index = self->tree->context.index + 1;
if (parent && parent->child_count > next_index) {
self->tree = parent->children[next_index];
return;
}
self->tree = parent;
}
const Subtree *tree;
uint32_t next_index;
do {
StackEntry popped_entry = array_pop(&self->stack);
next_index = popped_entry.child_index + 1;
if (self->stack.size == 0) return;
tree = array_back(&self->stack)->tree;
} while (tree->children.size <= next_index);
array_push(&self->stack, ((StackEntry) {
.tree = tree->children.contents[next_index],
.child_index = next_index,
.byte_offset = byte_offset,
}));
}
static inline ReusableNode reusable_node_after_leaf(const ReusableNode *self) {
ReusableNode result = *self;
while (result.tree->child_count > 0)
result.tree = result.tree->children[0];
reusable_node_pop(&result);
return result;
}
static inline bool reusable_node_breakdown(ReusableNode *self) {
if (self->tree->child_count == 0) {
return false;
} else {
self->tree = self->tree->children[0];
static inline bool reusable_node_descend(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
if (last_entry.tree->children.size > 0) {
array_push(&self->stack, ((StackEntry) {
.tree = last_entry.tree->children.contents[0],
.child_index = 0,
.byte_offset = last_entry.byte_offset,
}));
return true;
} else {
return false;
}
}
static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
while (reusable_node_descend(self)) {}
reusable_node_advance(self);
}

View file

@ -1,5 +1,6 @@
#include "runtime/alloc.h"
#include "runtime/tree.h"
#include "runtime/language.h"
#include "runtime/subtree.h"
#include "runtime/array.h"
#include "runtime/stack.h"
#include "runtime/length.h"
@ -20,7 +21,7 @@ typedef struct StackNode StackNode;
typedef struct {
StackNode *node;
Tree *tree;
const Subtree *subtree;
bool is_pending;
} StackLink;
@ -31,14 +32,14 @@ struct StackNode {
short unsigned int link_count;
uint32_t ref_count;
unsigned error_cost;
unsigned depth;
unsigned node_count;
int dynamic_precedence;
};
typedef struct {
StackNode *node;
TreeArray trees;
uint32_t tree_count;
SubtreeArray subtrees;
uint32_t subtree_count;
bool is_pending;
} Iterator;
@ -49,12 +50,19 @@ typedef struct {
typedef Array(StackNode *) StackNodeArray;
typedef enum {
StackStatusActive,
StackStatusPaused,
StackStatusHalted,
} StackStatus;
typedef struct {
StackNode *node;
Tree *last_external_token;
uint32_t push_count;
bool is_halted;
const Subtree *last_external_token;
StackSummary *summary;
unsigned node_count_at_last_error;
TSSymbol lookahead_when_paused;
StackStatus status;
} StackHead;
struct Stack {
@ -63,7 +71,7 @@ struct Stack {
Array(Iterator) iterators;
StackNodeArray node_pool;
StackNode *base_node;
TreePool *tree_pool;
SubtreePool *subtree_pool;
};
typedef unsigned StackAction;
@ -83,7 +91,7 @@ static void stack_node_retain(StackNode *self) {
assert(self->ref_count != 0);
}
static void stack_node_release(StackNode *self, StackNodeArray *pool, TreePool *tree_pool) {
static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
recur:
assert(self->ref_count != 0);
self->ref_count--;
@ -92,10 +100,10 @@ recur:
StackNode *first_predecessor = NULL;
if (self->link_count > 0) {
for (unsigned i = self->link_count - 1; i > 0; i--) {
if (self->links[i].tree) ts_tree_release(tree_pool, self->links[i].tree);
stack_node_release(self->links[i].node, pool, tree_pool);
if (self->links[i].subtree) ts_subtree_release(subtree_pool, self->links[i].subtree);
stack_node_release(self->links[i].node, pool, subtree_pool);
}
if (self->links[0].tree) ts_tree_release(tree_pool, self->links[0].tree);
if (self->links[0].subtree) ts_subtree_release(subtree_pool, self->links[0].subtree);
first_predecessor = self->links[0].node;
}
@ -111,44 +119,31 @@ recur:
}
}
static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_pending,
TSStateId state, StackNodeArray *pool) {
static StackNode *stack_node_new(StackNode *previous_node, const Subtree *subtree,
bool is_pending, TSStateId state, StackNodeArray *pool) {
StackNode *node = pool->size > 0 ?
array_pop(pool) :
ts_malloc(sizeof(StackNode));
*node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0};
*node = (StackNode){.ref_count = 1, .link_count = 0, .state = state};
if (previous_node) {
stack_node_retain(previous_node);
node->link_count = 1;
node->links[0] = (StackLink){
.node = previous_node,
.tree = tree,
.subtree = subtree,
.is_pending = is_pending,
};
node->position = previous_node->position;
node->error_cost = previous_node->error_cost;
node->dynamic_precedence = previous_node->dynamic_precedence;
node->node_count = previous_node->node_count;
if (tree) {
node->depth = previous_node->depth;
if (!tree->extra) node->depth++;
node->error_cost += tree->error_cost;
node->position = length_add(node->position, ts_tree_total_size(tree));
node->dynamic_precedence += tree->dynamic_precedence;
if (state == ERROR_STATE && !tree->extra) {
node->error_cost +=
ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) +
ERROR_COST_PER_SKIPPED_CHAR * tree->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row;
if (previous_node->links[0].tree) {
node->error_cost +=
ERROR_COST_PER_SKIPPED_CHAR * tree->padding.bytes +
ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row;
}
}
if (subtree) {
node->error_cost += subtree->error_cost;
node->position = length_add(node->position, ts_subtree_total_size(subtree));
node->dynamic_precedence += subtree->dynamic_precedence;
if (!subtree->extra) node->node_count += subtree->node_count;
}
} else {
node->position = length_zero();
@ -158,18 +153,18 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p
return node;
}
static bool stack__tree_is_equivalent(const Tree *left, const Tree *right) {
static bool stack__subtree_is_equivalent(const Subtree *left, const Subtree *right) {
return
left == right ||
(left &&
right &&
left->symbol == right->symbol &&
((left->error_cost > 0 && right->error_cost > 0) ||
(left->child_count == 0 && right->child_count == 0 &&
(left->children.size == 0 && right->children.size == 0 &&
left->padding.bytes == right->padding.bytes &&
left->size.bytes == right->size.bytes &&
left->extra == right->extra &&
ts_tree_external_token_state_eq(left, right))));
ts_subtree_external_scanner_state_eq(left, right))));
}
static void stack_node_add_link(StackNode *self, StackLink link) {
@ -177,7 +172,7 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
for (int i = 0; i < self->link_count; i++) {
StackLink existing_link = self->links[i];
if (stack__tree_is_equivalent(existing_link.tree, link.tree)) {
if (stack__subtree_is_equivalent(existing_link.subtree, link.subtree)) {
if (existing_link.node == link.node) return;
if (existing_link.node->state == link.node->state &&
existing_link.node->position.bytes == link.node->position.bytes) {
@ -189,70 +184,81 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
}
}
if (self->link_count < MAX_LINK_COUNT) {
stack_node_retain(link.node);
if (link.tree) ts_tree_retain(link.tree);
self->links[self->link_count++] = link;
}
if (self->link_count == MAX_LINK_COUNT) return;
stack_node_retain(link.node);
if (link.subtree) ts_subtree_retain(link.subtree);
self->links[self->link_count++] = link;
unsigned node_count = link.node->node_count;
if (link.subtree) node_count += link.subtree->node_count;
if (node_count > self->node_count) self->node_count = node_count;
}
static void stack_head_delete(StackHead *self, StackNodeArray *pool, TreePool *tree_pool) {
static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
if (self->node) {
if (self->last_external_token) {
ts_tree_release(tree_pool, self->last_external_token);
ts_subtree_release(subtree_pool, self->last_external_token);
}
if (self->summary) {
array_delete(self->summary);
ts_free(self->summary);
}
stack_node_release(self->node, pool, tree_pool);
stack_node_release(self->node, pool, subtree_pool);
}
}
static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
StackNode *node, Tree *last_external_token) {
StackNode *node) {
StackHead head = {
.node = node,
.push_count = self->heads.contents[original_version].push_count,
.last_external_token = last_external_token,
.is_halted = false,
.node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
.last_external_token = self->heads.contents[original_version].last_external_token,
.status = StackStatusActive,
.lookahead_when_paused = 0,
};
array_push(&self->heads, head);
stack_node_retain(node);
if (last_external_token) ts_tree_retain(last_external_token);
if (head.last_external_token) ts_subtree_retain(head.last_external_token);
return (StackVersion)(self->heads.size - 1);
}
static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node,
TreeArray *trees, Tree *last_external_token) {
static void ts_stack__add_slice(Stack *self, StackVersion original_version,
StackNode *node, SubtreeArray *subtrees) {
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
StackVersion version = self->slices.contents[i].version;
if (self->heads.contents[version].node == node) {
StackSlice slice = {*trees, version};
StackSlice slice = {*subtrees, version};
array_insert(&self->slices, i + 1, slice);
return;
}
}
StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token);
StackSlice slice = { *trees, version };
StackVersion version = ts_stack__add_version(self, original_version, node);
StackSlice slice = { *subtrees, version };
array_push(&self->slices, slice);
}
inline StackSliceArray stack__iter(Stack *self, StackVersion version,
StackCallback callback, void *payload,
bool include_trees) {
int goal_subtree_count) {
array_clear(&self->slices);
array_clear(&self->iterators);
StackHead *head = array_get(&self->heads, version);
Tree *last_external_token = head->last_external_token;
Iterator iterator = {
.node = head->node,
.trees = array_new(),
.tree_count = 0,
.subtrees = array_new(),
.subtree_count = 0,
.is_pending = true,
};
bool include_subtrees = false;
if (goal_subtree_count >= 0) {
include_subtrees = true;
array_reserve(&iterator.subtrees, goal_subtree_count);
}
array_push(&self->iterators, iterator);
while (self->iterators.size > 0) {
@ -265,22 +271,21 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
bool should_stop = action & StackActionStop || node->link_count == 0;
if (should_pop) {
TreeArray trees = iterator->trees;
SubtreeArray subtrees = iterator->subtrees;
if (!should_stop)
ts_tree_array_copy(trees, &trees);
ts_tree_array_reverse(&trees);
ts_subtree_array_copy(subtrees, &subtrees);
ts_subtree_array_reverse(&subtrees);
ts_stack__add_slice(
self,
version,
node,
&trees,
last_external_token
&subtrees
);
}
if (should_stop) {
if (!should_pop)
ts_tree_array_delete(self->tree_pool, &iterator->trees);
ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
array_erase(&self->iterators, i);
i--, size--;
continue;
@ -298,24 +303,24 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
Iterator current_iterator = self->iterators.contents[i];
array_push(&self->iterators, current_iterator);
next_iterator = array_back(&self->iterators);
ts_tree_array_copy(next_iterator->trees, &next_iterator->trees);
ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
}
next_iterator->node = link.node;
if (link.tree) {
if (include_trees) {
array_push(&next_iterator->trees, link.tree);
ts_tree_retain(link.tree);
if (link.subtree) {
if (include_subtrees) {
array_push(&next_iterator->subtrees, link.subtree);
ts_subtree_retain(link.subtree);
}
if (!link.tree->extra) {
next_iterator->tree_count++;
if (!link.subtree->extra) {
next_iterator->subtree_count++;
if (!link.is_pending) {
next_iterator->is_pending = false;
}
}
} else {
next_iterator->tree_count++;
next_iterator->subtree_count++;
next_iterator->is_pending = false;
}
}
@ -325,19 +330,19 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version,
return self->slices;
}
Stack *ts_stack_new(TreePool *tree_pool) {
Stack *ts_stack_new(SubtreePool *subtree_pool) {
Stack *self = ts_calloc(1, sizeof(Stack));
array_init(&self->heads);
array_init(&self->slices);
array_init(&self->iterators);
array_init(&self->node_pool);
array_grow(&self->heads, 4);
array_grow(&self->slices, 4);
array_grow(&self->iterators, 4);
array_grow(&self->node_pool, MAX_NODE_POOL_SIZE);
array_reserve(&self->heads, 4);
array_reserve(&self->slices, 4);
array_reserve(&self->iterators, 4);
array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
self->tree_pool = tree_pool;
self->subtree_pool = subtree_pool;
self->base_node = stack_node_new(NULL, NULL, false, 1, &self->node_pool);
ts_stack_clear(self);
@ -349,9 +354,9 @@ void ts_stack_delete(Stack *self) {
array_delete(&self->slices);
if (self->iterators.contents)
array_delete(&self->iterators);
stack_node_release(self->base_node, &self->node_pool, self->tree_pool);
stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->tree_pool);
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
if (self->node_pool.contents) {
@ -375,39 +380,41 @@ Length ts_stack_position(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->position;
}
unsigned ts_stack_push_count(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->push_count;
}
void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) {
array_get(&self->heads, version)->push_count -= decrement;
}
Tree *ts_stack_last_external_token(const Stack *self, StackVersion version) {
const Subtree *ts_stack_last_external_token(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->last_external_token;
}
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Tree *token) {
void ts_stack_set_last_external_token(Stack *self, StackVersion version, const Subtree *token) {
StackHead *head = array_get(&self->heads, version);
if (token) ts_tree_retain(token);
if (head->last_external_token) ts_tree_release(self->tree_pool, head->last_external_token);
if (token) ts_subtree_retain(token);
if (head->last_external_token) ts_subtree_release(self->subtree_pool, head->last_external_token);
head->last_external_token = token;
}
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
return head->node->error_cost;
unsigned result = head->node->error_cost;
if (
head->status == StackStatusPaused ||
(head->node->state == ERROR_STATE && !head->node->links[0].subtree)) {
result += ERROR_COST_PER_RECOVERY;
}
return result;
}
void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) {
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool);
if (state == ERROR_STATE) {
head->push_count = 0;
} else if (!tree->extra) {
head->push_count++;
if (head->node->node_count < head->node_count_at_last_error) {
head->node_count_at_last_error = head->node->node_count;
}
stack_node_release(head->node, &self->node_pool, self->tree_pool);
return head->node->node_count - head->node_count_at_last_error;
}
void ts_stack_push(Stack *self, StackVersion version, const Subtree *subtree,
bool pending, TSStateId state) {
StackHead *head = array_get(&self->heads, version);
StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
if (!subtree) head->node_count_at_last_error = new_node->node_count;
head->node = new_node;
}
@ -416,7 +423,7 @@ inline StackAction iterate_callback(void *payload, const Iterator *iterator) {
session->callback(
session->payload,
iterator->node->state,
iterator->tree_count
iterator->subtree_count
);
return StackActionNone;
}
@ -424,12 +431,12 @@ inline StackAction iterate_callback(void *payload, const Iterator *iterator) {
void ts_stack_iterate(Stack *self, StackVersion version,
StackIterateCallback callback, void *payload) {
StackIterateSession session = {payload, callback};
stack__iter(self, version, iterate_callback, &session, true);
stack__iter(self, version, iterate_callback, &session, -1);
}
inline StackAction pop_count_callback(void *payload, const Iterator *iterator) {
unsigned *goal_tree_count = payload;
if (iterator->tree_count == *goal_tree_count) {
unsigned *goal_subtree_count = payload;
if (iterator->subtree_count == *goal_subtree_count) {
return StackActionPop | StackActionStop;
} else {
return StackActionNone;
@ -437,11 +444,11 @@ inline StackAction pop_count_callback(void *payload, const Iterator *iterator) {
}
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
return stack__iter(self, version, pop_count_callback, &count, true);
return stack__iter(self, version, pop_count_callback, &count, count);
}
inline StackAction pop_pending_callback(void *payload, const Iterator *iterator) {
if (iterator->tree_count >= 1) {
if (iterator->subtree_count >= 1) {
if (iterator->is_pending) {
return StackActionPop | StackActionStop;
} else {
@ -453,7 +460,7 @@ inline StackAction pop_pending_callback(void *payload, const Iterator *iterator)
}
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, true);
StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
if (pop.size > 0) {
ts_stack_renumber_version(self, pop.contents[0].version, version);
pop.contents[0].version = version;
@ -462,9 +469,9 @@ StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
}
inline StackAction pop_error_callback(void *payload, const Iterator *iterator) {
if (iterator->trees.size > 0) {
if (iterator->subtrees.size > 0) {
bool *found_error = payload;
if (!*found_error && iterator->trees.contents[0]->symbol == ts_builtin_sym_error) {
if (!*found_error && iterator->subtrees.contents[0]->symbol == ts_builtin_sym_error) {
*found_error = true;
return StackActionPop | StackActionStop;
} else {
@ -475,15 +482,21 @@ inline StackAction pop_error_callback(void *payload, const Iterator *iterator) {
}
}
StackSliceArray ts_stack_pop_error(Stack *self, StackVersion version) {
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
StackNode *node = array_get(&self->heads, version)->node;
for (unsigned i = 0; i < node->link_count; i++) {
if (node->links[i].tree && node->links[i].tree->symbol == ts_builtin_sym_error) {
if (node->links[i].subtree && node->links[i].subtree->symbol == ts_builtin_sym_error) {
bool found_error = false;
return stack__iter(self, version, pop_error_callback, &found_error, true);
StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
if (pop.size > 0) {
assert(pop.size == 1);
ts_stack_renumber_version(self, pop.contents[0].version, version);
return pop.contents[0].subtrees;
}
break;
}
}
return (StackSliceArray){.size = 0};
return (SubtreeArray){.size = 0};
}
inline StackAction pop_all_callback(void *payload, const Iterator *iterator) {
@ -491,7 +504,7 @@ inline StackAction pop_all_callback(void *payload, const Iterator *iterator) {
}
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
return stack__iter(self, version, pop_all_callback, NULL, true);
return stack__iter(self, version, pop_all_callback, NULL, 0);
}
typedef struct {
@ -502,7 +515,7 @@ typedef struct {
inline StackAction summarize_stack_callback(void *payload, const Iterator *iterator) {
SummarizeStackSession *session = payload;
TSStateId state = iterator->node->state;
unsigned depth = iterator->tree_count;
unsigned depth = iterator->subtree_count;
if (depth > session->max_depth) return StackActionStop;
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
StackSummaryEntry entry = session->summary->contents[i];
@ -523,7 +536,7 @@ void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_dep
.max_depth = max_depth
};
array_init(session.summary);
stack__iter(self, version, summarize_stack_callback, &session, false);
stack__iter(self, version, summarize_stack_callback, &session, -1);
self->heads.contents[version].summary = session.summary;
}
@ -531,24 +544,26 @@ StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->summary;
}
unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->depth;
}
int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->dynamic_precedence;
}
void ts_stack_remove_version(Stack *self, StackVersion version) {
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->tree_pool);
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
array_erase(&self->heads, version);
}
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
assert(v2 < v1);
assert((uint32_t)v1 < self->heads.size);
stack_head_delete(&self->heads.contents[v2], &self->node_pool, self->tree_pool);
self->heads.contents[v2] = self->heads.contents[v1];
StackHead *source_head = &self->heads.contents[v1];
StackHead *target_head = &self->heads.contents[v2];
if (target_head->summary && !source_head->summary) {
source_head->summary = target_head->summary;
target_head->summary = NULL;
}
stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
*target_head = *source_head;
array_erase(&self->heads, v1);
}
@ -563,62 +578,84 @@ StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
array_push(&self->heads, self->heads.contents[version]);
StackHead *head = array_back(&self->heads);
stack_node_retain(head->node);
if (head->last_external_token) ts_tree_retain(head->last_external_token);
if (head->last_external_token) ts_subtree_retain(head->last_external_token);
head->summary = NULL;
return self->heads.size - 1;
}
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
if (ts_stack_can_merge(self, version1, version2)) {
ts_stack_force_merge(self, version1, version2);
return true;
} else {
return false;
if (!ts_stack_can_merge(self, version1, version2)) return false;
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
for (uint32_t i = 0; i < head2->node->link_count; i++) {
stack_node_add_link(head1->node, head2->node->links[i]);
}
if (head1->node->state == ERROR_STATE) {
head1->node_count_at_last_error = head1->node->node_count;
}
ts_stack_remove_version(self, version2);
return true;
}
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
return
!head1->is_halted && !head2->is_halted &&
head1->status == StackStatusActive &&
head2->status == StackStatusActive &&
head1->node->state == head2->node->state &&
head1->node->position.bytes == head2->node->position.bytes &&
head1->node->depth == head2->node->depth &&
ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token);
}
void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion version2) {
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
for (uint32_t i = 0; i < head2->node->link_count; i++) {
stack_node_add_link(head1->node, head2->node->links[i]);
}
ts_stack_remove_version(self, version2);
head1->node->error_cost == head2->node->error_cost &&
ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
}
void ts_stack_halt(Stack *self, StackVersion version) {
array_get(&self->heads, version)->is_halted = true;
array_get(&self->heads, version)->status = StackStatusHalted;
}
bool ts_stack_is_halted(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->is_halted;
void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) {
StackHead *head = array_get(&self->heads, version);
head->status = StackStatusPaused;
head->lookahead_when_paused = lookahead;
head->node_count_at_last_error = head->node->node_count;
}
bool ts_stack_is_active(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusActive;
}
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusHalted;
}
bool ts_stack_is_paused(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusPaused;
}
TSSymbol ts_stack_resume(Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
assert(head->status == StackStatusPaused);
TSSymbol result = head->lookahead_when_paused;
head->status = StackStatusActive;
head->lookahead_when_paused = 0;
return result;
}
void ts_stack_clear(Stack *self) {
stack_node_retain(self->base_node);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->tree_pool);
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
array_push(&self->heads, ((StackHead){
.node = self->base_node,
.last_external_token = NULL,
.is_halted = false,
.status = StackStatusActive,
.lookahead_when_paused = 0,
}));
}
bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
bool was_recording_allocations = ts_toggle_allocation_recording(false);
if (!f)
f = stderr;
@ -631,19 +668,26 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
array_clear(&self->iterators);
for (uint32_t i = 0; i < self->heads.size; i++) {
if (ts_stack_is_halted(self, i)) continue;
StackHead *head = &self->heads.contents[i];
if (head->status == StackStatusHalted) continue;
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
fprintf(
f,
"node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
"labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth
fprintf(f, "node_head_%u -> node_%p [", i, head->node);
if (head->status == StackStatusPaused) {
fprintf(f, "color=red ");
}
fprintf(f,
"label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
i,
ts_stack_node_count_since_error(self, i),
ts_stack_error_cost(self, i)
);
if (head->last_external_token) {
TSExternalTokenState *state = &head->last_external_token->external_token_state;
const char *data = ts_external_token_state_data(state);
fprintf(f, "\nexternal_token_state:");
const ExternalScannerState *state = &head->last_external_token->external_scanner_state;
const char *data = ts_external_scanner_state_data(state);
fprintf(f, "\nexternal_scanner_state:");
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
}
@ -672,17 +716,18 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
fprintf(f, "node_%p [", node);
if (node->state == ERROR_STATE)
fprintf(f, "label=\"?\"");
else if (node->link_count == 1 && node->links[0].tree &&
node->links[0].tree->extra)
else if (node->link_count == 1 && node->links[0].subtree &&
node->links[0].subtree->extra)
fprintf(f, "shape=point margin=0 label=\"\"");
else
fprintf(f, "label=\"%d\"", node->state);
fprintf(
f,
" tooltip=\"position: %u,%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
node->position.extent.row,
node->position.extent.column,
node->node_count,
node->error_cost,
node->dynamic_precedence
);
@ -692,28 +737,24 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
fprintf(f, "node_%p -> node_%p [", node, link.node);
if (link.is_pending)
fprintf(f, "style=dashed ");
if (link.tree && link.tree->extra)
if (link.subtree && link.subtree->extra)
fprintf(f, "fontcolor=gray ");
if (!link.tree) {
if (!link.subtree) {
fprintf(f, "color=red");
} else if (link.tree->symbol == ts_builtin_sym_error) {
fprintf(f, "label=\"ERROR\"");
} else {
fprintf(f, "label=\"");
if (!link.tree->named)
fprintf(f, "'");
const char *name = symbol_names[link.tree->symbol];
if (link.subtree->visible && !link.subtree->named) fprintf(f, "'");
const char *name = ts_language_symbol_name(language, link.subtree->symbol);
for (const char *c = name; *c; c++) {
if (*c == '\"' || *c == '\\')
fprintf(f, "\\");
if (*c == '\"' || *c == '\\') fprintf(f, "\\");
fprintf(f, "%c", *c);
}
if (!link.tree->named)
fprintf(f, "'");
fprintf(f, "\" labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
link.tree->error_cost,
link.tree->dynamic_precedence);
if (link.subtree->visible && !link.subtree->named) fprintf(f, "'");
fprintf(f, "\"");
fprintf(f, "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
link.subtree->error_cost,
link.subtree->dynamic_precedence);
}
fprintf(f, "];\n");

View file

@ -6,7 +6,7 @@ extern "C" {
#endif
#include "runtime/array.h"
#include "runtime/tree.h"
#include "runtime/subtree.h"
#include "runtime/error_costs.h"
#include <stdio.h>
@ -16,7 +16,7 @@ typedef unsigned StackVersion;
#define STACK_VERSION_NONE ((StackVersion)-1)
typedef struct {
TreeArray trees;
SubtreeArray subtrees;
StackVersion version;
} StackSlice;
typedef Array(StackSlice) StackSliceArray;
@ -29,7 +29,7 @@ typedef struct {
typedef Array(StackSummaryEntry) StackSummary;
// Create a stack.
Stack *ts_stack_new(TreePool *);
Stack *ts_stack_new(SubtreePool *);
// Release the memory reserved for a given stack.
void ts_stack_delete(Stack *);
@ -41,20 +41,11 @@ uint32_t ts_stack_version_count(const Stack *);
// empty, this returns the initial state, 0.
TSStateId ts_stack_state(const Stack *, StackVersion);
// Get the number of trees that have been pushed to a given version of
// the stack.
unsigned ts_stack_push_count(const Stack *, StackVersion);
// In the event that trees were permanently removed from some version
// of the stack, decrease the version's push count to account for the
// removal.
void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned);
// Get the last external token associated with a given version of the stack.
Tree *ts_stack_last_external_token(const Stack *, StackVersion);
const Subtree *ts_stack_last_external_token(const Stack *, StackVersion);
// Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *, StackVersion, Tree *);
void ts_stack_set_last_external_token(Stack *, StackVersion, const Subtree *);
// Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion);
@ -64,7 +55,7 @@ Length ts_stack_position(const Stack *, StackVersion);
// This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should
// first retain the tree.
void ts_stack_push(Stack *, StackVersion, Tree *, bool, TSStateId);
void ts_stack_push(Stack *, StackVersion, const Subtree *, bool, TSStateId);
// Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple
@ -74,7 +65,7 @@ void ts_stack_push(Stack *, StackVersion, Tree *, bool, TSStateId);
StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
// Remove an error at the top of the given version of the stack.
StackSliceArray ts_stack_pop_error(Stack *, StackVersion);
SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
// Remove any pending trees from the top of the given version of the stack.
StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
@ -82,7 +73,9 @@ StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
// Remove any all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
unsigned ts_stack_depth_since_error(Stack *, StackVersion);
// Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected.
unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
int ts_stack_dynamic_precedence(Stack *, StackVersion);
@ -94,17 +87,27 @@ void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
// given version of the stack.
StackSummary *ts_stack_get_summary(Stack *, StackVersion);
// Get the total cost of all errors on the given version of the stack.
unsigned ts_stack_error_cost(const Stack *, StackVersion version);
// Determine whether the given two stack versions can be merged.
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
// Merge the given two stack versions if possible, returning true
// if they were successfully merged and false otherwise.
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
void ts_stack_force_merge(Stack *, StackVersion, StackVersion);
TSSymbol ts_stack_resume(Stack *, StackVersion);
void ts_stack_pause(Stack *, StackVersion, TSSymbol);
void ts_stack_halt(Stack *, StackVersion);
bool ts_stack_is_halted(Stack *, StackVersion);
bool ts_stack_is_active(const Stack *, StackVersion);
bool ts_stack_is_paused(const Stack *, StackVersion);
bool ts_stack_is_halted(const Stack *, StackVersion);
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
@ -117,7 +120,7 @@ void ts_stack_remove_version(Stack *, StackVersion);
void ts_stack_clear(Stack *);
bool ts_stack_print_dot_graph(Stack *, const char **, FILE *);
bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);

View file

@ -1,13 +1,7 @@
#include "tree_sitter/runtime.h"
#include "runtime/string_input.h"
#include "runtime/alloc.h"
#include <string.h>
typedef struct {
const char *string;
uint32_t position;
uint32_t length;
} TSStringInput;
static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) {
TSStringInput *input = (TSStringInput *)payload;
if (input->position >= input->length) {
@ -26,17 +20,12 @@ static int ts_string_input__seek(void *payload, uint32_t byte, TSPoint _) {
return (byte < input->length);
}
TSInput ts_string_input_make(const char *string) {
return ts_string_input_make_with_length(string, strlen(string));
}
TSInput ts_string_input_make_with_length(const char *string, uint32_t length) {
TSStringInput *input = ts_malloc(sizeof(TSStringInput));
input->string = string;
input->position = 0;
input->length = length;
return (TSInput){
.payload = input,
void ts_string_input_init(TSStringInput *self, const char *string, uint32_t length) {
self->string = string;
self->position = 0;
self->length = length;
self->input = (TSInput) {
.payload = self,
.read = ts_string_input__read,
.seek = ts_string_input__seek,
.encoding = TSInputEncodingUTF8,

View file

@ -7,8 +7,14 @@ extern "C" {
#include "tree_sitter/runtime.h"
TSInput ts_string_input_make(const char *);
TSInput ts_string_input_make_with_length(const char *, uint32_t);
typedef struct {
const char *string;
uint32_t position;
uint32_t length;
TSInput input;
} TSStringInput;
void ts_string_input_init(TSStringInput *, const char *, uint32_t);
#ifdef __cplusplus
}

710
src/runtime/subtree.c Normal file
View file

@ -0,0 +1,710 @@
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include "runtime/alloc.h"
#include "runtime/atomic.h"
#include "runtime/subtree.h"
#include "runtime/length.h"
#include "runtime/language.h"
#include "runtime/error_costs.h"
typedef struct {
Length start;
Length old_end;
Length new_end;
} Edit;
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
static const uint32_t MAX_TREE_POOL_SIZE = 1024;
static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}};
// ExternalScannerState
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
self->length = length;
if (length > sizeof(self->short_data)) {
self->long_data = ts_malloc(length);
memcpy(self->long_data, data, length);
} else {
memcpy(self->short_data, data, length);
}
}
void ts_external_scanner_state_delete(ExternalScannerState *self) {
if (self->length > sizeof(self->short_data)) {
ts_free(self->long_data);
}
}
const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
if (self->length > sizeof(self->short_data)) {
return self->long_data;
} else {
return self->short_data;
}
}
bool ts_external_scanner_state_eq(const ExternalScannerState *a, const ExternalScannerState *b) {
return a == b || (
a->length == b->length &&
!memcmp(ts_external_scanner_state_data(a), ts_external_scanner_state_data(b), a->length)
);
}
// SubtreeArray
bool ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
const Subtree **contents = NULL;
if (self.capacity > 0) {
contents = ts_calloc(self.capacity, sizeof(Subtree *));
memcpy(contents, self.contents, self.size * sizeof(Subtree *));
for (uint32_t i = 0; i < self.size; i++) {
ts_subtree_retain(contents[i]);
}
}
dest->size = self.size;
dest->capacity = self.capacity;
dest->contents = contents;
return true;
}
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
for (uint32_t i = 0; i < self->size; i++) {
ts_subtree_release(pool, self->contents[i]);
}
array_delete(self);
}
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
SubtreeArray result = array_new();
uint32_t i = self->size - 1;
for (; i + 1 > 0; i--) {
const Subtree *child = self->contents[i];
if (!child->extra) break;
array_push(&result, child);
}
self->size = i + 1;
ts_subtree_array_reverse(&result);
return result;
}
void ts_subtree_array_reverse(SubtreeArray *self) {
for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
size_t reverse_index = self->size - 1 - i;
const Subtree *swap = self->contents[i];
self->contents[i] = self->contents[reverse_index];
self->contents[reverse_index] = swap;
}
}
// SubtreePool
SubtreePool ts_subtree_pool_new(uint32_t capacity) {
SubtreePool self = {array_new(), array_new()};
array_reserve(&self.free_trees, capacity);
return self;
}
void ts_subtree_pool_delete(SubtreePool *self) {
if (self->free_trees.contents) {
for (unsigned i = 0; i < self->free_trees.size; i++) {
ts_free(self->free_trees.contents[i]);
}
array_delete(&self->free_trees);
}
if (self->tree_stack.contents) array_delete(&self->tree_stack);
}
Subtree *ts_subtree_pool_allocate(SubtreePool *self) {
if (self->free_trees.size > 0) {
return array_pop(&self->free_trees);
} else {
return ts_malloc(sizeof(Subtree));
}
}
void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) {
if (self->free_trees.capacity > 0 && self->free_trees.size < MAX_TREE_POOL_SIZE) {
array_push(&self->free_trees, tree);
} else {
ts_free(tree);
}
}
// Subtree
Subtree *ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
Subtree *result = ts_subtree_pool_allocate(pool);
*result = (Subtree){
.ref_count = 1,
.symbol = symbol,
.size = size,
.visible_child_count = 0,
.named_child_count = 0,
.alias_sequence_id = 0,
.padding = padding,
.visible = metadata.visible,
.named = metadata.named,
.node_count = 1,
.has_changes = false,
.first_leaf = {
.symbol = symbol,
.lex_mode = {0, 0},
},
.has_external_tokens = false,
};
if (symbol == ts_builtin_sym_end) result->extra = true;
return result;
}
Subtree *ts_subtree_new_error(SubtreePool *pool, Length size, Length padding,
int32_t lookahead_char, const TSLanguage *language) {
Subtree *result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, size, language);
result->fragile_left = true;
result->fragile_right = true;
result->lookahead_char = lookahead_char;
return result;
}
Subtree *ts_subtree_new_copy(SubtreePool *pool, const Subtree *self) {
Subtree *result = ts_subtree_pool_allocate(pool);
*result = *self;
if (result->children.size > 0) {
ts_subtree_array_copy(self->children, &result->children);
}
result->ref_count = 1;
return result;
}
Subtree *ts_subtree_make_mut(SubtreePool *pool, const Subtree *self) {
if (self->ref_count == 1) {
return (Subtree *)self;
} else {
Subtree *result = ts_subtree_new_copy(pool, self);
ts_subtree_release(pool, self);
return result;
}
}
static void ts_subtree__compress(Subtree *self, unsigned count, const TSLanguage *language,
MutableSubtreeArray *stack) {
unsigned initial_stack_size = stack->size;
Subtree *tree = self;
for (unsigned i = 0; i < count; i++) {
if (tree->ref_count > 1 || tree->children.size != 2) break;
Subtree *child = (Subtree *)tree->children.contents[0];
if (
child->ref_count > 1 ||
child->children.size != 2 ||
child->symbol != tree->symbol
) break;
Subtree *grandchild = (Subtree *)child->children.contents[0];
if (
grandchild->ref_count > 1 ||
grandchild->children.size != 2 ||
grandchild->symbol != tree->symbol
) break;
tree->children.contents[0] = grandchild;
child->children.contents[0] = grandchild->children.contents[1];
grandchild->children.contents[1] = child;
array_push(stack, tree);
tree = grandchild;
}
while (stack->size > initial_stack_size) {
tree = array_pop(stack);
assert(tree);
Subtree *child = (Subtree *)tree->children.contents[0];
Subtree *grandchild = (Subtree *)child->children.contents[1];
ts_subtree_set_children(grandchild, &grandchild->children, language);
ts_subtree_set_children(child, &child->children, language);
ts_subtree_set_children(tree, &tree->children, language);
}
}
void ts_subtree_balance(const Subtree *self, SubtreePool *pool, const TSLanguage *language) {
array_clear(&pool->tree_stack);
if (self->ref_count == 1) {
array_push(&pool->tree_stack, (Subtree *)self);
}
while (pool->tree_stack.size > 0) {
Subtree *tree = array_pop(&pool->tree_stack);
assert(tree);
if (tree->repeat_depth > 0 &&
tree->children.contents[0]->repeat_depth > tree->children.contents[1]->repeat_depth) {
unsigned n = (
tree->children.contents[0]->repeat_depth -
tree->children.contents[1]->repeat_depth
);
for (unsigned i = n / 2; i > 0; i /= 2) {
ts_subtree__compress(tree, i, language, &pool->tree_stack);
n -= i;
}
}
for (uint32_t i = 0; i < tree->children.size; i++) {
const Subtree *child = tree->children.contents[i];
if (child->ref_count == 1) {
array_push(&pool->tree_stack, (Subtree *)child);
}
}
}
}
void ts_subtree_set_children(Subtree *self, SubtreeArray *children, const TSLanguage *language) {
if (self->children.size > 0 && children->contents != self->children.contents) {
array_delete(&self->children);
}
self->children = *children;
self->named_child_count = 0;
self->visible_child_count = 0;
self->error_cost = 0;
self->repeat_depth = 0;
self->node_count = 1;
self->has_external_tokens = false;
self->dynamic_precedence = 0;
uint32_t non_extra_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
for (uint32_t i = 0; i < self->children.size; i++) {
const Subtree *child = self->children.contents[i];
if (i == 0) {
self->padding = child->padding;
self->size = child->size;
self->bytes_scanned = child->bytes_scanned;
} else {
uint32_t bytes_scanned = ts_subtree_total_bytes(self) + child->bytes_scanned;
if (bytes_scanned > self->bytes_scanned) self->bytes_scanned = bytes_scanned;
self->size = length_add(self->size, ts_subtree_total_size(child));
}
if (child->symbol != ts_builtin_sym_error_repeat) {
self->error_cost += child->error_cost;
}
self->dynamic_precedence += child->dynamic_precedence;
self->node_count += child->node_count;
if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) {
self->visible_child_count++;
if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
self->named_child_count++;
}
} else if (child->visible) {
self->visible_child_count++;
if (child->named) self->named_child_count++;
} else if (child->children.size > 0) {
self->visible_child_count += child->visible_child_count;
self->named_child_count += child->named_child_count;
}
if (child->has_external_tokens) self->has_external_tokens = true;
if (child->symbol == ts_builtin_sym_error) {
self->fragile_left = self->fragile_right = true;
self->parse_state = TS_TREE_STATE_NONE;
}
if (!child->extra) non_extra_index++;
}
if (self->symbol == ts_builtin_sym_error || self->symbol == ts_builtin_sym_error_repeat) {
self->error_cost += ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row;
for (uint32_t i = 0; i < self->children.size; i++) {
const Subtree *child = self->children.contents[i];
if (child->extra) continue;
if (child->symbol == ts_builtin_sym_error && child->children.size == 0) continue;
if (child->visible) {
self->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else {
self->error_cost += ERROR_COST_PER_SKIPPED_TREE * child->visible_child_count;
}
}
}
if (self->children.size > 0) {
const Subtree *first_child = self->children.contents[0];
const Subtree *last_child = self->children.contents[self->children.size - 1];
self->first_leaf = first_child->first_leaf;
if (first_child->fragile_left) self->fragile_left = true;
if (last_child->fragile_right) self->fragile_right = true;
if (
self->children.size == 2 &&
!self->visible && !self->named &&
first_child->symbol == self->symbol &&
last_child->symbol == self->symbol
) {
if (first_child->repeat_depth > last_child->repeat_depth) {
self->repeat_depth = first_child->repeat_depth + 1;
} else {
self->repeat_depth = last_child->repeat_depth + 1;
}
}
}
}
Subtree *ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, SubtreeArray *children,
unsigned alias_sequence_id, const TSLanguage *language) {
Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language);
result->alias_sequence_id = alias_sequence_id;
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->fragile_left = true;
result->fragile_right = true;
}
ts_subtree_set_children(result, children, language);
return result;
}
Subtree *ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
const TSLanguage *language) {
return ts_subtree_new_node(pool, ts_builtin_sym_error, children, 0, language);
}
Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol,
const TSLanguage *language) {
Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language);
result->is_missing = true;
result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
return result;
}
void ts_subtree_retain(const Subtree *self) {
assert(self->ref_count > 0);
atomic_inc((volatile uint32_t *)&self->ref_count);
assert(self->ref_count != 0);
}
void ts_subtree_release(SubtreePool *pool, const Subtree *self) {
array_clear(&pool->tree_stack);
assert(self->ref_count > 0);
if (atomic_dec((volatile uint32_t *)&self->ref_count) == 0) {
array_push(&pool->tree_stack, (Subtree *)self);
}
while (pool->tree_stack.size > 0) {
Subtree *tree = array_pop(&pool->tree_stack);
if (tree->children.size > 0) {
for (uint32_t i = 0; i < tree->children.size; i++) {
const Subtree *child = tree->children.contents[i];
if (atomic_dec((volatile uint32_t *)&child->ref_count) == 0) {
array_push(&pool->tree_stack, (Subtree *)child);
}
}
array_delete(&tree->children);
} else if (tree->has_external_tokens) {
ts_external_scanner_state_delete(&tree->external_scanner_state);
}
ts_subtree_pool_free(pool, tree);
}
}
bool ts_subtree_eq(const Subtree *self, const Subtree *other) {
if (self) {
if (!other) return false;
} else {
return !other;
}
if (self->symbol != other->symbol) return false;
if (self->visible != other->visible) return false;
if (self->named != other->named) return false;
if (self->padding.bytes != other->padding.bytes) return false;
if (self->size.bytes != other->size.bytes) return false;
if (self->symbol == ts_builtin_sym_error) return self->lookahead_char == other->lookahead_char;
if (self->children.size != other->children.size) return false;
if (self->visible_child_count != other->visible_child_count) return false;
if (self->named_child_count != other->named_child_count) return false;
for (uint32_t i = 0; i < self->children.size; i++) {
if (!ts_subtree_eq(self->children.contents[i], other->children.contents[i])) {
return false;
}
}
return true;
}
int ts_subtree_compare(const Subtree *left, const Subtree *right) {
if (left->symbol < right->symbol)
return -1;
if (right->symbol < left->symbol)
return 1;
if (left->children.size < right->children.size)
return -1;
if (right->children.size < left->children.size)
return 1;
for (uint32_t i = 0; i < left->children.size; i++) {
const Subtree *left_child = left->children.contents[i];
const Subtree *right_child = right->children.contents[i];
switch (ts_subtree_compare(left_child, right_child)) {
case -1:
return -1;
case 1:
return 1;
default:
break;
}
}
return 0;
}
const Subtree *ts_subtree_invalidate_lookahead(const Subtree *self, uint32_t edit_byte_offset,
SubtreePool *pool) {
if (edit_byte_offset >= self->bytes_scanned) return self;
Subtree *result = ts_subtree_make_mut(pool, self);
result->has_changes = true;
if (result->children.size > 0) {
uint32_t child_start_byte = 0;
for (uint32_t i = 0; i < result->children.size; i++) {
const Subtree **child = &result->children.contents[i];
if (child_start_byte > edit_byte_offset) break;
*child = ts_subtree_invalidate_lookahead(*child, edit_byte_offset - child_start_byte, pool);
child_start_byte += ts_subtree_total_bytes(*child);
}
}
return result;
}
const Subtree *ts_subtree__edit(const Subtree *self, Edit edit, SubtreePool *pool) {
Subtree *result = ts_subtree_make_mut(pool, self);
result->has_changes = true;
bool pure_insertion = edit.old_end.bytes == edit.start.bytes;
if (edit.old_end.bytes <= result->padding.bytes) {
result->padding = length_add(edit.new_end, length_sub(result->padding, edit.old_end));
} else if (edit.start.bytes < result->padding.bytes) {
result->size = length_sub(result->size, length_sub(edit.old_end, result->padding));
result->padding = edit.new_end;
} else if (edit.start.bytes == result->padding.bytes && pure_insertion) {
result->padding = edit.new_end;
} else {
result->size = length_add(
length_sub(edit.new_end, result->padding),
length_sub(result->size, length_sub(edit.old_end, result->padding))
);
}
Length child_left, child_right = length_zero();
for (uint32_t i = 0; i < result->children.size; i++) {
const Subtree **child = &result->children.contents[i];
Length child_size = ts_subtree_total_size(*child);
child_left = child_right;
child_right = length_add(child_left, child_size);
if (child_left.bytes > edit.old_end.bytes ||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
if (child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && pure_insertion)) {
Edit child_edit = {
.start = length_sub(edit.start, child_left),
.old_end = length_sub(edit.old_end, child_left),
.new_end = length_sub(edit.new_end, child_left),
};
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
edit.new_end = edit.start;
*child = ts_subtree__edit(*child, child_edit, pool);
} else if (child_left.bytes <= edit.start.bytes) {
*child = ts_subtree_invalidate_lookahead(*child, edit.start.bytes - child_left.bytes, pool);
}
}
return result;
}
const Subtree *ts_subtree_edit(const Subtree *self, const TSInputEdit *edit, SubtreePool *pool) {
return ts_subtree__edit(self, (Edit) {
.start = {edit->start_byte, edit->start_point},
.old_end = {edit->old_end_byte, edit->old_end_point},
.new_end = {edit->new_end_byte, edit->new_end_point},
}, pool);
}
const Subtree *ts_subtree_last_external_token(const Subtree *tree) {
if (!tree->has_external_tokens) return NULL;
while (tree->children.size > 0) {
for (uint32_t i = tree->children.size - 1; i + 1 > 0; i--) {
const Subtree *child = tree->children.contents[i];
if (child->has_external_tokens) {
tree = child;
break;
}
}
}
return tree;
}
static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
if (c == 0)
return snprintf(s, n, "EOF");
if (c == -1)
return snprintf(s, n, "INVALID");
else if (c == '\n')
return snprintf(s, n, "'\\n'");
else if (c == '\t')
return snprintf(s, n, "'\\t'");
else if (c == '\r')
return snprintf(s, n, "'\\r'");
else if (0 < c && c < 128 && isprint(c))
return snprintf(s, n, "'%c'", c);
else
return snprintf(s, n, "%d", c);
}
static size_t ts_subtree__write_to_string(const Subtree *self, char *string, size_t limit,
const TSLanguage *language, bool is_root,
bool include_all, TSSymbol alias_symbol,
bool alias_is_named) {
if (!self) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible =
include_all ||
is_root ||
self->is_missing ||
(self->visible && self->named) ||
alias_is_named;
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
if (visible) {
if (self->symbol == ts_builtin_sym_error && self->children.size == 0 && self->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_subtree__write_char_to_string(*writer, limit, self->lookahead_char);
} else if (self->is_missing) {
cursor += snprintf(*writer, limit, "(MISSING");
} else {
TSSymbol symbol = alias_symbol ? alias_symbol : self->symbol;
const char *symbol_name = ts_language_symbol_name(language, symbol);
cursor += snprintf(*writer, limit, "(%s", symbol_name);
}
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self->children.size; i++) {
const Subtree *child = self->children.contents[i];
if (child->extra) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
0, false
);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
alias_symbol,
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
);
structural_child_index++;
}
}
if (visible) cursor += snprintf(*writer, limit, ")");
return cursor - string;
}
char *ts_subtree_string(const Subtree *self, const TSLanguage *language, bool include_all) {
char scratch_string[1];
size_t size = ts_subtree__write_to_string(
self, scratch_string, 0,
language, true,
include_all, 0, false
) + 1;
char *result = ts_malloc(size * sizeof(char));
ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false);
return result;
}
void ts_subtree__print_dot_graph(const Subtree *self, uint32_t byte_offset,
const TSLanguage *language, TSSymbol alias_symbol, FILE *f) {
TSSymbol symbol = alias_symbol ? alias_symbol : self->symbol;
fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol));
if (self->children.size == 0)
fprintf(f, ", shape=plaintext");
if (self->extra)
fprintf(f, ", fontcolor=gray");
fprintf(f, ", tooltip=\""
"address:%p\n"
"range:%u - %u\n"
"state:%d\n"
"error-cost:%u\n"
"repeat-depth:%u\n"
"bytes-scanned:%u\"]\n",
self,
byte_offset, byte_offset + ts_subtree_total_bytes(self),
self->parse_state,
self->error_cost,
self->repeat_depth,
self->bytes_scanned
);
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self->children.size; i++) {
const Subtree *child = self->children.contents[i];
if (child->extra) {
ts_subtree__print_dot_graph(child, byte_offset, language, 0, f);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
ts_subtree__print_dot_graph(child, byte_offset, language, alias_symbol, f);
structural_child_index++;
}
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
byte_offset += ts_subtree_total_bytes(child);
}
}
void ts_subtree_print_dot_graph(const Subtree *self, const TSLanguage *language, FILE *f) {
fprintf(f, "digraph tree {\n");
fprintf(f, "edge [arrowhead=none]\n");
ts_subtree__print_dot_graph(self, 0, language, 0, f);
fprintf(f, "}\n");
}
bool ts_subtree_external_scanner_state_eq(const Subtree *self, const Subtree *other) {
const ExternalScannerState *state1 = &empty_state;
const ExternalScannerState *state2 = &empty_state;
if (self && self->has_external_tokens) state1 = &self->external_scanner_state;
if (other && other->has_external_tokens) state2 = &other->external_scanner_state;
return ts_external_scanner_state_eq(state1, state2);
}

122
src/runtime/subtree.h Normal file
View file

@ -0,0 +1,122 @@
#ifndef RUNTIME_SUBTREE_H_
#define RUNTIME_SUBTREE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include "runtime/length.h"
#include "runtime/array.h"
#include "tree_sitter/runtime.h"
#include "tree_sitter/parser.h"
extern TSStateId TS_TREE_STATE_NONE;
typedef struct {
union {
char *long_data;
char short_data[sizeof(char *) + sizeof(uint32_t)];
};
uint32_t length;
} ExternalScannerState;
typedef struct Subtree Subtree;
typedef Array(const Subtree *) SubtreeArray;
typedef Array(Subtree *) MutableSubtreeArray;
struct Subtree {
Length padding;
Length size;
volatile uint32_t ref_count;
uint32_t bytes_scanned;
uint32_t error_cost;
uint32_t node_count;
uint32_t repeat_depth;
int32_t dynamic_precedence;
bool visible : 1;
bool named : 1;
bool extra : 1;
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool is_missing : 1;
TSSymbol symbol;
TSStateId parse_state;
struct {
TSSymbol symbol;
TSLexMode lex_mode;
} first_leaf;
union {
struct {
SubtreeArray children;
uint32_t visible_child_count;
uint32_t named_child_count;
uint16_t alias_sequence_id;
};
struct {
uint32_t _2;
ExternalScannerState external_scanner_state;
};
struct {
uint32_t _1;
int32_t lookahead_char;
};
};
};
typedef struct {
MutableSubtreeArray free_trees;
MutableSubtreeArray tree_stack;
} SubtreePool;
void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
const char *ts_external_scanner_state_data(const ExternalScannerState *);
bool ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
void ts_subtree_array_reverse(SubtreeArray *);
SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *);
Subtree *ts_subtree_pool_allocate(SubtreePool *);
void ts_subtree_pool_free(SubtreePool *, Subtree *);
Subtree *ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, const TSLanguage *);
Subtree *ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
Subtree *ts_subtree_new_copy(SubtreePool *, const Subtree *);
Subtree *ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, const TSLanguage *);
Subtree *ts_subtree_new_error(SubtreePool *, Length, Length, int32_t, const TSLanguage *);
Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, const TSLanguage *);
Subtree *ts_subtree_make_mut(SubtreePool *, const Subtree *);
void ts_subtree_retain(const Subtree *tree);
void ts_subtree_release(SubtreePool *, const Subtree *tree);
bool ts_subtree_eq(const Subtree *tree1, const Subtree *tree2);
int ts_subtree_compare(const Subtree *tree1, const Subtree *tree2);
void ts_subtree_set_children(Subtree *, SubtreeArray *, const TSLanguage *);
void ts_subtree_balance(const Subtree *, SubtreePool *, const TSLanguage *);
const Subtree *ts_subtree_edit(const Subtree *, const TSInputEdit *edit, SubtreePool *);
char *ts_subtree_string(const Subtree *, const TSLanguage *, bool include_all);
void ts_subtree_print_dot_graph(const Subtree *, const TSLanguage *, FILE *);
const Subtree *ts_subtree_last_external_token(const Subtree *);
bool ts_subtree_external_scanner_state_eq(const Subtree *, const Subtree *);
static inline uint32_t ts_subtree_total_bytes(const Subtree *self) {
return self->padding.bytes + self->size.bytes;
}
static inline Length ts_subtree_total_size(const Subtree *self) {
return length_add(self->padding, self->size);
}
#ifdef __cplusplus
}
#endif
#endif // RUNTIME_SUBTREE_H_

View file

@ -1,740 +1,94 @@
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include "runtime/alloc.h"
#include "tree_sitter/runtime.h"
#include "runtime/array.h"
#include "runtime/get_changed_ranges.h"
#include "runtime/subtree.h"
#include "runtime/tree_cursor.h"
#include "runtime/tree.h"
#include "runtime/length.h"
#include "runtime/language.h"
#include "runtime/error_costs.h"
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
static const unsigned PARENT_CACHE_CAPACITY = 32;
// ExternalTokenState
void ts_external_token_state_init(TSExternalTokenState *self, const char *content, unsigned length) {
self->length = length;
if (length > sizeof(self->short_data)) {
self->long_data = ts_malloc(length);
memcpy(self->long_data, content, length);
} else {
memcpy(self->short_data, content, length);
}
}
void ts_external_token_state_delete(TSExternalTokenState *self) {
if (self->length > sizeof(self->short_data)) {
ts_free(self->long_data);
}
}
const char *ts_external_token_state_data(const TSExternalTokenState *self) {
if (self->length > sizeof(self->short_data)) {
return self->long_data;
} else {
return self->short_data;
}
}
bool ts_external_token_state_eq(const TSExternalTokenState *a, const TSExternalTokenState *b) {
return a == b ||
(a->length == b->length &&
memcmp(ts_external_token_state_data(a), ts_external_token_state_data(b), a->length) == 0);
}
// TreeArray
bool ts_tree_array_copy(TreeArray self, TreeArray *dest) {
Tree **contents = NULL;
if (self.capacity > 0) {
contents = ts_calloc(self.capacity, sizeof(Tree *));
memcpy(contents, self.contents, self.size * sizeof(Tree *));
for (uint32_t i = 0; i < self.size; i++)
ts_tree_retain(contents[i]);
}
dest->size = self.size;
dest->capacity = self.capacity;
dest->contents = contents;
return true;
}
void ts_tree_array_delete(TreePool *pool, TreeArray *self) {
for (uint32_t i = 0; i < self->size; i++) {
ts_tree_release(pool, self->contents[i]);
}
array_delete(self);
}
uint32_t ts_tree_array_essential_count(const TreeArray *self) {
uint32_t result = 0;
for (uint32_t i = 0; i < self->size; i++) {
Tree *tree = self->contents[i];
if (!tree->extra && tree->symbol != ts_builtin_sym_error)
result++;
}
TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language) {
TSTree *result = ts_malloc(sizeof(TSTree));
result->root = root;
result->language = language;
result->parent_cache = NULL;
result->parent_cache_start = 0;
result->parent_cache_size = 0;
return result;
}
TreeArray ts_tree_array_remove_last_n(TreeArray *self, uint32_t remove_count) {
TreeArray result = array_new();
if (self->size == 0 || remove_count == 0) return result;
uint32_t count = 0;
uint32_t split_index = self->size - 1;
for (; split_index + 1 > 0; split_index--) {
Tree *tree = self->contents[split_index];
if (!tree->extra) {
count++;
if (count == remove_count) break;
}
}
array_grow(&result, self->size - split_index);
for (uint32_t i = split_index; i < self->size; i++) {
array_push(&result, self->contents[i]);
}
self->size = split_index;
return result;
TSTree *ts_tree_copy(const TSTree *self) {
ts_subtree_retain(self->root);
return ts_tree_new(self->root, self->language);
}
TreeArray ts_tree_array_remove_trailing_extras(TreeArray *self) {
TreeArray result = array_new();
uint32_t i = self->size - 1;
for (; i + 1 > 0; i--) {
Tree *child = self->contents[i];
if (!child->extra) break;
array_push(&result, child);
}
self->size = i + 1;
ts_tree_array_reverse(&result);
return result;
void ts_tree_delete(TSTree *self) {
SubtreePool pool = ts_subtree_pool_new(0);
ts_subtree_release(&pool, self->root);
ts_subtree_pool_delete(&pool);
if (self->parent_cache) ts_free(self->parent_cache);
ts_free(self);
}
void ts_tree_array_reverse(TreeArray *self) {
for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
size_t reverse_index = self->size - 1 - i;
Tree *swap = self->contents[i];
self->contents[i] = self->contents[reverse_index];
self->contents[reverse_index] = swap;
}
TSNode ts_tree_root_node(const TSTree *self) {
return ts_node_new(self, self->root, self->root->padding, 0);
}
// TreePool
static const uint32_t MAX_TREE_POOL_SIZE = 1024;
void ts_tree_pool_init(TreePool *self) {
array_init(&self->free_trees);
array_init(&self->tree_stack);
void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
SubtreePool pool = ts_subtree_pool_new(0);
self->root = ts_subtree_edit(self->root, edit, &pool);
ts_subtree_pool_delete(&pool);
}
void ts_tree_pool_delete(TreePool *self) {
if (self->free_trees.contents) {
for (unsigned i = 0; i < self->free_trees.size; i++) {
ts_free(self->free_trees.contents[i]);
}
array_delete(&self->free_trees);
}
if (self->tree_stack.contents) array_delete(&self->tree_stack);
}
Tree *ts_tree_pool_allocate(TreePool *self) {
if (self->free_trees.size > 0) {
return array_pop(&self->free_trees);
} else {
return ts_malloc(sizeof(Tree));
}
}
void ts_tree_pool_free(TreePool *self, Tree *tree) {
if (self->free_trees.size < MAX_TREE_POOL_SIZE) {
array_push(&self->free_trees, tree);
} else {
ts_free(tree);
}
}
// Tree
Tree *ts_tree_make_leaf(TreePool *pool, TSSymbol symbol, Length padding, Length size, const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
Tree *result = ts_tree_pool_allocate(pool);
*result = (Tree){
.ref_count = 1,
.symbol = symbol,
.size = size,
.child_count = 0,
.children = NULL,
.visible_child_count = 0,
.named_child_count = 0,
.alias_sequence_id = 0,
.padding = padding,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
.first_leaf = {
.symbol = symbol,
.lex_mode = {0, 0},
},
.has_external_tokens = false,
};
return result;
}
Tree *ts_tree_make_error(TreePool *pool, Length size, Length padding, int32_t lookahead_char,
const TSLanguage *language) {
Tree *result = ts_tree_make_leaf(pool, ts_builtin_sym_error, padding, size, language);
result->fragile_left = true;
result->fragile_right = true;
result->lookahead_char = lookahead_char;
return result;
}
Tree *ts_tree_make_copy(TreePool *pool, Tree *self) {
Tree *result = ts_tree_pool_allocate(pool);
*result = *self;
result->ref_count = 1;
return result;
}
static void ts_tree__compress(Tree *self, unsigned count, const TSLanguage *language) {
Tree *tree = self;
for (unsigned i = 0; i < count; i++) {
if (tree->ref_count > 1 || tree->child_count != 2) break;
Tree *child = tree->children[0];
if (
child->ref_count > 1 ||
child->child_count != 2 ||
child->symbol != tree->symbol
) break;
Tree *grandchild = child->children[0];
if (
grandchild->ref_count > 1 ||
grandchild->child_count != 2 ||
grandchild->symbol != tree->symbol
) break;
tree->children[0] = grandchild;
grandchild->context.parent = tree;
grandchild->context.index = -1;
child->children[0] = grandchild->children[1];
child->children[0]->context.parent = child;
child->children[0]->context.index = -1;
grandchild->children[1] = child;
grandchild->children[1]->context.parent = grandchild;
grandchild->children[1]->context.index = -1;
tree = grandchild;
}
while (tree != self) {
tree = tree->context.parent;
Tree *child = tree->children[0];
Tree *grandchild = child->children[1];
ts_tree_set_children(grandchild, 2, grandchild->children, language);
ts_tree_set_children(child, 2, child->children, language);
ts_tree_set_children(tree, 2, tree->children, language);
}
}
void ts_tree__balance(Tree *self, const TSLanguage *language) {
if (self->children[0]->repeat_depth > self->children[1]->repeat_depth) {
unsigned n = self->children[0]->repeat_depth - self->children[1]->repeat_depth;
for (unsigned i = n / 2; i > 0; i /= 2) {
ts_tree__compress(self, i, language);
n -= i;
}
}
}
void ts_tree_assign_parents(Tree *self, TreePool *pool, const TSLanguage *language) {
self->context.parent = NULL;
array_clear(&pool->tree_stack);
array_push(&pool->tree_stack, self);
while (pool->tree_stack.size > 0) {
Tree *tree = array_pop(&pool->tree_stack);
if (tree->repeat_depth > 0) {
ts_tree__balance(tree, language);
}
Length offset = length_zero();
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, tree->alias_sequence_id);
uint32_t non_extra_index = 0;
bool earlier_child_was_changed = false;
for (uint32_t i = 0; i < tree->child_count; i++) {
Tree *child = tree->children[i];
if (earlier_child_was_changed || child->context.parent != tree || child->context.index != i) {
earlier_child_was_changed = true;
child->context.parent = tree;
child->context.index = i;
child->context.offset = offset;
if (!child->extra && alias_sequence && alias_sequence[non_extra_index] != 0) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, alias_sequence[non_extra_index]);
child->context.alias_symbol = alias_sequence[non_extra_index];
child->context.alias_is_named = metadata.named;
} else {
child->context.alias_symbol = 0;
child->context.alias_is_named = false;
}
array_push(&pool->tree_stack, child);
}
offset = length_add(offset, ts_tree_total_size(child));
if (!child->extra) non_extra_index++;
}
}
}
void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children,
const TSLanguage *language) {
if (self->child_count > 0 && children != self->children) ts_free(self->children);
self->children = children;
self->child_count = child_count;
self->named_child_count = 0;
self->visible_child_count = 0;
self->error_cost = 0;
self->repeat_depth = 0;
self->has_external_tokens = false;
self->dynamic_precedence = 0;
uint32_t non_extra_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self->alias_sequence_id);
for (uint32_t i = 0; i < child_count; i++) {
Tree *child = children[i];
if (i == 0) {
self->padding = child->padding;
self->size = child->size;
self->bytes_scanned = child->bytes_scanned;
} else {
uint32_t bytes_scanned = ts_tree_total_bytes(self) + child->bytes_scanned;
if (bytes_scanned > self->bytes_scanned) self->bytes_scanned = bytes_scanned;
self->size = length_add(self->size, ts_tree_total_size(child));
}
self->error_cost += child->error_cost;
self->dynamic_precedence += child->dynamic_precedence;
if (alias_sequence && alias_sequence[non_extra_index] != 0 && !child->extra) {
self->visible_child_count++;
if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
self->named_child_count++;
}
} else if (child->visible) {
self->visible_child_count++;
if (child->named) self->named_child_count++;
} else if (child->child_count > 0) {
self->visible_child_count += child->visible_child_count;
self->named_child_count += child->named_child_count;
}
if (child->has_external_tokens) self->has_external_tokens = true;
if (child->symbol == ts_builtin_sym_error) {
self->fragile_left = self->fragile_right = true;
self->parse_state = TS_TREE_STATE_NONE;
}
if (!child->extra) non_extra_index++;
}
if (self->symbol == ts_builtin_sym_error) {
self->error_cost += ERROR_COST_PER_SKIPPED_CHAR * self->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * self->size.extent.row;
for (uint32_t i = 0; i < child_count; i++)
if (!self->children[i]->extra)
self->error_cost += ERROR_COST_PER_SKIPPED_TREE;
}
if (child_count > 0) {
self->first_leaf = children[0]->first_leaf;
if (children[0]->fragile_left) {
self->fragile_left = true;
}
if (children[child_count - 1]->fragile_right) {
self->fragile_right = true;
}
if (
self->child_count == 2 &&
!self->visible && !self->named &&
self->children[0]->symbol == self->symbol &&
self->children[1]->symbol == self->symbol
) {
if (self->children[0]->repeat_depth > self->children[1]->repeat_depth) {
self->repeat_depth = self->children[0]->repeat_depth + 1;
} else {
self->repeat_depth = self->children[1]->repeat_depth + 1;
}
}
}
}
Tree *ts_tree_make_node(TreePool *pool, TSSymbol symbol, uint32_t child_count, Tree **children,
unsigned alias_sequence_id, const TSLanguage *language) {
Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language);
result->alias_sequence_id = alias_sequence_id;
ts_tree_set_children(result, child_count, children, language);
return result;
}
Tree *ts_tree_make_error_node(TreePool *pool, TreeArray *children, const TSLanguage *language) {
for (uint32_t i = 0; i < children->size; i++) {
Tree *child = children->contents[i];
if (child->symbol == ts_builtin_sym_error && child->child_count > 0) {
array_splice(children, i, 1, child->child_count, child->children);
i += child->child_count - 1;
for (uint32_t j = 0; j < child->child_count; j++)
ts_tree_retain(child->children[j]);
ts_tree_release(pool, child);
}
}
Tree *result = ts_tree_make_node(
pool, ts_builtin_sym_error,
children->size, children->contents,
0, language
TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
TSRange *result;
TreeCursor cursor1, cursor2;
ts_tree_cursor_init(&cursor1, self);
ts_tree_cursor_init(&cursor2, self);
*count = ts_subtree_get_changed_ranges(
self->root, other->root, &cursor1, &cursor2,
self->language, &result
);
result->fragile_left = true;
result->fragile_right = true;
array_delete(&cursor1.stack);
array_delete(&cursor2.stack);
return result;
}
Tree *ts_tree_make_missing_leaf(TreePool *pool, TSSymbol symbol, const TSLanguage *language) {
Tree *result = ts_tree_make_leaf(pool, symbol, length_zero(), length_zero(), language);
result->is_missing = true;
result->error_cost = ERROR_COST_PER_MISSING_TREE;
return result;
void ts_tree_print_dot_graph(const TSTree *self, FILE *file) {
ts_subtree_print_dot_graph(self->root, self->language, file);
}
void ts_tree_retain(Tree *self) {
assert(self->ref_count > 0);
self->ref_count++;
assert(self->ref_count != 0);
}
void ts_tree_release(TreePool *pool, Tree *self) {
array_clear(&pool->tree_stack);
array_push(&pool->tree_stack, self);
while (pool->tree_stack.size > 0) {
Tree *tree = array_pop(&pool->tree_stack);
assert(tree->ref_count > 0);
tree->ref_count--;
if (tree->ref_count == 0) {
if (tree->child_count > 0) {
for (uint32_t i = 0; i < tree->child_count; i++) {
array_push(&pool->tree_stack, tree->children[i]);
}
ts_free(tree->children);
} else if (tree->has_external_tokens) {
ts_external_token_state_delete(&tree->external_token_state);
}
ts_tree_pool_free(pool, tree);
TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) {
for (uint32_t i = 0; i < self->parent_cache_size; i++) {
uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY;
ParentCacheEntry *entry = &self->parent_cache[index];
if (entry->child == node->id) {
return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol);
}
}
return ts_node_new(NULL, NULL, length_zero(), 0);
}
uint32_t ts_tree_start_column(const Tree *self) {
uint32_t column = self->padding.extent.column;
if (self->padding.extent.row > 0)
return column;
for (const Tree *tree = self; tree != NULL; tree = tree->context.parent) {
column += tree->context.offset.extent.column;
if (tree->context.offset.extent.row > 0)
break;
void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) {
TSTree *self = (TSTree *)_self;
if (!self->parent_cache) {
self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry));
}
return column;
}
uint32_t ts_tree_end_column(const Tree *self) {
uint32_t result = self->size.extent.column;
if (self->size.extent.row == 0)
result += ts_tree_start_column(self);
return result;
}
uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY;
self->parent_cache[index] = (ParentCacheEntry) {
.child = node->id,
.parent = parent->id,
.position = {
parent->context[0],
{parent->context[1], parent->context[2]}
},
.alias_symbol = parent->context[3],
};
bool ts_tree_eq(const Tree *self, const Tree *other) {
if (self) {
if (!other) return false;
if (self->parent_cache_size == PARENT_CACHE_CAPACITY) {
self->parent_cache_start++;
} else {
return !other;
}
if (self->symbol != other->symbol) return false;
if (self->visible != other->visible) return false;
if (self->named != other->named) return false;
if (self->padding.bytes != other->padding.bytes) return false;
if (self->size.bytes != other->size.bytes) return false;
if (self->symbol == ts_builtin_sym_error) return self->lookahead_char == other->lookahead_char;
if (self->child_count != other->child_count) return false;
if (self->visible_child_count != other->visible_child_count) return false;
if (self->named_child_count != other->named_child_count) return false;
for (uint32_t i = 0; i < self->child_count; i++) {
if (!ts_tree_eq(self->children[i], other->children[i])) {
return false;
}
}
return true;
}
int ts_tree_compare(const Tree *left, const Tree *right) {
if (left->symbol < right->symbol)
return -1;
if (right->symbol < left->symbol)
return 1;
if (left->child_count < right->child_count)
return -1;
if (right->child_count < left->child_count)
return 1;
for (uint32_t i = 0; i < left->child_count; i++) {
Tree *left_child = left->children[i];
Tree *right_child = right->children[i];
switch (ts_tree_compare(left_child, right_child)) {
case -1:
return -1;
case 1:
return 1;
default:
break;
}
}
return 0;
}
static inline long min_byte(long a, long b) {
return a <= b ? a : b;
}
bool ts_tree_invalidate_lookahead(Tree *self, uint32_t edit_byte_offset) {
if (edit_byte_offset >= self->bytes_scanned) return false;
self->has_changes = true;
if (self->child_count > 0) {
uint32_t child_start_byte = 0;
for (uint32_t i = 0; i < self->child_count; i++) {
Tree *child = self->children[i];
if (child_start_byte > edit_byte_offset) break;
ts_tree_invalidate_lookahead(child, edit_byte_offset - child_start_byte);
child_start_byte += ts_tree_total_bytes(child);
}
}
return true;
}
static inline TSPoint ts_tree_total_extent(const Tree *self) {
return point_add(self->padding.extent, self->size.extent);
}
void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
uint32_t old_end_byte = edit->start_byte + edit->bytes_removed;
uint32_t new_end_byte = edit->start_byte + edit->bytes_added;
TSPoint old_end_point = point_add(edit->start_point, edit->extent_removed);
TSPoint new_end_point = point_add(edit->start_point, edit->extent_added);
assert(old_end_byte <= ts_tree_total_bytes(self));
self->has_changes = true;
if (edit->start_byte < self->padding.bytes) {
if (self->padding.bytes >= old_end_byte) {
uint32_t trailing_padding_bytes = self->padding.bytes - old_end_byte;
TSPoint trailing_padding_extent = point_sub(self->padding.extent, old_end_point);
self->padding.bytes = new_end_byte + trailing_padding_bytes;
self->padding.extent = point_add(new_end_point, trailing_padding_extent);
} else {
uint32_t removed_content_bytes = old_end_byte - self->padding.bytes;
TSPoint removed_content_extent = point_sub(old_end_point, self->padding.extent);
self->size.bytes = self->size.bytes - removed_content_bytes;
self->size.extent = point_sub(self->size.extent, removed_content_extent);
self->padding.bytes = new_end_byte;
self->padding.extent = new_end_point;
}
} else if (edit->start_byte == self->padding.bytes && edit->bytes_removed == 0) {
self->padding.bytes = self->padding.bytes + edit->bytes_added;
self->padding.extent = point_add(self->padding.extent, edit->extent_added);
} else {
uint32_t trailing_content_bytes = ts_tree_total_bytes(self) - old_end_byte;
TSPoint trailing_content_extent = point_sub(ts_tree_total_extent(self), old_end_point);
self->size.bytes = new_end_byte + trailing_content_bytes - self->padding.bytes;
self->size.extent = point_sub(point_add(new_end_point, trailing_content_extent), self->padding.extent);
}
bool found_first_child = false;
long remaining_bytes_to_delete = 0;
TSPoint remaining_extent_to_delete = {0, 0};
Length child_left, child_right = length_zero();
for (uint32_t i = 0; i < self->child_count; i++) {
Tree *child = self->children[i];
child_left = child_right;
child_right = length_add(child_left, ts_tree_total_size(child));
if (!found_first_child && child_right.bytes >= edit->start_byte) {
found_first_child = true;
TSInputEdit child_edit = {
.start_byte = edit->start_byte - child_left.bytes,
.bytes_added = edit->bytes_added,
.bytes_removed = edit->bytes_removed,
.start_point = point_sub(edit->start_point, child_left.extent),
.extent_added = edit->extent_added,
.extent_removed = edit->extent_removed,
};
if (old_end_byte > child_right.bytes) {
child_edit.bytes_removed = child_right.bytes - edit->start_byte;
child_edit.extent_removed = point_sub(child_right.extent, edit->start_point);
remaining_bytes_to_delete = old_end_byte - child_right.bytes;
remaining_extent_to_delete = point_sub(old_end_point, child_right.extent);
}
ts_tree_edit(child, &child_edit);
} else if (remaining_bytes_to_delete > 0) {
TSInputEdit child_edit = {
.start_byte = 0,
.bytes_added = 0,
.bytes_removed = min_byte(remaining_bytes_to_delete, ts_tree_total_bytes(child)),
.start_point = {0, 0},
.extent_added = {0, 0},
.extent_removed = point_min(remaining_extent_to_delete, ts_tree_total_size(child).extent),
};
remaining_bytes_to_delete -= child_edit.bytes_removed;
remaining_extent_to_delete = point_sub(remaining_extent_to_delete, child_edit.extent_removed);
ts_tree_edit(child, &child_edit);
} else {
ts_tree_invalidate_lookahead(child, edit->start_byte - child_left.bytes);
}
child_right = length_add(child_left, ts_tree_total_size(child));
child->context.offset = child_left;
self->parent_cache_size++;
}
}
Tree *ts_tree_last_external_token(Tree *tree) {
if (!tree->has_external_tokens) return NULL;
while (tree->child_count > 0) {
for (uint32_t i = tree->child_count - 1; i + 1 > 0; i--) {
Tree *child = tree->children[i];
if (child->has_external_tokens) {
tree = child;
break;
}
}
}
return tree;
}
static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) {
if (c == 0)
return snprintf(s, n, "EOF");
if (c == -1)
return snprintf(s, n, "INVALID");
else if (c == '\n')
return snprintf(s, n, "'\\n'");
else if (c == '\t')
return snprintf(s, n, "'\\t'");
else if (c == '\r')
return snprintf(s, n, "'\\r'");
else if (0 < c && c < 128 && isprint(c))
return snprintf(s, n, "'%c'", c);
else
return snprintf(s, n, "%d", c);
}
static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *language,
char *string, size_t limit, bool is_root,
bool include_all) {
if (!self) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible =
include_all ||
is_root ||
self->is_missing ||
(self->visible && self->named) ||
self->context.alias_is_named;
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
if (visible) {
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
} else if (self->is_missing) {
cursor += snprintf(*writer, limit, "(MISSING");
} else {
TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol;
const char *symbol_name = ts_language_symbol_name(language, symbol);
cursor += snprintf(*writer, limit, "(%s", symbol_name);
}
}
for (uint32_t i = 0; i < self->child_count; i++) {
Tree *child = self->children[i];
cursor += ts_tree__write_to_string(child, language, *writer, limit, false, include_all);
}
if (visible) cursor += snprintf(*writer, limit, ")");
return cursor - string;
}
char *ts_tree_string(const Tree *self, const TSLanguage *language, bool include_all) {
char scratch_string[1];
size_t size = ts_tree__write_to_string(self, language, scratch_string, 0, true, include_all) + 1;
char *result = ts_malloc(size * sizeof(char));
ts_tree__write_to_string(self, language, result, size, true, include_all);
return result;
}
void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset,
const TSLanguage *language, FILE *f) {
TSSymbol symbol = self->context.alias_symbol ? self->context.alias_symbol : self->symbol;
fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol));
if (self->child_count == 0)
fprintf(f, ", shape=plaintext");
if (self->extra)
fprintf(f, ", fontcolor=gray");
fprintf(f, ", tooltip=\"address:%p\nrange:%u - %u\nstate:%d\nerror-cost:%u\nrepeat-depth:%u\"]\n",
self, byte_offset, byte_offset + ts_tree_total_bytes(self), self->parse_state,
self->error_cost, self->repeat_depth);
for (uint32_t i = 0; i < self->child_count; i++) {
const Tree *child = self->children[i];
ts_tree__print_dot_graph(child, byte_offset, language, f);
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
byte_offset += ts_tree_total_bytes(child);
}
}
void ts_tree_print_dot_graph(const Tree *self, const TSLanguage *language,
FILE *f) {
fprintf(f, "digraph tree {\n");
fprintf(f, "edge [arrowhead=none]\n");
ts_tree__print_dot_graph(self, 0, language, f);
fprintf(f, "}\n");
}
static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}};
bool ts_tree_external_token_state_eq(const Tree *self, const Tree *other) {
const TSExternalTokenState *state1 = &empty_state;
const TSExternalTokenState *state2 = &empty_state;
if (self && self->has_external_tokens) state1 = &self->external_token_state;
if (other && other->has_external_tokens) state2 = &other->external_token_state;
return ts_external_token_state_eq(state1, state2);
}

View file

@ -5,119 +5,25 @@
extern "C" {
#endif
#include <stdbool.h>
#include "tree_sitter/parser.h"
#include "tree_sitter/runtime.h"
#include "runtime/length.h"
#include "runtime/array.h"
#include <stdio.h>
extern TSStateId TS_TREE_STATE_NONE;
typedef struct {
union {
char *long_data;
char short_data[sizeof(char *) + sizeof(unsigned)];
};
unsigned length;
} TSExternalTokenState;
const Subtree *child;
const Subtree *parent;
Length position;
TSSymbol alias_symbol;
} ParentCacheEntry;
typedef struct Tree {
struct {
struct Tree *parent;
uint32_t index;
Length offset;
TSSymbol alias_symbol : 15;
bool alias_is_named : 1;
} context;
struct TSTree {
const Subtree *root;
const TSLanguage *language;
ParentCacheEntry *parent_cache;
uint32_t parent_cache_start;
uint32_t parent_cache_size;
};
uint32_t child_count;
union {
struct {
struct Tree **children;
uint32_t visible_child_count;
uint32_t named_child_count;
unsigned short alias_sequence_id;
};
TSExternalTokenState external_token_state;
int32_t lookahead_char;
};
Length padding;
Length size;
uint32_t bytes_scanned;
TSSymbol symbol;
TSStateId parse_state;
unsigned error_cost;
unsigned repeat_depth;
struct {
TSSymbol symbol;
TSLexMode lex_mode;
} first_leaf;
uint32_t ref_count;
int dynamic_precedence;
bool visible : 1;
bool named : 1;
bool extra : 1;
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool is_missing : 1;
} Tree;
typedef Array(Tree *) TreeArray;
typedef struct {
TreeArray free_trees;
TreeArray tree_stack;
} TreePool;
void ts_external_token_state_init(TSExternalTokenState *, const char *, unsigned);
const char *ts_external_token_state_data(const TSExternalTokenState *);
bool ts_tree_array_copy(TreeArray, TreeArray *);
void ts_tree_array_delete(TreePool *, TreeArray *);
uint32_t ts_tree_array_essential_count(const TreeArray *);
TreeArray ts_tree_array_remove_last_n(TreeArray *, uint32_t);
TreeArray ts_tree_array_remove_trailing_extras(TreeArray *);
void ts_tree_array_reverse(TreeArray *);
void ts_tree_pool_init(TreePool *);
void ts_tree_pool_delete(TreePool *);
Tree *ts_tree_pool_allocate(TreePool *);
void ts_tree_pool_free(TreePool *, Tree *);
Tree *ts_tree_make_leaf(TreePool *, TSSymbol, Length, Length, const TSLanguage *);
Tree *ts_tree_make_node(TreePool *, TSSymbol, uint32_t, Tree **, unsigned, const TSLanguage *);
Tree *ts_tree_make_copy(TreePool *, Tree *child);
Tree *ts_tree_make_error_node(TreePool *, TreeArray *, const TSLanguage *);
Tree *ts_tree_make_error(TreePool *, Length, Length, int32_t, const TSLanguage *);
Tree *ts_tree_make_missing_leaf(TreePool *, TSSymbol, const TSLanguage *);
void ts_tree_retain(Tree *tree);
void ts_tree_release(TreePool *, Tree *tree);
bool ts_tree_eq(const Tree *tree1, const Tree *tree2);
int ts_tree_compare(const Tree *tree1, const Tree *tree2);
uint32_t ts_tree_start_column(const Tree *self);
uint32_t ts_tree_end_column(const Tree *self);
void ts_tree_set_children(Tree *, uint32_t, Tree **, const TSLanguage *);
void ts_tree_assign_parents(Tree *, TreePool *, const TSLanguage *);
void ts_tree_edit(Tree *, const TSInputEdit *edit);
char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all);
void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *);
Tree *ts_tree_last_external_token(Tree *);
bool ts_tree_external_token_state_eq(const Tree *, const Tree *);
static inline uint32_t ts_tree_total_bytes(const Tree *self) {
return self->padding.bytes + self->size.bytes;
}
static inline Length ts_tree_total_size(const Tree *self) {
return length_add(self->padding, self->size);
}
TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language);
TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *);
void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *);
#ifdef __cplusplus
}

214
src/runtime/tree_cursor.c Normal file
View file

@ -0,0 +1,214 @@
#include "tree_sitter/runtime.h"
#include "runtime/alloc.h"
#include "runtime/tree_cursor.h"
#include "runtime/language.h"
#include "runtime/tree.h"
typedef struct {
const Subtree *parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} ChildIterator;
// ChildIterator
static inline ChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
TreeCursorEntry *last_entry = array_back(&self->stack);
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
last_entry->subtree->alias_sequence_id
);
return (ChildIterator) {
.tree = self->tree,
.parent = last_entry->subtree,
.position = last_entry->position,
.child_index = 0,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_tree_cursor_child_iterator_next(ChildIterator *self,
TreeCursorEntry *result,
bool *visible) {
if (self->child_index == self->parent->children.size) return false;
const Subtree *child = self->parent->children.contents[self->child_index];
*result = (TreeCursorEntry) {
.subtree = child,
.position = self->position,
.child_index = self->child_index,
.structural_child_index = self->structural_child_index,
};
*visible = child->visible;
if (!child->extra && self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
}
self->position = length_add(self->position, ts_subtree_total_size(child));
self->child_index++;
if (!child->extra) self->structural_child_index++;
return true;
}
// TSTreeCursor - lifecycle
TSTreeCursor ts_tree_cursor_new(const TSTree *tree) {
TSTreeCursor self;
ts_tree_cursor_init((TreeCursor *)&self, tree);
return self;
}
void ts_tree_cursor_init(TreeCursor *self, const TSTree *tree) {
self->tree = tree;
array_init(&self->stack);
array_push(&self->stack, ((TreeCursorEntry) {
.subtree = tree->root,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
}
void ts_tree_cursor_delete(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
array_delete(&self->stack);
}
// TSTreeCursor - walking the tree
bool ts_tree_cursor_goto_first_child(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
bool did_descend;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
ChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return true;
}
if (entry.subtree->children.size > 0 && entry.subtree->visible_child_count > 0) {
array_push(&self->stack, entry);
did_descend = true;
break;
}
}
} while (did_descend);
return false;
}
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t goal_byte) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
uint32_t visible_child_index = 0;
bool did_descend;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
ChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
bool at_goal = iterator.position.bytes > goal_byte;
uint32_t visible_child_count = entry.subtree->children.size > 0
? entry.subtree->visible_child_count
: 0;
if (at_goal) {
if (visible) {
array_push(&self->stack, entry);
return visible_child_index;
}
if (visible_child_count > 0) {
array_push(&self->stack, entry);
did_descend = true;
break;
}
} else if (visible) {
visible_child_index++;
} else {
visible_child_index += visible_child_count;
}
}
} while (did_descend);
self->stack.size = initial_size;
return -1;
}
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
while (self->stack.size > 1) {
bool visible;
TreeCursorEntry entry = array_pop(&self->stack);
ChildIterator iterator = ts_tree_cursor_iterate_children(self);
iterator.child_index = entry.child_index;
iterator.structural_child_index = entry.structural_child_index;
iterator.position = entry.position;
ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible);
if (visible && self->stack.size + 1 < initial_size) break;
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return true;
}
if (entry.subtree->children.size > 0 && entry.subtree->visible_child_count > 0) {
array_push(&self->stack, entry);
ts_tree_cursor_goto_first_child(_self);
return true;
}
}
}
self->stack.size = initial_size;
return false;
}
bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
if (entry->subtree->visible) {
self->stack.size = i + 1;
return true;
}
}
return false;
}
TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
TSSymbol alias_symbol = 0;
if (self->stack.size > 1) {
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->alias_sequence_id
);
if (alias_sequence && !last_entry->subtree->extra) {
alias_symbol = alias_sequence[last_entry->structural_child_index];
}
}
return ts_node_new(
self->tree,
last_entry->subtree,
length_add(last_entry->position, last_entry->subtree->padding),
alias_symbol
);
}

20
src/runtime/tree_cursor.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef RUNTIME_TREE_CURSOR_H_
#define RUNTIME_TREE_CURSOR_H_
#include "runtime/subtree.h"
typedef struct {
const Subtree *subtree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
} TreeCursorEntry;
typedef struct {
Array(TreeCursorEntry) stack;
const TSTree *tree;
} TreeCursor;
void ts_tree_cursor_init(TreeCursor *, const TSTree *);
#endif // RUNTIME_TREE_CURSOR_H_

View file

@ -43,12 +43,12 @@ int main(int argc, char *arg[]) {
vector<size_t> error_speeds;
vector<size_t> non_error_speeds;
auto document = ts_document_new();
TSParser *parser = ts_parser_new();
if (getenv("TREE_SITTER_BENCHMARK_SVG")) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_dot_graphs(parser, stderr);
} else if (getenv("TREE_SITTER_BENCHMARK_LOG")) {
ts_document_set_logger(document, stderr_logger_new(false));
ts_parser_set_logger(parser, stderr_logger_new(false));
}
auto language_filter = getenv("TREE_SITTER_BENCHMARK_LANGUAGE");
@ -61,7 +61,7 @@ int main(int argc, char *arg[]) {
for (auto &language_name : language_names) {
if (language_filter && language_name != language_filter) continue;
ts_document_set_language(document, load_real_language(language_name));
ts_parser_set_language(parser, load_real_language(language_name));
printf("%s\n", language_name.c_str());
@ -69,20 +69,16 @@ int main(int argc, char *arg[]) {
if (file_name_filter && example.file_name != file_name_filter) continue;
if (example.input.size() < 256) continue;
ts_document_invalidate(document);
ts_document_set_input_string(document, "");
ts_document_parse(document);
ts_document_invalidate(document);
ts_document_set_input_string(document, example.input.c_str());
clock_t start_time = clock();
ts_document_parse(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size());
clock_t end_time = clock();
unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
assert(!ts_node_has_error(ts_document_root_node(document)));
assert(!ts_node_has_error(ts_tree_root_node(tree)));
ts_tree_delete(tree);
size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
size_t speed = static_cast<double>(example.input.size()) / duration;
printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
if (speed != 0) non_error_speeds.push_back(speed);
}
@ -93,15 +89,15 @@ int main(int argc, char *arg[]) {
if (file_name_filter && example.file_name != file_name_filter) continue;
if (example.input.size() < 256) continue;
ts_document_invalidate(document);
ts_document_set_input_string(document, example.input.c_str());
clock_t start_time = clock();
ts_document_parse(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size());
clock_t end_time = clock();
unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
ts_tree_delete(tree);
size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
size_t speed = static_cast<double>(example.input.size()) / duration;
printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
if (speed != 0) error_speeds.push_back(speed);
}
}
@ -118,5 +114,6 @@ int main(int argc, char *arg[]) {
printf(" %-30s\t%lu bytes/ms\n", "average speed", mean(error_speeds));
printf(" %-30s\t%lu bytes/ms\n", "worst speed", min(error_speeds));
ts_parser_delete(parser);
return 0;
}

View file

@ -48,7 +48,7 @@ describe("extract_tokens", []() {
Repeat{Symbol::non_terminal(3)}
},
},
{}, {}, {}, {}
{}, {}, {}, {}, {}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
@ -156,7 +156,7 @@ describe("extract_tokens", []() {
})
},
},
{}, {}, {}, {}
{}, {}, {}, {}, {}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
@ -203,7 +203,7 @@ describe("extract_tokens", []() {
Rule::seq({ String{"ef"}, String{"cd"} })
},
},
{}, {}, {}, {}
{}, {}, {}, {}, {}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
@ -258,7 +258,7 @@ describe("extract_tokens", []() {
String{"a"}
},
},
{}, {}, {}, {}
{}, {}, {}, {}, {}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
@ -298,7 +298,7 @@ describe("extract_tokens", []() {
{
{ Symbol::non_terminal(2), Symbol::non_terminal(3) }
},
{}, {}
{}, {}, {}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
@ -319,7 +319,7 @@ describe("extract_tokens", []() {
String{"y"},
Pattern{" "},
},
{}, {}, {}
{}, {}, {}, {}
});
AssertThat(get<2>(result), Equals(CompileError::none()));
@ -340,7 +340,7 @@ describe("extract_tokens", []() {
{
String{"y"},
},
{}, {}, {}
{}, {}, {}, {}
});
AssertThat(get<2>(result), Equals(CompileError::none()));
@ -370,7 +370,7 @@ describe("extract_tokens", []() {
{
Symbol::non_terminal(2),
},
{}, {}, {}
{}, {}, {}, {}
});
AssertThat(get<2>(result), Equals(CompileError::none()));
@ -399,7 +399,7 @@ describe("extract_tokens", []() {
{
Symbol::non_terminal(1)
},
{}, {}, {}
{}, {}, {}, {}
});
AssertThat(get<2>(result), Equals(CompileError(
@ -417,7 +417,7 @@ describe("extract_tokens", []() {
{
Rule::choice({ Symbol::non_terminal(1), Blank{} })
},
{}, {}, {}
{}, {}, {}, {}
});
AssertThat(get<2>(result), Equals(CompileError(
@ -446,7 +446,7 @@ describe("extract_tokens", []() {
{
Variable{"rule_A", VariableTypeNamed, Symbol::non_terminal(0)}
},
{}
{}, {}
});
AssertThat(get<2>(result), Equals(CompileError(

View file

@ -110,6 +110,30 @@ describe("parse_regex", []() {
CharacterSet{{'\\'}}
},
{
"dashes",
"a-b",
Rule::seq({
CharacterSet{{'a'}},
CharacterSet{{'-'}},
CharacterSet{{'b'}}
})
},
{
"literal dashes in character classes",
"[a-][\\d-a][\\S-a]",
Rule::seq({
CharacterSet{{'a', '-'}},
CharacterSet().include('0', '9').include('-').include('a'),
CharacterSet().include_all()
.exclude(' ')
.exclude('\t')
.exclude('\r')
.exclude('\n')
})
},
{
"character groups in sequences",
"x([^x]|\\\\x)*x",
@ -150,6 +174,33 @@ describe("parse_regex", []() {
})
},
{
"escaped brackets",
"\\[\\]",
Rule::seq({
CharacterSet{{'['}},
CharacterSet{{']'}},
})
},
{
"escaped brackets in choice",
"[\\[\\]]",
CharacterSet{{'[', ']'}}
},
{
"escaped brackets in range",
"[\\[-\\]]",
CharacterSet{{'[', '\\', ']'}}
},
{
"escaped characters in ranges",
"[\\0-\\n]",
CharacterSet().include(0, '\n')
},
{
"escaped periods",
"a\\.",

View file

@ -81,7 +81,9 @@ int main() {
(function_declarator (identifier) (parameter_list))
(compound_statement
(if_statement
(field_expression (identifier) (MISSING))
(field_expression
(identifier)
(MISSING))
(compound_statement
(expression_statement (call_expression (identifier) (argument_list)))
(expression_statement (call_expression (identifier) (argument_list)))
@ -141,3 +143,24 @@ int y = 5;
(translation_unit
(declaration (primitive_type) (ERROR (identifier)) (identifier))
(declaration (primitive_type) (init_declarator (identifier) (number_literal))))
==========================================
Declarations with missing variable names
==========================================
int a() {
struct x = 1;
int = 2;
}
---
(translation_unit
(function_definition
(primitive_type)
(function_declarator (identifier) (parameter_list))
(compound_statement
(struct_specifier (type_identifier))
(ERROR (number_literal))
(primitive_type)
(ERROR (number_literal)))))

View file

@ -77,14 +77,12 @@ if ({a: 'b'} {c: 'd'}) {
(ERROR (object (pair (property_identifier) (string))))
(object (pair (property_identifier) (string))))
(statement_block
(expression_statement (assignment_expression
(identifier)
(ERROR (function
(formal_parameters (identifier))
(statement_block (expression_statement (identifier)))))
(function
(formal_parameters (identifier))
(statement_block (expression_statement (identifier)))))))))
(expression_statement
(assignment_expression
(identifier)
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))
(MISSING))
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))))
===================================================
Extra tokens at the end of the file
@ -149,3 +147,33 @@ const a = `b c ${d +} f g`
(variable_declarator
(identifier)
(template_string (template_substitution (identifier) (ERROR))))))
=========================================================
Long sequences of invalid tokens
=========================================================
function main(x) {
console.log('a');
what??????????????????????????????????????????????????
console.log('b');
return {};
}
---
(program
(function
(identifier)
(formal_parameters (identifier))
(statement_block
(expression_statement
(call_expression
(member_expression (identifier) (property_identifier))
(arguments (string))))
(expression_statement
(identifier)
(ERROR
(call_expression
(member_expression (identifier) (property_identifier))
(arguments (string)))))
(return_statement (object)))))

View file

@ -1,27 +1,24 @@
#include <string.h>
#include <cassert>
#include "tree_sitter/runtime.h"
void test_log(void *payload, TSLogType type, const char *string) { }
TSLogger logger = {
.log = test_log,
};
extern "C" const TSLanguage *TS_LANG();
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
const char *str = reinterpret_cast<const char *>(data);
TSDocument *document = ts_document_new();
ts_document_set_language(document, TS_LANG());
ts_document_set_input_string_with_length(document, str, size);
TSParser *parser = ts_parser_new();
TSParseOptions options = {};
options.halt_on_error = TS_HALT_ON_ERROR;
ts_document_parse_with_options(document, options);
// This can fail if the language version doesn't match the runtime version
bool language_ok = ts_parser_set_language(parser, TS_LANG());
assert(language_ok);
TSNode root_node = ts_document_root_node(document);
ts_document_free(document);
ts_parser_halt_on_error(parser, TS_HALT_ON_ERROR);
TSTree *tree = ts_parser_parse_string(parser, NULL, str, size);
TSNode root_node = ts_tree_root_node(tree);
ts_tree_delete(tree);
ts_parser_delete(parser);
return 0;
}

View file

@ -223,7 +223,7 @@ const TSLanguage *load_real_language(const string &language_name) {
printf("\n" "Regenerating the %s parser...\n", language_name.c_str());
string grammar_json = read_file(grammar_filename);
TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
TSCompileResult result = ts_compile_grammar(grammar_json.c_str(), nullptr);
if (result.error_type != TSCompileErrorTypeNone) {
fprintf(stderr, "Failed to compile %s grammar: %s\n", language_name.c_str(), result.error_message);
return nullptr;

View file

@ -1,3 +1,4 @@
#include "helpers/random_helpers.h"
#include <string>
#include <vector>
#include <random>
@ -6,54 +7,69 @@
using std::string;
using std::vector;
static std::default_random_engine engine;
Generator default_generator(0);
unsigned get_time_as_seed() {
return time(nullptr);
}
void random_reseed(unsigned seed) {
void Generator::reseed(unsigned seed) {
engine.seed(seed);
}
unsigned random_unsigned() {
return std::uniform_int_distribution<unsigned>()(engine);
unsigned Generator::operator()() {
return distribution(engine);
}
unsigned random_unsigned(unsigned max) {
return std::uniform_int_distribution<unsigned>(0, max - 1)(engine);
unsigned Generator::operator()(unsigned max) {
return distribution(engine) % max;
}
static string random_string(char min, char max) {
string Generator::str(char min, char max) {
string result;
size_t length = random_unsigned(12);
size_t length = operator()(12);
for (size_t i = 0; i < length; i++) {
result += (min + random_unsigned(max - min));
result += (min + operator()(max - min));
}
return result;
}
static string random_char(string characters) {
size_t index = random_unsigned(characters.size());
return string() + characters[index];
}
static string operator_characters = "!(){}[]<>+-=";
string random_words(size_t count) {
string Generator::words(size_t count) {
string result;
bool just_inserted_word = false;
for (size_t i = 0; i < count; i++) {
if (random_unsigned(10) < 6) {
result += random_char("!(){}[]<>+-=");
if (operator()(10) < 6) {
result += operator_characters[operator()(operator_characters.size())];
} else {
if (just_inserted_word)
result += " ";
result += random_string('a', 'z');
result += str('a', 'z');
just_inserted_word = true;
}
}
return result;
}
string select_random(const vector<string> &list) {
return list[random_unsigned(list.size())];
string Generator::select(const vector<string> &list) {
return list[operator()(list.size())];
}
#ifdef _WIN32
#include <windows.h>
void Generator::sleep_some() {
Sleep(operator()(5));
}
#else
#include <unistd.h>
void Generator::sleep_some() {
usleep(operator()(5 * 1000));
}
#endif

View file

@ -3,12 +3,26 @@
#include <string>
#include <vector>
#include <random>
unsigned get_time_as_seed();
void random_reseed(unsigned);
unsigned random_unsigned();
unsigned random_unsigned(unsigned max);
std::string random_words(size_t count);
std::string select_random(const std::vector<std::string> &);
class Generator {
std::default_random_engine engine;
std::uniform_int_distribution<uint32_t> distribution;
public:
Generator(uint32_t seed) : engine{seed} {}
void reseed(unsigned);
unsigned operator()();
unsigned operator()(unsigned max);
std::string words(size_t count);
std::string str(char min, char max);
std::string select(const std::vector<std::string> &);
void sleep_some();
};
extern Generator default_generator;
#endif // HELPERS_RANDOM_HELPERS_H_

View file

@ -1,6 +1,7 @@
#include <stdlib.h>
#include <map>
#include <vector>
#include <mutex>
using std::map;
using std::vector;
@ -8,13 +9,16 @@ using std::vector;
static bool _enabled = false;
static size_t _allocation_count = 0;
static map<void *, size_t> _outstanding_allocations;
static std::mutex _outstanding_allocations_mutex;
static bool _multi_threaded_mode = false;
namespace record_alloc {
void start() {
void start(bool multi_threaded_mode) {
_enabled = true;
_allocation_count = 0;
_outstanding_allocations.clear();
_multi_threaded_mode = multi_threaded_mode;
}
void stop() {
@ -30,7 +34,11 @@ vector<size_t> outstanding_allocation_indices() {
}
size_t allocation_count() {
return _allocation_count;
size_t result;
_outstanding_allocations_mutex.lock();
result = _allocation_count;
_outstanding_allocations_mutex.unlock();
return result;
}
} // namespace record_alloc
@ -39,16 +47,20 @@ extern "C" {
static void *record_allocation(void *result) {
if (!_enabled) return result;
if (_multi_threaded_mode) _outstanding_allocations_mutex.lock();
_outstanding_allocations[result] = _allocation_count;
_allocation_count++;
if (_multi_threaded_mode) _outstanding_allocations_mutex.unlock();
return result;
}
static void record_deallocation(void *pointer) {
if (_multi_threaded_mode) _outstanding_allocations_mutex.lock();
auto entry = _outstanding_allocations.find(pointer);
if (entry != _outstanding_allocations.end()) {
_outstanding_allocations.erase(entry);
}
if (_multi_threaded_mode) _outstanding_allocations_mutex.unlock();
}
void *ts_record_malloc(size_t size) {

View file

@ -5,7 +5,7 @@
namespace record_alloc {
void start();
void start(bool multi_threaded_mode = false);
void stop();
void fail_at_allocation_index(size_t failure_index);
std::vector<size_t> outstanding_allocation_indices();

View file

@ -21,17 +21,16 @@ static void append_text_to_scope_sequence(ScopeSequence *sequence,
static void append_to_scope_sequence(ScopeSequence *sequence,
ScopeStack *current_scopes,
TSNode node, TSDocument *document,
const std::string &text) {
TSNode node, const std::string &text) {
append_text_to_scope_sequence(
sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()
);
current_scopes->push_back(ts_node_type(node, document));
current_scopes->push_back(ts_node_type(node));
for (size_t i = 0, n = ts_node_child_count(node); i < n; i++) {
TSNode child = ts_node_child(node, i);
append_to_scope_sequence(sequence, current_scopes, child, document, text);
append_to_scope_sequence(sequence, current_scopes, child, text);
}
append_text_to_scope_sequence(
@ -41,11 +40,11 @@ static void append_to_scope_sequence(ScopeSequence *sequence,
current_scopes->pop_back();
}
ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text) {
ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text) {
ScopeSequence sequence;
ScopeStack current_scopes;
TSNode node = ts_document_root_node(document);
append_to_scope_sequence(&sequence, &current_scopes, node, document, text);
TSNode node = ts_tree_root_node(tree);
append_to_scope_sequence(&sequence, &current_scopes, node, text);
return sequence;
}

View file

@ -9,7 +9,7 @@ typedef std::string Scope;
typedef std::vector<Scope> ScopeStack;
typedef std::vector<ScopeStack> ScopeSequence;
ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text);
ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text);
void verify_changed_ranges(const ScopeSequence &old, const ScopeSequence &new_sequence, const std::string &text, TSRange *ranges, size_t range_count);

View file

@ -1,5 +1,6 @@
#include "helpers/spy_input.h"
#include "helpers/encoding_helpers.h"
#include "runtime/point.h"
#include <string.h>
#include <algorithm>
#include <assert.h>
@ -20,6 +21,14 @@ SpyInput::~SpyInput() {
delete[] buffer;
}
static TSPoint operator+(TSPoint a, TSPoint b) {
if (b.row > 0) {
return TSPoint {a.row + b.row, b.column};
} else {
return TSPoint {a.row, a.column + b.column};
}
}
static void add_byte_range(vector<pair<uint32_t, uint32_t>> *ranges,
uint32_t start, uint32_t count) {
uint32_t end = start + count;
@ -112,11 +121,11 @@ TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string te
undo_stack.push_back(SpyInputEdit{start_byte, bytes_added, swap.first});
TSInputEdit result = {};
result.start_byte = start_byte;
result.bytes_added = bytes_added;
result.bytes_removed = bytes_removed;
result.old_end_byte = start_byte + bytes_removed;
result.new_end_byte = start_byte + bytes_added;
result.start_point = swap.second;
result.extent_removed = get_extent(swap.first);
result.extent_added = get_extent(text);
result.old_end_point = result.start_point + get_extent(swap.first);
result.new_end_point = result.start_point + get_extent(text);
return result;
}
@ -126,11 +135,11 @@ TSInputEdit SpyInput::undo() {
auto swap = swap_substr(entry.start_byte, entry.bytes_removed, entry.text_inserted);
TSInputEdit result;
result.start_byte = entry.start_byte;
result.bytes_removed = entry.bytes_removed;
result.bytes_added = entry.text_inserted.size();
result.old_end_byte = entry.start_byte + entry.bytes_removed;
result.new_end_byte = entry.start_byte + entry.text_inserted.size();
result.start_point = swap.second;
result.extent_removed = get_extent(swap.first);
result.extent_added = get_extent(entry.text_inserted);
result.old_end_point = result.start_point + get_extent(swap.first);
result.new_end_point = result.start_point + get_extent(entry.text_inserted);
return result;
}

View file

@ -1,8 +1,6 @@
#include "bandit/bandit.h"
#include "test_helper.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "runtime/document.h"
#include "runtime/node.h"
#include <ostream>
using std::string;
@ -16,33 +14,42 @@ const char *symbol_names[24] = {
"twenty-two", "twenty-three"
};
Tree ** tree_array(std::vector<Tree *> trees) {
Tree ** result = (Tree **)calloc(trees.size(), sizeof(Tree *));
for (size_t i = 0; i < trees.size(); i++)
result[i] = trees[i];
return result;
SubtreeArray *tree_array(std::vector<const Subtree *> trees) {
static SubtreeArray result;
result.capacity = trees.size();
result.size = trees.size();
result.contents = (const Subtree **)calloc(trees.size(), sizeof(Subtree *));
for (size_t i = 0; i < trees.size(); i++) {
result.contents[i] = trees[i];
}
return &result;
}
ostream &operator<<(std::ostream &stream, const Tree *tree) {
ostream &operator<<(std::ostream &stream, const Subtree *tree) {
static TSLanguage DUMMY_LANGUAGE = {};
static TSDocument DUMMY_DOCUMENT = {};
DUMMY_DOCUMENT.parser.language = &DUMMY_LANGUAGE;
DUMMY_LANGUAGE.symbol_names = symbol_names;
TSNode node;
node.data = tree;
return stream << string(ts_node_string(node, &DUMMY_DOCUMENT));
char *string = ts_subtree_string(tree, &DUMMY_LANGUAGE, false);
stream << string;
ts_free(string);
return stream;
}
ostream &operator<<(ostream &stream, const TSNode &node) {
return stream << string("{") << (const Tree *)node.data <<
string(", ") << to_string(ts_node_start_byte(node)) << string("}");
if (ts_node_is_null(node)) {
return stream << "NULL";
} else {
char *string = ts_node_string(node);
stream << "{" << string << ", " << to_string(ts_node_start_byte(node)) << "}";
ts_free(string);
return stream;
}
}
bool operator==(const TSNode &left, const TSNode &right) {
return ts_node_eq(left, right);
}
bool operator==(const std::vector<Tree *> &vec, const TreeArray &array) {
bool operator==(const std::vector<const Subtree *> &vec, const SubtreeArray &array) {
if (vec.size() != array.size)
return false;
for (size_t i = 0; i < array.size; i++)

View file

@ -1,17 +1,17 @@
#ifndef HELPERS_TREE_HELPERS_H_
#define HELPERS_TREE_HELPERS_H_
#include "runtime/tree.h"
#include "runtime/subtree.h"
#include <vector>
#include <string>
extern const char *symbol_names[24];
Tree ** tree_array(std::vector<Tree *> trees);
SubtreeArray *tree_array(std::vector<const Subtree *> trees);
std::ostream &operator<<(std::ostream &stream, const Tree *tree);
std::ostream &operator<<(std::ostream &stream, const Subtree *tree);
std::ostream &operator<<(std::ostream &stream, const TSNode &node);
bool operator==(const TSNode &left, const TSNode &right);
bool operator==(const std::vector<Tree *> &right, const TreeArray &array);
bool operator==(const std::vector<const Subtree *> &right, const SubtreeArray &array);
void assert_consistent_tree_sizes(TSNode node);

View file

@ -29,11 +29,14 @@ describe("examples found via fuzzing", [&]() {
for (unsigned i = 0, n = examples.size(); i < n; i++) {
it(("parses example number " + to_string(i)).c_str(), [&]() {
TSDocument *document = ts_document_new();
// ts_document_print_debugging_graphs(document, true);
TSParser *parser = ts_parser_new();
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_parser_print_dot_graphs(parser, stderr);
}
const string &language_name = examples[i].first;
ts_document_set_language(document, load_real_language(language_name));
ts_parser_set_language(parser, load_real_language(language_name));
string input;
const string &base64_input = examples[i].second;
@ -44,18 +47,12 @@ describe("examples found via fuzzing", [&]() {
base64_input.size()
));
ts_document_set_input_string_with_length(
document,
input.c_str(),
input.size()
);
ts_document_parse(document);
TSNode node = ts_document_root_node(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, input.c_str(), input.size());
TSNode node = ts_tree_root_node(tree);
assert_consistent_tree_sizes(node);
ts_document_free(document);
ts_tree_delete(tree);
ts_parser_delete(parser);
});
}

View file

@ -12,14 +12,16 @@
#include "helpers/tree_helpers.h"
#include <set>
static void assert_correct_tree_size(TSDocument *document, string content) {
TSNode root_node = ts_document_root_node(document);
static void assert_correct_tree_size(TSTree *tree, string content) {
TSNode root_node = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root_node), Equals(content.size()));
assert_consistent_tree_sizes(root_node);
}
START_TEST
if (TREE_SITTER_SEED == -1) return;
vector<string> test_languages({
"javascript",
"json",
@ -31,101 +33,136 @@ vector<string> test_languages({
for (auto &language_name : test_languages) {
describe(("the " + language_name + " language").c_str(), [&]() {
TSDocument *document;
TSParser *parser;
const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS");
before_each([&]() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, load_real_language(language_name));
parser = ts_parser_new();
ts_parser_set_language(parser, load_real_language(language_name));
// ts_document_set_logger(document, stderr_logger_new(true));
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_document_print_debugging_graphs(document, true);
// ts_parser_set_logger(parser, stderr_logger_new(true));
if (debug_graphs_enabled) {
ts_parser_print_dot_graphs(parser, stderr);
}
});
after_each([&]() {
ts_document_free(document);
ts_parser_delete(parser);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
for (auto &entry : read_real_language_corpus(language_name)) {
SpyInput *input;
auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
it(("parses " + entry.description + ": " + name).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
ts_document_set_input(document, input->input());
edit_sequence();
it(("parses " + entry.description + ": initial parse").c_str(), [&]() {
input = new SpyInput(entry.input, 3);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSNode root_node = ts_document_root_node(document);
const char *node_string = ts_node_string(root_node, document);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
TSTree *tree = ts_parser_parse(parser, nullptr, input->input());
assert_correct_tree_size(tree, input->content);
assert_correct_tree_size(document, input->content);
delete input;
});
};
TSNode root_node = ts_tree_root_node(tree);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
it_handles_edit_sequence("initial parse", [&]() {
ts_document_parse(document);
ts_tree_delete(tree);
delete input;
});
set<pair<size_t, size_t>> deletions;
set<pair<size_t, string>> insertions;
for (size_t i = 0; i < 60; i++) {
size_t edit_position = random_unsigned(utf8_char_count(entry.input));
size_t deletion_size = random_unsigned(utf8_char_count(entry.input) - edit_position);
string inserted_text = random_words(random_unsigned(4) + 1);
size_t edit_position = default_generator(utf8_char_count(entry.input));
size_t deletion_size = default_generator(utf8_char_count(entry.input) - edit_position);
string inserted_text = default_generator.words(default_generator(4) + 1);
if (insertions.insert({edit_position, inserted_text}).second) {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
it(("parses " + entry.description +
": repairing an insertion of \"" + inserted_text + "\"" +
" at " + to_string(edit_position)).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
input->replace(edit_position, 0, inserted_text);
TSTree *tree = ts_parser_parse(parser, nullptr, input->input());
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
TSInputEdit edit = input->undo();
ts_tree_edit(tree, &edit);
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
assert_correct_tree_size(new_tree, input->content);
TSRange *ranges;
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
assert_correct_tree_size(document, input->content);
TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content);
ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content);
verify_changed_ranges(
old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count
);
ts_free(ranges);
TSNode root_node = ts_tree_root_node(new_tree);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
ts_tree_delete(tree);
ts_tree_delete(new_tree);
delete input;
});
}
if (deletions.insert({edit_position, deletion_size}).second) {
string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
it(("parses " + entry.description +
": repairing a deletion of " +
to_string(edit_position) + "-" + to_string(edit_position + deletion_size)).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
input->replace(edit_position, deletion_size, "");
TSTree *tree = ts_parser_parse(parser, nullptr, input->input());
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
TSInputEdit edit = input->undo();
ts_tree_edit(tree, &edit);
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
assert_correct_tree_size(new_tree, input->content);
TSRange *ranges;
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
assert_correct_tree_size(document, input->content);
TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content);
ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content);
verify_changed_ranges(
old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count
);
ts_free(ranges);
TSNode root_node = ts_tree_root_node(new_tree);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
ts_tree_delete(tree);
ts_tree_delete(new_tree);
delete input;
});
}
}

View file

@ -9,6 +9,8 @@
START_TEST
if (TREE_SITTER_SEED == -1) return;
string grammars_dir_path = join_path({"test", "fixtures", "test_grammars"});
vector<string> test_languages = list_directory(grammars_dir_path);
@ -25,7 +27,7 @@ for (auto &language_name : test_languages) {
if (file_exists(expected_error_path)) {
it("fails with the correct error message", [&]() {
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str(), nullptr);
string expected_error = read_file(expected_error_path);
AssertThat((void *)compile_result.error_message, !Equals<void *>(nullptr));
AssertThat(compile_result.error_message, Equals(expected_error));
@ -41,7 +43,7 @@ for (auto &language_name : test_languages) {
string external_scanner_path = join_path({directory_path, "scanner.c"});
if (!file_exists(external_scanner_path)) external_scanner_path = "";
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str(), nullptr);
language = load_test_language(
language_name,
@ -50,26 +52,26 @@ for (auto &language_name : test_languages) {
);
}
TSDocument *document = ts_document_new();
ts_document_set_language(document, language);
ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, language);
// ts_document_print_debugging_graphs(document, true);
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_dot_graphs(parser, stderr);
}
ts_document_parse(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, entry.input.c_str(), entry.input.size());
TSNode root_node = ts_document_root_node(document);
TSNode root_node = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root_node), Equals(entry.input.size()));
assert_consistent_tree_sizes(root_node);
const char *node_string = ts_node_string(root_node, document);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
ts_document_free(document);
AssertThat(result, Equals(entry.tree_string));
ts_tree_delete(tree);
ts_parser_delete(parser);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
}

View file

@ -1,486 +0,0 @@
#include "test_helper.h"
#include "runtime/alloc.h"
#include "helpers/record_alloc.h"
#include "helpers/stream_methods.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
TSPoint point(size_t row, size_t column) {
return TSPoint{static_cast<uint32_t>(row), static_cast<uint32_t>(column)};
}
START_TEST
describe("Document", [&]() {
TSDocument *document;
TSNode root;
before_each([&]() {
record_alloc::start();
document = ts_document_new();
});
after_each([&]() {
ts_document_free(document);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
auto assert_node_string_equals = [&](TSNode node, const string &expected) {
char *str = ts_node_string(node, document);
string actual(str);
ts_free(str);
AssertThat(actual, Equals(expected));
};
describe("set_input(input)", [&]() {
SpyInput *spy_input;
before_each([&]() {
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, "{\"key\": [1, 2]}");
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (object (pair (string) (array (number) (number)))))");
});
after_each([&]() {
delete spy_input;
});
it("handles both UTF8 and UTF16 encodings", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (true) (false)))");
});
it("handles truncated UTF16 data", [&]() {
const char content[1] = { '\0' };
spy_input->content = string(content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
});
it("measures columns in bytes", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
TSInput input = spy_input->input();
ts_document_set_input(document, input);
ts_document_invalidate(document);
ts_document_parse(document);
root = ts_document_root_node(document);
AssertThat(ts_node_end_point(root), Equals<TSPoint>({0, 28}));
});
it("allows the input to be retrieved later", [&]() {
ts_document_set_input(document, spy_input->input());
AssertThat(ts_document_input(document).payload, Equals<void *>(spy_input));
AssertThat(ts_document_input(document).read, Equals(spy_input->input().read));
AssertThat(ts_document_input(document).seek, Equals(spy_input->input().seek));
});
it("does not assume that the document's text has changed", [&]() {
ts_document_set_input(document, spy_input->input());
AssertThat(ts_document_root_node(document), Equals<TSNode>(root));
AssertThat(ts_node_has_changes(root), IsFalse());
AssertThat(spy_input->strings_read(), IsEmpty());
});
it("reads text from the new input for future parses", [&]() {
ts_document_set_input(document, spy_input->input());
// Insert 'null', delete '1'.
TSInputEdit edit = {};
edit.start_point.column = edit.start_byte = strlen("{\"key\": [");
edit.extent_added.column = edit.bytes_added = 4;
edit.extent_removed.column = edit.bytes_removed = 1;
ts_document_edit(document, edit);
ts_document_parse(document);
TSNode new_root = ts_document_root_node(document);
assert_node_string_equals(
new_root,
"(value (object (pair (string) (array (null) (number)))))");
AssertThat(spy_input->strings_read(), Equals(vector<string>({" [null, 2" })));
});
it("allows setting input string with length", [&]() {
const char content[] = { '1' };
ts_document_set_input_string_with_length(document, content, 1);
ts_document_parse(document);
TSNode new_root = ts_document_root_node(document);
AssertThat(ts_node_end_byte(new_root), Equals<size_t>(1));
assert_node_string_equals(
new_root,
"(value (number))");
});
it("reads from the new input correctly when the old input was blank", [&]() {
ts_document_set_input_string(document, "");
ts_document_parse(document);
TSNode new_root = ts_document_root_node(document);
AssertThat(ts_node_end_byte(new_root), Equals<size_t>(0));
assert_node_string_equals(
new_root,
"(ERROR)");
ts_document_set_input_string(document, "1");
ts_document_parse(document);
new_root = ts_document_root_node(document);
AssertThat(ts_node_end_byte(new_root), Equals<size_t>(1));
assert_node_string_equals(
new_root,
"(value (number))");
});
});
describe("set_language(language)", [&]() {
before_each([&]() {
ts_document_set_input_string(document, "{\"key\": [1, 2]}\n");
});
it("uses the given language for future parses", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (object (pair (string) (array (number) (number)))))");
});
it("clears out any previous tree", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_document_parse(document);
ts_document_set_language(document, load_real_language("javascript"));
AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(program (expression_statement "
"(object (pair (string) (array (number) (number))))))");
});
it("does not allow setting a language with a different version number", [&]() {
TSLanguage language = *load_real_language("json");
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
language.version++;
AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
ts_document_set_language(document, &language);
AssertThat(ts_document_language(document), Equals<const TSLanguage *>(nullptr));
});
});
describe("set_logger(TSLogger)", [&]() {
SpyLogger *logger;
before_each([&]() {
logger = new SpyLogger();
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, "[1, 2]");
});
after_each([&]() {
delete logger;
});
it("calls the debugger with a message for each parse action", [&]() {
ts_document_set_logger(document, logger->logger());
ts_document_parse(document);
AssertThat(logger->messages, Contains("new_parse"));
AssertThat(logger->messages, Contains("skip character:' '"));
AssertThat(logger->messages, Contains("consume character:'['"));
AssertThat(logger->messages, Contains("consume character:'1'"));
AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(logger->messages, Contains("accept"));
});
it("allows the debugger to be retrieved later", [&]() {
ts_document_set_logger(document, logger->logger());
AssertThat(ts_document_logger(document).payload, Equals(logger));
});
describe("disabling debugging", [&]() {
before_each([&]() {
ts_document_set_logger(document, logger->logger());
ts_document_set_logger(document, {NULL, NULL});
});
it("does not call the debugger any more", [&]() {
ts_document_parse(document);
AssertThat(logger->messages, IsEmpty());
});
});
});
describe("parse_and_get_changed_ranges()", [&]() {
SpyInput *input;
before_each([&]() {
ts_document_set_language(document, load_real_language("javascript"));
input = new SpyInput("{a: null};\n", 3);
ts_document_set_input(document, input->input());
ts_document_parse(document);
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
});
after_each([&]() {
delete input;
});
auto get_invalidated_ranges_for_edit = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
TSInputEdit edit = callback();
ts_document_edit(document, edit);
TSRange *ranges;
uint32_t range_count = 0;
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
vector<TSRange> result;
for (size_t i = 0; i < range_count; i++) {
result.push_back(ranges[i]);
}
ts_free(ranges);
return result;
};
it("reports changes when one token has been updated", [&]() {
// Replace `null` with `nothing`
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("ull"), 1, "othing");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("nothing")),
point(0, input->content.find("}"))
},
})));
// Replace `nothing` with `null` again
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("null")),
point(0, input->content.find("}"))
},
})));
});
it("reports no changes when leading whitespace has changed (regression)", [&]() {
input->chars_per_chunk = 80;
// Insert leading whitespace
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({})));
// Remove leading whitespace
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({})));
// Insert leading whitespace again
ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({})));
});
it("reports changes when tokens have been appended", [&]() {
// Add a second key-value pair
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("}"), 0, ", b: false");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(",")),
point(0, input->content.find("}"))
},
})));
// Add a third key-value pair in between the first two
ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find(", b"), 0, ", c: 1");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (number)) "
"(pair (property_identifier) (false)))))");
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(", c")),
point(0, input->content.find(", b"))
},
})));
// Delete the middle pair.
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (false)))))");
AssertThat(ranges, IsEmpty());
// Delete the second pair.
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (null)))))");
AssertThat(ranges, IsEmpty());
});
it("reports changes when trees have been wrapped", [&]() {
// Wrap the object in an assignment expression.
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("null"), 0, "b === ");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (binary_expression (identifier) (null))))))");
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("b ===")),
point(0, input->content.find("}"))
},
})));
});
});
describe("parse_with_options(options)", [&]() {
it("halts as soon as an error is found if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, error, 3]";
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, input_string.c_str());
TSParseOptions options = {};
options.changed_ranges = nullptr;
options.halt_on_error = false;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))");
ts_document_invalidate(document);
options.halt_on_error = true;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(ERROR (number) (null) (UNEXPECTED 'e'))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
it("does not insert missing tokens if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, 3";
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, input_string.c_str());
TSParseOptions options = {};
options.changed_ranges = nullptr;
options.halt_on_error = false;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (number) (null) (number) (MISSING)))");
ts_document_invalidate(document);
options.halt_on_error = true;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(ERROR (number) (null) (number))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
it("can parse valid code with the halt_on_error flag set", [&]() {
string input_string = "[1, null, 3]";
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, input_string.c_str());
TSParseOptions options = {};
options.changed_ranges = nullptr;
options.halt_on_error = true;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (number) (null) (number)))");
});
});
});
END_TEST

View file

@ -26,20 +26,19 @@ describe("Language", []() {
"value": "b"
}
}
})JSON");
})JSON", nullptr);
TSDocument *document = ts_document_new();
TSParser *parser = ts_parser_new();
const TSLanguage *language = load_test_language("aliased_rules", compile_result);
ts_document_set_language(document, language);
ts_document_set_input_string(document, "b");
ts_document_parse(document);
ts_parser_set_language(parser, language);
TSTree *tree = ts_parser_parse_string(parser, nullptr, "b", 1);
TSNode root_node = ts_document_root_node(document);
char *string = ts_node_string(root_node, document);
TSNode root_node = ts_tree_root_node(tree);
char *string = ts_node_string(root_node);
AssertThat(string, Equals("(a (c))"));
TSNode aliased_node = ts_node_child(root_node, 0);
AssertThat(ts_node_type(aliased_node, document), Equals("c"));
AssertThat(ts_node_type(aliased_node), Equals("c"));
TSSymbol aliased_symbol = ts_node_symbol(aliased_node);
AssertThat(ts_language_symbol_count(language), IsGreaterThan(aliased_symbol));
@ -47,7 +46,8 @@ describe("Language", []() {
AssertThat(ts_language_symbol_type(language, aliased_symbol), Equals(TSSymbolTypeRegular));
ts_free(string);
ts_document_free(document);
ts_parser_delete(parser);
ts_tree_delete(tree);
});
});
});

View file

@ -52,38 +52,53 @@ string grammar_with_aliases_and_extras = R"JSON({
"named": true,
"content": {"type": "SYMBOL", "name": "b"}
},
{"type": "SYMBOL", "name": "b"}
{
"type": "ALIAS",
"value": "C",
"named": true,
"content": {"type": "SYMBOL", "name": "_c"}
}
]
},
"b": {"type": "STRING", "value": "b"},
"_c": {"type": "STRING", "value": "c"},
"comment": {"type": "STRING", "value": "..."}
}
})JSON";
const TSLanguage *language_with_aliases_and_extras = load_test_language(
"aliases_and_extras",
ts_compile_grammar(grammar_with_aliases_and_extras.c_str(), nullptr)
);
describe("Node", [&]() {
TSDocument *document;
TSParser *parser;
TSTree *tree;
TSNode root_node;
TSNode NULL_NODE = {};
before_each([&]() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, json_string.c_str());
ts_document_parse(document);
root_node = ts_node_child(ts_document_root_node(document), 0);
parser = ts_parser_new();
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size());
root_node = ts_node_child(ts_tree_root_node(tree), 0);
});
after_each([&]() {
ts_document_free(document);
ts_parser_delete(parser);
ts_tree_delete(tree);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
it("parses the example as expected (precondition)", [&]() {
char *node_string = ts_node_string(root_node, document);
char *node_string = ts_node_string(root_node);
AssertThat(node_string, Equals(
"(array "
"(number) "
@ -94,7 +109,7 @@ describe("Node", [&]() {
describe("named_child_count(), named_child(i)", [&]() {
it("returns the named child node at the given index", [&]() {
AssertThat(ts_node_type(root_node, document), Equals("array"));
AssertThat(ts_node_type(root_node), Equals("array"));
AssertThat(ts_node_named_child_count(root_node), Equals<size_t>(3));
AssertThat(ts_node_start_byte(root_node), Equals(array_index));
@ -106,9 +121,9 @@ describe("Node", [&]() {
TSNode false_node = ts_node_named_child(root_node, 1);
TSNode object_node = ts_node_named_child(root_node, 2);
AssertThat(ts_node_type(number_node, document), Equals("number"));
AssertThat(ts_node_type(false_node, document), Equals("false"));
AssertThat(ts_node_type(object_node, document), Equals("object"));
AssertThat(ts_node_type(number_node), Equals("number"));
AssertThat(ts_node_type(false_node), Equals("false"));
AssertThat(ts_node_type(object_node), Equals("object"));
AssertThat(ts_node_start_byte(number_node), Equals(number_index));
AssertThat(ts_node_end_byte(number_node), Equals(number_end_index));
@ -128,7 +143,7 @@ describe("Node", [&]() {
TSNode pair_node = ts_node_named_child(object_node, 0);
AssertThat(ts_node_type(pair_node, document), Equals("pair"));
AssertThat(ts_node_type(pair_node), Equals("pair"));
AssertThat(ts_node_start_byte(pair_node), Equals(string_index));
AssertThat(ts_node_end_byte(pair_node), Equals(null_end_index));
AssertThat(ts_node_start_point(pair_node), Equals<TSPoint>({ 6, 4 }));
@ -138,8 +153,8 @@ describe("Node", [&]() {
TSNode string_node = ts_node_named_child(pair_node, 0);
TSNode null_node = ts_node_named_child(pair_node, 1);
AssertThat(ts_node_type(string_node, document), Equals("string"));
AssertThat(ts_node_type(null_node, document), Equals("null"));
AssertThat(ts_node_type(string_node), Equals("string"));
AssertThat(ts_node_type(null_node), Equals("null"));
AssertThat(ts_node_start_byte(string_node), Equals(string_index));
AssertThat(ts_node_end_byte(string_node), Equals(string_end_index));
@ -157,29 +172,30 @@ describe("Node", [&]() {
AssertThat(ts_node_parent(number_node), Equals(root_node));
AssertThat(ts_node_parent(false_node), Equals(root_node));
AssertThat(ts_node_parent(object_node), Equals(root_node));
AssertThat(ts_node_parent(ts_document_root_node(document)).data, Equals<void *>(nullptr));
AssertThat(ts_node_parent(ts_tree_root_node(tree)), Equals(NULL_NODE));
});
it("works correctly when the node contains aliased children and extras", [&]() {
TSCompileResult compile_result = ts_compile_grammar(grammar_with_aliases_and_extras.c_str());
const TSLanguage *language = load_test_language("aliases_and_extras", compile_result);
ts_document_set_language(document, language);
ts_document_set_input_string(document, "b ... b ... b");
ts_document_parse(document);
root_node = ts_document_root_node(document);
ts_parser_set_language(parser, language_with_aliases_and_extras);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, "b ... b ... c", 13);
root_node = ts_tree_root_node(tree);
char *node_string = ts_node_string(root_node, document);
AssertThat(node_string, Equals("(a (b) (comment) (B) (comment) (b))"));
char *node_string = ts_node_string(root_node);
AssertThat(node_string, Equals("(a (b) (comment) (B) (comment) (C))"));
ts_free(node_string);
AssertThat(ts_node_named_child_count(root_node), Equals(5u));
AssertThat(ts_node_type(ts_node_named_child(root_node, 0), document), Equals("b"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 1), document), Equals("comment"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 2), document), Equals("B"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 3), document), Equals("comment"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 4), document), Equals("b"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 0)), Equals("b"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 1)), Equals("comment"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 2)), Equals("B"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 3)), Equals("comment"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 4)), Equals("C"));
AssertThat(ts_node_symbol(ts_node_named_child(root_node, 0)), !Equals(ts_node_symbol(ts_node_named_child(root_node, 2))));
AssertThat(
ts_node_symbol(ts_node_named_child(root_node, 0)),
!Equals(ts_node_symbol(ts_node_named_child(root_node, 2)))
);
});
});
@ -188,29 +204,29 @@ describe("Node", [&]() {
TSNode child;
child = ts_node_first_child_for_byte(root_node, array_index);
AssertThat(ts_node_type(child, document), Equals("["));
AssertThat(ts_node_type(child), Equals("["));
child = ts_node_first_child_for_byte(root_node, number_index);
AssertThat(ts_node_type(child, document), Equals("number"));
AssertThat(ts_node_type(child), Equals("number"));
child = ts_node_first_child_for_byte(root_node, number_end_index);
AssertThat(ts_node_type(child, document), Equals(","));
AssertThat(ts_node_type(child), Equals(","));
child = ts_node_first_child_for_byte(root_node, number_end_index + 1);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_child_for_byte(root_node, false_index - 1);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_child_for_byte(root_node, false_index);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_child_for_byte(root_node, false_index + 1);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_child_for_byte(root_node, false_end_index);
AssertThat(ts_node_type(child, document), Equals(","));
AssertThat(ts_node_type(child), Equals(","));
child = ts_node_first_child_for_byte(root_node, false_end_index);
AssertThat(ts_node_type(child, document), Equals(","));
AssertThat(ts_node_type(child), Equals(","));
child = ts_node_first_child_for_byte(root_node, object_index);
AssertThat(ts_node_type(child, document), Equals("object"));
AssertThat(ts_node_type(child), Equals("object"));
child = ts_node_first_child_for_byte(root_node, object_index + 1);
AssertThat(ts_node_type(child, document), Equals("object"));
AssertThat(ts_node_type(child), Equals("object"));
child = ts_node_first_child_for_byte(root_node, object_end_index);
AssertThat(ts_node_type(child, document), Equals("]"));
AssertThat(ts_node_type(child), Equals("]"));
});
});
@ -219,39 +235,27 @@ describe("Node", [&]() {
TSNode child;
child = ts_node_first_named_child_for_byte(root_node, array_index);
AssertThat(ts_node_type(child, document), Equals("number"));
AssertThat(ts_node_type(child), Equals("number"));
child = ts_node_first_named_child_for_byte(root_node, number_index);
AssertThat(ts_node_type(child, document), Equals("number"));
AssertThat(ts_node_type(child), Equals("number"));
child = ts_node_first_named_child_for_byte(root_node, number_end_index);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_named_child_for_byte(root_node, number_end_index + 1);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_named_child_for_byte(root_node, false_index - 1);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_named_child_for_byte(root_node, false_index);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_named_child_for_byte(root_node, false_index + 1);
AssertThat(ts_node_type(child, document), Equals("false"));
AssertThat(ts_node_type(child), Equals("false"));
child = ts_node_first_named_child_for_byte(root_node, false_end_index);
AssertThat(ts_node_type(child, document), Equals("object"));
AssertThat(ts_node_type(child), Equals("object"));
child = ts_node_first_named_child_for_byte(root_node, object_index);
AssertThat(ts_node_type(child, document), Equals("object"));
AssertThat(ts_node_type(child), Equals("object"));
child = ts_node_first_named_child_for_byte(root_node, object_index + 1);
AssertThat(ts_node_type(child, document), Equals("object"));
AssertThat(ts_node_type(child), Equals("object"));
child = ts_node_first_named_child_for_byte(root_node, object_end_index);
AssertThat(child.data, Equals<void *>(nullptr));
});
});
describe("child_index()", [&]() {
it("returns the index of the node within its parent", [&]() {
AssertThat(ts_node_child_index(ts_node_child(root_node, 0)), Equals(0u));
AssertThat(ts_node_child_index(ts_node_child(root_node, 1)), Equals(1u));
AssertThat(ts_node_child_index(ts_node_child(root_node, 2)), Equals(2u));
AssertThat(ts_node_child_index(ts_node_child(root_node, 3)), Equals(3u));
AssertThat(ts_node_child_index(ts_node_child(root_node, 4)), Equals(4u));
AssertThat(ts_node_child_index(ts_node_child(root_node, 5)), Equals(5u));
AssertThat(ts_node_child_index(ts_node_child(root_node, 6)), Equals(6u));
AssertThat(child, Equals(NULL_NODE));
});
});
@ -266,14 +270,14 @@ describe("Node", [&]() {
TSNode child6 = ts_node_child(root_node, 5);
TSNode child7 = ts_node_child(root_node, 6);
AssertThat(ts_node_type(root_node, document), Equals("array"));
AssertThat(ts_node_type(child1, document), Equals("["));
AssertThat(ts_node_type(child2, document), Equals("number"));
AssertThat(ts_node_type(child3, document), Equals(","));
AssertThat(ts_node_type(child4, document), Equals("false"));
AssertThat(ts_node_type(child5, document), Equals(","));
AssertThat(ts_node_type(child6, document), Equals("object"));
AssertThat(ts_node_type(child7, document), Equals("]"));
AssertThat(ts_node_type(root_node), Equals("array"));
AssertThat(ts_node_type(child1), Equals("["));
AssertThat(ts_node_type(child2), Equals("number"));
AssertThat(ts_node_type(child3), Equals(","));
AssertThat(ts_node_type(child4), Equals("false"));
AssertThat(ts_node_type(child5), Equals(","));
AssertThat(ts_node_type(child6), Equals("object"));
AssertThat(ts_node_type(child7), Equals("]"));
AssertThat(ts_node_is_named(root_node), IsTrue());
AssertThat(ts_node_is_named(child1), IsFalse());
@ -314,13 +318,13 @@ describe("Node", [&]() {
TSNode grandchild3 = ts_node_child(pair, 1);
TSNode grandchild4 = ts_node_child(pair, 2);
AssertThat(ts_node_type(left_brace, document), Equals("{"));
AssertThat(ts_node_type(pair, document), Equals("pair"));
AssertThat(ts_node_type(right_brace, document), Equals("}"));
AssertThat(ts_node_type(left_brace), Equals("{"));
AssertThat(ts_node_type(pair), Equals("pair"));
AssertThat(ts_node_type(right_brace), Equals("}"));
AssertThat(ts_node_type(grandchild2, document), Equals("string"));
AssertThat(ts_node_type(grandchild3, document), Equals(":"));
AssertThat(ts_node_type(grandchild4, document), Equals("null"));
AssertThat(ts_node_type(grandchild2), Equals("string"));
AssertThat(ts_node_type(grandchild3), Equals(":"));
AssertThat(ts_node_type(grandchild4), Equals("null"));
AssertThat(ts_node_parent(grandchild2), Equals(pair));
AssertThat(ts_node_parent(grandchild3), Equals(pair));
@ -335,7 +339,7 @@ describe("Node", [&]() {
AssertThat(ts_node_parent(child5), Equals(root_node));
AssertThat(ts_node_parent(child6), Equals(root_node));
AssertThat(ts_node_parent(child7), Equals(root_node));
AssertThat(ts_node_parent(ts_document_root_node(document)).data, Equals<void *>(nullptr));
AssertThat(ts_node_parent(ts_tree_root_node(tree)), Equals(NULL_NODE));
});
});
@ -355,15 +359,16 @@ describe("Node", [&]() {
TSNode brace_node2 = ts_node_child(object_node, 2);
TSNode bracket_node2 = ts_node_child(root_node, 6);
AssertThat(ts_node_parent(bracket_node1), Equals(root_node));
AssertThat(ts_node_next_sibling(bracket_node1), Equals(number_node));
AssertThat(ts_node_next_sibling(number_node), Equals(array_comma_node1));
AssertThat(ts_node_next_sibling(array_comma_node1), Equals(false_node));
AssertThat(ts_node_next_sibling(false_node), Equals(array_comma_node2));
AssertThat(ts_node_next_sibling(array_comma_node2), Equals(object_node));
AssertThat(ts_node_next_sibling(object_node), Equals(bracket_node2));
AssertThat(ts_node_next_sibling(bracket_node2).data, Equals<void *>(nullptr));
AssertThat(ts_node_next_sibling(bracket_node2), Equals(NULL_NODE));
AssertThat(ts_node_prev_sibling(bracket_node1).data, Equals<void *>(nullptr));
AssertThat(ts_node_prev_sibling(bracket_node1), Equals(NULL_NODE));
AssertThat(ts_node_prev_sibling(number_node), Equals(bracket_node1));
AssertThat(ts_node_prev_sibling(array_comma_node1), Equals(number_node));
AssertThat(ts_node_prev_sibling(false_node), Equals(array_comma_node1));
@ -373,24 +378,24 @@ describe("Node", [&]() {
AssertThat(ts_node_next_sibling(brace_node1), Equals(pair_node));
AssertThat(ts_node_next_sibling(pair_node), Equals(brace_node2));
AssertThat(ts_node_next_sibling(brace_node2).data, Equals<void *>(nullptr));
AssertThat(ts_node_next_sibling(brace_node2), Equals(NULL_NODE));
AssertThat(ts_node_prev_sibling(brace_node1).data, Equals<void *>(nullptr));
AssertThat(ts_node_prev_sibling(brace_node1), Equals(NULL_NODE));
AssertThat(ts_node_prev_sibling(pair_node), Equals(brace_node1));
AssertThat(ts_node_prev_sibling(brace_node2), Equals(pair_node));
AssertThat(ts_node_next_sibling(string_node), Equals(colon_node));
AssertThat(ts_node_next_sibling(colon_node), Equals(null_node));
AssertThat(ts_node_next_sibling(null_node).data, Equals<void *>(nullptr));
AssertThat(ts_node_next_sibling(null_node), Equals(NULL_NODE));
AssertThat(ts_node_prev_sibling(string_node).data, Equals<void *>(nullptr));
AssertThat(ts_node_prev_sibling(string_node), Equals(NULL_NODE));
AssertThat(ts_node_prev_sibling(colon_node), Equals(string_node));
AssertThat(ts_node_prev_sibling(null_node), Equals(colon_node));
});
it("returns null when the node has no parent", [&]() {
AssertThat(ts_node_next_named_sibling(root_node).data, Equals<void *>(nullptr));
AssertThat(ts_node_prev_named_sibling(root_node).data, Equals<void *>(nullptr));
AssertThat(ts_node_next_named_sibling(root_node), Equals(NULL_NODE));
AssertThat(ts_node_prev_named_sibling(root_node), Equals(NULL_NODE));
});
});
@ -412,8 +417,8 @@ describe("Node", [&]() {
});
it("returns null when the node has no parent", [&]() {
AssertThat(ts_node_next_named_sibling(root_node).data, Equals<void *>(nullptr));
AssertThat(ts_node_prev_named_sibling(root_node).data, Equals<void *>(nullptr));
AssertThat(ts_node_next_named_sibling(root_node), Equals(NULL_NODE));
AssertThat(ts_node_prev_named_sibling(root_node), Equals(NULL_NODE));
});
});
@ -421,32 +426,39 @@ describe("Node", [&]() {
describe("when there is a leaf node that spans the given range exactly", [&]() {
it("returns that leaf node", [&]() {
TSNode leaf = ts_node_named_descendant_for_byte_range(root_node, string_index, string_end_index - 1);
AssertThat(ts_node_type(leaf, document), Equals("string"));
AssertThat(ts_node_type(leaf), Equals("string"));
AssertThat(ts_node_start_byte(leaf), Equals(string_index));
AssertThat(ts_node_end_byte(leaf), Equals(string_end_index));
AssertThat(ts_node_start_point(leaf), Equals<TSPoint>({ 6, 4 }));
AssertThat(ts_node_end_point(leaf), Equals<TSPoint>({ 6, 7 }));
leaf = ts_node_named_descendant_for_byte_range(root_node, number_index, number_end_index - 1);
AssertThat(ts_node_type(leaf, document), Equals("number"));
AssertThat(ts_node_type(leaf), Equals("number"));
AssertThat(ts_node_start_byte(leaf), Equals(number_index));
AssertThat(ts_node_end_byte(leaf), Equals(number_end_index));
AssertThat(ts_node_start_point(leaf), Equals<TSPoint>({ 3, 2 }));
AssertThat(ts_node_end_point(leaf), Equals<TSPoint>({ 3, 5 }));
TSNode parent = ts_node_parent(leaf);
AssertThat(ts_node_type(parent), Equals("array"));
AssertThat(ts_node_start_byte(parent), Equals(array_index));
parent = ts_node_parent(parent);
AssertThat(ts_node_type(parent), Equals("value"));
AssertThat(ts_node_start_byte(parent), Equals(array_index));
});
});
describe("when there is a leaf node that extends beyond the given range", [&]() {
it("returns that leaf node", [&]() {
TSNode leaf = ts_node_named_descendant_for_byte_range(root_node, string_index, string_index + 1);
AssertThat(ts_node_type(leaf, document), Equals("string"));
AssertThat(ts_node_type(leaf), Equals("string"));
AssertThat(ts_node_start_byte(leaf), Equals(string_index));
AssertThat(ts_node_end_byte(leaf), Equals(string_end_index));
AssertThat(ts_node_start_point(leaf), Equals<TSPoint>({ 6, 4 }));
AssertThat(ts_node_end_point(leaf), Equals<TSPoint>({ 6, 7 }));
leaf = ts_node_named_descendant_for_byte_range(root_node, string_index + 1, string_index + 2);
AssertThat(ts_node_type(leaf, document), Equals("string"));
AssertThat(ts_node_type(leaf), Equals("string"));
AssertThat(ts_node_start_byte(leaf), Equals(string_index));
AssertThat(ts_node_end_byte(leaf), Equals(string_end_index));
AssertThat(ts_node_start_point(leaf), Equals<TSPoint>({ 6, 4 }));
@ -457,7 +469,7 @@ describe("Node", [&]() {
describe("when there is no leaf node that spans the given range", [&]() {
it("returns the smallest node that does span the range", [&]() {
TSNode pair_node = ts_node_named_descendant_for_byte_range(root_node, string_index, string_index + 3);
AssertThat(ts_node_type(pair_node, document), Equals("pair"));
AssertThat(ts_node_type(pair_node), Equals("pair"));
AssertThat(ts_node_start_byte(pair_node), Equals(string_index));
AssertThat(ts_node_end_byte(pair_node), Equals(null_end_index));
AssertThat(ts_node_start_point(pair_node), Equals<TSPoint>({ 6, 4 }));
@ -466,7 +478,7 @@ describe("Node", [&]() {
it("does not return invisible nodes (repeats)", [&]() {
TSNode node = ts_node_named_descendant_for_byte_range(root_node, number_end_index, number_end_index + 1);
AssertThat(ts_node_type(node, document), Equals("array"));
AssertThat(ts_node_type(node), Equals("array"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(ts_node_end_byte(node), Equals(array_end_index));
AssertThat(ts_node_start_point(node), Equals<TSPoint>({ 2, 0 }));
@ -478,32 +490,35 @@ describe("Node", [&]() {
describe("descendant_for_byte_range(start, end)", [&]() {
it("returns the smallest node that spans the given byte offsets", [&]() {
TSNode node1 = ts_node_descendant_for_byte_range(root_node, colon_index, colon_index);
AssertThat(ts_node_type(node1, document), Equals(":"));
AssertThat(ts_node_type(node1), Equals(":"));
AssertThat(ts_node_start_byte(node1), Equals(colon_index));
AssertThat(ts_node_end_byte(node1), Equals(colon_index + 1));
AssertThat(ts_node_start_point(node1), Equals<TSPoint>({ 6, 7 }));
AssertThat(ts_node_end_point(node1), Equals<TSPoint>({ 6, 8 }));
TSNode node2 = ts_node_descendant_for_byte_range(root_node, string_index + 2, string_index + 4);
AssertThat(ts_node_type(node2, document), Equals("pair"));
AssertThat(ts_node_type(node2), Equals("pair"));
AssertThat(ts_node_start_byte(node2), Equals(string_index));
AssertThat(ts_node_end_byte(node2), Equals(null_end_index));
AssertThat(ts_node_start_point(node2), Equals<TSPoint>({ 6, 4 }));
AssertThat(ts_node_end_point(node2), Equals<TSPoint>({ 6, 13 }));
AssertThat(ts_node_parent(node1), Equals(node2));
});
it("works in the presence of multi-byte characters", [&]() {
string input_string = "[\"αβγδ\", \"αβγδ\"]";
ts_document_set_input_string(document, input_string.c_str());
ts_document_parse(document);
TSNode root_node = ts_document_root_node(document);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
TSNode root_node = ts_tree_root_node(tree);
uint32_t comma_position = input_string.find(",");
TSNode node1 = ts_node_descendant_for_byte_range(root_node, comma_position, comma_position);
AssertThat(ts_node_type(node1, document), Equals(","));
AssertThat(ts_node_type(node1), Equals(","));
TSNode node2 = ts_node_descendant_for_byte_range(root_node, 6, 10);
AssertThat(ts_node_type(node2, document), Equals("string"));
AssertThat(ts_node_type(node2), Equals("string"));
AssertThat(ts_node_start_byte(node2), Equals<size_t>(1));
AssertThat(ts_node_end_byte(node2), Equals<size_t>(11));
});
@ -512,20 +527,236 @@ describe("Node", [&]() {
describe("descendant_for_point_range(start, end)", [&]() {
it("returns the smallest concrete node that spans the given range", [&]() {
TSNode node1 = ts_node_descendant_for_point_range(root_node, {6, 7}, {6, 7});
AssertThat(ts_node_type(node1, document), Equals(":"));
AssertThat(ts_node_type(node1), Equals(":"));
AssertThat(ts_node_start_byte(node1), Equals(colon_index));
AssertThat(ts_node_end_byte(node1), Equals(colon_index + 1));
AssertThat(ts_node_start_point(node1), Equals<TSPoint>({ 6, 7 }));
AssertThat(ts_node_end_point(node1), Equals<TSPoint>({ 6, 8 }));
TSNode node2 = ts_node_descendant_for_point_range(root_node, {6, 6}, {6, 8});
AssertThat(ts_node_type(node2, document), Equals("pair"));
AssertThat(ts_node_type(node2), Equals("pair"));
AssertThat(ts_node_start_byte(node2), Equals(string_index));
AssertThat(ts_node_end_byte(node2), Equals(null_end_index));
AssertThat(ts_node_start_point(node2), Equals<TSPoint>({ 6, 4 }));
AssertThat(ts_node_end_point(node2), Equals<TSPoint>({ 6, 13 }));
AssertThat(ts_node_parent(node1), Equals(node2));
});
});
});
describe("TreeCursor", [&]() {
TSParser *parser;
TSTree *tree;
TSTreeCursor cursor;
before_each([&]() {
record_alloc::start();
parser = ts_parser_new();
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size());
cursor = ts_tree_cursor_new(tree);
});
after_each([&]() {
ts_tree_delete(tree);
ts_tree_cursor_delete(&cursor);
ts_parser_delete(parser);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
it("can walk the tree", [&]() {
TSNode node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("value"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(ts_tree_cursor_goto_first_child(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("array"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(ts_tree_cursor_goto_first_child(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("["));
AssertThat(ts_node_start_byte(node), Equals(array_index));
// Cannot descend into a node with no children
AssertThat(ts_tree_cursor_goto_first_child(&cursor), IsFalse());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("["));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("number"));
AssertThat(ts_node_start_byte(node), Equals(number_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals(","));
AssertThat(ts_node_start_byte(node), Equals(number_end_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("false"));
AssertThat(ts_node_start_byte(node), Equals(false_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals(","));
AssertThat(ts_node_start_byte(node), Equals(false_end_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("object"));
AssertThat(ts_node_start_byte(node), Equals(object_index));
AssertThat(ts_tree_cursor_goto_first_child(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("{"));
AssertThat(ts_node_start_byte(node), Equals(object_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("pair"));
AssertThat(ts_node_start_byte(node), Equals(string_index));
AssertThat(ts_tree_cursor_goto_first_child(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("string"));
AssertThat(ts_node_start_byte(node), Equals(string_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals(":"));
AssertThat(ts_node_start_byte(node), Equals(string_end_index));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("null"));
AssertThat(ts_node_start_byte(node), Equals(null_index));
// Cannot move beyond a node with no next sibling
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsFalse());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("null"));
AssertThat(ts_node_start_byte(node), Equals(null_index));
AssertThat(ts_tree_cursor_goto_parent(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("pair"));
AssertThat(ts_node_start_byte(node), Equals(string_index));
AssertThat(ts_tree_cursor_goto_parent(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("object"));
AssertThat(ts_node_start_byte(node), Equals(object_index));
AssertThat(ts_tree_cursor_goto_parent(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("array"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(ts_tree_cursor_goto_parent(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("value"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
// The root node doesn't have a parent.
AssertThat(ts_tree_cursor_goto_parent(&cursor), IsFalse());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("value"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
});
it("can find the first child of a given node which spans the given byte offset", [&]() {
int64_t child_index = ts_tree_cursor_goto_first_child_for_byte(&cursor, 1);
TSNode node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("array"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(child_index, Equals(0));
child_index = ts_tree_cursor_goto_first_child_for_byte(&cursor, array_index);
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("["));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(child_index, Equals(0));
ts_tree_cursor_goto_parent(&cursor);
child_index = ts_tree_cursor_goto_first_child_for_byte(&cursor, array_index + 1);
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("number"));
AssertThat(ts_node_start_byte(node), Equals(number_index));
AssertThat(child_index, Equals(1));
ts_tree_cursor_goto_parent(&cursor);
child_index = ts_tree_cursor_goto_first_child_for_byte(&cursor, number_index + 1);
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("number"));
AssertThat(ts_node_start_byte(node), Equals(number_index));
AssertThat(child_index, Equals(1));
ts_tree_cursor_goto_parent(&cursor);
child_index = ts_tree_cursor_goto_first_child_for_byte(&cursor, false_index - 1);
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("false"));
AssertThat(ts_node_start_byte(node), Equals(false_index));
AssertThat(child_index, Equals(3));
ts_tree_cursor_goto_parent(&cursor);
child_index = ts_tree_cursor_goto_first_child_for_byte(&cursor, object_end_index - 1);
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("object"));
AssertThat(ts_node_start_byte(node), Equals(object_index));
AssertThat(child_index, Equals(5));
// There is no child past the end of the array
ts_tree_cursor_goto_parent(&cursor);
child_index = ts_tree_cursor_goto_first_child_for_byte(&cursor, array_end_index);
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("array"));
AssertThat(ts_node_start_byte(node), Equals(array_index));
AssertThat(child_index, Equals(-1));
});
it("walks the tree correctly when the node contains aliased children and extras", [&]() {
ts_parser_set_language(parser, language_with_aliases_and_extras);
ts_tree_cursor_delete(&cursor);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, "b ... b ... c", 13);
cursor = ts_tree_cursor_new(tree);
TSNode node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("a"));
AssertThat(ts_tree_cursor_goto_first_child(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("b"));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("comment"));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("B"));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("comment"));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsTrue());
node = ts_tree_cursor_current_node(&cursor);
AssertThat(ts_node_type(node), Equals("C"));
AssertThat(ts_tree_cursor_goto_next_sibling(&cursor), IsFalse());
AssertThat(ts_tree_cursor_goto_parent(&cursor), IsTrue());
AssertThat(ts_tree_cursor_goto_first_child_for_byte(&cursor, 0), Equals(0));
});
});
END_TEST

View file

@ -1,17 +1,21 @@
#include "test_helper.h"
#include <future>
#include "runtime/alloc.h"
#include "runtime/language.h"
#include "helpers/record_alloc.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
#include "helpers/record_alloc.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/dedent.h"
START_TEST
describe("Parser", [&]() {
TSDocument *document;
TSParser *parser;
TSTree *tree;
SpyInput *input;
TSNode root;
size_t chunk_size;
@ -21,14 +25,16 @@ describe("Parser", [&]() {
chunk_size = 3;
input = nullptr;
document = ts_document_new();
tree = nullptr;
parser = ts_parser_new();
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_dot_graphs(parser, stderr);
}
});
after_each([&]() {
if (document) ts_document_free(document);
if (parser) ts_parser_delete(parser);
if (tree) ts_tree_delete(tree);
if (input) delete input;
record_alloc::stop();
@ -37,10 +43,8 @@ describe("Parser", [&]() {
auto set_text = [&](string text) {
input = new SpyInput(text, chunk_size);
ts_document_set_input(document, input->input());
ts_document_parse(document);
root = ts_document_root_node(document);
tree = ts_parser_parse(parser, nullptr, input->input());
root = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root), Equals(text.size()));
input->clear();
};
@ -48,10 +52,13 @@ describe("Parser", [&]() {
auto replace_text = [&](size_t position, size_t length, string new_text) {
size_t prev_size = ts_node_end_byte(root);
ts_document_edit(document, input->replace(position, length, new_text));
ts_document_parse(document);
TSInputEdit edit = input->replace(position, length, new_text);
ts_tree_edit(tree, &edit);
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
ts_tree_delete(tree);
tree = new_tree;
root = ts_document_root_node(document);
root = ts_tree_root_node(tree);
size_t new_size = ts_node_end_byte(root);
AssertThat(new_size, Equals(prev_size - length + new_text.size()));
};
@ -65,13 +72,16 @@ describe("Parser", [&]() {
};
auto undo = [&]() {
ts_document_edit(document, input->undo());
ts_document_parse(document);
TSInputEdit edit = input->undo();
ts_tree_edit(tree, &edit);
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
ts_tree_delete(tree);
tree = new_tree;
};
auto assert_root_node = [&](const string &expected) {
TSNode node = ts_document_root_node(document);
char *node_string = ts_node_string(node, document);
TSNode node = ts_tree_root_node(tree);
char *node_string = ts_node_string(node);
string actual(node_string);
ts_free(node_string);
AssertThat(actual, Equals(expected));
@ -86,14 +96,12 @@ describe("Parser", [&]() {
describe("handling errors", [&]() {
describe("when there is an invalid substring right before a valid token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, @@@@@, true]");
assert_root_node(
"(value (array (number) (ERROR (UNEXPECTED '@')) (true)))");
assert_root_node("(value (array (number) (ERROR (UNEXPECTED '@')) (true)))");
TSNode error = ts_node_named_child(ts_node_child(root, 0), 1);
AssertThat(ts_node_type(error, document), Equals("ERROR"));
AssertThat(ts_node_type(error), Equals("ERROR"));
AssertThat(get_node_text(error), Equals("@@@@@,"));
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
@ -104,79 +112,75 @@ describe("Parser", [&]() {
AssertThat(get_node_text(comma), Equals(","));
TSNode node_after_error = ts_node_next_named_sibling(error);
AssertThat(ts_node_type(node_after_error, document), Equals("true"));
AssertThat(ts_node_type(node_after_error), Equals("true"));
AssertThat(get_node_text(node_after_error), Equals("true"));
});
});
describe("when there is an unexpected string in the middle of a token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, faaaaalse, true]");
assert_root_node(
"(value (array (number) (ERROR (UNEXPECTED 'a')) (true)))");
TSNode error = ts_node_named_child(ts_node_child(root, 0), 1);
AssertThat(ts_node_type(error, document), Equals("ERROR"));
AssertThat(ts_node_type(error), Equals("ERROR"));
AssertThat(get_node_text(error), Equals("faaaaalse,"));
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
TSNode garbage = ts_node_child(error, 0);
AssertThat(ts_node_type(garbage, document), Equals("ERROR"));
AssertThat(ts_node_type(garbage), Equals("ERROR"));
AssertThat(get_node_text(garbage), Equals("faaaaalse"));
TSNode comma = ts_node_child(error, 1);
AssertThat(ts_node_type(comma, document), Equals(","));
AssertThat(ts_node_type(comma), Equals(","));
AssertThat(get_node_text(comma), Equals(","));
TSNode last = ts_node_next_named_sibling(error);
AssertThat(ts_node_type(last, document), Equals("true"));
AssertThat(ts_node_type(last), Equals("true"));
AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, ")));
});
});
describe("when there is one unexpected token between two valid tokens", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, true false, true]");
assert_root_node(
"(value (array (number) (true) (ERROR (false)) (true)))");
assert_root_node("(value (array (number) (true) (ERROR (false)) (true)))");
TSNode error = ts_node_named_child(ts_node_child(root, 0), 2);
AssertThat(ts_node_type(error, document), Equals("ERROR"));
AssertThat(ts_node_type(error), Equals("ERROR"));
AssertThat(get_node_text(error), Equals("false"));
AssertThat(ts_node_child_count(error), Equals<size_t>(1));
TSNode last = ts_node_next_named_sibling(error);
AssertThat(ts_node_type(last, document), Equals("true"));
AssertThat(ts_node_type(last), Equals("true"));
AssertThat(get_node_text(last), Equals("true"));
});
});
describe("when there is an unexpected string at the end of a token", [&]() {
it("computes the error's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, \"hi\n, true]");
assert_root_node(
"(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))");
assert_root_node("(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))");
});
});
describe("when there is an unterminated error", [&]() {
it("maintains a consistent tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("a; ' this string never ends");
assert_root_node(
"(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
assert_root_node("(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
});
});
describe("when there are extra tokens at the end of the viable prefix", [&]() {
it("does not include them in the error node", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text(
"var x;\n"
"\n"
@ -186,7 +190,7 @@ describe("Parser", [&]() {
);
TSNode error = ts_node_named_child(root, 1);
AssertThat(ts_node_type(error, document), Equals("ERROR"));
AssertThat(ts_node_type(error), Equals("ERROR"));
AssertThat(ts_node_start_point(error), Equals<TSPoint>({2, 0}));
AssertThat(ts_node_end_point(error), Equals<TSPoint>({2, 2}));
});
@ -196,59 +200,56 @@ describe("Parser", [&]() {
char *string = (char *)malloc(1);
string[0] = '\xdf';
ts_document_set_language(document, load_real_language("javascript"));
ts_document_set_input_string_with_length(document, string, 1);
ts_document_parse(document);
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, string, 1);
free(string);
assert_root_node("(program (ERROR (UNEXPECTED INVALID)))");
assert_root_node("(ERROR (UNEXPECTED INVALID))");
});
});
describe("handling extra tokens", [&]() {
describe("when the token appears as part of a grammar rule", [&]() {
it("incorporates it into the tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
set_text("fn()\n");
describe("when halt_on_error is set to true", [&]() {
it("halts as soon as an error is found if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, error, 3]";
ts_parser_set_language(parser, load_real_language("json"));
assert_root_node(
"(program (expression_statement (call_expression (identifier) (arguments))))");
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))");
ts_parser_halt_on_error(parser, true);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(ERROR (number) (null))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
});
describe("when the token appears somewhere else", [&]() {
it("incorporates it into the tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
set_text(
"fn()\n"
" .otherFn();");
it("does not insert missing tokens if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, 3";
ts_parser_set_language(parser, load_real_language("json"));
assert_root_node(
"(program (expression_statement (call_expression "
"(member_expression "
"(call_expression (identifier) (arguments)) "
"(property_identifier)) "
"(arguments))))");
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(value (array (number) (null) (number) (MISSING)))");
ts_parser_halt_on_error(parser, true);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(ERROR (number) (null) (number))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
});
describe("when several extra tokens appear in a row", [&]() {
it("incorporates them into the tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
set_text(
"fn()\n\n"
"// This is a comment"
"\n\n"
".otherFn();");
it("can parse valid code with the halt_on_error flag set", [&]() {
string input_string = "[1, null, 3]";
ts_parser_set_language(parser, load_real_language("json"));
assert_root_node(
"(program (expression_statement (call_expression "
"(member_expression "
"(call_expression (identifier) (arguments)) "
"(comment) "
"(property_identifier)) "
"(arguments))))");
ts_parser_halt_on_error(parser, true);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(value (array (number) (null) (number)))");
});
});
});
@ -256,7 +257,7 @@ describe("Parser", [&]() {
describe("editing", [&]() {
describe("creating new tokens near the end of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("x * (100 + abc);");
assert_root_node(
@ -289,7 +290,7 @@ describe("Parser", [&]() {
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
chunk_size = 2;
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("123 + 456 * (10 + x);");
assert_root_node(
@ -315,7 +316,7 @@ describe("Parser", [&]() {
describe("introducing an error", [&]() {
it("gives the error the right size", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("var x = y;");
assert_root_node(
@ -338,7 +339,7 @@ describe("Parser", [&]() {
describe("into the middle of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("abc * 123;");
assert_root_node(
@ -350,14 +351,14 @@ describe("Parser", [&]() {
"(program (expression_statement (binary_expression (identifier) (number))))");
TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1);
AssertThat(ts_node_type(node, document), Equals("identifier"));
AssertThat(ts_node_type(node), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
});
});
describe("at the end of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("abc * 123;");
assert_root_node(
@ -369,14 +370,14 @@ describe("Parser", [&]() {
"(program (expression_statement (binary_expression (identifier) (number))))");
TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1);
AssertThat(ts_node_type(node, document), Equals("identifier"));
AssertThat(ts_node_type(node), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
});
});
describe("inserting text into a node containing a extra token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("123 *\n"
"// a-comment\n"
"abc;");
@ -403,7 +404,7 @@ describe("Parser", [&]() {
describe("when a critical token is removed", [&]() {
it("updates the parse tree, creating an error", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("123 * 456; 789 * 123;");
assert_root_node(
@ -423,7 +424,7 @@ describe("Parser", [&]() {
describe("with external tokens", [&]() {
it("maintains the external scanner's state during incremental parsing", [&]() {
ts_document_set_language(document, load_real_language("python"));
ts_parser_set_language(parser, load_real_language("python"));
string text = dedent(R"PYTHON(
if a:
print b
@ -451,7 +452,7 @@ describe("Parser", [&]() {
});
it("does not try to reuse nodes that are within the edited region", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("{ x: (b.c) };");
assert_root_node(
@ -464,23 +465,12 @@ describe("Parser", [&]() {
"(program (expression_statement (object (pair "
"(property_identifier) (member_expression (identifier) (property_identifier))))))");
});
it("updates the document's parse count", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
set_text("{ x: (b.c) };");
AssertThat(ts_document_parse_count(document), Equals<size_t>(1));
insert_text(strlen("{ x"), "yz");
AssertThat(ts_document_parse_count(document), Equals<size_t>(2));
});
});
describe("lexing", [&]() {
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
it("terminates them at the end of the document", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
it("terminates them at the end of the string", [&]() {
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("x; // this is a comment");
assert_root_node(
@ -495,7 +485,7 @@ describe("Parser", [&]() {
it("recognizes UTF8 characters as single characters", [&]() {
// 'ΩΩΩ — ΔΔ';
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
assert_root_node(
@ -507,14 +497,232 @@ describe("Parser", [&]() {
it("handles non-UTF8 characters", [&]() {
const char *string = "cons\xeb\x00e=ls\x83l6hi');\x0a";
ts_document_set_language(document, load_real_language("javascript"));
ts_document_set_input_string(document, string);
ts_document_parse(document);
TSNode root = ts_document_root_node(document);
ts_parser_set_language(parser, load_real_language("javascript"));
tree = ts_parser_parse_string(parser, nullptr, string, strlen(string));
TSNode root = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root), Equals(strlen(string)));
});
});
describe("handling TSInputs", [&]() {
SpyInput *spy_input;
before_each([&]() {
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
ts_parser_set_language(parser, load_real_language("json"));
});
after_each([&]() {
delete spy_input;
});
it("handles UTF16 encodings", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
tree = ts_parser_parse(parser, nullptr, spy_input->input());
root = ts_tree_root_node(tree);
assert_root_node(
"(value (array (true) (false)))");
});
it("handles truncated UTF16 data", [&]() {
const char content[1] = { '\0' };
spy_input->content = string(content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
tree = ts_parser_parse(parser, nullptr, spy_input->input());
});
it("measures columns in bytes", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
tree = ts_parser_parse(parser, nullptr, spy_input->input());
root = ts_tree_root_node(tree);
AssertThat(ts_node_end_point(root), Equals<TSPoint>({0, 28}));
});
});
describe("set_language(language)", [&]() {
string input_string = "{\"key\": [1, 2]}\n";
it("uses the given language for future parses", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node(
"(value (object (pair (string) (array (number) (number)))))");
});
it("does not allow setting a language with a different version number", [&]() {
TSLanguage language = *load_real_language("json");
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
language.version++;
AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
AssertThat(ts_parser_set_language(parser, &language), IsFalse());
AssertThat(ts_parser_language(parser), Equals<const TSLanguage *>(nullptr));
});
it("does nothing when parse is called while the language is null", [&]() {
tree = ts_parser_parse_string(parser, nullptr, "{}", 2);
AssertThat(tree, Equals<TSTree *>(nullptr));
ts_parser_set_language(parser, nullptr);
tree = ts_parser_parse_string(parser, nullptr, "{}", 2);
AssertThat(tree, Equals<TSTree *>(nullptr));
});
});
describe("set_logger(TSLogger)", [&]() {
SpyLogger *logger;
before_each([&]() {
logger = new SpyLogger();
ts_parser_set_language(parser, load_real_language("json"));
});
after_each([&]() {
delete logger;
});
it("calls the debugger with a message for each parse action", [&]() {
ts_parser_set_logger(parser, logger->logger());
tree = ts_parser_parse_string(parser, nullptr, "[ 1, 2, 3 ]", 11);
AssertThat(logger->messages, Contains("new_parse"));
AssertThat(logger->messages, Contains("skip character:' '"));
AssertThat(logger->messages, Contains("consume character:'['"));
AssertThat(logger->messages, Contains("consume character:'1'"));
AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(logger->messages, Contains("accept"));
});
it("allows the debugger to be retrieved later", [&]() {
ts_parser_set_logger(parser, logger->logger());
AssertThat(ts_parser_logger(parser).payload, Equals(logger));
});
describe("disabling debugging", [&]() {
before_each([&]() {
ts_parser_set_logger(parser, logger->logger());
ts_parser_set_logger(parser, {NULL, NULL});
});
it("does not call the debugger any more", [&]() {
tree = ts_parser_parse_string(parser, nullptr, "{}", 2);
AssertThat(logger->messages, IsEmpty());
});
});
});
describe("set_enabled(enabled)", [&]() {
it("stops the in-progress parse if false is passed", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
AssertThat(ts_parser_enabled(parser), IsTrue());
auto tree_future = std::async([parser]() {
size_t read_count = 0;
TSInput infinite_input = {
&read_count,
[](void *payload, uint32_t *bytes_read) {
size_t *read_count = static_cast<size_t *>(payload);
assert((*read_count)++ < 100000);
*bytes_read = 1;
return "[";
},
[](void *payload, unsigned byte, TSPoint position) -> int {
return true;
},
TSInputEncodingUTF8
};
return ts_parser_parse(parser, nullptr, infinite_input);
});
auto cancel_future = std::async([parser]() {
ts_parser_set_enabled(parser, false);
});
cancel_future.wait();
tree_future.wait();
AssertThat(ts_parser_enabled(parser), IsFalse());
AssertThat(tree_future.get(), Equals<TSTree *>(nullptr));
TSTree *tree = ts_parser_parse_string(parser, nullptr, "[]", 2);
AssertThat(ts_parser_enabled(parser), IsFalse());
AssertThat(tree, Equals<TSTree *>(nullptr));
ts_parser_set_enabled(parser, true);
AssertThat(ts_parser_enabled(parser), IsTrue());
tree = ts_parser_parse_string(parser, nullptr, "[]", 2);
AssertThat(tree, !Equals<TSTree *>(nullptr));
ts_tree_delete(tree);
});
});
describe("set_operation_limit(limit)", [&]() {
it("limits the amount of work the parser does on any given call to parse()", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
struct InputState {
const char *string;
size_t read_count;
};
InputState state = {"[", 0};
// An input that repeats the given string forever, counting how many times
// it has been read.
TSInput infinite_input = {
&state,
[](void *payload, uint32_t *bytes_read) {
InputState *state = static_cast<InputState *>(payload);
assert(state->read_count++ <= 10);
*bytes_read = strlen(state->string);
return state->string;
},
[](void *payload, unsigned byte, TSPoint position) -> int {
return true;
},
TSInputEncodingUTF8
};
ts_parser_set_operation_limit(parser, 10);
TSTree *tree = ts_parser_parse(parser, nullptr, infinite_input);
AssertThat(tree, Equals<TSTree *>(nullptr));
state.read_count = 0;
state.string = "";
tree = ts_parser_resume(parser);
AssertThat(tree, !Equals<TSTree *>(nullptr));
ts_tree_delete(tree);
});
});
describe("resume()", [&]() {
it("does nothing unless parsing was previously halted", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
TSTree *tree = ts_parser_resume(parser);
AssertThat(tree, Equals<TSTree *>(nullptr));
tree = ts_parser_resume(parser);
AssertThat(tree, Equals<TSTree *>(nullptr));
tree = ts_parser_parse_string(parser, nullptr, "true", 4);
AssertThat(tree, !Equals<TSTree *>(nullptr));
ts_tree_delete(tree);
tree = ts_parser_resume(parser);
AssertThat(tree, Equals<TSTree *>(nullptr));
});
});
});
END_TEST

View file

@ -4,7 +4,7 @@
#include "helpers/record_alloc.h"
#include "helpers/stream_methods.h"
#include "runtime/stack.h"
#include "runtime/tree.h"
#include "runtime/subtree.h"
#include "runtime/length.h"
#include "runtime/alloc.h"
@ -23,27 +23,31 @@ Length operator*(const Length &length, uint32_t factor) {
return {length.bytes * factor, {0, length.extent.column * factor}};
}
void free_slice_array(TreePool *pool, StackSliceArray *slices) {
void free_slice_array(SubtreePool *pool, StackSliceArray *slices) {
for (size_t i = 0; i < slices->size; i++) {
StackSlice slice = slices->contents[i];
bool matches_prior_trees = false;
for (size_t j = 0; j < i; j++) {
StackSlice prior_slice = slices->contents[j];
if (slice.trees.contents == prior_slice.trees.contents) {
if (slice.subtrees.contents == prior_slice.subtrees.contents) {
matches_prior_trees = true;
break;
}
}
if (!matches_prior_trees) {
for (size_t j = 0; j < slice.trees.size; j++)
ts_tree_release(pool, slice.trees.contents[j]);
array_delete(&slice.trees);
for (size_t j = 0; j < slice.subtrees.size; j++)
ts_subtree_release(pool, slice.subtrees.contents[j]);
array_delete(&slice.subtrees);
}
}
}
Subtree *mutate(const Subtree *subtree) {
return (Subtree *)subtree;
}
struct StackEntry {
TSStateId state;
size_t depth;
@ -54,9 +58,9 @@ vector<StackEntry> get_stack_entries(Stack *stack, StackVersion version) {
ts_stack_iterate(
stack,
version,
[](void *payload, TSStateId state, uint32_t tree_count) {
[](void *payload, TSStateId state, uint32_t subtree_count) {
auto entries = static_cast<vector<StackEntry> *>(payload);
StackEntry entry = {state, tree_count};
StackEntry entry = {state, subtree_count};
if (find(entries->begin(), entries->end(), entry) == entries->end()) {
entries->push_back(entry);
}
@ -68,39 +72,39 @@ START_TEST
describe("Stack", [&]() {
Stack *stack;
const size_t tree_count = 11;
Tree *trees[tree_count];
const size_t subtree_count = 11;
const Subtree *subtrees[subtree_count];
Length tree_len = {3, {0, 3}};
TreePool pool;
SubtreePool pool;
before_each([&]() {
record_alloc::start();
ts_tree_pool_init(&pool);
pool = ts_subtree_pool_new(10);
stack = ts_stack_new(&pool);
TSLanguage dummy_language;
TSSymbolMetadata symbol_metadata[50] = {};
dummy_language.symbol_metadata = symbol_metadata;
for (size_t i = 0; i < tree_count; i++) {
trees[i] = ts_tree_make_leaf(&pool, i, length_zero(), tree_len, &dummy_language);
for (size_t i = 0; i < subtree_count; i++) {
subtrees[i] = ts_subtree_new_leaf(&pool, i + 1, length_zero(), tree_len, &dummy_language);
}
});
after_each([&]() {
ts_stack_delete(stack);
for (size_t i = 0; i < tree_count; i++) {
ts_tree_release(&pool, trees[i]);
for (size_t i = 0; i < subtree_count; i++) {
ts_subtree_release(&pool, subtrees[i]);
}
ts_tree_pool_delete(&pool);
ts_subtree_pool_delete(&pool);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
auto push = [&](StackVersion version, Tree *tree, TSStateId state) {
ts_tree_retain(tree);
auto push = [&](StackVersion version, const Subtree *tree, TSStateId state) {
ts_subtree_retain(tree);
ts_stack_push(stack, version, tree, false, state);
};
@ -111,17 +115,17 @@ describe("Stack", [&]() {
AssertThat(ts_stack_position(stack, 0), Equals(length_zero()));
// . <──0── A*
push(0, trees[0], stateA);
push(0, subtrees[0], stateA);
AssertThat(ts_stack_state(stack, 0), Equals(stateA));
AssertThat(ts_stack_position(stack, 0), Equals(tree_len));
// . <──0── A <──1── B*
push(0, trees[1], stateB);
push(0, subtrees[1], stateB);
AssertThat(ts_stack_state(stack, 0), Equals(stateB));
AssertThat(ts_stack_position(stack, 0), Equals(tree_len * 2));
// . <──0── A <──1── B <──2── C*
push(0, trees[2], stateC);
push(0, subtrees[2], stateC);
AssertThat(ts_stack_state(stack, 0), Equals(stateC));
AssertThat(ts_stack_position(stack, 0), Equals(tree_len * 3));
@ -139,7 +143,7 @@ describe("Stack", [&]() {
// . <──0── A <─*
// ↑
// └───*
push(0, trees[0], stateA);
push(0, subtrees[0], stateA);
ts_stack_copy_version(stack, 0);
});
@ -147,10 +151,10 @@ describe("Stack", [&]() {
// . <──0── A <──1── B <──3── D*
// ↑
// └───2─── C <──4── D*
push(0, trees[1], stateB);
push(1, trees[2], stateC);
push(0, trees[3], stateD);
push(1, trees[4], stateD);
push(0, subtrees[1], stateB);
push(1, subtrees[2], stateC);
push(0, subtrees[3], stateD);
push(1, subtrees[4], stateD);
// . <──0── A <──1── B <──3── D*
// ↑ |
@ -170,8 +174,8 @@ describe("Stack", [&]() {
// . <──0── A <──1── B*
// ↑
// └───2─── C*
push(0, trees[1], stateB);
push(1, trees[2], stateC);
push(0, subtrees[1], stateB);
push(1, subtrees[2], stateC);
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
@ -181,11 +185,11 @@ describe("Stack", [&]() {
// . <──0── A <──1── B <────3──── D*
// ↑
// └───2─── C <──4── D*
trees[3]->size = tree_len * 3;
push(0, trees[1], stateB);
push(1, trees[2], stateC);
push(0, trees[3], stateD);
push(1, trees[4], stateD);
mutate(subtrees[3])->size = tree_len * 3;
push(0, subtrees[1], stateB);
push(1, subtrees[2], stateC);
push(0, subtrees[3], stateD);
push(1, subtrees[4], stateD);
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
@ -196,12 +200,12 @@ describe("Stack", [&]() {
// . <──0── A <──1── B <──3── D <──5── E*
// ↑
// └───2─── C <──4── D <──5── E*
push(0, trees[1], stateB);
push(1, trees[2], stateC);
push(0, trees[3], stateD);
push(1, trees[4], stateD);
push(0, trees[5], stateE);
push(1, trees[5], stateE);
push(0, subtrees[1], stateB);
push(1, subtrees[2], stateC);
push(0, subtrees[3], stateD);
push(1, subtrees[4], stateD);
push(0, subtrees[5], stateE);
push(1, subtrees[5], stateE);
// . <──0── A <──1── B <──3── D <──5── E*
// ↑ |
@ -224,12 +228,12 @@ describe("Stack", [&]() {
// . <──0── A <────1──── B*
// ↑
// └2─ A <──1── B*
trees[2]->extra = true;
trees[2]->size = tree_len * 0;
mutate(subtrees[2])->extra = true;
mutate(subtrees[2])->size = tree_len * 0;
push(0, trees[1], stateB);
push(1, trees[2], stateA);
push(1, trees[1], stateB);
push(0, subtrees[1], stateB);
push(1, subtrees[2], stateA);
push(1, subtrees[1], stateB);
// . <──0── A <──1── B*
AssertThat(ts_stack_merge(stack, 0, 1), IsTrue());
@ -246,9 +250,9 @@ describe("Stack", [&]() {
describe("pop_count(version, count)", [&]() {
before_each([&]() {
// . <──0── A <──1── B <──2── C*
push(0, trees[0], stateA);
push(0, trees[1], stateB);
push(0, trees[2], stateC);
push(0, subtrees[0], stateA);
push(0, subtrees[1], stateB);
push(0, subtrees[2], stateC);
});
it("creates a new version with the given number of entries removed", [&]() {
@ -261,14 +265,14 @@ describe("Stack", [&]() {
StackSlice slice = pop.contents[0];
AssertThat(slice.version, Equals<StackVersion>(1));
AssertThat(slice.trees, Equals(vector<Tree *>({ trees[1], trees[2] })));
AssertThat(slice.subtrees, Equals(vector<const Subtree *>({ subtrees[1], subtrees[2] })));
AssertThat(ts_stack_state(stack, 1), Equals(stateA));
free_slice_array(&pool,&pop);
});
it("does not count 'extra' trees toward the given count", [&]() {
trees[1]->extra = true;
it("does not count 'extra' subtrees toward the given count", [&]() {
mutate(subtrees[1])->extra = true;
// . <──0── A <──1── B <──2── C*
// ↑
@ -277,7 +281,7 @@ describe("Stack", [&]() {
AssertThat(pop.size, Equals<size_t>(1));
StackSlice slice = pop.contents[0];
AssertThat(slice.trees, Equals(vector<Tree *>({ trees[0], trees[1], trees[2] })));
AssertThat(slice.subtrees, Equals(vector<const Subtree *>({ subtrees[0], subtrees[1], subtrees[2] })));
AssertThat(ts_stack_state(stack, 1), Equals(1));
free_slice_array(&pool,&pop);
@ -288,14 +292,14 @@ describe("Stack", [&]() {
// . <──0── A <──1── B <──2── C <──3── D <──10── I*
// ↑ |
// └───4─── E <──5── F <──6───┘
push(0, trees[3], stateD);
push(0, subtrees[3], stateD);
StackSliceArray pop = ts_stack_pop_count(stack, 0, 3);
free_slice_array(&pool,&pop);
push(1, trees[4], stateE);
push(1, trees[5], stateF);
push(1, trees[6], stateD);
push(1, subtrees[4], stateE);
push(1, subtrees[5], stateF);
push(1, subtrees[6], stateD);
ts_stack_merge(stack, 0, 1);
push(0, trees[10], stateI);
push(0, subtrees[10], stateI);
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
@ -322,11 +326,11 @@ describe("Stack", [&]() {
StackSlice slice1 = pop.contents[0];
AssertThat(slice1.version, Equals<StackVersion>(1));
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[2], trees[3], trees[10] })));
AssertThat(slice1.subtrees, Equals(vector<const Subtree *>({ subtrees[2], subtrees[3], subtrees[10] })));
StackSlice slice2 = pop.contents[1];
AssertThat(slice2.version, Equals<StackVersion>(2));
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[5], trees[6], trees[10] })));
AssertThat(slice2.subtrees, Equals(vector<const Subtree *>({ subtrees[5], subtrees[6], subtrees[10] })));
AssertThat(ts_stack_version_count(stack), Equals<size_t>(3));
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
@ -366,7 +370,7 @@ describe("Stack", [&]() {
StackSlice slice1 = pop.contents[0];
AssertThat(slice1.version, Equals<StackVersion>(1));
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[10] })));
AssertThat(slice1.subtrees, Equals(vector<const Subtree *>({ subtrees[10] })));
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
AssertThat(ts_stack_state(stack, 0), Equals(stateI));
@ -388,11 +392,11 @@ describe("Stack", [&]() {
StackSlice slice1 = pop.contents[0];
AssertThat(slice1.version, Equals<StackVersion>(1));
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[1], trees[2], trees[3], trees[10] })));
AssertThat(slice1.subtrees, Equals(vector<const Subtree *>({ subtrees[1], subtrees[2], subtrees[3], subtrees[10] })));
StackSlice slice2 = pop.contents[1];
AssertThat(slice2.version, Equals<StackVersion>(1));
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[4], trees[5], trees[6], trees[10] })));
AssertThat(slice2.subtrees, Equals(vector<const Subtree *>({ subtrees[4], subtrees[5], subtrees[6], subtrees[10] })));
AssertThat(ts_stack_version_count(stack), Equals<size_t>(2));
AssertThat(ts_stack_state(stack, 0), Equals(stateI));
@ -403,7 +407,7 @@ describe("Stack", [&]() {
});
describe("when there are three paths that lead to three different versions", [&]() {
it("returns three entries with different arrays of trees", [&]() {
it("returns three entries with different arrays of subtrees", [&]() {
// . <──0── A <──1── B <──2── C <──3── D <──10── I*
// ↑ |
// ├───4─── E <──5── F <──6───┘
@ -411,10 +415,10 @@ describe("Stack", [&]() {
// └───7─── G <──8── H <──9───┘
StackSliceArray pop = ts_stack_pop_count(stack, 0, 4);
free_slice_array(&pool,&pop);
push(1, trees[7], stateG);
push(1, trees[8], stateH);
push(1, trees[9], stateD);
push(1, trees[10], stateI);
push(1, subtrees[7], stateG);
push(1, subtrees[8], stateH);
push(1, subtrees[9], stateD);
push(1, subtrees[10], stateI);
ts_stack_merge(stack, 0, 1);
AssertThat(ts_stack_version_count(stack), Equals<size_t>(1));
@ -443,15 +447,15 @@ describe("Stack", [&]() {
StackSlice slice1 = pop.contents[0];
AssertThat(slice1.version, Equals<StackVersion>(1));
AssertThat(slice1.trees, Equals(vector<Tree *>({ trees[3], trees[10] })));
AssertThat(slice1.subtrees, Equals(vector<const Subtree *>({ subtrees[3], subtrees[10] })));
StackSlice slice2 = pop.contents[1];
AssertThat(slice2.version, Equals<StackVersion>(2));
AssertThat(slice2.trees, Equals(vector<Tree *>({ trees[6], trees[10] })));
AssertThat(slice2.subtrees, Equals(vector<const Subtree *>({ subtrees[6], subtrees[10] })));
StackSlice slice3 = pop.contents[2];
AssertThat(slice3.version, Equals<StackVersion>(3));
AssertThat(slice3.trees, Equals(vector<Tree *>({ trees[9], trees[10] })));
AssertThat(slice3.subtrees, Equals(vector<const Subtree *>({ subtrees[9], subtrees[10] })));
AssertThat(ts_stack_version_count(stack), Equals<size_t>(4));
AssertThat(ts_stack_state(stack, 0), Equals(stateI));
@ -467,12 +471,12 @@ describe("Stack", [&]() {
describe("pop_pending(version)", [&]() {
before_each([&]() {
push(0, trees[0], stateA);
push(0, subtrees[0], stateA);
});
it("removes the top node from the stack if it was pushed in pending mode", [&]() {
ts_stack_push(stack, 0, trees[1], true, stateB);
ts_tree_retain(trees[1]);
ts_stack_push(stack, 0, subtrees[1], true, stateB);
ts_subtree_retain(subtrees[1]);
StackSliceArray pop = ts_stack_pop_pending(stack, 0);
AssertThat(pop.size, Equals<size_t>(1));
@ -485,20 +489,20 @@ describe("Stack", [&]() {
free_slice_array(&pool,&pop);
});
it("skips entries whose trees are extra", [&]() {
ts_stack_push(stack, 0, trees[1], true, stateB);
ts_tree_retain(trees[1]);
it("skips entries whose subtrees are extra", [&]() {
ts_stack_push(stack, 0, subtrees[1], true, stateB);
ts_subtree_retain(subtrees[1]);
trees[2]->extra = true;
trees[3]->extra = true;
mutate(subtrees[2])->extra = true;
mutate(subtrees[3])->extra = true;
push(0, trees[2], stateB);
push(0, trees[3], stateB);
push(0, subtrees[2], stateB);
push(0, subtrees[3], stateB);
StackSliceArray pop = ts_stack_pop_pending(stack, 0);
AssertThat(pop.size, Equals<size_t>(1));
AssertThat(pop.contents[0].trees, Equals(vector<Tree *>({ trees[1], trees[2], trees[3] })));
AssertThat(pop.contents[0].subtrees, Equals(vector<const Subtree *>({ subtrees[1], subtrees[2], subtrees[3] })));
AssertThat(get_stack_entries(stack, 0), Equals(vector<StackEntry>({
{stateA, 0},
@ -509,7 +513,7 @@ describe("Stack", [&]() {
});
it("does nothing if the top node was not pushed in pending mode", [&]() {
push(0, trees[1], stateB);
push(0, subtrees[1], stateB);
StackSliceArray pop = ts_stack_pop_pending(stack, 0);
AssertThat(pop.size, Equals<size_t>(0));
@ -526,59 +530,59 @@ describe("Stack", [&]() {
describe("setting external token state", [&]() {
before_each([&]() {
trees[1]->has_external_tokens = true;
trees[2]->has_external_tokens = true;
ts_external_token_state_init(&trees[1]->external_token_state, NULL, 0);
ts_external_token_state_init(&trees[2]->external_token_state, NULL, 0);
mutate(subtrees[1])->has_external_tokens = true;
mutate(subtrees[2])->has_external_tokens = true;
ts_external_scanner_state_init(&mutate(subtrees[1])->external_scanner_state, NULL, 0);
ts_external_scanner_state_init(&mutate(subtrees[2])->external_scanner_state, NULL, 0);
});
it("allows the state to be retrieved", [&]() {
AssertThat(ts_stack_last_external_token(stack, 0), Equals<Tree *>(nullptr));
AssertThat(ts_stack_last_external_token(stack, 0), Equals<Subtree *>(nullptr));
ts_stack_set_last_external_token(stack, 0, trees[1]);
AssertThat(ts_stack_last_external_token(stack, 0), Equals(trees[1]));
ts_stack_set_last_external_token(stack, 0, subtrees[1]);
AssertThat(ts_stack_last_external_token(stack, 0), Equals(subtrees[1]));
ts_stack_copy_version(stack, 0);
AssertThat(ts_stack_last_external_token(stack, 1), Equals(trees[1]));
AssertThat(ts_stack_last_external_token(stack, 1), Equals(subtrees[1]));
ts_stack_set_last_external_token(stack, 0, trees[2]);
AssertThat(ts_stack_last_external_token(stack, 0), Equals(trees[2]));
ts_stack_set_last_external_token(stack, 0, subtrees[2]);
AssertThat(ts_stack_last_external_token(stack, 0), Equals(subtrees[2]));
});
it("does not merge stack versions with different external token states", [&]() {
ts_external_token_state_init(&trees[1]->external_token_state, "abcd", 2);
ts_external_token_state_init(&trees[2]->external_token_state, "ABCD", 2);
ts_external_scanner_state_init(&mutate(subtrees[1])->external_scanner_state, "abcd", 2);
ts_external_scanner_state_init(&mutate(subtrees[2])->external_scanner_state, "ABCD", 2);
ts_stack_copy_version(stack, 0);
push(0, trees[0], 5);
push(1, trees[0], 5);
push(0, subtrees[0], 5);
push(1, subtrees[0], 5);
ts_stack_set_last_external_token(stack, 0, trees[1]);
ts_stack_set_last_external_token(stack, 1, trees[2]);
ts_stack_set_last_external_token(stack, 0, subtrees[1]);
ts_stack_set_last_external_token(stack, 1, subtrees[2]);
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
});
it("merges stack versions with identical external token states", [&]() {
ts_external_token_state_init(&trees[1]->external_token_state, "abcd", 2);
ts_external_token_state_init(&trees[2]->external_token_state, "abcd", 2);
ts_external_scanner_state_init(&mutate(subtrees[1])->external_scanner_state, "abcd", 2);
ts_external_scanner_state_init(&mutate(subtrees[2])->external_scanner_state, "abcd", 2);
ts_stack_copy_version(stack, 0);
push(0, trees[0], 5);
push(1, trees[0], 5);
push(0, subtrees[0], 5);
push(1, subtrees[0], 5);
ts_stack_set_last_external_token(stack, 0, trees[1]);
ts_stack_set_last_external_token(stack, 1, trees[2]);
ts_stack_set_last_external_token(stack, 0, subtrees[1]);
ts_stack_set_last_external_token(stack, 1, subtrees[2]);
AssertThat(ts_stack_merge(stack, 0, 1), IsTrue());
});
it("does not distinguish between an *empty* external token state and *no* external token state", [&]() {
ts_stack_copy_version(stack, 0);
push(0, trees[0], 5);
push(1, trees[0], 5);
push(0, subtrees[0], 5);
push(1, subtrees[0], 5);
ts_stack_set_last_external_token(stack, 0, trees[1]);
ts_stack_set_last_external_token(stack, 0, subtrees[1]);
AssertThat(ts_stack_merge(stack, 0, 1), IsTrue());
});
@ -595,7 +599,7 @@ std::ostream &operator<<(std::ostream &stream, const StackEntry &entry) {
return stream << "{" << entry.state << ", " << entry.depth << "}";
}
std::ostream &operator<<(std::ostream &stream, const TreeArray &array) {
std::ostream &operator<<(std::ostream &stream, const SubtreeArray &array) {
stream << "[";
bool first = true;
for (size_t i = 0; i < array.size; i++) {

View file

@ -0,0 +1,511 @@
#include "test_helper.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "runtime/subtree.h"
#include "runtime/length.h"
void assert_consistent(const Subtree *tree) {
if (tree->children.size == 0) return;
AssertThat(tree->children.contents[0]->padding, Equals<Length>(tree->padding));
Length total_children_size = length_zero();
for (size_t i = 0; i < tree->children.size; i++) {
const Subtree *child = tree->children.contents[i];
assert_consistent(child);
total_children_size = length_add(total_children_size, ts_subtree_total_size(child));
}
AssertThat(total_children_size, Equals<Length>(ts_subtree_total_size(tree)));
};
START_TEST
describe("Subtree", []() {
enum {
symbol1 = 1,
symbol2,
symbol3,
symbol4,
symbol5,
symbol6,
symbol7,
symbol8,
symbol9,
};
TSSymbolMetadata metadata_list[30] = {};
TSLanguage language;
language.symbol_metadata = metadata_list;
SubtreePool pool;
before_each([&]() {
pool = ts_subtree_pool_new(10);
});
after_each([&]() {
ts_subtree_pool_delete(&pool);
});
describe("make_leaf", [&]() {
it("does not mark the tree as fragile", [&]() {
const Subtree *tree = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
AssertThat(tree->fragile_left, IsFalse());
AssertThat(tree->fragile_right, IsFalse());
ts_subtree_release(&pool, tree);
});
});
describe("make_error", [&]() {
it("marks the tree as fragile", [&]() {
const Subtree *error_tree = ts_subtree_new_error(
&pool,
length_zero(),
length_zero(),
'z',
&language
);
AssertThat(error_tree->fragile_left, IsTrue());
AssertThat(error_tree->fragile_right, IsTrue());
ts_subtree_release(&pool, error_tree);
});
});
describe("make_node", [&]() {
const Subtree *tree1, *tree2, *parent1;
before_each([&]() {
tree1 = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
tree2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language);
ts_subtree_retain(tree1);
ts_subtree_retain(tree2);
parent1 = ts_subtree_new_node(&pool, symbol3, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_subtree_release(&pool, tree1);
ts_subtree_release(&pool, tree2);
ts_subtree_release(&pool, parent1);
});
it("computes its size and padding based on its child nodes", [&]() {
AssertThat(parent1->size.bytes, Equals<size_t>(
tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes
));
AssertThat(parent1->padding.bytes, Equals<size_t>(tree1->padding.bytes));
});
describe("when the first node is fragile on the left side", [&]() {
const Subtree *parent;
before_each([&]() {
Subtree *mutable_tree1 = (Subtree *)tree1;
mutable_tree1->fragile_left = true;
mutable_tree1->extra = true;
ts_subtree_retain(tree1);
ts_subtree_retain(tree2);
parent = ts_subtree_new_node(&pool, symbol3, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_subtree_release(&pool, parent);
});
it("records that it is fragile on the left side", [&]() {
AssertThat(parent->fragile_left, IsTrue());
});
});
describe("when the last node is fragile on the right side", [&]() {
const Subtree *parent;
before_each([&]() {
Subtree *mutable_tree2 = (Subtree *)tree2;
mutable_tree2->fragile_right = true;
mutable_tree2->extra = true;
ts_subtree_retain(tree1);
ts_subtree_retain(tree2);
parent = ts_subtree_new_node(&pool, symbol3, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_subtree_release(&pool, parent);
});
it("records that it is fragile on the right side", [&]() {
AssertThat(parent->fragile_right, IsTrue());
});
});
describe("when the outer nodes aren't fragile on their outer side", [&]() {
const Subtree *parent;
before_each([&]() {
Subtree *mutable_tree1 = (Subtree *)tree1;
Subtree *mutable_tree2 = (Subtree *)tree2;
mutable_tree1->fragile_right = true;
mutable_tree2->fragile_left = true;
ts_subtree_retain(tree1);
ts_subtree_retain(tree2);
parent = ts_subtree_new_node(&pool, symbol3, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_subtree_release(&pool, parent);
});
it("records that it is not fragile", [&]() {
AssertThat(parent->fragile_left, IsFalse());
AssertThat(parent->fragile_right, IsFalse());
});
});
});
describe("edit", [&]() {
const Subtree *tree;
before_each([&]() {
tree = ts_subtree_new_node(&pool, symbol1, tree_array({
ts_subtree_new_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, &language),
ts_subtree_new_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, &language),
ts_subtree_new_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, &language),
}), 0, &language);
AssertThat(tree->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->size, Equals<Length>({13, {0, 13}}));
});
after_each([&]() {
ts_subtree_release(&pool, tree);
});
it("does not mutate the argument", [&]() {
TSInputEdit edit;
edit.start_byte = 1;
edit.old_end_byte = 1;
edit.new_end_byte = 2;
edit.start_point = {0, 1};
edit.old_end_point = {0, 1};
edit.new_end_point = {0, 2};
ts_subtree_retain(tree);
const Subtree *new_tree = ts_subtree_edit(tree, &edit, &pool);
assert_consistent(tree);
assert_consistent(new_tree);
AssertThat(tree->has_changes, IsFalse());
AssertThat(tree->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->size, Equals<Length>({13, {0, 13}}));
AssertThat(tree->children.contents[0]->has_changes, IsFalse());
AssertThat(tree->children.contents[0]->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->children.contents[0]->size, Equals<Length>({3, {0, 3}}));
AssertThat(tree->children.contents[1]->has_changes, IsFalse());
AssertThat(tree->children.contents[1]->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->children.contents[1]->size, Equals<Length>({3, {0, 3}}));
ts_subtree_release(&pool, new_tree);
});
describe("edits within a tree's padding", [&]() {
it("resizes the padding of the tree and its leftmost descendants", [&]() {
TSInputEdit edit;
edit.start_byte = 1;
edit.old_end_byte = 1;
edit.new_end_byte = 2;
edit.start_point = {0, 1};
edit.old_end_point = {0, 1};
edit.new_end_point = {0, 2};
tree = ts_subtree_edit(tree, &edit, &pool);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({3, {0, 3}}));
AssertThat(tree->size, Equals<Length>({13, {0, 13}}));
AssertThat(tree->children.contents[0]->has_changes, IsTrue());
AssertThat(tree->children.contents[0]->padding, Equals<Length>({3, {0, 3}}));
AssertThat(tree->children.contents[0]->size, Equals<Length>({3, {0, 3}}));
AssertThat(tree->children.contents[1]->has_changes, IsFalse());
AssertThat(tree->children.contents[1]->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->children.contents[1]->size, Equals<Length>({3, {0, 3}}));
});
});
describe("edits that start in a tree's padding but extend into its content", [&]() {
it("shrinks the content to compensate for the expanded padding", [&]() {
TSInputEdit edit;
edit.start_byte = 1;
edit.old_end_byte = 4;
edit.new_end_byte = 5;
edit.start_point = {0, 1};
edit.old_end_point = {0, 4};
edit.new_end_point = {0, 5};
tree = ts_subtree_edit(tree, &edit, &pool);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({5, {0, 5}}));
AssertThat(tree->size, Equals<Length>({11, {0, 11}}));
AssertThat(tree->children.contents[0]->has_changes, IsTrue());
AssertThat(tree->children.contents[0]->padding, Equals<Length>({5, {0, 5}}));
AssertThat(tree->children.contents[0]->size, Equals<Length>({1, {0, 1}}));
});
});
describe("insertions at the edge of a tree's padding", [&]() {
it("expands the tree's padding", [&]() {
TSInputEdit edit;
edit.start_byte = 2;
edit.old_end_byte = 2;
edit.new_end_byte = 4;
edit.start_point = {0, 2};
edit.old_end_point = {0, 2};
edit.new_end_point = {0, 4};
tree = ts_subtree_edit(tree, &edit, &pool);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->size, Equals<Length>({13, {0, 13}}));
AssertThat(tree->children.contents[0]->has_changes, IsTrue());
AssertThat(tree->children.contents[0]->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->children.contents[0]->size, Equals<Length>({3, {0, 3}}));
AssertThat(tree->children.contents[1]->has_changes, IsFalse());
});
});
describe("replacements starting at the edge of a tree's padding", [&]() {
it("resizes the content and not the padding", [&]() {
TSInputEdit edit;
edit.start_byte = 2;
edit.old_end_byte = 4;
edit.new_end_byte = 7;
edit.start_point = {0, 2};
edit.old_end_point = {0, 4};
edit.new_end_point = {0, 7};
tree = ts_subtree_edit(tree, &edit, &pool);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->size, Equals<Length>({16, {0, 16}}));
AssertThat(tree->children.contents[0]->has_changes, IsTrue());
AssertThat(tree->children.contents[0]->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->children.contents[0]->size, Equals<Length>({6, {0, 6}}));
AssertThat(tree->children.contents[1]->has_changes, IsFalse());
});
});
describe("deletions that span more than one child node", [&]() {
it("shrinks subsequent child nodes", [&]() {
TSInputEdit edit;
edit.start_byte = 1;
edit.old_end_byte = 11;
edit.new_end_byte = 4;
edit.start_point = {0, 1};
edit.old_end_point = {0, 11};
edit.new_end_point = {0, 4};
tree = ts_subtree_edit(tree, &edit, &pool);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->size, Equals<Length>({4, {0, 4}}));
AssertThat(tree->children.contents[0]->has_changes, IsTrue());
AssertThat(tree->children.contents[0]->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->children.contents[0]->size, Equals<Length>({0, {0, 0}}));
AssertThat(tree->children.contents[1]->has_changes, IsTrue());
AssertThat(tree->children.contents[1]->padding, Equals<Length>({0, {0, 0}}));
AssertThat(tree->children.contents[1]->size, Equals<Length>({0, {0, 0}}));
AssertThat(tree->children.contents[2]->has_changes, IsTrue());
AssertThat(tree->children.contents[2]->padding, Equals<Length>({1, {0, 1}}));
AssertThat(tree->children.contents[2]->size, Equals<Length>({3, {0, 3}}));
});
});
describe("edits within a tree's range of scanned bytes", [&]() {
it("marks preceding trees as changed", [&]() {
Subtree *mutable_child = (Subtree *)tree->children.contents[0];
mutable_child->bytes_scanned = 7;
TSInputEdit edit;
edit.start_byte = 6;
edit.old_end_byte = 7;
edit.new_end_byte = 7;
edit.start_point = {0, 6};
edit.old_end_point = {0, 7};
edit.new_end_point = {0, 7};
tree = ts_subtree_edit(tree, &edit, &pool);
assert_consistent(tree);
AssertThat(tree->children.contents[0]->has_changes, IsTrue());
});
});
});
describe("eq", [&]() {
const Subtree *leaf;
before_each([&]() {
leaf = ts_subtree_new_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
});
after_each([&]() {
ts_subtree_release(&pool, leaf);
});
it("returns true for identical trees", [&]() {
const Subtree *leaf_copy = ts_subtree_new_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, &language);
AssertThat(ts_subtree_eq(leaf, leaf_copy), IsTrue());
const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({
leaf,
leaf_copy,
}), 0, &language);
ts_subtree_retain(leaf);
ts_subtree_retain(leaf_copy);
const Subtree *parent_copy = ts_subtree_new_node(&pool, symbol2, tree_array({
leaf,
leaf_copy,
}), 0, &language);
ts_subtree_retain(leaf);
ts_subtree_retain(leaf_copy);
AssertThat(ts_subtree_eq(parent, parent_copy), IsTrue());
ts_subtree_release(&pool, leaf_copy);
ts_subtree_release(&pool, parent);
ts_subtree_release(&pool, parent_copy);
});
it("returns false for trees with different symbols", [&]() {
const Subtree *different_leaf = ts_subtree_new_leaf(
&pool,
leaf->symbol + 1,
leaf->padding,
leaf->size,
&language
);
AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse());
ts_subtree_release(&pool, different_leaf);
});
it("returns false for trees with different options", [&]() {
const Subtree *different_leaf = ts_subtree_new_leaf(
&pool, leaf->symbol, leaf->padding, leaf->size, &language
);
((Subtree *)different_leaf)->visible = !leaf->visible;
AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse());
ts_subtree_release(&pool, different_leaf);
});
it("returns false for trees with different paddings or sizes", [&]() {
const Subtree *different_leaf = ts_subtree_new_leaf(&pool, leaf->symbol, {}, leaf->size, &language);
AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse());
ts_subtree_release(&pool, different_leaf);
different_leaf = ts_subtree_new_leaf(&pool, symbol1, leaf->padding, {}, &language);
AssertThat(ts_subtree_eq(leaf, different_leaf), IsFalse());
ts_subtree_release(&pool, different_leaf);
});
it("returns false for trees with different children", [&]() {
const Subtree *leaf2 = ts_subtree_new_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language);
const Subtree *parent = ts_subtree_new_node(&pool, symbol2, tree_array({
leaf,
leaf2,
}), 0, &language);
ts_subtree_retain(leaf);
ts_subtree_retain(leaf2);
const Subtree *different_parent = ts_subtree_new_node(&pool, symbol2, tree_array({
leaf2,
leaf,
}), 0, &language);
ts_subtree_retain(leaf2);
ts_subtree_retain(leaf);
AssertThat(ts_subtree_eq(different_parent, parent), IsFalse());
AssertThat(ts_subtree_eq(parent, different_parent), IsFalse());
ts_subtree_release(&pool, leaf2);
ts_subtree_release(&pool, parent);
ts_subtree_release(&pool, different_parent);
});
});
describe("last_external_token", [&]() {
Length padding = {1, {0, 1}};
Length size = {2, {0, 2}};
auto make_external = [](const Subtree *tree) {
((Subtree *)tree)->has_external_tokens = true;
return tree;
};
it("returns the last serialized external token state in the given tree", [&]() {
const Subtree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9;
tree1 = ts_subtree_new_node(&pool, symbol1, tree_array({
(tree2 = ts_subtree_new_node(&pool, symbol2, tree_array({
(tree3 = make_external(ts_subtree_new_leaf(&pool, symbol3, padding, size, &language))),
(tree4 = ts_subtree_new_leaf(&pool, symbol4, padding, size, &language)),
(tree5 = ts_subtree_new_leaf(&pool, symbol5, padding, size, &language)),
}), 0, &language)),
(tree6 = ts_subtree_new_node(&pool, symbol6, tree_array({
(tree7 = ts_subtree_new_node(&pool, symbol7, tree_array({
(tree8 = ts_subtree_new_leaf(&pool, symbol8, padding, size, &language)),
}), 0, &language)),
(tree9 = ts_subtree_new_leaf(&pool, symbol9, padding, size, &language)),
}), 0, &language)),
}), 0, &language);
auto token = ts_subtree_last_external_token(tree1);
AssertThat(token, Equals(tree3));
ts_subtree_release(&pool, tree1);
});
});
});
END_TEST

View file

@ -1,473 +1,257 @@
#include "test_helper.h"
#include <future>
#include "runtime/alloc.h"
#include "helpers/record_alloc.h"
#include "helpers/stream_methods.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "runtime/tree.h"
#include "runtime/length.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
#include "helpers/random_helpers.h"
#include "helpers/read_test_entries.h"
#include "helpers/encoding_helpers.h"
#include "helpers/tree_helpers.h"
void assert_consistent(const Tree *tree) {
if (tree->child_count == 0)
return;
AssertThat(tree->children[0]->padding, Equals<Length>(tree->padding));
Length total_children_size = length_zero();
for (size_t i = 0; i < tree->child_count; i++) {
Tree *child = tree->children[i];
AssertThat(child->context.offset, Equals(total_children_size));
assert_consistent(child);
total_children_size = length_add(total_children_size, ts_tree_total_size(child));
}
AssertThat(total_children_size, Equals<Length>(ts_tree_total_size(tree)));
};
TSPoint point(uint32_t row, uint32_t column) {
TSPoint result = {row, column};
return result;
}
START_TEST
describe("Tree", []() {
enum {
symbol1 = 1,
symbol2,
symbol3,
symbol4,
symbol5,
symbol6,
symbol7,
symbol8,
symbol9,
};
TSSymbolMetadata metadata_list[30] = {};
TSLanguage language;
language.symbol_metadata = metadata_list;
TreePool pool;
describe("Tree", [&]() {
TSParser *parser;
SpyInput *input;
TSTree *tree;
before_each([&]() {
ts_tree_pool_init(&pool);
record_alloc::start(true);
parser = ts_parser_new();
tree = nullptr;
input = nullptr;
});
after_each([&]() {
ts_tree_pool_delete(&pool);
if (tree) ts_tree_delete(tree);
if (input) delete input;
ts_parser_delete(parser);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
describe("make_leaf", [&]() {
it("does not mark the tree as fragile", [&]() {
Tree *tree = ts_tree_make_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
AssertThat(tree->fragile_left, IsFalse());
AssertThat(tree->fragile_right, IsFalse());
auto assert_root_node = [&](const string &expected) {
TSNode node = ts_tree_root_node(tree);
char *node_string = ts_node_string(node);
string actual(node_string);
ts_free(node_string);
AssertThat(actual, Equals(expected));
};
ts_tree_release(&pool, tree);
describe("copy()", [&]() {
it("returns a tree that can be safely used while the current tree is edited", [&]() {
const TSLanguage *language = load_real_language("javascript");
ts_parser_set_language(parser, language);
string source_code = examples_for_language("javascript")[0].input;
input = new SpyInput(source_code, 32);
TSTree *original_tree = ts_parser_parse(parser, nullptr, input->input());
vector<future<TSTree *>> new_trees;
for (unsigned i = 0; i < 8; i++) {
TSTree *tree_copy = ts_tree_copy(original_tree);
new_trees.push_back(std::async([i, tree_copy, &source_code, language]() {
Generator random(TREE_SITTER_SEED + i);
TSTree *tree = tree_copy;
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, language);
SpyInput *input = new SpyInput(source_code, 1024);
for (unsigned j = 0; j < 10; j++) {
random.sleep_some();
size_t edit_position = random(utf8_char_count(input->content));
size_t deletion_size = random(utf8_char_count(input->content) - edit_position);
string inserted_text = random.words(random(4) + 1);
TSInputEdit edit = input->replace(edit_position, deletion_size, inserted_text);
ts_tree_edit(tree, &edit);
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
ts_tree_delete(tree);
tree = new_tree;
}
ts_parser_delete(parser);
delete input;
return tree;
}));
}
ts_tree_delete(original_tree);
for (auto &future : new_trees) {
future.wait();
TSTree *new_tree = future.get();
assert_consistent_tree_sizes(ts_tree_root_node(new_tree));
ts_tree_delete(new_tree);
}
});
});
describe("make_error", [&]() {
it("marks the tree as fragile", [&]() {
Tree *error_tree = ts_tree_make_error(
&pool,
length_zero(),
length_zero(),
'z',
&language
describe("get_changed_ranges()", [&]() {
before_each([&]() {
ts_parser_set_language(parser, load_real_language("javascript"));
input = new SpyInput("{a: null};\n", 3);
tree = ts_parser_parse(parser, nullptr, input->input());
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
AssertThat(error_tree->fragile_left, IsTrue());
AssertThat(error_tree->fragile_right, IsTrue());
ts_tree_release(&pool, error_tree);
});
});
describe("make_node", [&]() {
Tree *tree1, *tree2, *parent1;
before_each([&]() {
tree1 = ts_tree_make_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
tree2 = ts_tree_make_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language);
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent1 = ts_tree_make_node(&pool, symbol3, 2, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_tree_release(&pool, tree1);
ts_tree_release(&pool, tree2);
ts_tree_release(&pool, parent1);
});
it("computes its size and padding based on its child nodes", [&]() {
AssertThat(parent1->size.bytes, Equals<size_t>(
tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes
));
AssertThat(parent1->padding.bytes, Equals<size_t>(tree1->padding.bytes));
});
describe("when the first node is fragile on the left side", [&]() {
Tree *parent;
before_each([&]() {
tree1->fragile_left = true;
tree1->extra = true;
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent = ts_tree_make_node(&pool, symbol3, 2, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_tree_release(&pool, parent);
});
it("records that it is fragile on the left side", [&]() {
AssertThat(parent->fragile_left, IsTrue());
});
});
describe("when the last node is fragile on the right side", [&]() {
Tree *parent;
before_each([&]() {
tree2->fragile_right = true;
tree2->extra = true;
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent = ts_tree_make_node(&pool, symbol3, 2, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_tree_release(&pool, parent);
});
it("records that it is fragile on the right side", [&]() {
AssertThat(parent->fragile_right, IsTrue());
});
});
describe("when the outer nodes aren't fragile on their outer side", [&]() {
Tree *parent;
before_each([&]() {
tree1->fragile_right = true;
tree2->fragile_left = true;
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent = ts_tree_make_node(&pool, symbol3, 2, tree_array({
tree1,
tree2,
}), 0, &language);
});
after_each([&]() {
ts_tree_release(&pool, parent);
});
it("records that it is not fragile", [&]() {
AssertThat(parent->fragile_left, IsFalse());
AssertThat(parent->fragile_right, IsFalse());
});
});
});
describe("edit", [&]() {
Tree *tree = nullptr;
before_each([&]() {
tree = ts_tree_make_node(&pool, symbol1, 3, tree_array({
ts_tree_make_leaf(&pool, symbol2, {2, {0, 2}}, {3, {0, 3}}, &language),
ts_tree_make_leaf(&pool, symbol3, {2, {0, 2}}, {3, {0, 3}}, &language),
ts_tree_make_leaf(&pool, symbol4, {2, {0, 2}}, {3, {0, 3}}, &language),
}), 0, &language);
AssertThat(tree->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->size, Equals<Length>({13, {0, 13}}));
});
after_each([&]() {
ts_tree_release(&pool, tree);
});
describe("edits within a tree's padding", [&]() {
it("resizes the padding of the tree and its leftmost descendants", [&]() {
TSInputEdit edit;
edit.start_byte = 1;
edit.bytes_removed = 0;
edit.bytes_added = 1;
edit.start_point = {0, 1};
edit.extent_removed = {0, 0};
edit.extent_added = {0, 1};
ts_tree_edit(tree, &edit);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({3, {0, 3}}));
AssertThat(tree->size, Equals<Length>({13, {0, 13}}));
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<Length>({3, {0, 3}}));
AssertThat(tree->children[0]->size, Equals<Length>({3, {0, 3}}));
AssertThat(tree->children[1]->has_changes, IsFalse());
AssertThat(tree->children[1]->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->children[1]->size, Equals<Length>({3, {0, 3}}));
});
});
describe("edits that start in a tree's padding but extend into its content", [&]() {
it("shrinks the content to compensate for the expanded padding", [&]() {
TSInputEdit edit;
edit.start_byte = 1;
edit.bytes_removed = 3;
edit.bytes_added = 4;
edit.start_point = {0, 1};
edit.extent_removed = {0, 3};
edit.extent_added = {0, 4};
ts_tree_edit(tree, &edit);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({5, {0, 5}}));
AssertThat(tree->size, Equals<Length>({11, {0, 11}}));
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<Length>({5, {0, 5}}));
AssertThat(tree->children[0]->size, Equals<Length>({1, {0, 1}}));
});
});
describe("insertions at the edge of a tree's padding", [&]() {
it("expands the tree's padding", [&]() {
TSInputEdit edit;
edit.start_byte = 2;
edit.bytes_removed = 0;
edit.bytes_added = 2;
edit.start_point = {0, 2};
edit.extent_removed = {0, 0};
edit.extent_added = {0, 2};
ts_tree_edit(tree, &edit);
assert_consistent(tree);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->size, Equals<Length>({13, {0, 13}}));
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->children[0]->size, Equals<Length>({3, {0, 3}}));
AssertThat(tree->children[1]->has_changes, IsFalse());
});
});
describe("replacements starting at the edge of a tree's padding", [&]() {
it("resizes the content and not the padding", [&]() {
TSInputEdit edit;
edit.start_byte = 2;
edit.bytes_removed = 2;
edit.bytes_added = 5;
edit.start_point = {0, 2};
edit.extent_removed = {0, 2};
edit.extent_added = {0, 5};
ts_tree_edit(tree, &edit);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->size, Equals<Length>({16, {0, 16}}));
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<Length>({2, {0, 2}}));
AssertThat(tree->children[0]->size, Equals<Length>({6, {0, 6}}));
AssertThat(tree->children[1]->has_changes, IsFalse());
});
});
describe("deletions that span more than one child node", [&]() {
it("shrinks subsequent child nodes", [&]() {
TSInputEdit edit;
edit.start_byte = 1;
edit.bytes_removed = 10;
edit.bytes_added = 3;
edit.start_point = {0, 1};
edit.extent_removed = {0, 10};
edit.extent_added = {0, 3};
ts_tree_edit(tree, &edit);
assert_consistent(tree);
assert_consistent(tree);
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->size, Equals<Length>({4, {0, 4}}));
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<Length>({4, {0, 4}}));
AssertThat(tree->children[0]->size, Equals<Length>({0, {0, 0}}));
AssertThat(tree->children[1]->has_changes, IsTrue());
AssertThat(tree->children[1]->padding, Equals<Length>({0, {0, 0}}));
AssertThat(tree->children[1]->size, Equals<Length>({0, {0, 0}}));
AssertThat(tree->children[2]->has_changes, IsTrue());
AssertThat(tree->children[2]->padding, Equals<Length>({1, {0, 1}}));
AssertThat(tree->children[2]->size, Equals<Length>({3, {0, 3}}));
});
});
describe("edits within a tree's range of scanned bytes", [&]() {
it("marks preceding trees as changed", [&]() {
tree->children[0]->bytes_scanned = 7;
TSInputEdit edit;
edit.start_byte = 6;
edit.bytes_removed = 1;
edit.bytes_added = 1;
edit.start_point = {0, 6};
edit.extent_removed = {0, 1};
edit.extent_added = {0, 1};
ts_tree_edit(tree, &edit);
assert_consistent(tree);
AssertThat(tree->children[0]->has_changes, IsTrue());
});
});
});
describe("eq", [&]() {
Tree *leaf;
before_each([&]() {
leaf = ts_tree_make_leaf(&pool, symbol1, {2, {0, 1}}, {5, {0, 4}}, &language);
});
after_each([&]() {
ts_tree_release(&pool, leaf);
});
it("returns true for identical trees", [&]() {
Tree *leaf_copy = ts_tree_make_leaf(&pool, symbol1, {2, {1, 1}}, {5, {1, 4}}, &language);
AssertThat(ts_tree_eq(leaf, leaf_copy), IsTrue());
Tree *parent = ts_tree_make_node(&pool, symbol2, 2, tree_array({
leaf,
leaf_copy,
}), 0, &language);
ts_tree_retain(leaf);
ts_tree_retain(leaf_copy);
Tree *parent_copy = ts_tree_make_node(&pool, symbol2, 2, tree_array({
leaf,
leaf_copy,
}), 0, &language);
ts_tree_retain(leaf);
ts_tree_retain(leaf_copy);
AssertThat(ts_tree_eq(parent, parent_copy), IsTrue());
ts_tree_release(&pool, leaf_copy);
ts_tree_release(&pool, parent);
ts_tree_release(&pool, parent_copy);
});
it("returns false for trees with different symbols", [&]() {
Tree *different_leaf = ts_tree_make_leaf(
&pool,
leaf->symbol + 1,
leaf->padding,
leaf->size,
&language
);
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(&pool, different_leaf);
});
it("returns false for trees with different options", [&]() {
Tree *different_leaf = ts_tree_make_leaf(&pool, leaf->symbol, leaf->padding, leaf->size, &language);
different_leaf->visible = !leaf->visible;
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(&pool, different_leaf);
});
it("returns false for trees with different paddings or sizes", [&]() {
Tree *different_leaf = ts_tree_make_leaf(&pool, leaf->symbol, {}, leaf->size, &language);
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(&pool, different_leaf);
different_leaf = ts_tree_make_leaf(&pool, symbol1, leaf->padding, {}, &language);
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(&pool, different_leaf);
});
it("returns false for trees with different children", [&]() {
Tree *leaf2 = ts_tree_make_leaf(&pool, symbol2, {1, {0, 1}}, {3, {0, 3}}, &language);
Tree *parent = ts_tree_make_node(&pool, symbol2, 2, tree_array({
leaf,
leaf2,
}), 0, &language);
ts_tree_retain(leaf);
ts_tree_retain(leaf2);
Tree *different_parent = ts_tree_make_node(&pool, symbol2, 2, tree_array({
leaf2,
leaf,
}), 0, &language);
ts_tree_retain(leaf2);
ts_tree_retain(leaf);
AssertThat(ts_tree_eq(different_parent, parent), IsFalse());
AssertThat(ts_tree_eq(parent, different_parent), IsFalse());
ts_tree_release(&pool, leaf2);
ts_tree_release(&pool, parent);
ts_tree_release(&pool, different_parent);
});
});
describe("last_external_token", [&]() {
Length padding = {1, {0, 1}};
Length size = {2, {0, 2}};
auto make_external = [](Tree *tree) {
tree->has_external_tokens = true;
return tree;
auto get_changed_ranges_for_edit = [&](function<TSInputEdit()> fn) -> vector<TSRange> {
TSInputEdit edit = fn();
ts_tree_edit(tree, &edit);
uint32_t range_count = 0;
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count);
ts_tree_delete(tree);
tree = new_tree;
vector<TSRange> result;
for (size_t i = 0; i < range_count; i++) {
result.push_back(ranges[i]);
}
ts_free(ranges);
return result;
};
it("returns the last serialized external token state in the given tree", [&]() {
Tree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9;
it("reports changes when one token has been updated", [&]() {
// Replace `null` with `nothing`
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find("ull"), 1, "othing");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("nothing")),
point(0, input->content.find("}"))
},
})));
tree1 = ts_tree_make_node(&pool, symbol1, 2, tree_array({
(tree2 = ts_tree_make_node(&pool, symbol2, 3, tree_array({
(tree3 = make_external(ts_tree_make_leaf(&pool, symbol3, padding, size, &language))),
(tree4 = ts_tree_make_leaf(&pool, symbol4, padding, size, &language)),
(tree5 = ts_tree_make_leaf(&pool, symbol5, padding, size, &language)),
}), 0, &language)),
(tree6 = ts_tree_make_node(&pool, symbol6, 2, tree_array({
(tree7 = ts_tree_make_node(&pool, symbol7, 1, tree_array({
(tree8 = ts_tree_make_leaf(&pool, symbol8, padding, size, &language)),
}), 0, &language)),
(tree9 = ts_tree_make_leaf(&pool, symbol9, padding, size, &language)),
}), 0, &language)),
}), 0, &language);
// Replace `nothing` with `null` again
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("null")),
point(0, input->content.find("}"))
},
})));
});
auto token = ts_tree_last_external_token(tree1);
AssertThat(token, Equals(tree3));
it("reports no changes when leading whitespace has changed (regression)", [&]() {
input->chars_per_chunk = 80;
ts_tree_release(&pool, tree1);
// Insert leading whitespace
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
// Remove leading whitespace
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
// Insert leading whitespace again
ranges = get_changed_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
});
it("reports changes when tokens have been appended", [&]() {
// Add a second key-value pair
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find("}"), 0, ", b: false");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(",")),
point(0, input->content.find("}"))
},
})));
// Add a third key-value pair in between the first two
ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find(", b"), 0, ", c: 1");
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (number)) "
"(pair (property_identifier) (false)))))"
);
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(", c")),
point(0, input->content.find(", b"))
},
})));
// Delete the middle pair.
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (false)))))"
);
AssertThat(ranges, IsEmpty());
// Delete the second pair.
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
});
it("reports changes when trees have been wrapped", [&]() {
// Wrap the object in an assignment expression.
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find("null"), 0, "b === ");
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (binary_expression (identifier) (null))))))"
);
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("b ===")),
point(0, input->content.find("}"))
},
})));
});
});
});

View file

@ -5,6 +5,8 @@
#include "tree_sitter/compiler.h"
#include "tree_sitter/runtime.h"
extern int TREE_SITTER_SEED;
namespace tree_sitter {}
using namespace std;

View file

@ -1,17 +1,18 @@
#include "test_helper.h"
#include "helpers/random_helpers.h"
int TREE_SITTER_SEED = 0;
int main(int argc, char *argv[]) {
int seed;
const char *seed_env = getenv("TREE_SITTER_SEED");
if (seed_env) {
seed = atoi(seed_env);
TREE_SITTER_SEED = atoi(seed_env);
} else {
seed = get_time_as_seed();
TREE_SITTER_SEED = get_time_as_seed();
}
printf("Random seed: %d\n", seed);
random_reseed(seed);
printf("Random seed: %d\n", TREE_SITTER_SEED);
default_generator.reseed(TREE_SITTER_SEED);
return bandit::run(argc, argv);
}

View file

@ -66,11 +66,11 @@
'test/integration/fuzzing-examples.cc',
'test/integration/real_grammars.cc',
'test/integration/test_grammars.cc',
'test/runtime/document_test.cc',
'test/runtime/language_test.cc',
'test/runtime/node_test.cc',
'test/runtime/parser_test.cc',
'test/runtime/stack_test.cc',
'test/runtime/subtree_test.cc',
'test/runtime/tree_test.cc',
'test/tests.cc',
],
@ -101,7 +101,7 @@
'cflags_cc': ['-std=c++14'],
'conditions': [
['OS=="linux"', {
'libraries': ['-ldl'],
'libraries': ['-ldl', '-lpthread'],
}]
],
'xcode_settings': {