Start fleshing out docs site
This commit is contained in:
parent
fb14cb0737
commit
7ad50f2731
10 changed files with 339 additions and 176 deletions
|
|
@ -1 +1,2 @@
|
|||
markdown: kramdown
|
||||
theme: jekyll-theme-cayman
|
||||
|
|
|
|||
123
docs/_layouts/default.html
Normal file
123
docs/_layouts/default.html
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
|
||||
<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.1.0/css/bootstrap.min.css" media="screen">
|
||||
<link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}" media="screen" type="text/css">
|
||||
<title>{{ page.title }}</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div style="position: fixed; width: 100%;">
|
||||
<div class="container" style="height: 100%;">
|
||||
<div class="row" style="height: 0;">
|
||||
<div class="col-md-4">
|
||||
<nav id="table-of-contents">
|
||||
<a class="logo" href="https://github.com/tree-sitter/tree-sitter">
|
||||
<img src="{{ '/assets/images/tree-sitter-small.png' | relative_url }}" width=200 height=200 />
|
||||
</a>
|
||||
|
||||
{% for other_page in site.html_pages %}
|
||||
{% if page.title == other_page.title %}
|
||||
<li class="toc-section active">
|
||||
<a class="nav-link" href="{{ other_page.url | relative_url }}">
|
||||
{{ other_page.title }}
|
||||
</a>
|
||||
|
||||
<ul id="current-page-table-of-contents" class="nav navbar">
|
||||
{% capture whitespace %}
|
||||
{% assign min_header = 2 %}
|
||||
{% assign nodes = content | split: "<h" %}
|
||||
{% assign first_header = true %}
|
||||
{% for node in nodes %}
|
||||
{% if node == "" %}
|
||||
{% continue %}
|
||||
{% endif %}
|
||||
|
||||
{% assign header_level = node | replace: '"', '' | slice: 0, 1 | times: 1 %}
|
||||
|
||||
{% if header_level < min_header or header_level > maxHeader %}
|
||||
{% continue %}
|
||||
{% endif %}
|
||||
|
||||
{% if first_header %}
|
||||
{% assign first_header = false %}
|
||||
{% assign min_header = header_level %}
|
||||
{% endif %}
|
||||
|
||||
{% assign indent_level = header_level | minus: min_header | add: 1 %}
|
||||
{% assign header_content = node | split: '</h' %}
|
||||
{% assign header_content = header_content[0] %}
|
||||
|
||||
{% assign html_id = header_content | split: 'id="' %}
|
||||
{% assign html_id = html_id[1] | split: '"' %}
|
||||
{% assign html_id = html_id[0] %}
|
||||
|
||||
{% capture header_attrs_to_strip %}{{ header_content | split: '>' | first }}>{% endcapture %}
|
||||
{% assign header = header_content | replace: header_attrs_to_strip, '' %}
|
||||
|
||||
{% assign space = '' %}
|
||||
{% for i in (1..indent_level) %}
|
||||
{% assign space = space | prepend: ' ' %}
|
||||
{% endfor %}
|
||||
|
||||
{% capture my_toc %}{{ my_toc }}
|
||||
{{ space }}- [{{ header }}](#{{ html_id }}){: .nav-link}{% endcapture %}
|
||||
|
||||
{% endfor %}
|
||||
{% endcapture %}
|
||||
{{ my_toc | strip | markdownify | strip }}
|
||||
</ul>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="toc-section">
|
||||
<a class="nav-link" href="{{ other_page.url | relative_url }}">
|
||||
{{ other_page.title }}
|
||||
</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<main class="container">
|
||||
<div class="row">
|
||||
<div class="col-md-4" style="pointer-events: none;">
|
||||
</div>
|
||||
|
||||
<div class="col-md-8 content">
|
||||
<div id="main-content">
|
||||
{{ content }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<script
|
||||
src="https://code.jquery.com/jquery-3.3.1.min.js"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script
|
||||
src="https://maxcdn.bootstrapcdn.com/bootstrap/4.1.0/js/bootstrap.bundle.min.js">
|
||||
</script>
|
||||
|
||||
<script>
|
||||
if (document.body.scrollHeight > window.innerHeight) {
|
||||
$(document.body).scrollspy({
|
||||
target: '#current-page-table-of-contents',
|
||||
offset: 40
|
||||
});
|
||||
}
|
||||
|
||||
$('h1, h2, h3, h4, h5, h6').filter('[id]').each(function() {
|
||||
$(this).html('<a href="#'+$(this).attr('id')+'">' + $(this).text() + '</a>');
|
||||
});
|
||||
</script>
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
|
||||
<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" media="screen">
|
||||
<link rel="stylesheet" type="text/css" href="{{ site.baseurl }}/css/style.css" media="screen">
|
||||
<title>{{ page.title }}</title>
|
||||
</head>
|
||||
|
||||
<body data-spy="scroll" data-target="#table-of-contents" data-offset="40">
|
||||
|
||||
<!-- Fixed sidebar -->
|
||||
<div style="position: fixed; width: 100%;">
|
||||
<div class="container" style="height: 0;">
|
||||
<div class="row" style="height: 0;">
|
||||
<div class="col-md-3">
|
||||
<nav class="nav navbar navbar-light" id="table-of-contents"></nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Main content -->
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col-md-3" style="pointer-events: none;">
|
||||
</div>
|
||||
|
||||
<div class="col-md-9 content">
|
||||
<div id="main-content">
|
||||
{{ content }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- Generate a table of contents based on header elements -->
|
||||
<script type="text/javascript">
|
||||
var mainContent = document.getElementById('main-content');
|
||||
var tableOfContents = document.getElementById('table-of-contents');
|
||||
var headers = mainContent.querySelectorAll('h2, h3');
|
||||
var lastSubnav;
|
||||
for (let i = 0, length = headers.length; i < length; i++) {
|
||||
var header = headers[i];
|
||||
if (!header.id) continue;
|
||||
|
||||
var li = document.createElement('li');
|
||||
li.className = 'navbar-item';
|
||||
var link = document.createElement('a');
|
||||
link.href = '#' + header.id;
|
||||
link.innerText = header.innerText;
|
||||
link.className = 'nav-link'
|
||||
li.appendChild(link);
|
||||
|
||||
if (header.tagName === 'H2') {
|
||||
lastSubnav = document.createElement('ul');
|
||||
lastSubnav.className = 'nav navbar';
|
||||
li.appendChild(lastSubnav);
|
||||
tableOfContents.appendChild(li);
|
||||
} else {
|
||||
lastSubnav.appendChild(li);
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<script
|
||||
src="https://code.jquery.com/jquery-3.3.1.min.js"
|
||||
crossorigin="anonymous"></script>
|
||||
<script
|
||||
src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.bundle.min.js"></script>
|
||||
39
docs/assets/css/style.scss
Normal file
39
docs/assets/css/style.scss
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
---
|
||||
---
|
||||
|
||||
@import 'jekyll-theme-cayman';
|
||||
|
||||
#main-content, #table-of-contents {
|
||||
padding-top: 20px;
|
||||
}
|
||||
|
||||
#table-of-contents {
|
||||
border-right: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.nav-link.active {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.logo {
|
||||
padding: 20px;
|
||||
padding-top: 0;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.toc-section, .logo {
|
||||
border-bottom: 1px solid #ccc;
|
||||
}
|
||||
|
||||
.toc-section.active {
|
||||
background-color: #edffcb;
|
||||
}
|
||||
|
||||
li {
|
||||
display: block;
|
||||
}
|
||||
|
||||
body {
|
||||
overflow-y: scroll;
|
||||
padding-bottom: 100px;
|
||||
}
|
||||
BIN
docs/assets/images/tree-sitter-small.png
Normal file
BIN
docs/assets/images/tree-sitter-small.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 112 KiB |
|
|
@ -1,13 +0,0 @@
|
|||
#main-content, #table-of-contents {
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
#table-of-contents {
|
||||
padding: 10px;
|
||||
border-radius: 10px;
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.nav-link.active {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
|
@ -1,10 +1,50 @@
|
|||
---
|
||||
title: Introduction
|
||||
---
|
||||
|
||||
# Introduction
|
||||
|
||||
Tree-sitter is a library for parsing source code. It aims to be:
|
||||
|
||||
* **General** enough to parse any programming language
|
||||
* **Dependency-free** and written in pure C so that it can be embedded in any application
|
||||
* **Fast** and incremental so that it can be used in a text editor
|
||||
* **Robust** enough to provide useful results even in the presence of syntax errors
|
||||
* **General** enough to parse any programming language
|
||||
* **Dependency-free** (and written in pure C) so that it can be embedded in any application
|
||||
|
||||
## Table of contents
|
||||
### Language Bindings
|
||||
|
||||
1. [Creating parsers](creating-parsers.md)
|
||||
There are currently bindings that allow Tree-sitter to be used from the following languages:
|
||||
|
||||
* [JavaScript](https://github.com/tree-sitter/node-tree-sitter)
|
||||
* [Rust](https://github.com/tree-sitter/rust-tree-sitter)
|
||||
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
|
||||
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
|
||||
|
||||
### Available Parsers
|
||||
|
||||
There are fairly complete parsers for the following languages:
|
||||
|
||||
* [Bash](https://github.com/tree-sitter/tree-sitter-bash)
|
||||
* [C](https://github.com/tree-sitter/tree-sitter-c)
|
||||
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
|
||||
* [Go](https://github.com/tree-sitter/tree-sitter-go)
|
||||
* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
|
||||
* [PHP](https://github.com/tree-sitter/tree-sitter-php)
|
||||
* [Python](https://github.com/tree-sitter/tree-sitter-python)
|
||||
* [Ruby](https://github.com/tree-sitter/tree-sitter-ruby)
|
||||
* [Rust](https://github.com/tree-sitter/tree-sitter-rust)
|
||||
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
|
||||
|
||||
There are parsers in development for these languages:
|
||||
|
||||
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
|
||||
* [Java](https://github.com/tree-sitter/tree-sitter-java)
|
||||
* [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml)
|
||||
* [C-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp)
|
||||
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
|
||||
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
|
||||
|
||||
### Talks on Tree-sitter
|
||||
|
||||
* [FOSDEM 2018](https://www.youtube.com/watch?v=0CGzC_iss-8)
|
||||
* [GitHub Universe 2017](https://www.youtube.com/watch?v=a1rC79DHpmY)
|
||||
|
|
|
|||
18
docs/section-2-architecture.md
Normal file
18
docs/section-2-architecture.md
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
title: Architecture
|
||||
permalink: architecture
|
||||
---
|
||||
|
||||
# Architecture
|
||||
|
||||
Tree-sitter consists of two separate libraries, both of which expose C APIs.
|
||||
|
||||
The first library, `libcompiler`, is
|
||||
used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the
|
||||
language. `libcompiler` is a build tool; once the parser has been generated, it is no longer needed. Its public interface is specified in the header file [`compiler.h`](https://github.com/tree-sitter/tree-sitter/blob/master/include/tree_sitter/compiler.h).
|
||||
|
||||
The second library, `libruntime`, is used in combination with the parsers
|
||||
generated by `libcompiler`, to produce syntax trees from source code and keep the
|
||||
syntax trees up-to-date as the source code changes. `libruntime` is designed to be embedded in applications. Its interface is specified in the header file [`runtime.h`](https://github.com/tree-sitter/tree-sitter/blob/master/include/tree_sitter/runtime.h).
|
||||
|
||||
## The Compiler
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: table-of-contents
|
||||
title: Creating Parsers
|
||||
permalink: creating-parsers
|
||||
---
|
||||
|
||||
# Creating parsers
|
||||
|
|
@ -57,59 +58,63 @@ It's usually a good idea to find a formal specification for the language you're
|
|||
Although languages have very different constructs, their constructs can often be categorized in to similar groups like *Declarations*, *Definitions*, *Statements*, *Expressions*, *Types*, and *Patterns*. In writing your grammar, a good first step is to create just enough structure to include all of these basic *groups* of symbols. For an imaginary C-like language, this might look something like this:
|
||||
|
||||
```js
|
||||
rules: $ => {
|
||||
source_file: $ => repeat($._definition),
|
||||
{
|
||||
// ...
|
||||
|
||||
_definition: $ => choice(
|
||||
$.function_definition
|
||||
// TODO: other kinds of definitions
|
||||
),
|
||||
rules: $ => {
|
||||
source_file: $ => repeat($._definition),
|
||||
|
||||
function_definition: $ => seq(
|
||||
'func',
|
||||
$.identifier,
|
||||
$.parameter_list,
|
||||
$._type,
|
||||
$.block
|
||||
),
|
||||
_definition: $ => choice(
|
||||
$.function_definition
|
||||
// TODO: other kinds of definitions
|
||||
),
|
||||
|
||||
parameter_list: $ => seq(
|
||||
'(',
|
||||
// TODO: parameters
|
||||
')'
|
||||
),
|
||||
function_definition: $ => seq(
|
||||
'func',
|
||||
$.identifier,
|
||||
$.parameter_list,
|
||||
$._type,
|
||||
$.block
|
||||
),
|
||||
|
||||
_type: $ => choice(
|
||||
'bool'
|
||||
// TODO: other kinds of types
|
||||
),
|
||||
parameter_list: $ => seq(
|
||||
'(',
|
||||
// TODO: parameters
|
||||
')'
|
||||
),
|
||||
|
||||
block: $ => seq(
|
||||
'{',
|
||||
repeat($._statement),
|
||||
'}'
|
||||
),
|
||||
_type: $ => choice(
|
||||
'bool'
|
||||
// TODO: other kinds of types
|
||||
),
|
||||
|
||||
_statement: $ => choice(
|
||||
$.return_statement
|
||||
// TODO: other kinds of statements
|
||||
),
|
||||
block: $ => seq(
|
||||
'{',
|
||||
repeat($._statement),
|
||||
'}'
|
||||
),
|
||||
|
||||
return_statement: $ => seq(
|
||||
'return',
|
||||
$._expression,
|
||||
';'
|
||||
),
|
||||
_statement: $ => choice(
|
||||
$.return_statement
|
||||
// TODO: other kinds of statements
|
||||
),
|
||||
|
||||
_expression: $ => choice(
|
||||
$.identifier,
|
||||
$.number
|
||||
// TODO: other kinds of expressions
|
||||
),
|
||||
return_statement: $ => seq(
|
||||
'return',
|
||||
$._expression,
|
||||
';'
|
||||
),
|
||||
|
||||
identifier: $ => /[a-z]+/,
|
||||
_expression: $ => choice(
|
||||
$.identifier,
|
||||
$.number
|
||||
// TODO: other kinds of expressions
|
||||
),
|
||||
|
||||
number: $ => /\d+/
|
||||
identifier: $ => /[a-z]+/,
|
||||
|
||||
number: $ => /\d+/
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -118,27 +123,31 @@ Some of the details of this grammar will be explained in more depth later on, bu
|
|||
With this structure in place, you can now freely decide what part of the grammar to flesh out next. For example, you might decide to start with *types*. One-by-one, you could define the rules for writing basic types and composing them into more complex types:
|
||||
|
||||
```js
|
||||
_type: $ => choice(
|
||||
$.primitive_type,
|
||||
$.array_type,
|
||||
$.pointer_type
|
||||
),
|
||||
{
|
||||
// ...
|
||||
|
||||
primitive_type: $ => choice(
|
||||
'bool',
|
||||
'int'
|
||||
),
|
||||
_type: $ => choice(
|
||||
$.primitive_type,
|
||||
$.array_type,
|
||||
$.pointer_type
|
||||
),
|
||||
|
||||
array_type: $ => seq(
|
||||
'[',
|
||||
']',
|
||||
$._type
|
||||
),
|
||||
primitive_type: $ => choice(
|
||||
'bool',
|
||||
'int'
|
||||
),
|
||||
|
||||
pointer_type: $ => seq(
|
||||
'*',
|
||||
$._type
|
||||
),
|
||||
array_type: $ => seq(
|
||||
'[',
|
||||
']',
|
||||
$._type
|
||||
),
|
||||
|
||||
pointer_type: $ => seq(
|
||||
'*',
|
||||
$._type
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
After developing the *type* sublanguage a bit further, you might decide to switch to working on *statements* or *expressions* instead. It's often useful to check your progress by trying to parse some real code using `tree-sitter parse`.
|
||||
|
|
@ -250,24 +259,28 @@ The language spec encodes the 20 precedence levels of JavaScript expressions usi
|
|||
To produce a readable syntax tree, we'd like to model JavaScript expressions using a much flatter structure like this:
|
||||
|
||||
```js
|
||||
_expression: $ => choice(
|
||||
$.identifier,
|
||||
$.unary_expression,
|
||||
$.binary_expression,
|
||||
{
|
||||
// ...
|
||||
),
|
||||
|
||||
unary_expression: $ => choice(
|
||||
seq('-', $._expression),
|
||||
seq('!', $._expression),
|
||||
// ...
|
||||
),
|
||||
_expression: $ => choice(
|
||||
$.identifier,
|
||||
$.unary_expression,
|
||||
$.binary_expression,
|
||||
// ...
|
||||
),
|
||||
|
||||
binary_expression: $ => choice(
|
||||
seq($._expression, '*', $._expression),
|
||||
seq($._expression, '+', $._expression),
|
||||
// ...
|
||||
),
|
||||
unary_expression: $ => choice(
|
||||
seq('-', $._expression),
|
||||
seq('!', $._expression),
|
||||
// ...
|
||||
),
|
||||
|
||||
binary_expression: $ => choice(
|
||||
seq($._expression, '*', $._expression),
|
||||
seq($._expression, '+', $._expression),
|
||||
// ...
|
||||
),
|
||||
}
|
||||
```
|
||||
|
||||
Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message:
|
||||
|
|
@ -293,11 +306,15 @@ Possible resolutions:
|
|||
For an expression like `-a * b`, it's not clear whether the `-` operator applies to the `a * b` or just to the `a`. This is where the `prec` function described above comes into play. By wrapping a rule with `prec`, we can indicate that certain sequence of symbols should *bind to each other more tightly* than others. For example, the `'-', $._expression` sequence in `unary_expression` should bind more tightly than the `$._expression, '+', $._expression` sequence in `binary_expression`:
|
||||
|
||||
```js
|
||||
unary_expression: $ => prec(2, choice(
|
||||
seq('-', $._expression),
|
||||
seq('!', $._expression),
|
||||
{
|
||||
// ...
|
||||
))
|
||||
|
||||
unary_expression: $ => prec(2, choice(
|
||||
seq('-', $._expression),
|
||||
seq('!', $._expression),
|
||||
// ...
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
### Using associativity
|
||||
|
|
@ -323,11 +340,15 @@ Possible resolutions:
|
|||
For an expression like `a * b * c`, it's not clear whether we mean `a * (b * c)` or `(a * b) * c`. This is where `prec.left` and `prec.right` come into use. We want to select the second interpretation, so we use `prec.left`.
|
||||
|
||||
```js
|
||||
binary_expression: $ => choice(
|
||||
prec.left(2, seq($._expression, '*', $._expression)),
|
||||
prec.left(1, seq($._expression, '+', $._expression)),
|
||||
{
|
||||
// ...
|
||||
),
|
||||
|
||||
binary_expression: $ => choice(
|
||||
prec.left(2, seq($._expression, '*', $._expression)),
|
||||
prec.left(1, seq($._expression, '+', $._expression)),
|
||||
// ...
|
||||
),
|
||||
}
|
||||
```
|
||||
|
||||
### Hiding rules
|
||||
8
docs/section-4-using-parsers.md
Normal file
8
docs/section-4-using-parsers.md
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
title: Using Parsers
|
||||
permalink: using-parsers
|
||||
---
|
||||
|
||||
# Using Parsers
|
||||
|
||||
WIP
|
||||
Loading…
Add table
Add a link
Reference in a new issue