Start fleshing out docs site

This commit is contained in:
Max Brunsfeld 2018-06-10 09:54:59 -07:00
parent fb14cb0737
commit 7ad50f2731
10 changed files with 339 additions and 176 deletions

View file

@ -1 +1,2 @@
markdown: kramdown
theme: jekyll-theme-cayman

123
docs/_layouts/default.html Normal file
View file

@ -0,0 +1,123 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.1.0/css/bootstrap.min.css" media="screen">
<link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}" media="screen" type="text/css">
<title>{{ page.title }}</title>
</head>
<body>
<div style="position: fixed; width: 100%;">
<div class="container" style="height: 100%;">
<div class="row" style="height: 0;">
<div class="col-md-4">
<nav id="table-of-contents">
<a class="logo" href="https://github.com/tree-sitter/tree-sitter">
<img src="{{ '/assets/images/tree-sitter-small.png' | relative_url }}" width=200 height=200 />
</a>
{% for other_page in site.html_pages %}
{% if page.title == other_page.title %}
<li class="toc-section active">
<a class="nav-link" href="{{ other_page.url | relative_url }}">
{{ other_page.title }}
</a>
<ul id="current-page-table-of-contents" class="nav navbar">
{% capture whitespace %}
{% assign min_header = 2 %}
{% assign nodes = content | split: "<h" %}
{% assign first_header = true %}
{% for node in nodes %}
{% if node == "" %}
{% continue %}
{% endif %}
{% assign header_level = node | replace: '"', '' | slice: 0, 1 | times: 1 %}
{% if header_level < min_header or header_level > maxHeader %}
{% continue %}
{% endif %}
{% if first_header %}
{% assign first_header = false %}
{% assign min_header = header_level %}
{% endif %}
{% assign indent_level = header_level | minus: min_header | add: 1 %}
{% assign header_content = node | split: '</h' %}
{% assign header_content = header_content[0] %}
{% assign html_id = header_content | split: 'id="' %}
{% assign html_id = html_id[1] | split: '"' %}
{% assign html_id = html_id[0] %}
{% capture header_attrs_to_strip %}{{ header_content | split: '>' | first }}>{% endcapture %}
{% assign header = header_content | replace: header_attrs_to_strip, '' %}
{% assign space = '' %}
{% for i in (1..indent_level) %}
{% assign space = space | prepend: ' ' %}
{% endfor %}
{% capture my_toc %}{{ my_toc }}
{{ space }}- [{{ header }}](#{{ html_id }}){: .nav-link}{% endcapture %}
{% endfor %}
{% endcapture %}
{{ my_toc | strip | markdownify | strip }}
</ul>
</li>
{% else %}
<li class="toc-section">
<a class="nav-link" href="{{ other_page.url | relative_url }}">
{{ other_page.title }}
</a>
</li>
{% endif %}
{% endfor %}
</nav>
</div>
</div>
</div>
</div>
<main class="container">
<div class="row">
<div class="col-md-4" style="pointer-events: none;">
</div>
<div class="col-md-8 content">
<div id="main-content">
{{ content }}
</div>
</div>
</div>
</main>
</body>
</html>
<script
src="https://code.jquery.com/jquery-3.3.1.min.js"
crossorigin="anonymous">
</script>
<script
src="https://maxcdn.bootstrapcdn.com/bootstrap/4.1.0/js/bootstrap.bundle.min.js">
</script>
<script>
if (document.body.scrollHeight > window.innerHeight) {
$(document.body).scrollspy({
target: '#current-page-table-of-contents',
offset: 40
});
}
$('h1, h2, h3, h4, h5, h6').filter('[id]').each(function() {
$(this).html('<a href="#'+$(this).attr('id')+'">' + $(this).text() + '</a>');
});
</script>

View file

@ -1,74 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" media="screen">
<link rel="stylesheet" type="text/css" href="{{ site.baseurl }}/css/style.css" media="screen">
<title>{{ page.title }}</title>
</head>
<body data-spy="scroll" data-target="#table-of-contents" data-offset="40">
<!-- Fixed sidebar -->
<div style="position: fixed; width: 100%;">
<div class="container" style="height: 0;">
<div class="row" style="height: 0;">
<div class="col-md-3">
<nav class="nav navbar navbar-light" id="table-of-contents"></nav>
</div>
</div>
</div>
</div>
<!-- Main content -->
<div class="container">
<div class="row">
<div class="col-md-3" style="pointer-events: none;">
</div>
<div class="col-md-9 content">
<div id="main-content">
{{ content }}
</div>
</div>
</div>
</div>
</body>
</html>
<!-- Generate a table of contents based on header elements -->
<script type="text/javascript">
var mainContent = document.getElementById('main-content');
var tableOfContents = document.getElementById('table-of-contents');
var headers = mainContent.querySelectorAll('h2, h3');
var lastSubnav;
for (let i = 0, length = headers.length; i < length; i++) {
var header = headers[i];
if (!header.id) continue;
var li = document.createElement('li');
li.className = 'navbar-item';
var link = document.createElement('a');
link.href = '#' + header.id;
link.innerText = header.innerText;
link.className = 'nav-link'
li.appendChild(link);
if (header.tagName === 'H2') {
lastSubnav = document.createElement('ul');
lastSubnav.className = 'nav navbar';
li.appendChild(lastSubnav);
tableOfContents.appendChild(li);
} else {
lastSubnav.appendChild(li);
}
}
</script>
<script
src="https://code.jquery.com/jquery-3.3.1.min.js"
crossorigin="anonymous"></script>
<script
src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.bundle.min.js"></script>

View file

@ -0,0 +1,39 @@
---
---
@import 'jekyll-theme-cayman';
#main-content, #table-of-contents {
padding-top: 20px;
}
#table-of-contents {
border-right: 1px solid #ddd;
}
.nav-link.active {
text-decoration: underline;
}
.logo {
padding: 20px;
padding-top: 0;
display: block;
}
.toc-section, .logo {
border-bottom: 1px solid #ccc;
}
.toc-section.active {
background-color: #edffcb;
}
li {
display: block;
}
body {
overflow-y: scroll;
padding-bottom: 100px;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

View file

@ -1,13 +0,0 @@
#main-content, #table-of-contents {
margin-top: 20px;
}
#table-of-contents {
padding: 10px;
border-radius: 10px;
border: 1px solid #ddd;
}
.nav-link.active {
text-decoration: underline;
}

View file

@ -1,10 +1,50 @@
---
title: Introduction
---
# Introduction
Tree-sitter is a library for parsing source code. It aims to be:
* **General** enough to parse any programming language
* **Dependency-free** and written in pure C so that it can be embedded in any application
* **Fast** and incremental so that it can be used in a text editor
* **Robust** enough to provide useful results even in the presence of syntax errors
* **General** enough to parse any programming language
* **Dependency-free** (and written in pure C) so that it can be embedded in any application
## Table of contents
### Language Bindings
1. [Creating parsers](creating-parsers.md)
There are currently bindings that allow Tree-sitter to be used from the following languages:
* [JavaScript](https://github.com/tree-sitter/node-tree-sitter)
* [Rust](https://github.com/tree-sitter/rust-tree-sitter)
* [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter)
### Available Parsers
There are fairly complete parsers for the following languages:
* [Bash](https://github.com/tree-sitter/tree-sitter-bash)
* [C](https://github.com/tree-sitter/tree-sitter-c)
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
* [Go](https://github.com/tree-sitter/tree-sitter-go)
* [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
* [PHP](https://github.com/tree-sitter/tree-sitter-php)
* [Python](https://github.com/tree-sitter/tree-sitter-python)
* [Ruby](https://github.com/tree-sitter/tree-sitter-ruby)
* [Rust](https://github.com/tree-sitter/tree-sitter-rust)
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
There are parsers in development for these languages:
* [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
* [Java](https://github.com/tree-sitter/tree-sitter-java)
* [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml)
* [C-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp)
* [Julia](https://github.com/tree-sitter/tree-sitter-julia)
* [Scala](https://github.com/tree-sitter/tree-sitter-scala)
### Talks on Tree-sitter
* [FOSDEM 2018](https://www.youtube.com/watch?v=0CGzC_iss-8)
* [GitHub Universe 2017](https://www.youtube.com/watch?v=a1rC79DHpmY)

View file

@ -0,0 +1,18 @@
---
title: Architecture
permalink: architecture
---
# Architecture
Tree-sitter consists of two separate libraries, both of which expose C APIs.
The first library, `libcompiler`, is
used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the
language. `libcompiler` is a build tool; once the parser has been generated, it is no longer needed. Its public interface is specified in the header file [`compiler.h`](https://github.com/tree-sitter/tree-sitter/blob/master/include/tree_sitter/compiler.h).
The second library, `libruntime`, is used in combination with the parsers
generated by `libcompiler`, to produce syntax trees from source code and keep the
syntax trees up-to-date as the source code changes. `libruntime` is designed to be embedded in applications. Its interface is specified in the header file [`runtime.h`](https://github.com/tree-sitter/tree-sitter/blob/master/include/tree_sitter/runtime.h).
## The Compiler

View file

@ -1,5 +1,6 @@
---
layout: table-of-contents
title: Creating Parsers
permalink: creating-parsers
---
# Creating parsers
@ -57,59 +58,63 @@ It's usually a good idea to find a formal specification for the language you're
Although languages have very different constructs, their constructs can often be categorized in to similar groups like *Declarations*, *Definitions*, *Statements*, *Expressions*, *Types*, and *Patterns*. In writing your grammar, a good first step is to create just enough structure to include all of these basic *groups* of symbols. For an imaginary C-like language, this might look something like this:
```js
rules: $ => {
source_file: $ => repeat($._definition),
{
// ...
_definition: $ => choice(
$.function_definition
// TODO: other kinds of definitions
),
rules: $ => {
source_file: $ => repeat($._definition),
function_definition: $ => seq(
'func',
$.identifier,
$.parameter_list,
$._type,
$.block
),
_definition: $ => choice(
$.function_definition
// TODO: other kinds of definitions
),
parameter_list: $ => seq(
'(',
// TODO: parameters
')'
),
function_definition: $ => seq(
'func',
$.identifier,
$.parameter_list,
$._type,
$.block
),
_type: $ => choice(
'bool'
// TODO: other kinds of types
),
parameter_list: $ => seq(
'(',
// TODO: parameters
')'
),
block: $ => seq(
'{',
repeat($._statement),
'}'
),
_type: $ => choice(
'bool'
// TODO: other kinds of types
),
_statement: $ => choice(
$.return_statement
// TODO: other kinds of statements
),
block: $ => seq(
'{',
repeat($._statement),
'}'
),
return_statement: $ => seq(
'return',
$._expression,
';'
),
_statement: $ => choice(
$.return_statement
// TODO: other kinds of statements
),
_expression: $ => choice(
$.identifier,
$.number
// TODO: other kinds of expressions
),
return_statement: $ => seq(
'return',
$._expression,
';'
),
identifier: $ => /[a-z]+/,
_expression: $ => choice(
$.identifier,
$.number
// TODO: other kinds of expressions
),
number: $ => /\d+/
identifier: $ => /[a-z]+/,
number: $ => /\d+/
}
}
```
@ -118,27 +123,31 @@ Some of the details of this grammar will be explained in more depth later on, bu
With this structure in place, you can now freely decide what part of the grammar to flesh out next. For example, you might decide to start with *types*. One-by-one, you could define the rules for writing basic types and composing them into more complex types:
```js
_type: $ => choice(
$.primitive_type,
$.array_type,
$.pointer_type
),
{
// ...
primitive_type: $ => choice(
'bool',
'int'
),
_type: $ => choice(
$.primitive_type,
$.array_type,
$.pointer_type
),
array_type: $ => seq(
'[',
']',
$._type
),
primitive_type: $ => choice(
'bool',
'int'
),
pointer_type: $ => seq(
'*',
$._type
),
array_type: $ => seq(
'[',
']',
$._type
),
pointer_type: $ => seq(
'*',
$._type
)
}
```
After developing the *type* sublanguage a bit further, you might decide to switch to working on *statements* or *expressions* instead. It's often useful to check your progress by trying to parse some real code using `tree-sitter parse`.
@ -250,24 +259,28 @@ The language spec encodes the 20 precedence levels of JavaScript expressions usi
To produce a readable syntax tree, we'd like to model JavaScript expressions using a much flatter structure like this:
```js
_expression: $ => choice(
$.identifier,
$.unary_expression,
$.binary_expression,
{
// ...
),
unary_expression: $ => choice(
seq('-', $._expression),
seq('!', $._expression),
// ...
),
_expression: $ => choice(
$.identifier,
$.unary_expression,
$.binary_expression,
// ...
),
binary_expression: $ => choice(
seq($._expression, '*', $._expression),
seq($._expression, '+', $._expression),
// ...
),
unary_expression: $ => choice(
seq('-', $._expression),
seq('!', $._expression),
// ...
),
binary_expression: $ => choice(
seq($._expression, '*', $._expression),
seq($._expression, '+', $._expression),
// ...
),
}
```
Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message:
@ -293,11 +306,15 @@ Possible resolutions:
For an expression like `-a * b`, it's not clear whether the `-` operator applies to the `a * b` or just to the `a`. This is where the `prec` function described above comes into play. By wrapping a rule with `prec`, we can indicate that certain sequence of symbols should *bind to each other more tightly* than others. For example, the `'-', $._expression` sequence in `unary_expression` should bind more tightly than the `$._expression, '+', $._expression` sequence in `binary_expression`:
```js
unary_expression: $ => prec(2, choice(
seq('-', $._expression),
seq('!', $._expression),
{
// ...
))
unary_expression: $ => prec(2, choice(
seq('-', $._expression),
seq('!', $._expression),
// ...
))
}
```
### Using associativity
@ -323,11 +340,15 @@ Possible resolutions:
For an expression like `a * b * c`, it's not clear whether we mean `a * (b * c)` or `(a * b) * c`. This is where `prec.left` and `prec.right` come into use. We want to select the second interpretation, so we use `prec.left`.
```js
binary_expression: $ => choice(
prec.left(2, seq($._expression, '*', $._expression)),
prec.left(1, seq($._expression, '+', $._expression)),
{
// ...
),
binary_expression: $ => choice(
prec.left(2, seq($._expression, '*', $._expression)),
prec.left(1, seq($._expression, '+', $._expression)),
// ...
),
}
```
### Hiding rules

View file

@ -0,0 +1,8 @@
---
title: Using Parsers
permalink: using-parsers
---
# Using Parsers
WIP