Add documentation about included ranges

2018-09-12 17:54:09 -07:00 · 2018-09-12 17:54:09 -07:00 · 1878b425de
commit 1878b425de
parent 78b54810a6
1 changed files with 105 additions and 1 deletions
--- a/docs/section-2-using-parsers.md
+++ b/docs/section-2-using-parsers.md
@ -27,7 +27,7 @@ Here's an example of a simple C program that uses the Tree-sitter [JSON parser](
 #include <assert.h>
 #include <string.h>
 #include <stdio.h>
-#include "tree_sitter/runtime.h"
+#include <tree_sitter/runtime.h>

 // Declare the `tree_sitter_json` function, which is
 // implemented by the `tree-sitter-json` library.
@ -236,6 +236,110 @@ void ts_node_edit(TSNode *, const TSInputEdit *);

 Then, you can call `ts_parser_parse` again, passing in the old tree. This will create a new tree that internally shares structure with the old tree.

+## Multi-language Documents
+
+Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby.
+
+Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain *ranges* of a file.
+
+```c
+typedef struct {
+  TSPoint start_point;
+  TSPoint end_point;
+  uint32_t start_byte;
+  uint32_t end_byte;
+} TSRange;
+
+void ts_parser_set_included_ranges(
+  TSParser *self,
+  const TSRange *ranges,
+  uint32_t range_count
+);
+```
+
+For example, consider this ERB document:
+
+```erb
+<ul>
+  <% people.each do |person| %>
+    <li><%= person.name %></li>
+  <% end %>
+</ul>
+```
+
+Conceptually, it can be represented by three syntax trees with overlapping ranges: an ERB syntax tree, a Ruby syntax tree, and an HTML syntax tree. You could generate these syntax trees as follows:
+
+```c
+#include <string.h>
+#include <tree_sitter/runtime.h>
+
+// These functions are each implemented in their own repo.
+const TSLanguage *tree_sitter_embedded_template();
+const TSLanguage *tree_sitter_html();
+const TSLanguage *tree_sitter_ruby();
+
+int main(int argc, const char **argv) {
+  const char *text = argv[1];
+  unsigned len = strlen(src);
+
+  // Parse the entire text as ERB.
+  TSParser *parser = ts_parser_new();
+  ts_parser_set_language(parser, tree_sitter_embedded_template());
+  TSTree *erb_tree = ts_parser_parse_string(parser, NULL, text, len);
+  TSNode erb_root_node = ts_tree_root_node(erb_tree);
+
+  // Find the ranges of the `content` nodes, which represent
+  // the underlying HTML, and the `code` nodes, which represent
+  // the interpolated Ruby.
+  TSRange html_ranges[10];
+  TSRange ruby_ranges[10];
+  unsigned html_range_count = 0;
+  unsigned ruby_range_count = 0;
+  unsigned child_count = ts_node_child_count(erb_root_node);
+
+  for (unsigned i = 0; i < child_count; i++) {
+    TSNode node = ts_node_child(erb_root_node, i);
+    if (strcmp(ts_node_type(node), "content") == 0) {
+      html_ranges[html_range_count++] = (TSRange) {
+        ts_node_start_point(node),
+        ts_node_end_point(node),
+        ts_node_start_byte(node),
+        ts_node_end_byte(node),
+      };
+    } else {
+      TSNode code_node = ts_node_named_child(node, 0);
+      ruby_ranges[ruby_range_count++] = (TSRange) {
+        ts_node_start_point(code_node),
+        ts_node_end_point(code_node),
+        ts_node_start_byte(code_node),
+        ts_node_end_byte(code_node),
+      };
+    }
+  }
+
+  // Use the HTML ranges to parse the HTML.
+  ts_parser_set_language(parser, tree_sitter_html());
+  ts_parser_set_included_ranges(parser, html_ranges, html_range_count);
+  TSTree *html_tree = ts_parser_parse_string(parser, NULL, text, len);
+  TSNode html_root_node = ts_tree_root_node(html_tree);
+
+  // Use the Ruby ranges to parse the Ruby.
+  ts_parser_set_language(parser, tree_sitter_ruby());
+  ts_parser_set_included_ranges(parser, ruby_ranges, ruby_range_count);
+  TSTree *ruby_tree = ts_parser_parse_string(parser, NULL, text, len);
+  TSNode ruby_root_node = ts_tree_root_node(ruby_tree);
+
+  // Print all three trees.
+  char *erb_sexp = ts_node_string(erb_root_node);
+  char *html_sexp = ts_node_string(html_root_node);
+  char *ruby_sexp = ts_node_string(ruby_root_node);
+  printf("ERB: %s\n", erb_sexp);
+  printf("HTML: %s\n", html_sexp);
+  printf("Ruby: %s\n", ruby_sexp);
+  return 0;
+}
+```
+
 ## Concurrency

 Tree-sitter supports multi-threaded use cases by making syntax trees very cheap to copy.