Merge pull request #22 from maxbrunsfeld/c-compiler-api

Simplify the compiler API
2016-01-13 21:08:41 -08:00 · 2016-01-13 21:08:41 -08:00 · 49f393b75e
commit 49f393b75e
parent 1142e05873 7121689cfe
67 changed files with 1083 additions and 375 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -7,3 +7,6 @@
 [submodule "externals/utf8proc"]
 	path = externals/utf8proc
 	url = https://github.com/julialang/utf8proc
+[submodule "externals/json-parser"]
+	path = externals/json-parser
+	url = https://github.com/udp/json-parser.git
--- a/README.md
+++ b/README.md
@ -2,9 +2,11 @@

 [![Build Status](https://travis-ci.org/maxbrunsfeld/tree-sitter.png?branch=master)](https://travis-ci.org/maxbrunsfeld/tree-sitter)

-Tree-sitter is an incremental parsing library in C and C++, intended to be used via [bindings](https://github.com/maxbrunsfeld/node-tree-sitter) to higher-level
-languages. It allows documents to be efficiently re-parsed after localized
-edits, making it suitable for use in performance-intensive text-editing programs.
+Tree-sitter is a C library for incremental parsing, intended to be used via
+[bindings](https://github.com/maxbrunsfeld/node-tree-sitter) to higher-level
+languages. It can be used to build a concrete syntax tree for a program and
+efficiently update the syntax tree as the program is edited. This makes it suitable
+for use in text-editing programs.

 Tree-sitter uses a sentential-form incremental [LR parsing](https://en.wikipedia.org/wiki/LR_parser)
 algorithm, as described in the paper *[Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.ps.gz)*
@ -15,142 +17,176 @@ This allows it to generate a fast parser for any context-free grammar.
 ### Installation

 ```sh
-script/configure.sh  # Generate a Makefile using gyp
+script/configure.sh  # Generate a Makefile
 make                 # Build static libraries for the compiler and runtime
 ```

+### Overview
+
+Tree-sitter consists of two libraries. The first library, `libcompiler`, can be
+used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the
+language. Once the parser has been generated, `libcompiler` is no longer needed.
+
+The second library, `libruntime`, is used in combination with the parsers
+generated by `libcompiler`, to generate syntax trees based on text documents, and keep the
+syntax trees up-to-date as changes are made to the documents.
+
+
 ### Writing a grammar

-Tree-sitter's interface for creating grammars is a C++ library, `libcompiler`.
-This allows grammars and rules to be defined, manipulated and
-extended as simple values in high-level languages like [javascript](https://github.com/maxbrunsfeld/node-tree-sitter-compiler),
-and then converted into tree-sitter's native representation and compiled to C
-parsers. These parsers can then be used from any language that has a binding to
-tree-sitter's runtime library, `libruntime`.
+Tree-sitter's grammars are specified as JSON strings. This format allows them
+to be easily created and manipulated in high-level languages like [JavaScript](https://github.com/maxbrunsfeld/node-tree-sitter-compiler).
+The structure of a grammar is formally specified by [this JSON schema](./doc/grammar-schema.json).
+You can generate a parser for a grammar using the `ts_compile_grammar` function
+provided by `libcompiler`.

-Here's a simple example that uses `libcompiler` directly:
+Here's a simple example of using `ts_compile_grammar` to create a parser for basic
+arithmetic expressions. It uses C++11 multi-line strings for readability.

 ```cpp
 // arithmetic_grammar.cc

-#include <assert.h>
 #include <stdio.h>
 #include "tree_sitter/compiler.h"

-using namespace tree_sitter;
-
 int main() {
-  auto arithmetic_grammar = Grammar({
+  TSCompileResult result = ts_compile_grammar(R"JSON(
+    {
+      "name": "arithmetic",

-    // The first rule listed in a grammar becomes the 'start rule'.
-    { "expression", choice({
-      sym("sum"),
-      sym("product"),
-      sym("number"),
-      sym("variable"),
+      "extras": [
+        {"type": "PATTERN", "value": "\\s"},
+      ],

-      // Error recovery is controlled by wrapping rule subtrees with `err`.
-      seq({
-        str("("),
-        err(sym("expression")),
-        str(")") }) }) },
+      "rules": {
+        "expression": {
+          "type": "CHOICE",
+          "members": [
+            {"type": "SYMBOL", "name": "sum"},
+            {"type": "SYMBOL", "name": "product"},
+            {"type": "SYMBOL", "name": "number"},
+            {"type": "SYMBOL", "name": "variable"},
+            {
+              "type": "SEQ",
+              "members": [
+                {"type": "STRING", "value": "("},
+                {"type": "SYMBOL", "name": "expression"},
+                {"type": "STRING", "value": ")"}
+              ]
+            }
+          ]
+        },

-    // Tokens like '+' and '*' are described directly within the grammar's rules,
-    // as opposed to in a seperate lexer description.
-    { "sum", prec_left(1, seq({
-      sym("expression"),
-      str("+"),
-      sym("expression") })) },
+        "sum": {
+          "type": "PREC_LEFT",
+          "value": 1,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "STRING", "value": "+"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          }
+        },

-    // Ambiguities can be resolved at compile time by assigning precedence
-    // values to rule subtrees.
-    { "product", prec_left(2, seq({
-      sym("expression"),
-      str("*"),
-      sym("expression") })) },
+        "product": {
+          "type": "PREC_LEFT",
+          "value": 2,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "STRING", "value": "*"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          }
+        },

-    // Tokens can be specified using ECMAScript regexps.
-    { "number", pattern("\\d+") },
-    { "variable", pattern("[a-zA-Z]+\\w*") },
-    { "comment", pattern("//.*") },
+        "number": {"type": "PATTERN", "value": "\\d+"}
+        "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"},
+      }
+    }
+  )JSON");

-  }).extra_tokens({
+  if (result.error_type != TSCompileErrorTypeNone) {
+    fprintf(stderr, "Compilation failed: %s\n", result.error_message);
+    return 1;
+  }

-    // Things that can appear anywhere in the language are expressed as
-    // 'extra tokens'.
-    sym("comment"),
-    pattern("\\s+")
-  });
-
-  // Generate C code for parsing this language.
-  auto output = compile(arithmetic_grammar, "arithmetic");
-  std::string c_code = output.first;
-  const GrammarError *error = output.second;
-
-  assert(!error);
-  puts(c_code.c_str());
+  puts(result.code);

  return 0;
 }
 ```

-To create a parser for this language, compile and run this grammar like this:
+To create the parser, compile this file like this:

 ```sh
-clang++ -stdlib=libc++ -std=c++11                             \
-  -I tree-sitter/include -L tree-sitter/out/Debug -l compiler \
-  arithmetic_grammar.cc -o arithmetic_grammar
+clang++ -std=c++11 \
+  -I tree-sitter/include \
+  -L tree-sitter/out/Release \
+  -l compiler \
+  arithmetic_grammar.cc \
+  -o arithmetic_grammar
+```

+Then run the executable to print out the C code for the parser:
+
+```sh
 ./arithmetic_grammar > arithmetic_parser.c
 ```

 ### Using the parser

-The `tree_sitter/runtime` C library exposes a DOM-style interface for inspecting
-documents.
+#### Providing the text to parse

-Functions like `ts_node_child(node, index)` and `ts_node_next_sibling(node)`
+Text input is provided to a tree-sitter parser via a `TSInput` struct, which
+contains function pointers for seeking to positions in the text, and for reading
+chunks of text. The text can be encoded in either UTF8 or UTF16. This interface
+allows you to efficiently parse text that is stored in your own data structure.
+
+#### Querying the syntax tree
+
+The `libruntime` API provides a DOM-style interface for inspecting
+syntax trees. Functions like `ts_node_child(node, index)` and `ts_node_next_sibling(node)`
 expose every node in the concrete syntax tree. This is useful for operations
-like syntax-highlighting, that operate on a token-by-token basis. You can also
+like syntax-highlighting, which operate on a token-by-token basis. You can also
 traverse the tree in a more abstract way by using functions like
 `ts_node_named_child(node, index)` and `ts_node_next_named_sibling(node)`. These
 functions don't expose nodes that were specified in the grammar as anonymous
 tokens, like `(` and `+`. This is useful when analyzing the meaning of a document.

 ```c
+// test_parser.c
+
+#include <assert.h>
+#include <string.h>
 #include <stdio.h>
 #include "tree_sitter/runtime.h"

-// Declare the language constructor that was generated from your grammar.
+// Declare the language function that was generated from your grammar.
 TSLanguage *ts_language_arithmetic();

 int main() {
  TSDocument *document = ts_document_make();
  ts_document_set_language(document, ts_language_arithmetic());
-
-  // Usually, you would use the more general `ts_document_set_input`, which
-  // takes a struct with function pointers for seeking to positions in the text,
-  // and reading chunks of text. This allows you to efficiently parse text
-  // stored in your own data structure.
  ts_document_set_input_string(document, "a + b * 5");
  ts_document_parse(document);

  TSNode root_node = ts_document_root_node(document);
-  printf(
-    "Root name: %s, start: %lu, end: %lu\n",
-    ts_node_name(root_node, document),
-    ts_node_start_char(root_node),
-    ts_node_end_char(root_node)
-  );
+  assert(!strcmp(ts_node_name(root_node, document), "expression"));
+  assert(ts_node_named_child_count(root_node) == 1);

-  TSNode product_node = ts_node_named_child(ts_node_child(root_node, 0), 1);
-  printf(
-    "Child name: %s, start: %lu, end: %lu\n",
-    ts_node_name(product_node, document),
-    ts_node_start_char(product_node),
-    ts_node_end_char(product_node)
-  );
+  TSNode sum_node = ts_node_named_child(root_node, 0);
+  assert(!strcmp(ts_node_name(sum_node, document), "sum"));
+  assert(ts_node_named_child_count(sum_node) == 2);

+  TSNode product_node = ts_node_child(ts_node_named_child(sum_node, 1), 0);
+  assert(!strcmp(ts_node_name(product_node, document), "product"));
+  assert(ts_node_named_child_count(product_node) == 2);
+
+  printf("Syntax tree: %s\n", ts_node_string(root_node, document));
  ts_document_free(document);
  return 0;
 }
@ -159,9 +195,12 @@ int main() {
 To demo this parser's capabilities, compile this program like this:

 ```sh
-clang                                                        \
-  -I tree-sitter/include -L tree-sitter/out/Debug -l runtime \
-  arithmetic_parser.c test_parser.c -o test_parser
+clang \
+  -I tree-sitter/include \
+  -L tree-sitter/out/Debug \
+  -l runtime \
+  arithmetic_parser.c test_parser.c \
+  -o test_parser

 ./test_parser
 ```
--- a/doc/grammar-schema.json
+++ b/doc/grammar-schema.json
@ -0,0 +1,256 @@
+{
+  "type": "object",
+
+  "required": [
+    "name",
+    "rules"
+  ],
+
+  "additionalProperties": false,
+
+  "properties": {
+    "name": {
+      "type": "string",
+      "pattern": "^[a-zA-Z_]\\w*"
+    },
+
+    "rules": {
+      "type": "object",
+      "patternProperties": {
+        "^[a-zA-Z_]\\w*$": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "additionalProperties": false
+    },
+
+    "extras": {
+      "type": "array",
+      "items": {
+        "$ref": "#/definitions/rule"
+      }
+    },
+
+    "conflicts": {
+      "type": "array",
+      "items": {
+        "type": "array",
+        "items": {
+          "type": "string",
+          "pattern": "^[a-zA-Z_]\\w*$"
+        }
+      }
+    }
+  },
+
+  "definitions": {
+    "blank-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^BLANK$"
+        }
+      },
+      "required": ["type"]
+    },
+
+    "string-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^STRING$"
+        },
+        "value": {
+          "type": "string"
+        }
+      },
+      "required": ["type", "value"]
+    },
+
+    "pattern-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^PATTERN$"
+        },
+        "value": {"type": "string"}
+      },
+      "required": ["type", "value"]
+    },
+
+    "symbol-rule": {
+      "required": ["name"],
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^SYMBOL$"
+        },
+        "name": {"type": "string"}
+      },
+      "required": ["type", "name"]
+    },
+
+    "seq-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^SEQ$"
+        },
+        "members": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/rule"
+          }
+        }
+      },
+      "required": ["type", "members"]
+    },
+
+    "choice-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^CHOICE$"
+        },
+        "members": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/rule"
+          }
+        }
+      },
+      "required": ["type", "members"]
+    },
+
+    "repeat-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^REPEAT$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content"]
+    },
+
+    "repeat1-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^REPEAT1$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content"]
+    },
+
+    "token-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^TOKEN$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content"]
+    },
+
+    "error-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^ERROR$"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content"]
+    },
+
+    "prec-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^PREC$"
+        },
+        "value": {
+          "type": "integer"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content", "value"]
+    },
+
+    "prec-left-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^PREC_LEFT$"
+        },
+        "value": {
+          "type": "integer"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content", "value"]
+    },
+
+    "prec-right-rule": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "pattern": "^PREC_RIGHT$"
+        },
+        "value": {
+          "type": "integer"
+        },
+        "content": {
+          "$ref": "#/definitions/rule"
+        }
+      },
+      "required": ["type", "content", "value"]
+    },
+
+    "rule": {
+      "oneOf": [
+        { "$ref": "#/definitions/blank-rule" },
+        { "$ref": "#/definitions/string-rule" },
+        { "$ref": "#/definitions/pattern-rule" },
+        { "$ref": "#/definitions/symbol-rule" },
+        { "$ref": "#/definitions/seq-rule" },
+        { "$ref": "#/definitions/choice-rule" },
+        { "$ref": "#/definitions/repeat1-rule" },
+        { "$ref": "#/definitions/repeat-rule" },
+        { "$ref": "#/definitions/token-rule" },
+        { "$ref": "#/definitions/error-rule" },
+        { "$ref": "#/definitions/prec-rule" },
+        { "$ref": "#/definitions/prec-left-rule" },
+        { "$ref": "#/definitions/prec-right-rule" }
+      ]
+    }
+  }
+}
--- a/externals/json-parser
+++ b/externals/json-parser
@ -0,0 +1 @@
+Subproject commit 70533215eea575e40a0b91a34ae01a779641d73a
--- a/include/tree_sitter/compiler.h
+++ b/include/tree_sitter/compiler.h
@ -1,65 +1,30 @@
 #ifndef TREE_SITTER_COMPILER_H_
 #define TREE_SITTER_COMPILER_H_

-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
+#ifdef __cplusplus
+extern "C" {
+#endif

-namespace tree_sitter {
+typedef enum {
+  TSCompileErrorTypeNone,
+  TSCompileErrorTypeInvalidGrammar,
+  TSCompileErrorTypeInvalidRegex,
+  TSCompileErrorTypeUndefinedSymbol,
+  TSCompileErrorTypeInvalidUbiquitousToken,
+  TSCompileErrorTypeLexConflict,
+  TSCompileErrorTypeParseConflict,
+} TSCompileErrorType;

-class Rule;
-typedef std::shared_ptr<Rule> rule_ptr;
+typedef struct {
+  const char *code;
+  const char *error_message;
+  TSCompileErrorType error_type;
+} TSCompileResult;

-rule_ptr blank();
-rule_ptr choice(const std::vector<rule_ptr> &);
-rule_ptr repeat(const rule_ptr &);
-rule_ptr repeat1(const rule_ptr &);
-rule_ptr seq(const std::vector<rule_ptr> &);
-rule_ptr sym(const std::string &);
-rule_ptr pattern(const std::string &);
-rule_ptr str(const std::string &);
-rule_ptr err(const rule_ptr &);
-rule_ptr prec(int precedence, const rule_ptr &);
-rule_ptr prec_left(const rule_ptr &);
-rule_ptr prec_left(int precedence, const rule_ptr &);
-rule_ptr prec_right(const rule_ptr &);
-rule_ptr prec_right(int precedence, const rule_ptr &);
-rule_ptr token(const rule_ptr &rule);
+TSCompileResult ts_compile_grammar(const char *input);

-class Grammar {
-  const std::vector<std::pair<std::string, rule_ptr>> rules_;
-  std::vector<rule_ptr> extra_tokens_;
-  std::vector<std::vector<std::string>> expected_conflicts_;
-
- public:
-  explicit Grammar(const std::vector<std::pair<std::string, rule_ptr>> &);
-  Grammar &extra_tokens(const std::vector<rule_ptr> &);
-  Grammar &expected_conflicts(const std::vector<std::vector<std::string>> &);
-  const std::vector<std::pair<std::string, rule_ptr>> &rules() const;
-  const std::vector<rule_ptr> &extra_tokens() const;
-  const std::vector<std::vector<std::string>> &expected_conflicts() const;
-};
-
-enum GrammarErrorType {
-  GrammarErrorTypeRegex,
-  GrammarErrorTypeUndefinedSymbol,
-  GrammarErrorTypeInvalidUbiquitousToken,
-  GrammarErrorTypeLexConflict,
-  GrammarErrorTypeParseConflict,
-};
-
-class GrammarError {
- public:
-  GrammarError(GrammarErrorType type, std::string message);
-  bool operator==(const GrammarError &other) const;
-  GrammarErrorType type;
-  std::string message;
-};
-
-std::pair<std::string, const GrammarError *> compile(const Grammar &,
-                                                     std::string);
-
-}  // namespace tree_sitter
+#ifdef __cplusplus
+}
+#endif

 #endif  // TREE_SITTER_COMPILER_H_
--- a/include/tree_sitter/parser.h
+++ b/include/tree_sitter/parser.h
@ -94,17 +94,17 @@ struct TSLanguage {
 *  Lexer Macros
 */

-#define START_LEXER()                                        \
-  lexer->start_fn(lexer, state);                             \
-  int32_t lookahead;                                         \
-  next_state:                                                \
+#define START_LEXER()            \
+  lexer->start_fn(lexer, state); \
+  int32_t lookahead;             \
+  next_state:                    \
  lookahead = lexer->lookahead;

 #define START_TOKEN() lexer->start_token_fn(lexer);

 #define GO_TO_STATE(state_value) \
  {                              \
-    state = state_value;     \
+    state = state_value;         \
    goto next_state;             \
  }

--- a/project.gyp
+++ b/project.gyp
@ -8,6 +8,7 @@
        'include',
        'src',
        'externals/utf8proc',
+        'externals/json-parser',
      ],
      'sources': [
        'src/compiler/build_tables/build_lex_table.cc',
@ -24,8 +25,8 @@
        'src/compiler/build_tables/rule_can_be_blank.cc',
        'src/compiler/compile.cc',
        'src/compiler/generate_code/c_code.cc',
-        'src/compiler/grammar.cc',
        'src/compiler/lex_table.cc',
+        'src/compiler/parse_grammar.cc',
        'src/compiler/parse_table.cc',
        'src/compiler/precedence_range.cc',
        'src/compiler/prepare_grammar/expand_repeats.cc',
@ -58,6 +59,7 @@
        'src/compiler/rules/visitor.cc',
        'src/compiler/util/string_helpers.cc',
        'externals/utf8proc/utf8proc.c',
+        'externals/json-parser/json.c',
      ],
      'cflags_cc': [
        '-std=c++0x',
--- a/spec/compiler/compile_examples.cc
+++ b/spec/compiler/compile_examples.cc
@ -1,4 +1,5 @@
 #include "compiler/compiler_spec_helper.h"
+#include "compiler/compile.h"
 #include <fstream>
 #include <iostream>

@ -29,10 +30,10 @@ describe("compiling the example grammars", []() {
    it(("compiles the " + language + " grammar").c_str(), [&]() {
      auto result = compile(grammar, language);
      string code = result.first;
-      const GrammarError *error = result.second;
+      const CompileError error = result.second;

-      if (error)
-        AssertThat(error->message, Equals(""));
+      if (error.type)
+        AssertThat(error.message, Equals(""));

      ofstream file(example_parser_dir + language + ".c");
      file << get<0>(result);
--- a/spec/compiler/compile_spec.cc
+++ b/spec/compiler/compile_spec.cc
@ -1,32 +1,45 @@
 #include "compiler/compiler_spec_helper.h"
-#include "tree_sitter/compiler.h"
+#include "compiler/compile.h"

 using namespace rules;

 START_TEST

-describe("Compile", []() {
+describe("compile_grammar", []() {
  describe("when the grammar's start symbol is a token", [&]() {
    it("does not fail", [&]() {
-      Grammar grammar({
-        { "rule1", str("the-value") }
-      });
+      TSCompileResult result = ts_compile_grammar(R"JSON(
+        {
+          "name": "the_grammar",
+          "rules": {
+            "rule1": {
+              "type": "STRING",
+              "value": "hello"
+            }
+          }
+        }
+      )JSON");

-      auto result = compile(grammar, "test_grammar");
-      const GrammarError *error = result.second;
-      AssertThat(error, Equals<const GrammarError *>(nullptr));
+      AssertThat(string(result.error_message), IsEmpty());
+      AssertThat(string(result.code), !IsEmpty());
    });
  });

  describe("when the grammar's start symbol is blank", [&]() {
    it("does not fail", [&]() {
-      Grammar grammar({
-        { "rule1", blank() }
-      });
+      TSCompileResult result = ts_compile_grammar(R"JSON(
+        {
+          "name": "the_grammar",
+          "rules": {
+            "rule1": {
+              "type": "BLANK"
+            }
+          }
+        }
+      )JSON");

-      auto result = compile(grammar, "test_grammar");
-      const GrammarError *error = result.second;
-      AssertThat(error, Equals<const GrammarError *>(nullptr));
+      AssertThat(string(result.error_message), IsEmpty());
+      AssertThat(string(result.code), !IsEmpty());
    });
  });
 });
--- a/spec/compiler/compiler_spec_helper.h
+++ b/spec/compiler/compiler_spec_helper.h
@ -5,7 +5,7 @@
 #include "compiler/helpers/stream_methods.h"
 #include "compiler/helpers/equals_pointer.h"
 #include "compiler/helpers/rule_helpers.h"
-#include "tree_sitter/compiler.h"
+#include "compiler/rules.h"

 using namespace tree_sitter;
 using namespace std;
--- a/spec/compiler/helpers/stream_methods.cc
+++ b/spec/compiler/helpers/stream_methods.cc
@ -12,7 +12,7 @@ ostream &operator<<(ostream &stream, const Grammar &grammar) {
  stream << string("#<grammar");
  stream << string(" rules: {");
  bool started = false;
-  for (auto pair : grammar.rules()) {
+  for (auto pair : grammar.rules) {
    if (started)
      stream << string(", ");
    stream << pair.first;
@ -23,11 +23,11 @@ ostream &operator<<(ostream &stream, const Grammar &grammar) {
  return stream << string("}>");
 }

-ostream &operator<<(ostream &stream, const GrammarError *error) {
-  if (error)
-    return stream << (string("#<grammar-error '") + error->message + "'>");
+ostream &operator<<(ostream &stream, const CompileError &error) {
+  if (error.type)
+    return stream << (string("#<compile-error '") + error.message + "'>");
  else
-    return stream << string("#<null>");
+    return stream << string("#<no-compile-error>");
 }

 ostream &operator<<(ostream &stream, const Rule &rule) {
--- a/spec/compiler/helpers/stream_methods.h
+++ b/spec/compiler/helpers/stream_methods.h
@ -7,7 +7,8 @@
 #include <map>
 #include <unordered_set>
 #include <vector>
-#include "tree_sitter/compiler.h"
+#include "compiler/grammar.h"
+#include "compiler/compile_error.h"

 using std::cout;

@ -98,7 +99,7 @@ struct ProductionStep;
 struct PrecedenceRange;

 ostream &operator<<(ostream &, const Grammar &);
-ostream &operator<<(ostream &, const GrammarError &);
+ostream &operator<<(ostream &, const CompileError &);
 ostream &operator<<(ostream &, const Rule &);
 ostream &operator<<(ostream &, const rule_ptr &);
 ostream &operator<<(ostream &, const Variable &);
--- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc
@ -20,7 +20,7 @@ describe("expand_tokens", []() {

      auto result = expand_tokens(grammar);

-      AssertThat(result.second, Equals((const GrammarError *)nullptr));
+      AssertThat(result.second, Equals(CompileError::none()));
      AssertThat(result.first.variables, Equals(vector<Variable>({
        Variable("rule_A", VariableTypeNamed, seq({
          i_sym(10),
@ -69,7 +69,7 @@ describe("expand_tokens", []() {

      auto result = expand_tokens(grammar);

-      AssertThat(result.second, Equals((const GrammarError *)nullptr));
+      AssertThat(result.second, Equals(CompileError::none()));
      AssertThat(result.first.variables, Equals(vector<Variable>({
        Variable("rule_A", VariableTypeNamed, seq({
          i_sym(10),
@ -102,7 +102,7 @@ describe("expand_tokens", []() {

      auto result = expand_tokens(grammar);

-      AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
+      AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
    });
  });
 });
--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@ -30,9 +30,9 @@ describe("extract_tokens", []() {

    InitialSyntaxGrammar &syntax_grammar = get<0>(result);
    LexicalGrammar &lexical_grammar = get<1>(result);
-    const GrammarError *error = get<2>(result);
+    CompileError error = get<2>(result);

-    AssertThat(error, Equals<const GrammarError *>(nullptr));
+    AssertThat(error, Equals(CompileError::none()));

    AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
      Variable("rule_A", VariableTypeNamed, repeat1(seq({
@ -150,7 +150,7 @@ describe("extract_tokens", []() {
        pattern("\\s+"),
      }, {}});

-      AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
+      AssertThat(get<2>(result), Equals(CompileError::none()));

      AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
      AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
@ -167,7 +167,7 @@ describe("extract_tokens", []() {
        str("y"),
      }, {}});

-      AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
+      AssertThat(get<2>(result), Equals(CompileError::none()));
      AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
      AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
    });
@ -181,7 +181,7 @@ describe("extract_tokens", []() {
        i_sym(2),
      }, {}});

-      AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
+      AssertThat(get<2>(result), Equals(CompileError::none()));

      AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
        { Symbol(3, true) },
@ -196,9 +196,9 @@ describe("extract_tokens", []() {
        Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
      }, { i_sym(1) }, {}});

-      AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
-      AssertThat(get<2>(result), EqualsPointer(
-        new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
+      AssertThat(get<2>(result), !Equals(CompileError::none()));
+      AssertThat(get<2>(result), Equals(
+        CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
                         "Not a token: rule_B")));
    });

@ -208,9 +208,9 @@ describe("extract_tokens", []() {
        Variable("rule_B", VariableTypeNamed, str("y")),
      }, { choice({ i_sym(1), blank() }) }, {}});

-      AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
-      AssertThat(get<2>(result), EqualsPointer(
-        new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
+      AssertThat(get<2>(result), !Equals(CompileError::none()));
+      AssertThat(get<2>(result), Equals(
+        CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
                         "Not a token: (choice (sym 1) (blank))")));
    });
  });
--- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc
+++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
@ -10,15 +10,15 @@ using prepare_grammar::intern_symbols;

 describe("intern_symbols", []() {
  it("replaces named symbols with numerically-indexed symbols", [&]() {
-    Grammar grammar({
+    Grammar grammar{{
      { "x", choice({ sym("y"), sym("_z") }) },
      { "y", sym("_z") },
      { "_z", str("stuff") }
-    });
+    }, {}, {}};

    auto result = intern_symbols(grammar);

-    AssertThat(result.second, Equals((GrammarError *)nullptr));
+    AssertThat(result.second, Equals(CompileError::none()));
    AssertThat(result.first.variables, Equals(vector<Variable>({
      Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
      Variable("y", VariableTypeNamed, i_sym(2)),
@ -28,26 +28,28 @@ describe("intern_symbols", []() {

  describe("when there are symbols that reference undefined rules", [&]() {
    it("returns an error", []() {
-      Grammar grammar({
+      Grammar grammar{{
        { "x", sym("y") },
-      });
+      }, {}, {}};

      auto result = intern_symbols(grammar);

-      AssertThat(result.second->message, Equals("Undefined rule 'y'"));
+      AssertThat(result.second.message, Equals("Undefined rule 'y'"));
    });
  });

  it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
-    auto grammar = Grammar({
+    Grammar grammar{{
      { "x", choice({ sym("y"), sym("z") }) },
      { "y", sym("z") },
      { "z", str("stuff") }
-    }).extra_tokens({ sym("z") });
+    }, {
+      sym("z")
+    }, {}};

    auto result = intern_symbols(grammar);

-    AssertThat(result.second, Equals((GrammarError *)nullptr));
+    AssertThat(result.second, Equals(CompileError::none()));
    AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
    AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
  });
--- a/spec/compiler/prepare_grammar/parse_regex_spec.cc
+++ b/spec/compiler/prepare_grammar/parse_regex_spec.cc
@ -222,8 +222,8 @@ describe("parse_regex", []() {
  for (auto &row : invalid_inputs) {
    it(("handles invalid regexes with " + row.description).c_str(), [&]() {
      auto result = parse_regex(row.pattern);
-      AssertThat(result.second, !Equals((const GrammarError *)nullptr));
-      AssertThat(result.second->message, Contains(row.message));
+      AssertThat(result.second.type, Equals(TSCompileErrorTypeInvalidRegex));
+      AssertThat(result.second.message, Contains(row.message));
    });
  }
 });
--- a/spec/fixtures/grammars/anonymous_tokens.cc
+++ b/spec/fixtures/grammars/anonymous_tokens.cc
@ -3,14 +3,14 @@

 namespace tree_sitter_examples {

-extern const Grammar anonymous_tokens = Grammar({
+extern const Grammar anonymous_tokens{{
  { "program", choice({
    str("\n"),
    str("\r"),
    pattern("\\d"),
    str("\"hello\"") }) },
-}).extra_tokens({
+}, {
  pattern("\\s"),
-});
+}, {}};

 }  // namespace tree_sitter_examples
--- a/spec/fixtures/grammars/arithmetic.cc
+++ b/spec/fixtures/grammars/arithmetic.cc
@ -3,7 +3,7 @@

 namespace tree_sitter_examples {

-extern const Grammar arithmetic = Grammar({
+extern const Grammar arithmetic{{
  { "program", sym("_expression") },

  { "_expression", choice({
@ -37,9 +37,9 @@ extern const Grammar arithmetic = Grammar({
      pattern("[0-9]") })) })) },

  { "comment", pattern("#.*") },
-}).extra_tokens({
+}, {
  sym("comment"),
  pattern("\\s"),
-});
+}, {}};

 }  // namespace tree_sitter_examples
--- a/spec/fixtures/grammars/c.cc
+++ b/spec/fixtures/grammars/c.cc
@ -5,7 +5,7 @@ namespace tree_sitter_examples {

 // http://slps.github.io/zoo/c/iso-9899-tc3.html

-extern const Grammar c = Grammar({
+extern const Grammar c{{
  { "translation_unit", repeat(choice({
    sym("preproc_define"),
    sym("preproc_call"),
@ -258,13 +258,13 @@ extern const Grammar c = Grammar({
        pattern("[^\\*]"),
        pattern("\\*[^/]") })),
      str("*/") }) })) },
-}).extra_tokens({
+}, {
  sym("comment"),
  pattern("[ \t\r\n]"),
-}).expected_conflicts({
+}, {
  { "_type_specifier", "_expression" },
  { "_type_specifier", "_expression", "macro_type" },
  { "_type_specifier", "macro_type" },
-});
+}};

 }  // namespace tree_sitter_examples
--- a/spec/fixtures/grammars/cpp.cc
+++ b/spec/fixtures/grammars/cpp.cc
@ -5,7 +5,7 @@ namespace tree_sitter_examples {

 // http://slps.github.io/zoo/cpp/iso-n2723.html

-extern const Grammar cpp = Grammar({
+extern const Grammar cpp{{
  { "translation_unit", repeat(sym("_declaration")) },

  { "_declaration", choice({
@ -211,13 +211,13 @@ extern const Grammar cpp = Grammar({
  { "number", pattern("\\d+(\\.\\d+)?") },

  { "comment", pattern("//[^\n]*") },
-}).extra_tokens({
+}, {
  sym("comment"),
  pattern("[ \t\r\n]"),
-}).expected_conflicts({
+}, {
  { "type_specifier", "_expression" },
  { "template_call", "_expression" },
  { "template_call", "relational_expression" },
-});
+}};

 }  // namespace tree_sitter_examples
--- a/spec/fixtures/grammars/golang.cc
+++ b/spec/fixtures/grammars/golang.cc
@ -9,7 +9,7 @@ static rule_ptr terminated(rule_ptr rule) {
    str(";") }) });
 }

-extern const Grammar golang = Grammar({
+extern const Grammar golang{{
  { "program", seq({
    sym("package_directive"),
    repeat(sym("imports_block")),
@ -203,10 +203,10 @@ extern const Grammar golang = Grammar({

  { "comment", pattern("//[^\n]*") },

-}).extra_tokens({
+}, {
  sym("comment"),
  sym("_line_break"),
  pattern("[ \t\r]"),
-});
+}, {}};

 }  // namespace tree_sitter_examples
--- a/spec/fixtures/grammars/helpers.cc
+++ b/spec/fixtures/grammars/helpers.cc
@ -1,4 +1,4 @@
-#include "tree_sitter/compiler.h"
+#include "compiler/rules.h"

 namespace tree_sitter_examples {

--- a/spec/fixtures/grammars/helpers.h
+++ b/spec/fixtures/grammars/helpers.h
@ -1,7 +1,8 @@
 #ifndef TREESITTER_EXAMPLES_HELPERS_
 #define TREESITTER_EXAMPLES_HELPERS_

-#include "tree_sitter/compiler.h"
+#include "compiler/rules.h"
+#include "compiler/grammar.h"

 namespace tree_sitter_examples {

--- a/spec/fixtures/grammars/javascript.cc
+++ b/spec/fixtures/grammars/javascript.cc
@ -30,7 +30,7 @@ enum {
  PREC_ARGS = 16,
 };

-extern const Grammar javascript = Grammar({
+extern const Grammar javascript{{
  { "program", repeat(sym("_statement")) },

  /*
@ -349,13 +349,13 @@ extern const Grammar javascript = Grammar({
    str(")"),
    sym("statement_block") }) },

-}).extra_tokens({
+}, {
  sym("comment"),
  sym("_line_break"),
  pattern("[ \t\r]"),
-}).expected_conflicts({
+}, {
  { "for_in_statement", "_expression" },
  { "method_definition", "_expression" },
-});
+}};

 }  // namespace tree_sitter_examples
--- a/spec/fixtures/grammars/json.cc
+++ b/spec/fixtures/grammars/json.cc
@ -3,7 +3,7 @@

 namespace tree_sitter_examples {

-extern const Grammar json = Grammar({
+extern const Grammar json{{
  { "_value", choice({
    sym("object"),
    sym("array"),
@ -22,8 +22,8 @@ extern const Grammar json = Grammar({
  { "null", str("null") },
  { "true", str("true") },
  { "false", str("false") },
-}).extra_tokens({
+}, {
  pattern("\\s"),
-});
+}, {}};

 }  // namespace tree_sitter_examples
--- a/src/compiler/build_tables/build_lex_table.h
+++ b/src/compiler/build_tables/build_lex_table.h
@ -1,7 +1,6 @@
 #ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
 #define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_

-#include "tree_sitter/compiler.h"
 #include "compiler/lex_table.h"

 namespace tree_sitter {
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -45,7 +45,7 @@ class ParseTableBuilder {
                    const LexicalGrammar &lex_grammar)
      : grammar(grammar), lexical_grammar(lex_grammar) {}

-  pair<ParseTable, const GrammarError *> build() {
+  pair<ParseTable, CompileError> build() {
    Symbol start_symbol = Symbol(0, grammar.variables.empty());
    Production start_production({
      ProductionStep(start_symbol, 0, rules::AssociativityNone),
@ -68,9 +68,9 @@ class ParseTableBuilder {
      add_shift_actions(item_set, state_id);

      if (!conflicts.empty())
-        return { parse_table, new GrammarError(GrammarErrorTypeParseConflict,
-                                               "Unresolved conflict.\n\n" +
-                                                 *conflicts.begin()) };
+        return { parse_table,
+                 CompileError(TSCompileErrorTypeParseConflict,
+                              "Unresolved conflict.\n\n" + *conflicts.begin()) };
    }

    for (ParseStateId state = 0; state < parse_table.states.size(); state++) {
@ -83,7 +83,7 @@ class ParseTableBuilder {

    parse_table.symbols.insert({ rules::ERROR(), {} });

-    return { parse_table, nullptr };
+    return { parse_table, CompileError::none() };
  }

 private:
@ -370,7 +370,7 @@ class ParseTableBuilder {
  }
 };

-pair<ParseTable, const GrammarError *> build_parse_table(
+pair<ParseTable, CompileError> build_parse_table(
  const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
  return ParseTableBuilder(grammar, lex_grammar).build();
 }
--- a/src/compiler/build_tables/build_parse_table.h
+++ b/src/compiler/build_tables/build_parse_table.h
@ -4,7 +4,7 @@
 #include <utility>
 #include <vector>
 #include "compiler/parse_table.h"
-#include "tree_sitter/compiler.h"
+#include "compiler/compile_error.h"

 namespace tree_sitter {

@ -13,8 +13,8 @@ struct LexicalGrammar;

 namespace build_tables {

-std::pair<ParseTable, const GrammarError *> build_parse_table(
-  const SyntaxGrammar &, const LexicalGrammar &);
+std::pair<ParseTable, CompileError> build_parse_table(const SyntaxGrammar &,
+                                                      const LexicalGrammar &);

 }  // namespace build_tables
 }  // namespace tree_sitter
--- a/src/compiler/build_tables/build_tables.cc
+++ b/src/compiler/build_tables/build_tables.cc
@ -4,6 +4,7 @@
 #include "compiler/build_tables/build_parse_table.h"
 #include "compiler/syntax_grammar.h"
 #include "compiler/lexical_grammar.h"
+#include "compiler/compile_error.h"

 namespace tree_sitter {
 namespace build_tables {
@ -13,11 +14,11 @@ using std::tuple;
 using std::vector;
 using std::make_tuple;

-tuple<ParseTable, LexTable, const GrammarError *> build_tables(
+tuple<ParseTable, LexTable, CompileError> build_tables(
  const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
  auto parse_table_result = build_parse_table(grammar, lex_grammar);
  ParseTable parse_table = parse_table_result.first;
-  const GrammarError *error = parse_table_result.second;
+  const CompileError error = parse_table_result.second;
  LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
  return make_tuple(parse_table, lex_table, error);
 }
--- a/src/compiler/build_tables/build_tables.h
+++ b/src/compiler/build_tables/build_tables.h
@ -4,9 +4,9 @@
 #include <string>
 #include <tuple>
 #include <vector>
-#include "tree_sitter/compiler.h"
 #include "compiler/parse_table.h"
 #include "compiler/lex_table.h"
+#include "compiler/compile_error.h"

 namespace tree_sitter {

@ -15,7 +15,7 @@ struct LexicalGrammar;

 namespace build_tables {

-std::tuple<ParseTable, LexTable, const GrammarError *> build_tables(
+std::tuple<ParseTable, LexTable, CompileError> build_tables(
  const SyntaxGrammar &, const LexicalGrammar &);

 }  // namespace build_tables
--- a/src/compiler/build_tables/does_match_any_line.h
+++ b/src/compiler/build_tables/does_match_any_line.h
@ -1,7 +1,7 @@
 #ifndef COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
 #define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_

-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"

 namespace tree_sitter {
 namespace build_tables {
--- a/src/compiler/build_tables/item_set_closure.cc
+++ b/src/compiler/build_tables/item_set_closure.cc
@ -2,7 +2,6 @@
 #include <set>
 #include <vector>
 #include <utility>
-#include "tree_sitter/compiler.h"
 #include "compiler/syntax_grammar.h"

 namespace tree_sitter {
--- a/src/compiler/build_tables/lex_conflict_manager.h
+++ b/src/compiler/build_tables/lex_conflict_manager.h
@ -2,7 +2,6 @@
 #define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_

 #include <set>
-#include "tree_sitter/compiler.h"
 #include "compiler/lexical_grammar.h"
 #include "compiler/rules/symbol.h"

--- a/src/compiler/build_tables/parse_conflict_manager.h
+++ b/src/compiler/build_tables/parse_conflict_manager.h
@ -2,7 +2,6 @@
 #define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_

 #include <utility>
-#include "tree_sitter/compiler.h"
 #include "compiler/syntax_grammar.h"
 #include "compiler/build_tables/parse_item.h"

--- a/src/compiler/build_tables/parse_item.cc
+++ b/src/compiler/build_tables/parse_item.cc
@ -1,7 +1,6 @@
 #include "compiler/build_tables/parse_item.h"
 #include <string>
 #include "compiler/syntax_grammar.h"
-#include "tree_sitter/compiler.h"

 namespace tree_sitter {
 namespace build_tables {
--- a/src/compiler/build_tables/rule_can_be_blank.cc
+++ b/src/compiler/build_tables/rule_can_be_blank.cc
@ -1,5 +1,4 @@
 #include "compiler/build_tables/rule_can_be_blank.h"
-#include "tree_sitter/compiler.h"
 #include "compiler/rules/symbol.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/seq.h"
--- a/src/compiler/build_tables/rule_can_be_blank.h
+++ b/src/compiler/build_tables/rule_can_be_blank.h
@ -1,7 +1,7 @@
 #ifndef COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
 #define COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_

-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"

 namespace tree_sitter {
 namespace build_tables {
--- a/src/compiler/compile.cc
+++ b/src/compiler/compile.cc
@ -4,6 +4,8 @@
 #include "compiler/generate_code/c_code.h"
 #include "compiler/syntax_grammar.h"
 #include "compiler/lexical_grammar.h"
+#include "compiler/parse_grammar.h"
+#include "json.h"

 namespace tree_sitter {

@ -13,14 +15,44 @@ using std::vector;
 using std::get;
 using std::make_tuple;

-pair<string, const GrammarError *> compile(const Grammar &grammar,
-                                           std::string name) {
+extern "C" TSCompileResult ts_compile_grammar(const char *input) {
+  ParseGrammarResult parse_result = parse_grammar(string(input));
+  if (!parse_result.error_message.empty()) {
+    return { "", strdup(parse_result.error_message.c_str()),
+             TSCompileErrorTypeInvalidGrammar };
+  }
+
+  auto prepare_grammar_result =
+    prepare_grammar::prepare_grammar(parse_result.grammar);
+  const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
+  const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
+  CompileError error = get<2>(prepare_grammar_result);
+  if (error.type) {
+    return { "", strdup(error.message.c_str()), error.type };
+  }
+
+  auto table_build_result =
+    build_tables::build_tables(syntax_grammar, lexical_grammar);
+  const ParseTable &parse_table = get<0>(table_build_result);
+  const LexTable &lex_table = get<1>(table_build_result);
+  error = get<2>(table_build_result);
+  if (error.type) {
+    return { "", strdup(error.message.c_str()), error.type };
+  }
+
+  string code = generate_code::c_code(parse_result.name, parse_table, lex_table,
+                                      syntax_grammar, lexical_grammar);
+
+  return { strdup(code.c_str()), "", TSCompileErrorTypeNone };
+}
+
+pair<string, const CompileError> compile(const Grammar &grammar,
+                                         std::string name) {
  auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
  const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
  const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
-  const GrammarError *error = get<2>(prepare_grammar_result);
-
-  if (error)
+  CompileError error = get<2>(prepare_grammar_result);
+  if (error.type)
    return { "", error };

  auto table_build_result =
@ -28,14 +60,13 @@ pair<string, const GrammarError *> compile(const Grammar &grammar,
  const ParseTable &parse_table = get<0>(table_build_result);
  const LexTable &lex_table = get<1>(table_build_result);
  error = get<2>(table_build_result);
-
-  if (error)
+  if (error.type)
    return { "", error };

  string code = generate_code::c_code(name, parse_table, lex_table,
                                      syntax_grammar, lexical_grammar);

-  return { code, nullptr };
+  return { code, CompileError::none() };
 }

 }  // namespace tree_sitter
--- a/src/compiler/compile.h
+++ b/src/compiler/compile.h
@ -0,0 +1,16 @@
+#ifndef COMPILER_COMPILE_H_
+#define COMPILER_COMPILE_H_
+
+#include <string>
+#include <utility>
+#include "compiler/compile_error.h"
+
+namespace tree_sitter {
+
+struct Grammar;
+
+std::pair<std::string, CompileError> compile(const Grammar &, std::string);
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_COMPILE_H_
--- a/src/compiler/compile_error.h
+++ b/src/compiler/compile_error.h
@ -0,0 +1,28 @@
+#ifndef COMPILER_COMPILE_ERROR_H_
+#define COMPILER_COMPILE_ERROR_H_
+
+#include <string>
+#include "tree_sitter/compiler.h"
+
+namespace tree_sitter {
+
+class CompileError {
+ public:
+  CompileError(TSCompileErrorType type, std::string message)
+      : type(type), message(message) {}
+
+  static CompileError none() {
+    return CompileError(TSCompileErrorTypeNone, "");
+  }
+
+  bool operator==(const CompileError &other) const {
+    return type == other.type && message == other.message;
+  }
+
+  TSCompileErrorType type;
+  std::string message;
+};
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_COMPILE_ERROR_H_
--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@ -188,7 +188,9 @@ class CCodeGenerator {
  }

  void add_lex_function() {
-    line("static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {");
+    line(
+      "static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) "
+      "{");
    indent([&]() {
      line("START_LEXER();");
      _switch("state", [&]() {
--- a/src/compiler/grammar.cc
+++ b/src/compiler/grammar.cc
@ -1,44 +0,0 @@
-#include "tree_sitter/compiler.h"
-#include "compiler/rule.h"
-
-namespace tree_sitter {
-
-using std::ostream;
-using std::pair;
-using std::string;
-using std::vector;
-
-Grammar::Grammar(const vector<pair<string, rule_ptr>> &rules)
-    : rules_(rules), extra_tokens_({}) {}
-
-const vector<pair<string, rule_ptr>> &Grammar::rules() const {
-  return rules_;
-}
-
-const vector<rule_ptr> &Grammar::extra_tokens() const {
-  return extra_tokens_;
-}
-
-const vector<vector<string>> &Grammar::expected_conflicts() const {
-  return expected_conflicts_;
-}
-
-Grammar &Grammar::extra_tokens(const vector<rule_ptr> &extra_tokens) {
-  extra_tokens_ = extra_tokens;
-  return *this;
-}
-
-Grammar &Grammar::expected_conflicts(
-  const vector<vector<string>> &expected_conflicts) {
-  expected_conflicts_ = expected_conflicts;
-  return *this;
-}
-
-GrammarError::GrammarError(GrammarErrorType type, string message)
-    : type(type), message(message) {}
-
-bool GrammarError::operator==(const GrammarError &other) const {
-  return type == other.type && message == other.message;
-}
-
-}  // namespace tree_sitter
--- a/src/compiler/grammar.h
+++ b/src/compiler/grammar.h
@ -0,0 +1,19 @@
+#ifndef COMPILER_GRAMMAR_H_
+#define COMPILER_GRAMMAR_H_
+
+#include <vector>
+#include <string>
+#include <utility>
+#include "compiler/rule.h"
+
+namespace tree_sitter {
+
+struct Grammar {
+  std::vector<std::pair<std::string, rule_ptr>> rules;
+  std::vector<rule_ptr> extra_tokens;
+  std::vector<std::vector<std::string>> expected_conflicts;
+};
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_GRAMMAR_H_
--- a/src/compiler/lexical_grammar.h
+++ b/src/compiler/lexical_grammar.h
@ -4,7 +4,7 @@
 #include <vector>
 #include <string>
 #include <set>
-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"
 #include "compiler/variable.h"

 namespace tree_sitter {
--- a/src/compiler/parse_grammar.cc
+++ b/src/compiler/parse_grammar.cc
@ -0,0 +1,326 @@
+#include "compiler/parse_grammar.h"
+#include <string>
+#include <vector>
+#include <utility>
+#include "json.h"
+#include "compiler/rule.h"
+#include "compiler/rules.h"
+
+namespace tree_sitter {
+
+using std::string;
+using std::vector;
+using std::pair;
+
+struct ParseRuleResult {
+  rule_ptr rule;
+  string error_message;
+};
+
+ParseRuleResult parse_rule(json_value *rule_json) {
+  string error_message;
+  json_value rule_type_json;
+  string type;
+
+  if (!rule_json) {
+    error_message = "Rule cannot be null";
+    goto error;
+  }
+
+  if (rule_json->type != json_object) {
+    error_message = "Rule type must be an object";
+    goto error;
+  }
+
+  rule_type_json = rule_json->operator[]("type");
+  if (rule_type_json.type != json_string) {
+    error_message = "Rule type must be a string";
+    goto error;
+  }
+
+  type = rule_type_json.u.string.ptr;
+
+  if (type == "BLANK") {
+    return { blank(), "" };
+  }
+
+  if (type == "CHOICE") {
+    json_value members_json = rule_json->operator[]("members");
+    if (members_json.type != json_array) {
+      error_message = "Choice members must be an array";
+      goto error;
+    }
+
+    vector<rule_ptr> members;
+    for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
+      json_value *member_json = members_json.u.array.values[i];
+      ParseRuleResult member = parse_rule(member_json);
+      if (member.rule.get()) {
+        members.push_back(member.rule);
+      } else {
+        error_message = "Invalid choice member: " + member.error_message;
+        goto error;
+      }
+    }
+    return { choice(members), "" };
+  }
+
+  if (type == "SEQ") {
+    json_value members_json = rule_json->operator[]("members");
+    if (members_json.type != json_array) {
+      error_message = "Seq members must be an array";
+      goto error;
+    }
+
+    vector<rule_ptr> members;
+    for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
+      json_value *member_json = members_json.u.array.values[i];
+      ParseRuleResult member = parse_rule(member_json);
+      if (member.rule.get()) {
+        members.push_back(member.rule);
+      } else {
+        error_message = "Invalid seq member: " + member.error_message;
+        goto error;
+      }
+    }
+    return { seq(members), "" };
+  }
+
+  if (type == "ERROR") {
+    json_value content_json = rule_json->operator[]("content");
+    ParseRuleResult content = parse_rule(&content_json);
+    if (content.rule.get()) {
+      return { err(content.rule), "" };
+    } else {
+      error_message = "Invalid error content: " + content.error_message;
+      goto error;
+    }
+  }
+
+  if (type == "REPEAT") {
+    json_value content_json = rule_json->operator[]("content");
+    ParseRuleResult content = parse_rule(&content_json);
+    if (content.rule.get()) {
+      return { repeat(content.rule), "" };
+    } else {
+      error_message = "Invalid repeat content: " + content.error_message;
+      goto error;
+    }
+  }
+
+  if (type == "REPEAT1") {
+    json_value content_json = rule_json->operator[]("content");
+    ParseRuleResult content = parse_rule(&content_json);
+    if (content.rule.get()) {
+      return { repeat1(content.rule), "" };
+    } else {
+      error_message = "Invalid repeat1 content: " + content.error_message;
+      goto error;
+    }
+  }
+
+  if (type == "TOKEN") {
+    json_value content_json = rule_json->operator[]("content");
+    ParseRuleResult content = parse_rule(&content_json);
+    if (content.rule.get()) {
+      return { token(content.rule), "" };
+    } else {
+      error_message = "Invalid token content: " + content.error_message;
+      goto error;
+    }
+  }
+
+  if (type == "PATTERN") {
+    json_value value_json = rule_json->operator[]("value");
+    if (value_json.type == json_string) {
+      return { pattern(value_json.u.string.ptr), "" };
+    } else {
+      error_message = "Pattern value must be a string";
+      goto error;
+    }
+  }
+
+  if (type == "STRING") {
+    json_value value_json = rule_json->operator[]("value");
+    if (value_json.type == json_string) {
+      return { str(value_json.u.string.ptr), "" };
+    } else {
+      error_message = "String rule value must be a string";
+      goto error;
+    }
+  }
+
+  if (type == "SYMBOL") {
+    json_value value_json = rule_json->operator[]("name");
+    if (value_json.type == json_string) {
+      return { sym(value_json.u.string.ptr), "" };
+    } else {
+      error_message = "Symbol value must be a string";
+      goto error;
+    }
+  }
+
+  if (type == "PREC") {
+    json_value precedence_json = rule_json->operator[]("value");
+    if (precedence_json.type != json_integer) {
+      error_message = "Precedence value must be an integer";
+      goto error;
+    }
+
+    json_value content_json = rule_json->operator[]("content");
+    ParseRuleResult content = parse_rule(&content_json);
+    if (!content.rule.get()) {
+      error_message = "Invalid precedence content: " + content.error_message;
+      goto error;
+    }
+
+    return { prec(precedence_json.u.integer, content.rule), "" };
+  }
+
+  if (type == "PREC_LEFT") {
+    json_value precedence_json = rule_json->operator[]("value");
+    if (precedence_json.type != json_integer) {
+      error_message = "Precedence value must be an integer";
+      goto error;
+    }
+
+    json_value content_json = rule_json->operator[]("content");
+    ParseRuleResult content = parse_rule(&content_json);
+    if (!content.rule.get()) {
+      error_message = "Invalid precedence content: " + content.error_message;
+      goto error;
+    }
+
+    return { prec_left(precedence_json.u.integer, content.rule), "" };
+  }
+
+  if (type == "PREC_RIGHT") {
+    json_value precedence_json = rule_json->operator[]("value");
+    if (precedence_json.type != json_integer) {
+      error_message = "Precedence value must be an integer";
+      goto error;
+    }
+
+    json_value content_json = rule_json->operator[]("content");
+    ParseRuleResult content = parse_rule(&content_json);
+    if (!content.rule.get()) {
+      error_message = "Invalid precedence content: " + content.error_message;
+      goto error;
+    }
+
+    return { prec_right(precedence_json.u.integer, content.rule), "" };
+  }
+
+  error_message = "Unknown rule type " + type;
+
+error:
+  return { rule_ptr(), error_message };
+}
+
+ParseGrammarResult parse_grammar(const string &input) {
+  string error_message;
+  string name;
+  Grammar grammar;
+  json_value name_json, rules_json, extras_json, conflicts_json;
+
+  json_settings settings = { 0, 0, 0, 0, 0, 0 };
+  char parse_error[json_error_max];
+  json_value *grammar_json =
+    json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
+  if (!grammar_json) {
+    error_message = string("Invalid JSON at ") + parse_error;
+    goto error;
+  }
+
+  if (grammar_json->type != json_object) {
+    error_message = "Body must be an object";
+    goto error;
+  }
+
+  name_json = grammar_json->operator[]("name");
+  if (name_json.type != json_string) {
+    error_message = "Name must be a string";
+    goto error;
+  }
+
+  name = name_json.u.string.ptr;
+
+  rules_json = grammar_json->operator[]("rules");
+  if (rules_json.type != json_object) {
+    error_message = "Rules must be an object";
+    goto error;
+  }
+
+  for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) {
+    json_object_entry entry_json = rules_json.u.object.values[i];
+    ParseRuleResult entry = parse_rule(entry_json.value);
+
+    if (!entry.rule.get()) {
+      error_message =
+        string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
+      goto error;
+    }
+
+    grammar.rules.push_back({ string(entry_json.name), entry.rule });
+  }
+
+  extras_json = grammar_json->operator[]("extras");
+  if (extras_json.type != json_none) {
+    if (extras_json.type != json_array) {
+      error_message = "Extras must be an array";
+      goto error;
+    }
+
+    for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) {
+      json_value *extra_json = extras_json.u.array.values[i];
+      ParseRuleResult extra = parse_rule(extra_json);
+      if (!extra.rule.get()) {
+        error_message = string("Invalid extra token: ") + extra.error_message;
+        goto error;
+      }
+
+      grammar.extra_tokens.push_back(extra.rule);
+    }
+  }
+
+  conflicts_json = grammar_json->operator[]("conflicts");
+  if (conflicts_json.type != json_none) {
+    if (conflicts_json.type != json_array) {
+      error_message = "Conflicts must be an array";
+      goto error;
+    }
+
+    for (size_t i = 0, length = conflicts_json.u.array.length; i < length; i++) {
+      json_value *conflict_json = conflicts_json.u.array.values[i];
+      if (conflict_json->type != json_array) {
+        error_message = "Each conflict entry must be an array";
+        goto error;
+      }
+
+      vector<string> conflict;
+      for (size_t j = 0, conflict_length = conflict_json->u.array.length;
+           j < conflict_length; j++) {
+        json_value *conflict_entry_json = conflict_json->u.array.values[j];
+        if (conflict_entry_json->type != json_string) {
+          error_message = "Each conflict entry must be an array of strings";
+          goto error;
+        }
+
+        conflict.push_back(string(conflict_entry_json->u.string.ptr));
+      }
+
+      grammar.expected_conflicts.push_back(conflict);
+    }
+  }
+
+  return { name, grammar, "" };
+
+error:
+  if (grammar_json) {
+    json_value_free(grammar_json);
+  }
+
+  return { "", Grammar{}, error_message };
+}
+
+}  // namespace tree_sitter
--- a/src/compiler/parse_grammar.h
+++ b/src/compiler/parse_grammar.h
@ -0,0 +1,20 @@
+#ifndef COMPILER_GRAMMAR_JSON_H_
+#define COMPILER_GRAMMAR_JSON_H_
+
+#include <string>
+#include "tree_sitter/compiler.h"
+#include "compiler/grammar.h"
+
+namespace tree_sitter {
+
+struct ParseGrammarResult {
+  std::string name;
+  Grammar grammar;
+  std::string error_message;
+};
+
+ParseGrammarResult parse_grammar(const std::string &);
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_GRAMMAR_JSON_H_
--- a/src/compiler/prepare_grammar/expand_repeats.cc
+++ b/src/compiler/prepare_grammar/expand_repeats.cc
@ -3,6 +3,7 @@
 #include <string>
 #include <utility>
 #include "compiler/prepare_grammar/initial_syntax_grammar.h"
+#include "compiler/rule.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/seq.h"
 #include "compiler/rules/symbol.h"
--- a/src/compiler/prepare_grammar/expand_repeats.h
+++ b/src/compiler/prepare_grammar/expand_repeats.h
@ -1,7 +1,7 @@
 #ifndef COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
 #define COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_

-#include "tree_sitter/compiler.h"
+#include "compiler/prepare_grammar/initial_syntax_grammar.h"

 namespace tree_sitter {
 namespace prepare_grammar {
--- a/src/compiler/prepare_grammar/expand_tokens.cc
+++ b/src/compiler/prepare_grammar/expand_tokens.cc
@ -53,36 +53,35 @@ class ExpandTokens : public rules::IdentityRuleFn {

  rule_ptr apply_to(const Pattern *rule) {
    auto pair = parse_regex(rule->value);
-    if (!error)
+    if (!error.type)
      error = pair.second;
    return pair.first;
  }

 public:
-  const GrammarError *error;
-  ExpandTokens() : error(nullptr) {}
+  CompileError error;
+  ExpandTokens() : error(CompileError::none()) {}
 };

-pair<LexicalGrammar, const GrammarError *> expand_tokens(
-  const LexicalGrammar &grammar) {
+pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &grammar) {
  LexicalGrammar result;
  ExpandTokens expander;

  for (const Variable &variable : grammar.variables) {
    auto rule = expander.apply(variable.rule);
-    if (expander.error)
+    if (expander.error.type)
      return { result, expander.error };
    result.variables.push_back(Variable(variable.name, variable.type, rule));
  }

  for (auto &sep : grammar.separators) {
    auto rule = expander.apply(sep);
-    if (expander.error)
+    if (expander.error.type)
      return { result, expander.error };
    result.separators.push_back(rule);
  }

-  return { result, nullptr };
+  return { result, CompileError::none() };
 }

 }  // namespace prepare_grammar
--- a/src/compiler/prepare_grammar/expand_tokens.h
+++ b/src/compiler/prepare_grammar/expand_tokens.h
@ -2,7 +2,7 @@
 #define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_

 #include <utility>
-#include "tree_sitter/compiler.h"
+#include "compiler/compile_error.h"

 namespace tree_sitter {

@ -10,8 +10,7 @@ struct LexicalGrammar;

 namespace prepare_grammar {

-std::pair<LexicalGrammar, const GrammarError *> expand_tokens(
-  const LexicalGrammar &);
+std::pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &);

 }  // namespace prepare_grammar
 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/extract_choices.h
+++ b/src/compiler/prepare_grammar/extract_choices.h
@ -2,7 +2,7 @@
 #define COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_

 #include <vector>
-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"

 namespace tree_sitter {
 namespace prepare_grammar {
--- a/src/compiler/prepare_grammar/extract_tokens.cc
+++ b/src/compiler/prepare_grammar/extract_tokens.cc
@ -90,12 +90,12 @@ class TokenExtractor : public rules::IdentityRuleFn {
  vector<Variable> tokens;
 };

-static const GrammarError *ubiq_token_err(const string &message) {
-  return new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
-                          "Not a token: " + message);
+static CompileError ubiq_token_err(const string &message) {
+  return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
+                      "Not a token: " + message);
 }

-tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
+tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
  const InternedGrammar &grammar) {
  InitialSyntaxGrammar syntax_grammar;
  LexicalGrammar lexical_grammar;
@ -186,7 +186,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens
    syntax_grammar.extra_tokens.insert(new_symbol);
  }

-  return make_tuple(syntax_grammar, lexical_grammar, nullptr);
+  return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
 }

 }  // namespace prepare_grammar
--- a/src/compiler/prepare_grammar/extract_tokens.h
+++ b/src/compiler/prepare_grammar/extract_tokens.h
@ -2,7 +2,7 @@
 #define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_

 #include <tuple>
-#include "tree_sitter/compiler.h"
+#include "compiler/compile_error.h"
 #include "compiler/lexical_grammar.h"
 #include "compiler/prepare_grammar/initial_syntax_grammar.h"
 #include "compiler/prepare_grammar/interned_grammar.h"
@ -10,8 +10,8 @@
 namespace tree_sitter {
 namespace prepare_grammar {

-std::tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *>
-  extract_tokens(const InternedGrammar &);
+std::tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
+  const InternedGrammar &);

 }  // namespace prepare_grammar
 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/intern_symbols.cc
+++ b/src/compiler/prepare_grammar/intern_symbols.cc
@ -3,6 +3,7 @@
 #include <vector>
 #include <set>
 #include "tree_sitter/compiler.h"
+#include "compiler/grammar.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/blank.h"
 #include "compiler/rules/named_symbol.h"
@ -31,8 +32,8 @@ class InternSymbols : public rules::IdentityRuleFn {

 public:
  std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
-    for (size_t i = 0; i < grammar.rules().size(); i++)
-      if (grammar.rules()[i].first == rule_name)
+    for (size_t i = 0; i < grammar.rules.size(); i++)
+      if (grammar.rules[i].first == rule_name)
        return make_shared<rules::Symbol>(i);
    return nullptr;
  }
@ -42,16 +43,16 @@ class InternSymbols : public rules::IdentityRuleFn {
  string missing_rule_name;
 };

-const GrammarError *missing_rule_error(string rule_name) {
-  return new GrammarError(GrammarErrorTypeUndefinedSymbol,
-                          "Undefined rule '" + rule_name + "'");
+CompileError missing_rule_error(string rule_name) {
+  return CompileError(TSCompileErrorTypeUndefinedSymbol,
+                      "Undefined rule '" + rule_name + "'");
 }

-pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
+pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
  InternedGrammar result;
  InternSymbols interner(grammar);

-  for (auto &pair : grammar.rules()) {
+  for (auto &pair : grammar.rules) {
    auto new_rule = interner.apply(pair.second);
    if (!interner.missing_rule_name.empty())
      return { result, missing_rule_error(interner.missing_rule_name) };
@ -61,14 +62,14 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
      new_rule));
  }

-  for (auto &rule : grammar.extra_tokens()) {
+  for (auto &rule : grammar.extra_tokens) {
    auto new_rule = interner.apply(rule);
    if (!interner.missing_rule_name.empty())
      return { result, missing_rule_error(interner.missing_rule_name) };
    result.extra_tokens.push_back(new_rule);
  }

-  for (auto &names : grammar.expected_conflicts()) {
+  for (auto &names : grammar.expected_conflicts) {
    set<rules::Symbol> entry;
    for (auto &name : names) {
      auto symbol = interner.symbol_for_rule_name(name);
@ -78,7 +79,7 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
    result.expected_conflicts.insert(entry);
  }

-  return { result, nullptr };
+  return { result, CompileError::none() };
 }

 }  // namespace prepare_grammar
--- a/src/compiler/prepare_grammar/intern_symbols.h
+++ b/src/compiler/prepare_grammar/intern_symbols.h
@ -3,13 +3,16 @@

 #include <utility>
 #include <string>
-#include "tree_sitter/compiler.h"
+#include "compiler/compile_error.h"
 #include "compiler/prepare_grammar/interned_grammar.h"

 namespace tree_sitter {
+
+struct Grammar;
+
 namespace prepare_grammar {

-std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
+std::pair<InternedGrammar, CompileError> intern_symbols(const Grammar &);

 }  // namespace prepare_grammar
 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/is_token.h
+++ b/src/compiler/prepare_grammar/is_token.h
@ -1,7 +1,7 @@
 #ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
 #define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_

-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"

 namespace tree_sitter {
 namespace prepare_grammar {
--- a/src/compiler/prepare_grammar/parse_regex.cc
+++ b/src/compiler/prepare_grammar/parse_regex.cc
@ -32,7 +32,7 @@ class PatternParser {
    next();
  }

-  pair<rule_ptr, const GrammarError *> rule(bool nested) {
+  pair<rule_ptr, CompileError> rule(bool nested) {
    vector<rule_ptr> choices = {};
    do {
      if (!choices.empty()) {
@ -42,17 +42,17 @@ class PatternParser {
          break;
      }
      auto pair = term(nested);
-      if (pair.second)
+      if (pair.second.type)
        return { Blank::build(), pair.second };
      choices.push_back(pair.first);
    } while (has_more_input());
    auto rule =
      (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
-    return { rule, nullptr };
+    return { rule, CompileError::none() };
  }

 private:
-  pair<rule_ptr, const GrammarError *> term(bool nested) {
+  pair<rule_ptr, CompileError> term(bool nested) {
    rule_ptr result = Blank::build();
    do {
      if (peek() == '|')
@ -60,16 +60,16 @@ class PatternParser {
      if (nested && peek() == ')')
        break;
      auto pair = factor();
-      if (pair.second)
+      if (pair.second.type)
        return { Blank::build(), pair.second };
      result = Seq::build({ result, pair.first });
    } while (has_more_input());
-    return { result, nullptr };
+    return { result, CompileError::none() };
  }

-  pair<rule_ptr, const GrammarError *> factor() {
+  pair<rule_ptr, CompileError> factor() {
    auto pair = atom();
-    if (pair.second)
+    if (pair.second.type)
      return { Blank::build(), pair.second };
    rule_ptr result = pair.first;
    if (has_more_input()) {
@ -88,30 +88,30 @@ class PatternParser {
          break;
      }
    }
-    return { result, nullptr };
+    return { result, CompileError::none() };
  }

-  pair<rule_ptr, const GrammarError *> atom() {
+  pair<rule_ptr, CompileError> atom() {
    switch (peek()) {
      case '(': {
        next();
        auto pair = rule(true);
-        if (pair.second)
+        if (pair.second.type)
          return { Blank::build(), pair.second };
        if (peek() != ')')
          return error("unmatched open paren");
        next();
-        return { pair.first, nullptr };
+        return { pair.first, CompileError::none() };
      }
      case '[': {
        next();
        auto pair = char_set();
-        if (pair.second)
+        if (pair.second.type)
          return { Blank::build(), pair.second };
        if (peek() != ']')
          return error("unmatched open square bracket");
        next();
-        return { pair.first.copy(), nullptr };
+        return { pair.first.copy(), CompileError::none() };
      }
      case ')': {
        return error("unmatched close paren");
@ -121,18 +121,19 @@ class PatternParser {
      }
      case '.': {
        next();
-        return { CharacterSet().include_all().exclude('\n').copy(), nullptr };
+        return { CharacterSet().include_all().exclude('\n').copy(),
+                 CompileError::none() };
      }
      default: {
        auto pair = single_char();
-        if (pair.second)
+        if (pair.second.type)
          return { Blank::build(), pair.second };
-        return { pair.first.copy(), nullptr };
+        return { pair.first.copy(), CompileError::none() };
      }
    }
  }

-  pair<CharacterSet, const GrammarError *> char_set() {
+  pair<CharacterSet, CompileError> char_set() {
    CharacterSet result;
    bool is_affirmative = true;
    if (peek() == '^') {
@ -143,7 +144,7 @@ class PatternParser {

    while (has_more_input() && (peek() != ']')) {
      auto pair = single_char();
-      if (pair.second)
+      if (pair.second.type)
        return { CharacterSet(), pair.second };
      if (is_affirmative)
        result.add_set(pair.first);
@ -151,10 +152,10 @@ class PatternParser {
        result.remove_set(pair.first);
    }

-    return { result, nullptr };
+    return { result, CompileError::none() };
  }

-  pair<CharacterSet, const GrammarError *> single_char() {
+  pair<CharacterSet, CompileError> single_char() {
    CharacterSet value;
    switch (peek()) {
      case '\\':
@ -173,7 +174,7 @@ class PatternParser {
          value = CharacterSet().include(first_char);
        }
    }
-    return { value, nullptr };
+    return { value, CompileError::none() };
  }

  CharacterSet escaped_char(uint32_t value) {
@ -217,8 +218,8 @@ class PatternParser {
    return lookahead && iter <= end;
  }

-  pair<rule_ptr, const GrammarError *> error(string msg) {
-    return { Blank::build(), new GrammarError(GrammarErrorTypeRegex, msg) };
+  pair<rule_ptr, CompileError> error(string msg) {
+    return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) };
  }

  string input;
@ -227,7 +228,7 @@ class PatternParser {
  int32_t lookahead;
 };

-pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
+pair<rule_ptr, CompileError> parse_regex(const std::string &input) {
  return PatternParser(input.c_str()).rule(false);
 }

--- a/src/compiler/prepare_grammar/parse_regex.h
+++ b/src/compiler/prepare_grammar/parse_regex.h
@ -3,12 +3,13 @@

 #include <string>
 #include <utility>
-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"
+#include "compiler/compile_error.h"

 namespace tree_sitter {
 namespace prepare_grammar {

-std::pair<rule_ptr, const GrammarError *> parse_regex(const std::string &);
+std::pair<rule_ptr, CompileError> parse_regex(const std::string &);

 }  // namespace prepare_grammar
 }  // namespace tree_sitter
--- a/src/compiler/prepare_grammar/prepare_grammar.cc
+++ b/src/compiler/prepare_grammar/prepare_grammar.cc
@ -16,14 +16,14 @@ using std::tuple;
 using std::get;
 using std::make_tuple;

-tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
+tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
  const Grammar &input_grammar) {
  /*
   * Convert all string-based `NamedSymbols` into numerical `Symbols`
   */
  auto intern_result = intern_symbols(input_grammar);
-  const GrammarError *error = intern_result.second;
-  if (error)
+  CompileError error = intern_result.second;
+  if (error.type)
    return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);

  /*
@ -31,7 +31,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
   */
  auto extract_result = extract_tokens(intern_result.first);
  error = get<2>(extract_result);
-  if (error)
+  if (error.type)
    return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);

  /*
@ -45,7 +45,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
  auto expand_tokens_result = expand_tokens(get<1>(extract_result));
  LexicalGrammar lex_grammar = expand_tokens_result.first;
  error = expand_tokens_result.second;
-  if (error)
+  if (error.type)
    return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);

  /*
@ -58,7 +58,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
   */
  lex_grammar = normalize_rules(lex_grammar);

-  return make_tuple(syntax_grammar, lex_grammar, nullptr);
+  return make_tuple(syntax_grammar, lex_grammar, CompileError::none());
 }

 }  // namespace prepare_grammar
--- a/src/compiler/prepare_grammar/prepare_grammar.h
+++ b/src/compiler/prepare_grammar/prepare_grammar.h
@ -4,15 +4,15 @@
 #include <tuple>
 #include "compiler/syntax_grammar.h"
 #include "compiler/lexical_grammar.h"
+#include "compiler/compile_error.h"

 namespace tree_sitter {

-class Grammar;
-class GrammarError;
+struct Grammar;

 namespace prepare_grammar {

-std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
+std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
  const Grammar &);

 }  // namespace prepare_grammar
--- a/src/compiler/prepare_grammar/token_description.cc
+++ b/src/compiler/prepare_grammar/token_description.cc
@ -1,5 +1,4 @@
 #include "compiler/prepare_grammar/token_description.h"
-#include "tree_sitter/compiler.h"
 #include "compiler/rules/visitor.h"
 #include "compiler/rules/pattern.h"
 #include "compiler/rules/seq.h"
--- a/src/compiler/prepare_grammar/token_description.h
+++ b/src/compiler/prepare_grammar/token_description.h
@ -2,7 +2,7 @@
 #define COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_

 #include <string>
-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"

 namespace tree_sitter {
 namespace prepare_grammar {
--- a/src/compiler/rules.h
+++ b/src/compiler/rules.h
@ -0,0 +1,29 @@
+#ifndef COMPILER_RULES_H_
+#define COMPILER_RULES_H_
+
+#include <string>
+#include <vector>
+#include <memory>
+#include "compiler/rule.h"
+
+namespace tree_sitter {
+
+rule_ptr blank();
+rule_ptr choice(const std::vector<rule_ptr> &);
+rule_ptr repeat(const rule_ptr &);
+rule_ptr repeat1(const rule_ptr &);
+rule_ptr seq(const std::vector<rule_ptr> &);
+rule_ptr sym(const std::string &);
+rule_ptr pattern(const std::string &);
+rule_ptr str(const std::string &);
+rule_ptr err(const rule_ptr &);
+rule_ptr prec(int precedence, const rule_ptr &);
+rule_ptr prec_left(const rule_ptr &);
+rule_ptr prec_left(int precedence, const rule_ptr &);
+rule_ptr prec_right(const rule_ptr &);
+rule_ptr prec_right(int precedence, const rule_ptr &);
+rule_ptr token(const rule_ptr &rule);
+
+}  // namespace std
+
+#endif  // COMPILER_RULES_H_
--- a/src/compiler/rules/metadata.h
+++ b/src/compiler/rules/metadata.h
@ -3,7 +3,6 @@

 #include <string>
 #include <map>
-#include "tree_sitter/compiler.h"
 #include "compiler/rule.h"

 namespace tree_sitter {
--- a/src/compiler/rules/rules.cc
+++ b/src/compiler/rules/rules.cc
@ -2,8 +2,8 @@
 #include <map>
 #include <set>
 #include <string>
-#include "tree_sitter/compiler.h"
 #include "compiler/rule.h"
+#include "compiler/rules.h"
 #include "compiler/rules/blank.h"
 #include "compiler/rules/named_symbol.h"
 #include "compiler/rules/choice.h"
--- a/src/compiler/syntax_grammar.h
+++ b/src/compiler/syntax_grammar.h
@ -4,7 +4,6 @@
 #include <vector>
 #include <string>
 #include <set>
-#include "tree_sitter/compiler.h"
 #include "compiler/rules/symbol.h"
 #include "compiler/rules/metadata.h"
 #include "compiler/variable.h"
--- a/src/compiler/variable.h
+++ b/src/compiler/variable.h
@ -2,7 +2,7 @@
 #define COMPILER_VARIABLE_H_

 #include <string>
-#include "tree_sitter/compiler.h"
+#include "compiler/rule.h"
 #include "compiler/rules/symbol.h"

 namespace tree_sitter {
				`@ -0,0 +1 @@`
				`Subproject commit 70533215eea575e40a0b91a34ae01a779641d73a`