Merge pull request #22 from maxbrunsfeld/c-compiler-api
Simplify the compiler API
This commit is contained in:
commit
49f393b75e
67 changed files with 1083 additions and 375 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
|
@ -7,3 +7,6 @@
|
|||
[submodule "externals/utf8proc"]
|
||||
path = externals/utf8proc
|
||||
url = https://github.com/julialang/utf8proc
|
||||
[submodule "externals/json-parser"]
|
||||
path = externals/json-parser
|
||||
url = https://github.com/udp/json-parser.git
|
||||
|
|
|
|||
211
README.md
211
README.md
|
|
@ -2,9 +2,11 @@
|
|||
|
||||
[](https://travis-ci.org/maxbrunsfeld/tree-sitter)
|
||||
|
||||
Tree-sitter is an incremental parsing library in C and C++, intended to be used via [bindings](https://github.com/maxbrunsfeld/node-tree-sitter) to higher-level
|
||||
languages. It allows documents to be efficiently re-parsed after localized
|
||||
edits, making it suitable for use in performance-intensive text-editing programs.
|
||||
Tree-sitter is a C library for incremental parsing, intended to be used via
|
||||
[bindings](https://github.com/maxbrunsfeld/node-tree-sitter) to higher-level
|
||||
languages. It can be used to build a concrete syntax tree for a program and
|
||||
efficiently update the syntax tree as the program is edited. This makes it suitable
|
||||
for use in text-editing programs.
|
||||
|
||||
Tree-sitter uses a sentential-form incremental [LR parsing](https://en.wikipedia.org/wiki/LR_parser)
|
||||
algorithm, as described in the paper *[Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.ps.gz)*
|
||||
|
|
@ -15,142 +17,176 @@ This allows it to generate a fast parser for any context-free grammar.
|
|||
### Installation
|
||||
|
||||
```sh
|
||||
script/configure.sh # Generate a Makefile using gyp
|
||||
script/configure.sh # Generate a Makefile
|
||||
make # Build static libraries for the compiler and runtime
|
||||
```
|
||||
|
||||
### Overview
|
||||
|
||||
Tree-sitter consists of two libraries. The first library, `libcompiler`, can be
|
||||
used to generate a parser for a language by supplying a [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) describing the
|
||||
language. Once the parser has been generated, `libcompiler` is no longer needed.
|
||||
|
||||
The second library, `libruntime`, is used in combination with the parsers
|
||||
generated by `libcompiler`, to generate syntax trees based on text documents, and keep the
|
||||
syntax trees up-to-date as changes are made to the documents.
|
||||
|
||||
|
||||
### Writing a grammar
|
||||
|
||||
Tree-sitter's interface for creating grammars is a C++ library, `libcompiler`.
|
||||
This allows grammars and rules to be defined, manipulated and
|
||||
extended as simple values in high-level languages like [javascript](https://github.com/maxbrunsfeld/node-tree-sitter-compiler),
|
||||
and then converted into tree-sitter's native representation and compiled to C
|
||||
parsers. These parsers can then be used from any language that has a binding to
|
||||
tree-sitter's runtime library, `libruntime`.
|
||||
Tree-sitter's grammars are specified as JSON strings. This format allows them
|
||||
to be easily created and manipulated in high-level languages like [JavaScript](https://github.com/maxbrunsfeld/node-tree-sitter-compiler).
|
||||
The structure of a grammar is formally specified by [this JSON schema](./doc/grammar-schema.json).
|
||||
You can generate a parser for a grammar using the `ts_compile_grammar` function
|
||||
provided by `libcompiler`.
|
||||
|
||||
Here's a simple example that uses `libcompiler` directly:
|
||||
Here's a simple example of using `ts_compile_grammar` to create a parser for basic
|
||||
arithmetic expressions. It uses C++11 multi-line strings for readability.
|
||||
|
||||
```cpp
|
||||
// arithmetic_grammar.cc
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
using namespace tree_sitter;
|
||||
|
||||
int main() {
|
||||
auto arithmetic_grammar = Grammar({
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "arithmetic",
|
||||
|
||||
// The first rule listed in a grammar becomes the 'start rule'.
|
||||
{ "expression", choice({
|
||||
sym("sum"),
|
||||
sym("product"),
|
||||
sym("number"),
|
||||
sym("variable"),
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"},
|
||||
],
|
||||
|
||||
// Error recovery is controlled by wrapping rule subtrees with `err`.
|
||||
seq({
|
||||
str("("),
|
||||
err(sym("expression")),
|
||||
str(")") }) }) },
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "product"},
|
||||
{"type": "SYMBOL", "name": "number"},
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "("},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": ")"}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
// Tokens like '+' and '*' are described directly within the grammar's rules,
|
||||
// as opposed to in a seperate lexer description.
|
||||
{ "sum", prec_left(1, seq({
|
||||
sym("expression"),
|
||||
str("+"),
|
||||
sym("expression") })) },
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
// Ambiguities can be resolved at compile time by assigning precedence
|
||||
// values to rule subtrees.
|
||||
{ "product", prec_left(2, seq({
|
||||
sym("expression"),
|
||||
str("*"),
|
||||
sym("expression") })) },
|
||||
"product": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 2,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
// Tokens can be specified using ECMAScript regexps.
|
||||
{ "number", pattern("\\d+") },
|
||||
{ "variable", pattern("[a-zA-Z]+\\w*") },
|
||||
{ "comment", pattern("//.*") },
|
||||
"number": {"type": "PATTERN", "value": "\\d+"}
|
||||
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"},
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
}).extra_tokens({
|
||||
if (result.error_type != TSCompileErrorTypeNone) {
|
||||
fprintf(stderr, "Compilation failed: %s\n", result.error_message);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Things that can appear anywhere in the language are expressed as
|
||||
// 'extra tokens'.
|
||||
sym("comment"),
|
||||
pattern("\\s+")
|
||||
});
|
||||
|
||||
// Generate C code for parsing this language.
|
||||
auto output = compile(arithmetic_grammar, "arithmetic");
|
||||
std::string c_code = output.first;
|
||||
const GrammarError *error = output.second;
|
||||
|
||||
assert(!error);
|
||||
puts(c_code.c_str());
|
||||
puts(result.code);
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
To create a parser for this language, compile and run this grammar like this:
|
||||
To create the parser, compile this file like this:
|
||||
|
||||
```sh
|
||||
clang++ -stdlib=libc++ -std=c++11 \
|
||||
-I tree-sitter/include -L tree-sitter/out/Debug -l compiler \
|
||||
arithmetic_grammar.cc -o arithmetic_grammar
|
||||
clang++ -std=c++11 \
|
||||
-I tree-sitter/include \
|
||||
-L tree-sitter/out/Release \
|
||||
-l compiler \
|
||||
arithmetic_grammar.cc \
|
||||
-o arithmetic_grammar
|
||||
```
|
||||
|
||||
Then run the executable to print out the C code for the parser:
|
||||
|
||||
```sh
|
||||
./arithmetic_grammar > arithmetic_parser.c
|
||||
```
|
||||
|
||||
### Using the parser
|
||||
|
||||
The `tree_sitter/runtime` C library exposes a DOM-style interface for inspecting
|
||||
documents.
|
||||
#### Providing the text to parse
|
||||
|
||||
Functions like `ts_node_child(node, index)` and `ts_node_next_sibling(node)`
|
||||
Text input is provided to a tree-sitter parser via a `TSInput` struct, which
|
||||
contains function pointers for seeking to positions in the text, and for reading
|
||||
chunks of text. The text can be encoded in either UTF8 or UTF16. This interface
|
||||
allows you to efficiently parse text that is stored in your own data structure.
|
||||
|
||||
#### Querying the syntax tree
|
||||
|
||||
The `libruntime` API provides a DOM-style interface for inspecting
|
||||
syntax trees. Functions like `ts_node_child(node, index)` and `ts_node_next_sibling(node)`
|
||||
expose every node in the concrete syntax tree. This is useful for operations
|
||||
like syntax-highlighting, that operate on a token-by-token basis. You can also
|
||||
like syntax-highlighting, which operate on a token-by-token basis. You can also
|
||||
traverse the tree in a more abstract way by using functions like
|
||||
`ts_node_named_child(node, index)` and `ts_node_next_named_sibling(node)`. These
|
||||
functions don't expose nodes that were specified in the grammar as anonymous
|
||||
tokens, like `(` and `+`. This is useful when analyzing the meaning of a document.
|
||||
|
||||
```c
|
||||
// test_parser.c
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
// Declare the language constructor that was generated from your grammar.
|
||||
// Declare the language function that was generated from your grammar.
|
||||
TSLanguage *ts_language_arithmetic();
|
||||
|
||||
int main() {
|
||||
TSDocument *document = ts_document_make();
|
||||
ts_document_set_language(document, ts_language_arithmetic());
|
||||
|
||||
// Usually, you would use the more general `ts_document_set_input`, which
|
||||
// takes a struct with function pointers for seeking to positions in the text,
|
||||
// and reading chunks of text. This allows you to efficiently parse text
|
||||
// stored in your own data structure.
|
||||
ts_document_set_input_string(document, "a + b * 5");
|
||||
ts_document_parse(document);
|
||||
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
printf(
|
||||
"Root name: %s, start: %lu, end: %lu\n",
|
||||
ts_node_name(root_node, document),
|
||||
ts_node_start_char(root_node),
|
||||
ts_node_end_char(root_node)
|
||||
);
|
||||
assert(!strcmp(ts_node_name(root_node, document), "expression"));
|
||||
assert(ts_node_named_child_count(root_node) == 1);
|
||||
|
||||
TSNode product_node = ts_node_named_child(ts_node_child(root_node, 0), 1);
|
||||
printf(
|
||||
"Child name: %s, start: %lu, end: %lu\n",
|
||||
ts_node_name(product_node, document),
|
||||
ts_node_start_char(product_node),
|
||||
ts_node_end_char(product_node)
|
||||
);
|
||||
TSNode sum_node = ts_node_named_child(root_node, 0);
|
||||
assert(!strcmp(ts_node_name(sum_node, document), "sum"));
|
||||
assert(ts_node_named_child_count(sum_node) == 2);
|
||||
|
||||
TSNode product_node = ts_node_child(ts_node_named_child(sum_node, 1), 0);
|
||||
assert(!strcmp(ts_node_name(product_node, document), "product"));
|
||||
assert(ts_node_named_child_count(product_node) == 2);
|
||||
|
||||
printf("Syntax tree: %s\n", ts_node_string(root_node, document));
|
||||
ts_document_free(document);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -159,9 +195,12 @@ int main() {
|
|||
To demo this parser's capabilities, compile this program like this:
|
||||
|
||||
```sh
|
||||
clang \
|
||||
-I tree-sitter/include -L tree-sitter/out/Debug -l runtime \
|
||||
arithmetic_parser.c test_parser.c -o test_parser
|
||||
clang \
|
||||
-I tree-sitter/include \
|
||||
-L tree-sitter/out/Debug \
|
||||
-l runtime \
|
||||
arithmetic_parser.c test_parser.c \
|
||||
-o test_parser
|
||||
|
||||
./test_parser
|
||||
```
|
||||
|
|
|
|||
256
doc/grammar-schema.json
Normal file
256
doc/grammar-schema.json
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
{
|
||||
"type": "object",
|
||||
|
||||
"required": [
|
||||
"name",
|
||||
"rules"
|
||||
],
|
||||
|
||||
"additionalProperties": false,
|
||||
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"pattern": "^[a-zA-Z_]\\w*"
|
||||
},
|
||||
|
||||
"rules": {
|
||||
"type": "object",
|
||||
"patternProperties": {
|
||||
"^[a-zA-Z_]\\w*$": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
|
||||
"extras": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
|
||||
"conflicts": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"pattern": "^[a-zA-Z_]\\w*$"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"definitions": {
|
||||
"blank-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^BLANK$"
|
||||
}
|
||||
},
|
||||
"required": ["type"]
|
||||
},
|
||||
|
||||
"string-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^STRING$"
|
||||
},
|
||||
"value": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["type", "value"]
|
||||
},
|
||||
|
||||
"pattern-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^PATTERN$"
|
||||
},
|
||||
"value": {"type": "string"}
|
||||
},
|
||||
"required": ["type", "value"]
|
||||
},
|
||||
|
||||
"symbol-rule": {
|
||||
"required": ["name"],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^SYMBOL$"
|
||||
},
|
||||
"name": {"type": "string"}
|
||||
},
|
||||
"required": ["type", "name"]
|
||||
},
|
||||
|
||||
"seq-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^SEQ$"
|
||||
},
|
||||
"members": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["type", "members"]
|
||||
},
|
||||
|
||||
"choice-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^CHOICE$"
|
||||
},
|
||||
"members": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["type", "members"]
|
||||
},
|
||||
|
||||
"repeat-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^REPEAT$"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content"]
|
||||
},
|
||||
|
||||
"repeat1-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^REPEAT1$"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content"]
|
||||
},
|
||||
|
||||
"token-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^TOKEN$"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content"]
|
||||
},
|
||||
|
||||
"error-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^ERROR$"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content"]
|
||||
},
|
||||
|
||||
"prec-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^PREC$"
|
||||
},
|
||||
"value": {
|
||||
"type": "integer"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content", "value"]
|
||||
},
|
||||
|
||||
"prec-left-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^PREC_LEFT$"
|
||||
},
|
||||
"value": {
|
||||
"type": "integer"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content", "value"]
|
||||
},
|
||||
|
||||
"prec-right-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^PREC_RIGHT$"
|
||||
},
|
||||
"value": {
|
||||
"type": "integer"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content", "value"]
|
||||
},
|
||||
|
||||
"rule": {
|
||||
"oneOf": [
|
||||
{ "$ref": "#/definitions/blank-rule" },
|
||||
{ "$ref": "#/definitions/string-rule" },
|
||||
{ "$ref": "#/definitions/pattern-rule" },
|
||||
{ "$ref": "#/definitions/symbol-rule" },
|
||||
{ "$ref": "#/definitions/seq-rule" },
|
||||
{ "$ref": "#/definitions/choice-rule" },
|
||||
{ "$ref": "#/definitions/repeat1-rule" },
|
||||
{ "$ref": "#/definitions/repeat-rule" },
|
||||
{ "$ref": "#/definitions/token-rule" },
|
||||
{ "$ref": "#/definitions/error-rule" },
|
||||
{ "$ref": "#/definitions/prec-rule" },
|
||||
{ "$ref": "#/definitions/prec-left-rule" },
|
||||
{ "$ref": "#/definitions/prec-right-rule" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
1
externals/json-parser
vendored
Submodule
1
externals/json-parser
vendored
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 70533215eea575e40a0b91a34ae01a779641d73a
|
||||
|
|
@ -1,65 +1,30 @@
|
|||
#ifndef TREE_SITTER_COMPILER_H_
|
||||
#define TREE_SITTER_COMPILER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
TSCompileErrorTypeNone,
|
||||
TSCompileErrorTypeInvalidGrammar,
|
||||
TSCompileErrorTypeInvalidRegex,
|
||||
TSCompileErrorTypeUndefinedSymbol,
|
||||
TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
TSCompileErrorTypeLexConflict,
|
||||
TSCompileErrorTypeParseConflict,
|
||||
} TSCompileErrorType;
|
||||
|
||||
class Rule;
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
typedef struct {
|
||||
const char *code;
|
||||
const char *error_message;
|
||||
TSCompileErrorType error_type;
|
||||
} TSCompileResult;
|
||||
|
||||
rule_ptr blank();
|
||||
rule_ptr choice(const std::vector<rule_ptr> &);
|
||||
rule_ptr repeat(const rule_ptr &);
|
||||
rule_ptr repeat1(const rule_ptr &);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &);
|
||||
rule_ptr sym(const std::string &);
|
||||
rule_ptr pattern(const std::string &);
|
||||
rule_ptr str(const std::string &);
|
||||
rule_ptr err(const rule_ptr &);
|
||||
rule_ptr prec(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_left(const rule_ptr &);
|
||||
rule_ptr prec_left(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_right(const rule_ptr &);
|
||||
rule_ptr prec_right(int precedence, const rule_ptr &);
|
||||
rule_ptr token(const rule_ptr &rule);
|
||||
TSCompileResult ts_compile_grammar(const char *input);
|
||||
|
||||
class Grammar {
|
||||
const std::vector<std::pair<std::string, rule_ptr>> rules_;
|
||||
std::vector<rule_ptr> extra_tokens_;
|
||||
std::vector<std::vector<std::string>> expected_conflicts_;
|
||||
|
||||
public:
|
||||
explicit Grammar(const std::vector<std::pair<std::string, rule_ptr>> &);
|
||||
Grammar &extra_tokens(const std::vector<rule_ptr> &);
|
||||
Grammar &expected_conflicts(const std::vector<std::vector<std::string>> &);
|
||||
const std::vector<std::pair<std::string, rule_ptr>> &rules() const;
|
||||
const std::vector<rule_ptr> &extra_tokens() const;
|
||||
const std::vector<std::vector<std::string>> &expected_conflicts() const;
|
||||
};
|
||||
|
||||
enum GrammarErrorType {
|
||||
GrammarErrorTypeRegex,
|
||||
GrammarErrorTypeUndefinedSymbol,
|
||||
GrammarErrorTypeInvalidUbiquitousToken,
|
||||
GrammarErrorTypeLexConflict,
|
||||
GrammarErrorTypeParseConflict,
|
||||
};
|
||||
|
||||
class GrammarError {
|
||||
public:
|
||||
GrammarError(GrammarErrorType type, std::string message);
|
||||
bool operator==(const GrammarError &other) const;
|
||||
GrammarErrorType type;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
std::pair<std::string, const GrammarError *> compile(const Grammar &,
|
||||
std::string);
|
||||
|
||||
} // namespace tree_sitter
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_COMPILER_H_
|
||||
|
|
|
|||
|
|
@ -94,17 +94,17 @@ struct TSLanguage {
|
|||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#define START_LEXER() \
|
||||
lexer->start_fn(lexer, state); \
|
||||
int32_t lookahead; \
|
||||
next_state: \
|
||||
#define START_LEXER() \
|
||||
lexer->start_fn(lexer, state); \
|
||||
int32_t lookahead; \
|
||||
next_state: \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define START_TOKEN() lexer->start_token_fn(lexer);
|
||||
|
||||
#define GO_TO_STATE(state_value) \
|
||||
{ \
|
||||
state = state_value; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
'include',
|
||||
'src',
|
||||
'externals/utf8proc',
|
||||
'externals/json-parser',
|
||||
],
|
||||
'sources': [
|
||||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
|
|
@ -24,8 +25,8 @@
|
|||
'src/compiler/build_tables/rule_can_be_blank.cc',
|
||||
'src/compiler/compile.cc',
|
||||
'src/compiler/generate_code/c_code.cc',
|
||||
'src/compiler/grammar.cc',
|
||||
'src/compiler/lex_table.cc',
|
||||
'src/compiler/parse_grammar.cc',
|
||||
'src/compiler/parse_table.cc',
|
||||
'src/compiler/precedence_range.cc',
|
||||
'src/compiler/prepare_grammar/expand_repeats.cc',
|
||||
|
|
@ -58,6 +59,7 @@
|
|||
'src/compiler/rules/visitor.cc',
|
||||
'src/compiler/util/string_helpers.cc',
|
||||
'externals/utf8proc/utf8proc.c',
|
||||
'externals/json-parser/json.c',
|
||||
],
|
||||
'cflags_cc': [
|
||||
'-std=c++0x',
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/compile.h"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
|
|
@ -29,10 +30,10 @@ describe("compiling the example grammars", []() {
|
|||
it(("compiles the " + language + " grammar").c_str(), [&]() {
|
||||
auto result = compile(grammar, language);
|
||||
string code = result.first;
|
||||
const GrammarError *error = result.second;
|
||||
const CompileError error = result.second;
|
||||
|
||||
if (error)
|
||||
AssertThat(error->message, Equals(""));
|
||||
if (error.type)
|
||||
AssertThat(error.message, Equals(""));
|
||||
|
||||
ofstream file(example_parser_dir + language + ".c");
|
||||
file << get<0>(result);
|
||||
|
|
|
|||
|
|
@ -1,32 +1,45 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile.h"
|
||||
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("Compile", []() {
|
||||
describe("compile_grammar", []() {
|
||||
describe("when the grammar's start symbol is a token", [&]() {
|
||||
it("does not fail", [&]() {
|
||||
Grammar grammar({
|
||||
{ "rule1", str("the-value") }
|
||||
});
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "the_grammar",
|
||||
"rules": {
|
||||
"rule1": {
|
||||
"type": "STRING",
|
||||
"value": "hello"
|
||||
}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
auto result = compile(grammar, "test_grammar");
|
||||
const GrammarError *error = result.second;
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(string(result.error_message), IsEmpty());
|
||||
AssertThat(string(result.code), !IsEmpty());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar's start symbol is blank", [&]() {
|
||||
it("does not fail", [&]() {
|
||||
Grammar grammar({
|
||||
{ "rule1", blank() }
|
||||
});
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "the_grammar",
|
||||
"rules": {
|
||||
"rule1": {
|
||||
"type": "BLANK"
|
||||
}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
auto result = compile(grammar, "test_grammar");
|
||||
const GrammarError *error = result.second;
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(string(result.error_message), IsEmpty());
|
||||
AssertThat(string(result.code), !IsEmpty());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
#include "compiler/helpers/stream_methods.h"
|
||||
#include "compiler/helpers/equals_pointer.h"
|
||||
#include "compiler/helpers/rule_helpers.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
using namespace tree_sitter;
|
||||
using namespace std;
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ ostream &operator<<(ostream &stream, const Grammar &grammar) {
|
|||
stream << string("#<grammar");
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules()) {
|
||||
for (auto pair : grammar.rules) {
|
||||
if (started)
|
||||
stream << string(", ");
|
||||
stream << pair.first;
|
||||
|
|
@ -23,11 +23,11 @@ ostream &operator<<(ostream &stream, const Grammar &grammar) {
|
|||
return stream << string("}>");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const GrammarError *error) {
|
||||
if (error)
|
||||
return stream << (string("#<grammar-error '") + error->message + "'>");
|
||||
ostream &operator<<(ostream &stream, const CompileError &error) {
|
||||
if (error.type)
|
||||
return stream << (string("#<compile-error '") + error.message + "'>");
|
||||
else
|
||||
return stream << string("#<null>");
|
||||
return stream << string("#<no-compile-error>");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const Rule &rule) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
using std::cout;
|
||||
|
||||
|
|
@ -98,7 +99,7 @@ struct ProductionStep;
|
|||
struct PrecedenceRange;
|
||||
|
||||
ostream &operator<<(ostream &, const Grammar &);
|
||||
ostream &operator<<(ostream &, const GrammarError &);
|
||||
ostream &operator<<(ostream &, const CompileError &);
|
||||
ostream &operator<<(ostream &, const Rule &);
|
||||
ostream &operator<<(ostream &, const rule_ptr &);
|
||||
ostream &operator<<(ostream &, const Variable &);
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
|
|
@ -69,7 +69,7 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
|
|
@ -102,7 +102,7 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
|
||||
AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@ describe("extract_tokens", []() {
|
|||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
const GrammarError *error = get<2>(result);
|
||||
CompileError error = get<2>(result);
|
||||
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(error, Equals(CompileError::none()));
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, repeat1(seq({
|
||||
|
|
@ -150,7 +150,7 @@ describe("extract_tokens", []() {
|
|||
pattern("\\s+"),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
|
||||
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
|
||||
|
|
@ -167,7 +167,7 @@ describe("extract_tokens", []() {
|
|||
str("y"),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
|
||||
});
|
||||
|
|
@ -181,7 +181,7 @@ describe("extract_tokens", []() {
|
|||
i_sym(2),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, true) },
|
||||
|
|
@ -196,9 +196,9 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
}, { i_sym(1) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: rule_B")));
|
||||
});
|
||||
|
||||
|
|
@ -208,9 +208,9 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
}, { choice({ i_sym(1), blank() }) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: (choice (sym 1) (blank))")));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -10,15 +10,15 @@ using prepare_grammar::intern_symbols;
|
|||
|
||||
describe("intern_symbols", []() {
|
||||
it("replaces named symbols with numerically-indexed symbols", [&]() {
|
||||
Grammar grammar({
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("_z") }) },
|
||||
{ "y", sym("_z") },
|
||||
{ "_z", str("stuff") }
|
||||
});
|
||||
}, {}, {}};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
|
||||
Variable("y", VariableTypeNamed, i_sym(2)),
|
||||
|
|
@ -28,26 +28,28 @@ describe("intern_symbols", []() {
|
|||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
it("returns an error", []() {
|
||||
Grammar grammar({
|
||||
Grammar grammar{{
|
||||
{ "x", sym("y") },
|
||||
});
|
||||
}, {}, {}};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second->message, Equals("Undefined rule 'y'"));
|
||||
AssertThat(result.second.message, Equals("Undefined rule 'y'"));
|
||||
});
|
||||
});
|
||||
|
||||
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
|
||||
auto grammar = Grammar({
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}).extra_tokens({ sym("z") });
|
||||
}, {
|
||||
sym("z")
|
||||
}, {}};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -222,8 +222,8 @@ describe("parse_regex", []() {
|
|||
for (auto &row : invalid_inputs) {
|
||||
it(("handles invalid regexes with " + row.description).c_str(), [&]() {
|
||||
auto result = parse_regex(row.pattern);
|
||||
AssertThat(result.second, !Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.second->message, Contains(row.message));
|
||||
AssertThat(result.second.type, Equals(TSCompileErrorTypeInvalidRegex));
|
||||
AssertThat(result.second.message, Contains(row.message));
|
||||
});
|
||||
}
|
||||
});
|
||||
|
|
|
|||
6
spec/fixtures/grammars/anonymous_tokens.cc
vendored
6
spec/fixtures/grammars/anonymous_tokens.cc
vendored
|
|
@ -3,14 +3,14 @@
|
|||
|
||||
namespace tree_sitter_examples {
|
||||
|
||||
extern const Grammar anonymous_tokens = Grammar({
|
||||
extern const Grammar anonymous_tokens{{
|
||||
{ "program", choice({
|
||||
str("\n"),
|
||||
str("\r"),
|
||||
pattern("\\d"),
|
||||
str("\"hello\"") }) },
|
||||
}).extra_tokens({
|
||||
}, {
|
||||
pattern("\\s"),
|
||||
});
|
||||
}, {}};
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
6
spec/fixtures/grammars/arithmetic.cc
vendored
6
spec/fixtures/grammars/arithmetic.cc
vendored
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
namespace tree_sitter_examples {
|
||||
|
||||
extern const Grammar arithmetic = Grammar({
|
||||
extern const Grammar arithmetic{{
|
||||
{ "program", sym("_expression") },
|
||||
|
||||
{ "_expression", choice({
|
||||
|
|
@ -37,9 +37,9 @@ extern const Grammar arithmetic = Grammar({
|
|||
pattern("[0-9]") })) })) },
|
||||
|
||||
{ "comment", pattern("#.*") },
|
||||
}).extra_tokens({
|
||||
}, {
|
||||
sym("comment"),
|
||||
pattern("\\s"),
|
||||
});
|
||||
}, {}};
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
8
spec/fixtures/grammars/c.cc
vendored
8
spec/fixtures/grammars/c.cc
vendored
|
|
@ -5,7 +5,7 @@ namespace tree_sitter_examples {
|
|||
|
||||
// http://slps.github.io/zoo/c/iso-9899-tc3.html
|
||||
|
||||
extern const Grammar c = Grammar({
|
||||
extern const Grammar c{{
|
||||
{ "translation_unit", repeat(choice({
|
||||
sym("preproc_define"),
|
||||
sym("preproc_call"),
|
||||
|
|
@ -258,13 +258,13 @@ extern const Grammar c = Grammar({
|
|||
pattern("[^\\*]"),
|
||||
pattern("\\*[^/]") })),
|
||||
str("*/") }) })) },
|
||||
}).extra_tokens({
|
||||
}, {
|
||||
sym("comment"),
|
||||
pattern("[ \t\r\n]"),
|
||||
}).expected_conflicts({
|
||||
}, {
|
||||
{ "_type_specifier", "_expression" },
|
||||
{ "_type_specifier", "_expression", "macro_type" },
|
||||
{ "_type_specifier", "macro_type" },
|
||||
});
|
||||
}};
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
8
spec/fixtures/grammars/cpp.cc
vendored
8
spec/fixtures/grammars/cpp.cc
vendored
|
|
@ -5,7 +5,7 @@ namespace tree_sitter_examples {
|
|||
|
||||
// http://slps.github.io/zoo/cpp/iso-n2723.html
|
||||
|
||||
extern const Grammar cpp = Grammar({
|
||||
extern const Grammar cpp{{
|
||||
{ "translation_unit", repeat(sym("_declaration")) },
|
||||
|
||||
{ "_declaration", choice({
|
||||
|
|
@ -211,13 +211,13 @@ extern const Grammar cpp = Grammar({
|
|||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
|
||||
{ "comment", pattern("//[^\n]*") },
|
||||
}).extra_tokens({
|
||||
}, {
|
||||
sym("comment"),
|
||||
pattern("[ \t\r\n]"),
|
||||
}).expected_conflicts({
|
||||
}, {
|
||||
{ "type_specifier", "_expression" },
|
||||
{ "template_call", "_expression" },
|
||||
{ "template_call", "relational_expression" },
|
||||
});
|
||||
}};
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
6
spec/fixtures/grammars/golang.cc
vendored
6
spec/fixtures/grammars/golang.cc
vendored
|
|
@ -9,7 +9,7 @@ static rule_ptr terminated(rule_ptr rule) {
|
|||
str(";") }) });
|
||||
}
|
||||
|
||||
extern const Grammar golang = Grammar({
|
||||
extern const Grammar golang{{
|
||||
{ "program", seq({
|
||||
sym("package_directive"),
|
||||
repeat(sym("imports_block")),
|
||||
|
|
@ -203,10 +203,10 @@ extern const Grammar golang = Grammar({
|
|||
|
||||
{ "comment", pattern("//[^\n]*") },
|
||||
|
||||
}).extra_tokens({
|
||||
}, {
|
||||
sym("comment"),
|
||||
sym("_line_break"),
|
||||
pattern("[ \t\r]"),
|
||||
});
|
||||
}, {}};
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
2
spec/fixtures/grammars/helpers.cc
vendored
2
spec/fixtures/grammars/helpers.cc
vendored
|
|
@ -1,4 +1,4 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
|
||||
|
|
|
|||
3
spec/fixtures/grammars/helpers.h
vendored
3
spec/fixtures/grammars/helpers.h
vendored
|
|
@ -1,7 +1,8 @@
|
|||
#ifndef TREESITTER_EXAMPLES_HELPERS_
|
||||
#define TREESITTER_EXAMPLES_HELPERS_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "compiler/grammar.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
|
||||
|
|
|
|||
8
spec/fixtures/grammars/javascript.cc
vendored
8
spec/fixtures/grammars/javascript.cc
vendored
|
|
@ -30,7 +30,7 @@ enum {
|
|||
PREC_ARGS = 16,
|
||||
};
|
||||
|
||||
extern const Grammar javascript = Grammar({
|
||||
extern const Grammar javascript{{
|
||||
{ "program", repeat(sym("_statement")) },
|
||||
|
||||
/*
|
||||
|
|
@ -349,13 +349,13 @@ extern const Grammar javascript = Grammar({
|
|||
str(")"),
|
||||
sym("statement_block") }) },
|
||||
|
||||
}).extra_tokens({
|
||||
}, {
|
||||
sym("comment"),
|
||||
sym("_line_break"),
|
||||
pattern("[ \t\r]"),
|
||||
}).expected_conflicts({
|
||||
}, {
|
||||
{ "for_in_statement", "_expression" },
|
||||
{ "method_definition", "_expression" },
|
||||
});
|
||||
}};
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
6
spec/fixtures/grammars/json.cc
vendored
6
spec/fixtures/grammars/json.cc
vendored
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
namespace tree_sitter_examples {
|
||||
|
||||
extern const Grammar json = Grammar({
|
||||
extern const Grammar json{{
|
||||
{ "_value", choice({
|
||||
sym("object"),
|
||||
sym("array"),
|
||||
|
|
@ -22,8 +22,8 @@ extern const Grammar json = Grammar({
|
|||
{ "null", str("null") },
|
||||
{ "true", str("true") },
|
||||
{ "false", str("false") },
|
||||
}).extra_tokens({
|
||||
}, {
|
||||
pattern("\\s"),
|
||||
});
|
||||
}, {}};
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class ParseTableBuilder {
|
|||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar), lexical_grammar(lex_grammar) {}
|
||||
|
||||
pair<ParseTable, const GrammarError *> build() {
|
||||
pair<ParseTable, CompileError> build() {
|
||||
Symbol start_symbol = Symbol(0, grammar.variables.empty());
|
||||
Production start_production({
|
||||
ProductionStep(start_symbol, 0, rules::AssociativityNone),
|
||||
|
|
@ -68,9 +68,9 @@ class ParseTableBuilder {
|
|||
add_shift_actions(item_set, state_id);
|
||||
|
||||
if (!conflicts.empty())
|
||||
return { parse_table, new GrammarError(GrammarErrorTypeParseConflict,
|
||||
"Unresolved conflict.\n\n" +
|
||||
*conflicts.begin()) };
|
||||
return { parse_table,
|
||||
CompileError(TSCompileErrorTypeParseConflict,
|
||||
"Unresolved conflict.\n\n" + *conflicts.begin()) };
|
||||
}
|
||||
|
||||
for (ParseStateId state = 0; state < parse_table.states.size(); state++) {
|
||||
|
|
@ -83,7 +83,7 @@ class ParseTableBuilder {
|
|||
|
||||
parse_table.symbols.insert({ rules::ERROR(), {} });
|
||||
|
||||
return { parse_table, nullptr };
|
||||
return { parse_table, CompileError::none() };
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
@ -370,7 +370,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, const GrammarError *> build_parse_table(
|
||||
pair<ParseTable, CompileError> build_parse_table(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
return ParseTableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -13,8 +13,8 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::pair<ParseTable, const GrammarError *> build_parse_table(
|
||||
const SyntaxGrammar &, const LexicalGrammar &);
|
||||
std::pair<ParseTable, CompileError> build_parse_table(const SyntaxGrammar &,
|
||||
const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -13,11 +14,11 @@ using std::tuple;
|
|||
using std::vector;
|
||||
using std::make_tuple;
|
||||
|
||||
tuple<ParseTable, LexTable, const GrammarError *> build_tables(
|
||||
tuple<ParseTable, LexTable, CompileError> build_tables(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
const GrammarError *error = parse_table_result.second;
|
||||
const CompileError error = parse_table_result.second;
|
||||
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
return make_tuple(parse_table, lex_table, error);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@
|
|||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lex_table.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -15,7 +15,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::tuple<ParseTable, LexTable, const GrammarError *> build_tables(
|
||||
std::tuple<ParseTable, LexTable, CompileError> build_tables(
|
||||
const SyntaxGrammar &, const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include <string>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
|
||||
#define COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
#include "compiler/generate_code/c_code.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/parse_grammar.h"
|
||||
#include "json.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -13,14 +15,44 @@ using std::vector;
|
|||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
pair<string, const GrammarError *> compile(const Grammar &grammar,
|
||||
std::string name) {
|
||||
extern "C" TSCompileResult ts_compile_grammar(const char *input) {
|
||||
ParseGrammarResult parse_result = parse_grammar(string(input));
|
||||
if (!parse_result.error_message.empty()) {
|
||||
return { "", strdup(parse_result.error_message.c_str()),
|
||||
TSCompileErrorTypeInvalidGrammar };
|
||||
}
|
||||
|
||||
auto prepare_grammar_result =
|
||||
prepare_grammar::prepare_grammar(parse_result.grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
CompileError error = get<2>(prepare_grammar_result);
|
||||
if (error.type) {
|
||||
return { "", strdup(error.message.c_str()), error.type };
|
||||
}
|
||||
|
||||
auto table_build_result =
|
||||
build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
const ParseTable &parse_table = get<0>(table_build_result);
|
||||
const LexTable &lex_table = get<1>(table_build_result);
|
||||
error = get<2>(table_build_result);
|
||||
if (error.type) {
|
||||
return { "", strdup(error.message.c_str()), error.type };
|
||||
}
|
||||
|
||||
string code = generate_code::c_code(parse_result.name, parse_table, lex_table,
|
||||
syntax_grammar, lexical_grammar);
|
||||
|
||||
return { strdup(code.c_str()), "", TSCompileErrorTypeNone };
|
||||
}
|
||||
|
||||
pair<string, const CompileError> compile(const Grammar &grammar,
|
||||
std::string name) {
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
const GrammarError *error = get<2>(prepare_grammar_result);
|
||||
|
||||
if (error)
|
||||
CompileError error = get<2>(prepare_grammar_result);
|
||||
if (error.type)
|
||||
return { "", error };
|
||||
|
||||
auto table_build_result =
|
||||
|
|
@ -28,14 +60,13 @@ pair<string, const GrammarError *> compile(const Grammar &grammar,
|
|||
const ParseTable &parse_table = get<0>(table_build_result);
|
||||
const LexTable &lex_table = get<1>(table_build_result);
|
||||
error = get<2>(table_build_result);
|
||||
|
||||
if (error)
|
||||
if (error.type)
|
||||
return { "", error };
|
||||
|
||||
string code = generate_code::c_code(name, parse_table, lex_table,
|
||||
syntax_grammar, lexical_grammar);
|
||||
|
||||
return { code, nullptr };
|
||||
return { code, CompileError::none() };
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
16
src/compiler/compile.h
Normal file
16
src/compiler/compile.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef COMPILER_COMPILE_H_
|
||||
#define COMPILER_COMPILE_H_
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
|
||||
std::pair<std::string, CompileError> compile(const Grammar &, std::string);
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_COMPILE_H_
|
||||
28
src/compiler/compile_error.h
Normal file
28
src/compiler/compile_error.h
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef COMPILER_COMPILE_ERROR_H_
|
||||
#define COMPILER_COMPILE_ERROR_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class CompileError {
|
||||
public:
|
||||
CompileError(TSCompileErrorType type, std::string message)
|
||||
: type(type), message(message) {}
|
||||
|
||||
static CompileError none() {
|
||||
return CompileError(TSCompileErrorTypeNone, "");
|
||||
}
|
||||
|
||||
bool operator==(const CompileError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
||||
TSCompileErrorType type;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_COMPILE_ERROR_H_
|
||||
|
|
@ -188,7 +188,9 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_lex_function() {
|
||||
line("static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {");
|
||||
line(
|
||||
"static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) "
|
||||
"{");
|
||||
indent([&]() {
|
||||
line("START_LEXER();");
|
||||
_switch("state", [&]() {
|
||||
|
|
|
|||
|
|
@ -1,44 +0,0 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::ostream;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
Grammar::Grammar(const vector<pair<string, rule_ptr>> &rules)
|
||||
: rules_(rules), extra_tokens_({}) {}
|
||||
|
||||
const vector<pair<string, rule_ptr>> &Grammar::rules() const {
|
||||
return rules_;
|
||||
}
|
||||
|
||||
const vector<rule_ptr> &Grammar::extra_tokens() const {
|
||||
return extra_tokens_;
|
||||
}
|
||||
|
||||
const vector<vector<string>> &Grammar::expected_conflicts() const {
|
||||
return expected_conflicts_;
|
||||
}
|
||||
|
||||
Grammar &Grammar::extra_tokens(const vector<rule_ptr> &extra_tokens) {
|
||||
extra_tokens_ = extra_tokens;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Grammar &Grammar::expected_conflicts(
|
||||
const vector<vector<string>> &expected_conflicts) {
|
||||
expected_conflicts_ = expected_conflicts;
|
||||
return *this;
|
||||
}
|
||||
|
||||
GrammarError::GrammarError(GrammarErrorType type, string message)
|
||||
: type(type), message(message) {}
|
||||
|
||||
bool GrammarError::operator==(const GrammarError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
19
src/compiler/grammar.h
Normal file
19
src/compiler/grammar.h
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef COMPILER_GRAMMAR_H_
|
||||
#define COMPILER_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar {
|
||||
std::vector<std::pair<std::string, rule_ptr>> rules;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::vector<std::vector<std::string>> expected_conflicts;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_GRAMMAR_H_
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
326
src/compiler/parse_grammar.cc
Normal file
326
src/compiler/parse_grammar.cc
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
#include "compiler/parse_grammar.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "json.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
struct ParseRuleResult {
|
||||
rule_ptr rule;
|
||||
string error_message;
|
||||
};
|
||||
|
||||
ParseRuleResult parse_rule(json_value *rule_json) {
|
||||
string error_message;
|
||||
json_value rule_type_json;
|
||||
string type;
|
||||
|
||||
if (!rule_json) {
|
||||
error_message = "Rule cannot be null";
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (rule_json->type != json_object) {
|
||||
error_message = "Rule type must be an object";
|
||||
goto error;
|
||||
}
|
||||
|
||||
rule_type_json = rule_json->operator[]("type");
|
||||
if (rule_type_json.type != json_string) {
|
||||
error_message = "Rule type must be a string";
|
||||
goto error;
|
||||
}
|
||||
|
||||
type = rule_type_json.u.string.ptr;
|
||||
|
||||
if (type == "BLANK") {
|
||||
return { blank(), "" };
|
||||
}
|
||||
|
||||
if (type == "CHOICE") {
|
||||
json_value members_json = rule_json->operator[]("members");
|
||||
if (members_json.type != json_array) {
|
||||
error_message = "Choice members must be an array";
|
||||
goto error;
|
||||
}
|
||||
|
||||
vector<rule_ptr> members;
|
||||
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
|
||||
json_value *member_json = members_json.u.array.values[i];
|
||||
ParseRuleResult member = parse_rule(member_json);
|
||||
if (member.rule.get()) {
|
||||
members.push_back(member.rule);
|
||||
} else {
|
||||
error_message = "Invalid choice member: " + member.error_message;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
return { choice(members), "" };
|
||||
}
|
||||
|
||||
if (type == "SEQ") {
|
||||
json_value members_json = rule_json->operator[]("members");
|
||||
if (members_json.type != json_array) {
|
||||
error_message = "Seq members must be an array";
|
||||
goto error;
|
||||
}
|
||||
|
||||
vector<rule_ptr> members;
|
||||
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
|
||||
json_value *member_json = members_json.u.array.values[i];
|
||||
ParseRuleResult member = parse_rule(member_json);
|
||||
if (member.rule.get()) {
|
||||
members.push_back(member.rule);
|
||||
} else {
|
||||
error_message = "Invalid seq member: " + member.error_message;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
return { seq(members), "" };
|
||||
}
|
||||
|
||||
if (type == "ERROR") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return { err(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid error content: " + content.error_message;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "REPEAT") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return { repeat(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid repeat content: " + content.error_message;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "REPEAT1") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return { repeat1(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid repeat1 content: " + content.error_message;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "TOKEN") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return { token(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid token content: " + content.error_message;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "PATTERN") {
|
||||
json_value value_json = rule_json->operator[]("value");
|
||||
if (value_json.type == json_string) {
|
||||
return { pattern(value_json.u.string.ptr), "" };
|
||||
} else {
|
||||
error_message = "Pattern value must be a string";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "STRING") {
|
||||
json_value value_json = rule_json->operator[]("value");
|
||||
if (value_json.type == json_string) {
|
||||
return { str(value_json.u.string.ptr), "" };
|
||||
} else {
|
||||
error_message = "String rule value must be a string";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "SYMBOL") {
|
||||
json_value value_json = rule_json->operator[]("name");
|
||||
if (value_json.type == json_string) {
|
||||
return { sym(value_json.u.string.ptr), "" };
|
||||
} else {
|
||||
error_message = "Symbol value must be a string";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "PREC") {
|
||||
json_value precedence_json = rule_json->operator[]("value");
|
||||
if (precedence_json.type != json_integer) {
|
||||
error_message = "Precedence value must be an integer";
|
||||
goto error;
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (!content.rule.get()) {
|
||||
error_message = "Invalid precedence content: " + content.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
return { prec(precedence_json.u.integer, content.rule), "" };
|
||||
}
|
||||
|
||||
if (type == "PREC_LEFT") {
|
||||
json_value precedence_json = rule_json->operator[]("value");
|
||||
if (precedence_json.type != json_integer) {
|
||||
error_message = "Precedence value must be an integer";
|
||||
goto error;
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (!content.rule.get()) {
|
||||
error_message = "Invalid precedence content: " + content.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
return { prec_left(precedence_json.u.integer, content.rule), "" };
|
||||
}
|
||||
|
||||
if (type == "PREC_RIGHT") {
|
||||
json_value precedence_json = rule_json->operator[]("value");
|
||||
if (precedence_json.type != json_integer) {
|
||||
error_message = "Precedence value must be an integer";
|
||||
goto error;
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (!content.rule.get()) {
|
||||
error_message = "Invalid precedence content: " + content.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
return { prec_right(precedence_json.u.integer, content.rule), "" };
|
||||
}
|
||||
|
||||
error_message = "Unknown rule type " + type;
|
||||
|
||||
error:
|
||||
return { rule_ptr(), error_message };
|
||||
}
|
||||
|
||||
ParseGrammarResult parse_grammar(const string &input) {
|
||||
string error_message;
|
||||
string name;
|
||||
Grammar grammar;
|
||||
json_value name_json, rules_json, extras_json, conflicts_json;
|
||||
|
||||
json_settings settings = { 0, 0, 0, 0, 0, 0 };
|
||||
char parse_error[json_error_max];
|
||||
json_value *grammar_json =
|
||||
json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
|
||||
if (!grammar_json) {
|
||||
error_message = string("Invalid JSON at ") + parse_error;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (grammar_json->type != json_object) {
|
||||
error_message = "Body must be an object";
|
||||
goto error;
|
||||
}
|
||||
|
||||
name_json = grammar_json->operator[]("name");
|
||||
if (name_json.type != json_string) {
|
||||
error_message = "Name must be a string";
|
||||
goto error;
|
||||
}
|
||||
|
||||
name = name_json.u.string.ptr;
|
||||
|
||||
rules_json = grammar_json->operator[]("rules");
|
||||
if (rules_json.type != json_object) {
|
||||
error_message = "Rules must be an object";
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) {
|
||||
json_object_entry entry_json = rules_json.u.object.values[i];
|
||||
ParseRuleResult entry = parse_rule(entry_json.value);
|
||||
|
||||
if (!entry.rule.get()) {
|
||||
error_message =
|
||||
string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
grammar.rules.push_back({ string(entry_json.name), entry.rule });
|
||||
}
|
||||
|
||||
extras_json = grammar_json->operator[]("extras");
|
||||
if (extras_json.type != json_none) {
|
||||
if (extras_json.type != json_array) {
|
||||
error_message = "Extras must be an array";
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) {
|
||||
json_value *extra_json = extras_json.u.array.values[i];
|
||||
ParseRuleResult extra = parse_rule(extra_json);
|
||||
if (!extra.rule.get()) {
|
||||
error_message = string("Invalid extra token: ") + extra.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
grammar.extra_tokens.push_back(extra.rule);
|
||||
}
|
||||
}
|
||||
|
||||
conflicts_json = grammar_json->operator[]("conflicts");
|
||||
if (conflicts_json.type != json_none) {
|
||||
if (conflicts_json.type != json_array) {
|
||||
error_message = "Conflicts must be an array";
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (size_t i = 0, length = conflicts_json.u.array.length; i < length; i++) {
|
||||
json_value *conflict_json = conflicts_json.u.array.values[i];
|
||||
if (conflict_json->type != json_array) {
|
||||
error_message = "Each conflict entry must be an array";
|
||||
goto error;
|
||||
}
|
||||
|
||||
vector<string> conflict;
|
||||
for (size_t j = 0, conflict_length = conflict_json->u.array.length;
|
||||
j < conflict_length; j++) {
|
||||
json_value *conflict_entry_json = conflict_json->u.array.values[j];
|
||||
if (conflict_entry_json->type != json_string) {
|
||||
error_message = "Each conflict entry must be an array of strings";
|
||||
goto error;
|
||||
}
|
||||
|
||||
conflict.push_back(string(conflict_entry_json->u.string.ptr));
|
||||
}
|
||||
|
||||
grammar.expected_conflicts.push_back(conflict);
|
||||
}
|
||||
}
|
||||
|
||||
return { name, grammar, "" };
|
||||
|
||||
error:
|
||||
if (grammar_json) {
|
||||
json_value_free(grammar_json);
|
||||
}
|
||||
|
||||
return { "", Grammar{}, error_message };
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
20
src/compiler/parse_grammar.h
Normal file
20
src/compiler/parse_grammar.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef COMPILER_GRAMMAR_JSON_H_
|
||||
#define COMPILER_GRAMMAR_JSON_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct ParseGrammarResult {
|
||||
std::string name;
|
||||
Grammar grammar;
|
||||
std::string error_message;
|
||||
};
|
||||
|
||||
ParseGrammarResult parse_grammar(const std::string &);
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_GRAMMAR_JSON_H_
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -53,36 +53,35 @@ class ExpandTokens : public rules::IdentityRuleFn {
|
|||
|
||||
rule_ptr apply_to(const Pattern *rule) {
|
||||
auto pair = parse_regex(rule->value);
|
||||
if (!error)
|
||||
if (!error.type)
|
||||
error = pair.second;
|
||||
return pair.first;
|
||||
}
|
||||
|
||||
public:
|
||||
const GrammarError *error;
|
||||
ExpandTokens() : error(nullptr) {}
|
||||
CompileError error;
|
||||
ExpandTokens() : error(CompileError::none()) {}
|
||||
};
|
||||
|
||||
pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
||||
const LexicalGrammar &grammar) {
|
||||
pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &grammar) {
|
||||
LexicalGrammar result;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
auto rule = expander.apply(variable.rule);
|
||||
if (expander.error)
|
||||
if (expander.error.type)
|
||||
return { result, expander.error };
|
||||
result.variables.push_back(Variable(variable.name, variable.type, rule));
|
||||
}
|
||||
|
||||
for (auto &sep : grammar.separators) {
|
||||
auto rule = expander.apply(sep);
|
||||
if (expander.error)
|
||||
if (expander.error.type)
|
||||
return { result, expander.error };
|
||||
result.separators.push_back(rule);
|
||||
}
|
||||
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
|
||||
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -10,8 +10,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
||||
const LexicalGrammar &);
|
||||
std::pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -90,12 +90,12 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
vector<Variable> tokens;
|
||||
};
|
||||
|
||||
static const GrammarError *ubiq_token_err(const string &message) {
|
||||
return new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: " + message);
|
||||
static CompileError ubiq_token_err(const string &message) {
|
||||
return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: " + message);
|
||||
}
|
||||
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
const InternedGrammar &grammar) {
|
||||
InitialSyntaxGrammar syntax_grammar;
|
||||
LexicalGrammar lexical_grammar;
|
||||
|
|
@ -186,7 +186,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens
|
|||
syntax_grammar.extra_tokens.insert(new_symbol);
|
||||
}
|
||||
|
||||
return make_tuple(syntax_grammar, lexical_grammar, nullptr);
|
||||
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
|
||||
|
||||
#include <tuple>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
|
@ -10,8 +10,8 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
extract_tokens(const InternedGrammar &);
|
||||
std::tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
const InternedGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <vector>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
|
|
@ -31,8 +32,8 @@ class InternSymbols : public rules::IdentityRuleFn {
|
|||
|
||||
public:
|
||||
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules().size(); i++)
|
||||
if (grammar.rules()[i].first == rule_name)
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule_name)
|
||||
return make_shared<rules::Symbol>(i);
|
||||
return nullptr;
|
||||
}
|
||||
|
|
@ -42,16 +43,16 @@ class InternSymbols : public rules::IdentityRuleFn {
|
|||
string missing_rule_name;
|
||||
};
|
||||
|
||||
const GrammarError *missing_rule_error(string rule_name) {
|
||||
return new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'");
|
||||
CompileError missing_rule_error(string rule_name) {
|
||||
return CompileError(TSCompileErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'");
|
||||
}
|
||||
|
||||
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
|
||||
InternedGrammar result;
|
||||
InternSymbols interner(grammar);
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
|
|
@ -61,14 +62,14 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
|
|||
new_rule));
|
||||
}
|
||||
|
||||
for (auto &rule : grammar.extra_tokens()) {
|
||||
for (auto &rule : grammar.extra_tokens) {
|
||||
auto new_rule = interner.apply(rule);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
result.extra_tokens.push_back(new_rule);
|
||||
}
|
||||
|
||||
for (auto &names : grammar.expected_conflicts()) {
|
||||
for (auto &names : grammar.expected_conflicts) {
|
||||
set<rules::Symbol> entry;
|
||||
for (auto &name : names) {
|
||||
auto symbol = interner.symbol_for_rule_name(name);
|
||||
|
|
@ -78,7 +79,7 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
|
|||
result.expected_conflicts.insert(entry);
|
||||
}
|
||||
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -3,13 +3,16 @@
|
|||
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
std::pair<InternedGrammar, CompileError> intern_symbols(const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class PatternParser {
|
|||
next();
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> rule(bool nested) {
|
||||
pair<rule_ptr, CompileError> rule(bool nested) {
|
||||
vector<rule_ptr> choices = {};
|
||||
do {
|
||||
if (!choices.empty()) {
|
||||
|
|
@ -42,17 +42,17 @@ class PatternParser {
|
|||
break;
|
||||
}
|
||||
auto pair = term(nested);
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
choices.push_back(pair.first);
|
||||
} while (has_more_input());
|
||||
auto rule =
|
||||
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
|
||||
return { rule, nullptr };
|
||||
return { rule, CompileError::none() };
|
||||
}
|
||||
|
||||
private:
|
||||
pair<rule_ptr, const GrammarError *> term(bool nested) {
|
||||
pair<rule_ptr, CompileError> term(bool nested) {
|
||||
rule_ptr result = Blank::build();
|
||||
do {
|
||||
if (peek() == '|')
|
||||
|
|
@ -60,16 +60,16 @@ class PatternParser {
|
|||
if (nested && peek() == ')')
|
||||
break;
|
||||
auto pair = factor();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
result = Seq::build({ result, pair.first });
|
||||
} while (has_more_input());
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> factor() {
|
||||
pair<rule_ptr, CompileError> factor() {
|
||||
auto pair = atom();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
rule_ptr result = pair.first;
|
||||
if (has_more_input()) {
|
||||
|
|
@ -88,30 +88,30 @@ class PatternParser {
|
|||
break;
|
||||
}
|
||||
}
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> atom() {
|
||||
pair<rule_ptr, CompileError> atom() {
|
||||
switch (peek()) {
|
||||
case '(': {
|
||||
next();
|
||||
auto pair = rule(true);
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
if (peek() != ')')
|
||||
return error("unmatched open paren");
|
||||
next();
|
||||
return { pair.first, nullptr };
|
||||
return { pair.first, CompileError::none() };
|
||||
}
|
||||
case '[': {
|
||||
next();
|
||||
auto pair = char_set();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
if (peek() != ']')
|
||||
return error("unmatched open square bracket");
|
||||
next();
|
||||
return { pair.first.copy(), nullptr };
|
||||
return { pair.first.copy(), CompileError::none() };
|
||||
}
|
||||
case ')': {
|
||||
return error("unmatched close paren");
|
||||
|
|
@ -121,18 +121,19 @@ class PatternParser {
|
|||
}
|
||||
case '.': {
|
||||
next();
|
||||
return { CharacterSet().include_all().exclude('\n').copy(), nullptr };
|
||||
return { CharacterSet().include_all().exclude('\n').copy(),
|
||||
CompileError::none() };
|
||||
}
|
||||
default: {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
return { pair.first.copy(), nullptr };
|
||||
return { pair.first.copy(), CompileError::none() };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair<CharacterSet, const GrammarError *> char_set() {
|
||||
pair<CharacterSet, CompileError> char_set() {
|
||||
CharacterSet result;
|
||||
bool is_affirmative = true;
|
||||
if (peek() == '^') {
|
||||
|
|
@ -143,7 +144,7 @@ class PatternParser {
|
|||
|
||||
while (has_more_input() && (peek() != ']')) {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { CharacterSet(), pair.second };
|
||||
if (is_affirmative)
|
||||
result.add_set(pair.first);
|
||||
|
|
@ -151,10 +152,10 @@ class PatternParser {
|
|||
result.remove_set(pair.first);
|
||||
}
|
||||
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
pair<CharacterSet, const GrammarError *> single_char() {
|
||||
pair<CharacterSet, CompileError> single_char() {
|
||||
CharacterSet value;
|
||||
switch (peek()) {
|
||||
case '\\':
|
||||
|
|
@ -173,7 +174,7 @@ class PatternParser {
|
|||
value = CharacterSet().include(first_char);
|
||||
}
|
||||
}
|
||||
return { value, nullptr };
|
||||
return { value, CompileError::none() };
|
||||
}
|
||||
|
||||
CharacterSet escaped_char(uint32_t value) {
|
||||
|
|
@ -217,8 +218,8 @@ class PatternParser {
|
|||
return lookahead && iter <= end;
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> error(string msg) {
|
||||
return { Blank::build(), new GrammarError(GrammarErrorTypeRegex, msg) };
|
||||
pair<rule_ptr, CompileError> error(string msg) {
|
||||
return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) };
|
||||
}
|
||||
|
||||
string input;
|
||||
|
|
@ -227,7 +228,7 @@ class PatternParser {
|
|||
int32_t lookahead;
|
||||
};
|
||||
|
||||
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
|
||||
pair<rule_ptr, CompileError> parse_regex(const std::string &input) {
|
||||
return PatternParser(input.c_str()).rule(false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,12 +3,13 @@
|
|||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<rule_ptr, const GrammarError *> parse_regex(const std::string &);
|
||||
std::pair<rule_ptr, CompileError> parse_regex(const std::string &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -16,14 +16,14 @@ using std::tuple;
|
|||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
||||
tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
||||
const Grammar &input_grammar) {
|
||||
/*
|
||||
* Convert all string-based `NamedSymbols` into numerical `Symbols`
|
||||
*/
|
||||
auto intern_result = intern_symbols(input_grammar);
|
||||
const GrammarError *error = intern_result.second;
|
||||
if (error)
|
||||
CompileError error = intern_result.second;
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
|
|
@ -31,7 +31,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
*/
|
||||
auto extract_result = extract_tokens(intern_result.first);
|
||||
error = get<2>(extract_result);
|
||||
if (error)
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
|
|
@ -45,7 +45,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
auto expand_tokens_result = expand_tokens(get<1>(extract_result));
|
||||
LexicalGrammar lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
if (error)
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
|
|
@ -58,7 +58,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
*/
|
||||
lex_grammar = normalize_rules(lex_grammar);
|
||||
|
||||
return make_tuple(syntax_grammar, lex_grammar, nullptr);
|
||||
return make_tuple(syntax_grammar, lex_grammar, CompileError::none());
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -4,15 +4,15 @@
|
|||
#include <tuple>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class Grammar;
|
||||
class GrammarError;
|
||||
struct Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
||||
const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "compiler/prepare_grammar/token_description.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
29
src/compiler/rules.h
Normal file
29
src/compiler/rules.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#ifndef COMPILER_RULES_H_
|
||||
#define COMPILER_RULES_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
rule_ptr blank();
|
||||
rule_ptr choice(const std::vector<rule_ptr> &);
|
||||
rule_ptr repeat(const rule_ptr &);
|
||||
rule_ptr repeat1(const rule_ptr &);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &);
|
||||
rule_ptr sym(const std::string &);
|
||||
rule_ptr pattern(const std::string &);
|
||||
rule_ptr str(const std::string &);
|
||||
rule_ptr err(const rule_ptr &);
|
||||
rule_ptr prec(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_left(const rule_ptr &);
|
||||
rule_ptr prec_left(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_right(const rule_ptr &);
|
||||
rule_ptr prec_right(int precedence, const rule_ptr &);
|
||||
rule_ptr token(const rule_ptr &rule);
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_RULES_H_
|
||||
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/variable.h"
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_VARIABLE_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue