refactor: improve the grammar schema

- Publish on the GitHub page
- Specify in the generated file
- Use const/enum instead of pattern
This commit is contained in:
ObserverOfTime 2024-09-28 13:01:16 +03:00 committed by Amaan Qureshi
parent ffc942a95b
commit f212605dda
3 changed files with 32 additions and 24 deletions

View file

@ -485,7 +485,11 @@ globalThis.grammar = grammar;
globalThis.field = field;
const result = await import(getEnv("TREE_SITTER_GRAMMAR_PATH"));
const output = JSON.stringify(result.default?.grammar ?? result.grammar);
const object = {
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
...(result.default?.grammar ?? result.grammar)
};
const output = JSON.stringify(object);
if (globalThis.process) { // Node/Bun
process.stdout.write(output);

View file

@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "tree-sitter grammar specification",
"title": "Tree-sitter grammar specification",
"type": "object",
"required": ["name", "rules"],
@ -9,13 +9,13 @@
"properties": {
"name": {
"description": "the name of the grammar",
"description": "The name of the grammar",
"type": "string",
"pattern": "^[a-zA-Z_]\\w*"
},
"inherits": {
"description": "the name of the parent grammar",
"description": "The name of the parent grammar",
"type": "string",
"pattern": "^[a-zA-Z_]\\w*"
},
@ -93,7 +93,7 @@
"type": "array",
"uniqueItems": true,
"items": {
"description": "the name of a rule in `rules` or `extras`",
"description": "The name of a rule in `rules` or `extras`",
"type": "string"
}
}
@ -105,7 +105,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^BLANK$"
"const": "BLANK"
}
},
"required": ["type"]
@ -116,7 +116,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^STRING$"
"const": "STRING"
},
"value": {
"type": "string"
@ -130,7 +130,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^PATTERN$"
"const": "PATTERN"
},
"value": { "type": "string" },
"flags": { "type": "string" }
@ -143,7 +143,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^SYMBOL$"
"const": "SYMBOL"
},
"name": { "type": "string" }
},
@ -155,7 +155,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^SEQ$"
"const": "SEQ"
},
"members": {
"type": "array",
@ -172,7 +172,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^CHOICE$"
"const": "CHOICE"
},
"members": {
"type": "array",
@ -189,14 +189,10 @@
"properties": {
"type": {
"type": "string",
"pattern": "^ALIAS$"
},
"value": {
"type": "string"
},
"named": {
"type": "boolean"
"const": "ALIAS"
},
"value": { "type": "string" },
"named": { "type": "boolean" },
"content": {
"$ref": "#/definitions/rule"
}
@ -209,7 +205,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^REPEAT$"
"const": "REPEAT"
},
"content": {
"$ref": "#/definitions/rule"
@ -223,7 +219,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^REPEAT1$"
"const": "REPEAT1"
},
"content": {
"$ref": "#/definitions/rule"
@ -237,7 +233,10 @@
"properties": {
"type": {
"type": "string",
"pattern": "^(TOKEN|IMMEDIATE_TOKEN)$"
"enum": [
"TOKEN",
"IMMEDIATE_TOKEN"
]
},
"content": {
"$ref": "#/definitions/rule"
@ -251,7 +250,7 @@
"name": { "type": "string" },
"type": {
"type": "string",
"pattern": "^FIELD$"
"const": "FIELD"
},
"content": {
"$ref": "#/definitions/rule"
@ -265,7 +264,12 @@
"properties": {
"type": {
"type": "string",
"pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$"
"enum": [
"PREC",
"PREC_LEFT",
"PREC_RIGHT",
"PREC_DYNAMIC"
]
},
"value": {
"oneof": [

View file

@ -21,7 +21,7 @@ The `tree-sitter` CLI's most important feature is the `generate` subcommand. Thi
### Parsing a Grammar
First, Tree-sitter must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar-schema.json](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/grammar-schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs).
First, Tree-sitter must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar.schema.json](https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs).
### Grammar Rules