refactor: improve the grammar schema

- Publish on the GitHub page
- Specify in the generated file
- Use const/enum instead of pattern
This commit is contained in:
ObserverOfTime 2024-09-28 13:01:16 +03:00 committed by Amaan Qureshi
parent ffc942a95b
commit f212605dda
3 changed files with 32 additions and 24 deletions

View file

@ -0,0 +1,304 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Tree-sitter grammar specification",
"type": "object",
"required": ["name", "rules"],
"additionalProperties": false,
"properties": {
"name": {
"description": "The name of the grammar",
"type": "string",
"pattern": "^[a-zA-Z_]\\w*"
},
"inherits": {
"description": "The name of the parent grammar",
"type": "string",
"pattern": "^[a-zA-Z_]\\w*"
},
"rules": {
"type": "object",
"patternProperties": {
"^[a-zA-Z_]\\w*$": {
"$ref": "#/definitions/rule"
}
},
"additionalProperties": false
},
"extras": {
"type": "array",
"uniqueItems": true,
"items": {
"$ref": "#/definitions/rule"
}
},
"precedences": {
"type": "array",
"uniqueItems": true,
"items": {
"type": "array",
"uniqueItems": true,
"items": {
"oneOf": [
{ "type": "string" },
{ "$ref": "#/definitions/symbol-rule" }
]
}
}
},
"externals": {
"type": "array",
"uniqueItems": true,
"items": {
"$ref": "#/definitions/rule"
}
},
"inline": {
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[a-zA-Z_]\\w*$"
}
},
"conflicts": {
"type": "array",
"uniqueItems": true,
"items": {
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[a-zA-Z_]\\w*$"
}
}
},
"word": {
"type": "string",
"pattern": "^[a-zA-Z_]\\w*"
},
"supertypes": {
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"type": "array",
"uniqueItems": true,
"items": {
"description": "The name of a rule in `rules` or `extras`",
"type": "string"
}
}
},
"definitions": {
"blank-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "BLANK"
}
},
"required": ["type"]
},
"string-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "STRING"
},
"value": {
"type": "string"
}
},
"required": ["type", "value"]
},
"pattern-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "PATTERN"
},
"value": { "type": "string" },
"flags": { "type": "string" }
},
"required": ["type", "value"]
},
"symbol-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "SYMBOL"
},
"name": { "type": "string" }
},
"required": ["type", "name"]
},
"seq-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "SEQ"
},
"members": {
"type": "array",
"items": {
"$ref": "#/definitions/rule"
}
}
},
"required": ["type", "members"]
},
"choice-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "CHOICE"
},
"members": {
"type": "array",
"items": {
"$ref": "#/definitions/rule"
}
}
},
"required": ["type", "members"]
},
"alias-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "ALIAS"
},
"value": { "type": "string" },
"named": { "type": "boolean" },
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "named", "content", "value"]
},
"repeat-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "REPEAT"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content"]
},
"repeat1-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "REPEAT1"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content"]
},
"token-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"TOKEN",
"IMMEDIATE_TOKEN"
]
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content"]
},
"field-rule": {
"properties": {
"name": { "type": "string" },
"type": {
"type": "string",
"const": "FIELD"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["name", "type", "content"]
},
"prec-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"PREC",
"PREC_LEFT",
"PREC_RIGHT",
"PREC_DYNAMIC"
]
},
"value": {
"oneof": [
{ "type": "integer" },
{ "type": "string" }
]
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content", "value"]
},
"rule": {
"oneOf": [
{ "$ref": "#/definitions/alias-rule" },
{ "$ref": "#/definitions/blank-rule" },
{ "$ref": "#/definitions/string-rule" },
{ "$ref": "#/definitions/pattern-rule" },
{ "$ref": "#/definitions/symbol-rule" },
{ "$ref": "#/definitions/seq-rule" },
{ "$ref": "#/definitions/choice-rule" },
{ "$ref": "#/definitions/repeat1-rule" },
{ "$ref": "#/definitions/repeat-rule" },
{ "$ref": "#/definitions/token-rule" },
{ "$ref": "#/definitions/field-rule" },
{ "$ref": "#/definitions/prec-rule" }
]
}
}
}

View file

@ -21,7 +21,7 @@ The `tree-sitter` CLI's most important feature is the `generate` subcommand. Thi
### Parsing a Grammar
First, Tree-sitter must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar-schema.json](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/grammar-schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs).
First, Tree-sitter must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar.schema.json](https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json). The parsing is implemented in [parse_grammar.rs](https://github.com/tree-sitter/tree-sitter/blob/master/cli/src/generate/parse_grammar.rs).
### Grammar Rules