feat: support passing in a Rust regex in the grammar dsl
This commit is contained in:
parent
4170f71dbc
commit
68e707eb4f
4 changed files with 35 additions and 4 deletions
|
|
@ -211,6 +211,11 @@ function normalize(value) {
|
|||
type: 'PATTERN',
|
||||
value: value.source
|
||||
};
|
||||
case RustRegex:
|
||||
return {
|
||||
type: 'PATTERN',
|
||||
value: value.value
|
||||
};
|
||||
case ReferenceError:
|
||||
throw value
|
||||
default:
|
||||
|
|
@ -483,6 +488,12 @@ function grammar(baseGrammar, options) {
|
|||
};
|
||||
}
|
||||
|
||||
class RustRegex {
|
||||
constructor(value) {
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') {
|
||||
// Allow for .map() usage where additional arguments are index and the entire array.
|
||||
const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]);
|
||||
|
|
@ -524,6 +535,7 @@ globalThis.sym = sym;
|
|||
globalThis.token = token;
|
||||
globalThis.grammar = grammar;
|
||||
globalThis.field = field;
|
||||
globalThis.RustRegex = RustRegex;
|
||||
|
||||
const result = await import(getEnv("TREE_SITTER_GRAMMAR_PATH"));
|
||||
const object = {
|
||||
|
|
|
|||
10
cli/npm/dsl.d.ts
vendored
10
cli/npm/dsl.d.ts
vendored
|
|
@ -33,7 +33,15 @@ type Rule =
|
|||
| SymbolRule<string>
|
||||
| TokenRule;
|
||||
|
||||
type RuleOrLiteral = Rule | RegExp | string;
|
||||
class RustRegex {
|
||||
value: string;
|
||||
|
||||
constructor(pattern: string) {
|
||||
this.value = pattern;
|
||||
}
|
||||
}
|
||||
|
||||
type RuleOrLiteral = Rule | RegExp | RustRegex | string;
|
||||
|
||||
type GrammarSymbols<RuleName extends string> = {
|
||||
[name in RuleName]: SymbolRule<name>;
|
||||
|
|
|
|||
|
|
@ -8,8 +8,18 @@ called `$`. The syntax `$.identifier` is how you refer to another grammar symbol
|
|||
or `$.UNEXPECTED` should be avoided as they have special meaning for the `tree-sitter test` command.
|
||||
- **String and Regex literals** — The terminal symbols in a grammar are described using JavaScript strings and regular
|
||||
expressions. Of course during parsing, Tree-sitter does not actually use JavaScript's regex engine to evaluate these regexes;
|
||||
it generates its own regex-matching logic as part of each parser. Regex literals are just used as a convenient way of writing
|
||||
regular expressions in your grammar.
|
||||
it generates its own regex-matching logic based on the Rust regex syntax as part of each parser. Regex literals are just
|
||||
used as a convenient way of writing regular expressions in your grammar. You can use Rust regular expressions in your grammar
|
||||
DSL through the `RustRegex` class. Simply pass your regex pattern as a string:
|
||||
|
||||
```js
|
||||
new RustRegex('(?i)[a-z_][a-z0-9_]*') // matches a simple identifier
|
||||
```
|
||||
|
||||
Unlike JavaScript's builtin `RegExp` class, which takes a pattern and flags as separate arguments, `RustRegex` only
|
||||
accepts a single pattern string. While it doesn't support separate flags, you can use inline flags within the pattern itself.
|
||||
For more details about Rust's regex syntax and capabilities, check out the [Rust regex documentation][rust regex].
|
||||
|
||||
- **Regex Limitations** — Only a subset of the Regex engine is actually
|
||||
supported. This is due to certain features like lookahead and lookaround assertions
|
||||
not feasible to use in an LR(1) grammar, as well as certain flags being unnecessary
|
||||
|
|
@ -128,5 +138,6 @@ object that coreesponds an empty array, signifying *no* keywords are reserved.
|
|||
[keyword-extraction]: ./3-writing-the-grammar.md#keyword-extraction
|
||||
[lr-conflict]: https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
|
||||
[named-vs-anonymous-nodes]: ../using-parsers/2-basic-parsing.md#named-vs-anonymous-nodes
|
||||
[rust regex]: https://docs.rs/regex/1.1.8/regex/#grouping-and-flags
|
||||
[static-node-types]: ../using-parsers/6-static-node-types.md
|
||||
[yacc-prec]: https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
|
|
|
|||
|
|
@ -31,6 +31,6 @@ module.exports = grammar({
|
|||
|
||||
comment: _ => /#.*/,
|
||||
|
||||
variable: _ => /[a-zA-Z]\w*/,
|
||||
variable: _ => new RustRegex('(?i:[a-z])\\w*'),
|
||||
},
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue