From 86a5dabbcbdac650c53a889183bf56d7e721e09e Mon Sep 17 00:00:00 2001 From: Jacob Gillespie Date: Tue, 7 Jul 2020 16:45:23 +0100 Subject: [PATCH] Add TypeScript definition for DSL (#658) --- cli/npm/dsl.d.ts | 356 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 cli/npm/dsl.d.ts diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts new file mode 100644 index 00000000..b9bf1c98 --- /dev/null +++ b/cli/npm/dsl.d.ts @@ -0,0 +1,356 @@ +type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; +type BlankRule = {type: 'BLANK'}; +type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; +type FieldRule = {type: 'FIELD'; name: string; content: Rule}; +type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; +type PatternRule = {type: 'PATTERN'; value: string}; +type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; +type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; +type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; +type PrecRule = {type: 'PREC'; content: Rule; value: number}; +type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; +type RepeatRule = {type: 'REPEAT'; content: Rule}; +type SeqRule = {type: 'SEQ'; members: Rule[]}; +type StringRule = {type: 'STRING'; value: string}; +type SymbolRule = {type: 'SYMBOL'; name: Name}; +type TokenRule = {type: 'TOKEN'; content: Rule}; + +type Rule = + | AliasRule + | BlankRule + | ChoiceRule + | FieldRule + | ImmediateTokenRule + | PatternRule + | PrecDynamicRule + | PrecLeftRule + | PrecRightRule + | PrecRule + | Repeat1Rule + | RepeatRule + | SeqRule + | StringRule + | SymbolRule + | TokenRule; + +type RuleOrLiteral = Rule | RegExp | string; + +type GrammarSymbols = { + [name in RuleName]: SymbolRule; +} & + Record>; + +type RuleBuilder = ( + $: GrammarSymbols, +) => RuleOrLiteral; + +type RuleBuilders< + RuleName extends string, + BaseGrammarRuleName extends string +> = { + [name in RuleName]: RuleBuilder; +}; + +interface Grammar< + RuleName extends string, + BaseGrammarRuleName extends string = never, + Rules extends RuleBuilders = RuleBuilders< + RuleName, + BaseGrammarRuleName + > +> { + /** + * Name of the grammar language. + */ + name: string; + + /** Mapping of grammar rule names to rule builder functions. */ + rules: Rules; + + /** + * An array of arrays of rule names. Each inner array represents a set of + * rules that's involved in an _LR(1) conflict_ that is _intended to exist_ + * in the grammar. When these conflicts occur at runtime, Tree-sitter will + * use the GLR algorithm to explore all of the possible interpretations. If + * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree + * whose corresponding rule has the highest total _dynamic precedence_. + * + * @param $ grammar rules + */ + conflicts?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[][]; + + /** + * An array of token names which can be returned by an _external scanner_. + * External scanners allow you to write custom C code which runs during the + * lexing process in order to handle lexical rules (e.g. Python's indentation + * tokens) that cannot be described by regular expressions. + * + * @param $ grammar rules + * @param previous array of externals from the base schema, if any + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + */ + externals?: ( + $: Record>, + previous: Rule[], + ) => SymbolRule[]; + + /** + * An array of tokens that may appear anywhere in the language. This + * is often used for whitespace and comments. The default value of + * extras is to accept whitespace. To control whitespace explicitly, + * specify extras: `$ => []` in your grammar. + * + * @param $ grammar rules + */ + extras?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * An array of rules that should be automatically removed from the + * grammar by replacing all of their usages with a copy of their definition. + * This is useful for rules that are used in multiple places but for which + * you don't want to create syntax tree nodes at runtime. + * + * @param $ grammar rules + */ + inline?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * A list of hidden rule names that should be considered supertypes in the + * generated node types file. + * + * @param $ grammar rules + * + * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + */ + supertypes?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * The name of a token that will match keywords for the purpose of the + * keyword extraction optimization. + * + * @param $ grammar rules + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction + */ + word?: ($: GrammarSymbols) => RuleOrLiteral; +} + +type GrammarSchema = { + [K in keyof Grammar]: K extends 'rules' + ? Record + : Grammar[K]; +}; + +/** + * Causes the given rule to appear with an alternative name in the syntax tree. + * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an + * anonymous node, as if the rule had been written as the simple string. + * + * @param rule rule that will be aliased + * @param name target name for the alias + */ +declare function alias(rule: RuleOrLiteral, name: string): AliasRule; + +/** + * Causes the given rule to appear as an alternative named node, for instance + * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named + * node called `bar`. + * + * @param rule rule that will be aliased + * @param symbol target symbol for the alias + */ +declare function alias( + rule: RuleOrLiteral, + symbol: SymbolRule, +): AliasRule; + +/** + * Creates a blank rule, matching nothing. + */ +declare function blank(): BlankRule; + +/** + * Assigns a field name to the child node(s) matched by the given rule. + * In the resulting syntax tree, you can then use that field name to + * access specific children. + * + * @param name name of the field + * @param rule rule the field should match + */ +declare function field(name: string, rule: RuleOrLiteral): FieldRule; + +/** + * Creates a rule that matches one of a set of possible rules. The order + * of the arguments does not matter. This is analogous to the `|` (pipe) + * operator in EBNF notation. + * + * @param options possible rule choices + */ +declare function choice(...options: RuleOrLiteral[]): ChoiceRule; + +/** + * Creates a rule that matches zero or one occurrence of a given rule. + * It is analogous to the `[x]` (square bracket) syntax in EBNF notation. + * + * @param value rule to be made optional + */ +declare function optional(rule: RuleOrLiteral): ChoiceRule; + +/** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at parser-generation time. When two rules overlap + * in a way that represents either a true ambiguity or a _local_ ambiguity + * given one token of lookahead, Tree-sitter will try to resolve the conflict by + * matching the rule with the higher precedence. The default precedence of all + * rules is zero. This works similarly to the precedence directives in Yacc grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ +declare const prec: { + (number: number, rule: RuleOrLiteral): PrecRule; + + /** + * Marks the given rule as left-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a left-associative rule, Tree-sitter + * will prefer matching a rule that ends _earlier_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as left-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + left(rule: RuleOrLiteral): PrecLeftRule; + left(number: number, rule: RuleOrLiteral): PrecLeftRule; + + /** + * Marks the given rule as right-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a right-associative rule, Tree-sitter + * will prefer matching a rule that ends _later_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as right-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + right(rule: RuleOrLiteral): PrecRightRule; + right(number: number, rule: RuleOrLiteral): PrecRightRule; + + /** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at _runtime_ instead of parser-generation time. + * This is only necessary when handling a conflict dynamically using the + * `conflicts` field in the grammar, and when there is a genuine _ambiguity_: + * multiple rules correctly match a given piece of code. In that event, + * Tree-sitter compares the total dynamic precedence associated with each + * rule, and selects the one with the highest total. This is similar to + * dynamic precedence directives in Bison grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html + */ + dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule; +}; + +/** + * Creates a rule that matches _zero-or-more_ occurrences of a given rule. + * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This + * rule is implemented in terms of `repeat1` but is included because it + * is very commonly used. + * + * @param rule rule to repeat, zero or more times + */ +declare function repeat(rule: RuleOrLiteral): RepeatRule; + +/** + * Creates a rule that matches one-or-more occurrences of a given rule. + * + * @param rule rule to repeat, one or more times + */ +declare function repeat1(rule: RuleOrLiteral): Repeat1Rule; + +/** + * Creates a rule that matches any number of other rules, one after another. + * It is analogous to simply writing multiple symbols next to each other + * in EBNF notation. + * + * @param rules ordered rules that comprise the sequence + */ +declare function seq(...rules: RuleOrLiteral[]): SeqRule; + +/** + * Creates a symbol rule, representing another rule in the grammar by name. + * + * @param name name of the target rule + */ +declare function sym(name: Name): SymbolRule; + +/** + * Marks the given rule as producing only a single token. Tree-sitter's + * default is to treat each String or RegExp literal in the grammar as a + * separate token. Each token is matched separately by the lexer and + * returned as its own leaf node in the tree. The token function allows + * you to express a complex rule using the DSL functions (rather + * than as a single regular expression) but still have Tree-sitter treat + * it as a single token. + * + * @param rule rule to represent as a single token + */ +declare const token: { + (rule: RuleOrLiteral): TokenRule; + + /** + * Marks the given rule as producing an immediate token. This allows + * the parser to produce a different token based on whether or not + * there are `extras` preceding the token's main content. When there + * are _no_ leading `extras`, an immediate token is preferred over a + * normal token which would otherwise match. + * + * @param rule rule to represent as an immediate token + */ + immediate(rule: RuleOrLiteral): ImmediateTokenRule; +}; + +/** + * Creates a new language grammar with the provided schema. + * + * @param options grammar options + */ +declare function grammar( + options: Grammar, +): GrammarSchema; + +/** + * Extends an existing language grammar with the provided options, + * creating a new language. + * + * @param baseGrammar base grammar schema to extend from + * @param options grammar options for the new extended language + */ +declare function grammar< + BaseGrammarRuleName extends string, + RuleName extends string +>( + baseGrammar: GrammarSchema, + options: Grammar, +): GrammarSchema;