Merge branch 'master' into query-pattern-is-definite
This commit is contained in:
commit
1ea29053e1
33 changed files with 2004 additions and 763 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -740,7 +740,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tree-sitter-cli"
|
||||
version = "0.16.8"
|
||||
version = "0.16.9"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-cli"
|
||||
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
|
||||
version = "0.16.8"
|
||||
version = "0.16.9"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
edition = "2018"
|
||||
license = "MIT"
|
||||
|
|
|
|||
356
cli/npm/dsl.d.ts
vendored
Normal file
356
cli/npm/dsl.d.ts
vendored
Normal file
|
|
@ -0,0 +1,356 @@
|
|||
type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
|
||||
type BlankRule = {type: 'BLANK'};
|
||||
type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
|
||||
type FieldRule = {type: 'FIELD'; name: string; content: Rule};
|
||||
type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
|
||||
type PatternRule = {type: 'PATTERN'; value: string};
|
||||
type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
|
||||
type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
|
||||
type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
|
||||
type PrecRule = {type: 'PREC'; content: Rule; value: number};
|
||||
type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
|
||||
type RepeatRule = {type: 'REPEAT'; content: Rule};
|
||||
type SeqRule = {type: 'SEQ'; members: Rule[]};
|
||||
type StringRule = {type: 'STRING'; value: string};
|
||||
type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
|
||||
type TokenRule = {type: 'TOKEN'; content: Rule};
|
||||
|
||||
type Rule =
|
||||
| AliasRule
|
||||
| BlankRule
|
||||
| ChoiceRule
|
||||
| FieldRule
|
||||
| ImmediateTokenRule
|
||||
| PatternRule
|
||||
| PrecDynamicRule
|
||||
| PrecLeftRule
|
||||
| PrecRightRule
|
||||
| PrecRule
|
||||
| Repeat1Rule
|
||||
| RepeatRule
|
||||
| SeqRule
|
||||
| StringRule
|
||||
| SymbolRule<string>
|
||||
| TokenRule;
|
||||
|
||||
type RuleOrLiteral = Rule | RegExp | string;
|
||||
|
||||
type GrammarSymbols<RuleName extends string> = {
|
||||
[name in RuleName]: SymbolRule<name>;
|
||||
} &
|
||||
Record<string, SymbolRule<string>>;
|
||||
|
||||
type RuleBuilder<RuleName extends string> = (
|
||||
$: GrammarSymbols<RuleName>,
|
||||
) => RuleOrLiteral;
|
||||
|
||||
type RuleBuilders<
|
||||
RuleName extends string,
|
||||
BaseGrammarRuleName extends string
|
||||
> = {
|
||||
[name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
|
||||
};
|
||||
|
||||
interface Grammar<
|
||||
RuleName extends string,
|
||||
BaseGrammarRuleName extends string = never,
|
||||
Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
|
||||
RuleName,
|
||||
BaseGrammarRuleName
|
||||
>
|
||||
> {
|
||||
/**
|
||||
* Name of the grammar language.
|
||||
*/
|
||||
name: string;
|
||||
|
||||
/** Mapping of grammar rule names to rule builder functions. */
|
||||
rules: Rules;
|
||||
|
||||
/**
|
||||
* An array of arrays of rule names. Each inner array represents a set of
|
||||
* rules that's involved in an _LR(1) conflict_ that is _intended to exist_
|
||||
* in the grammar. When these conflicts occur at runtime, Tree-sitter will
|
||||
* use the GLR algorithm to explore all of the possible interpretations. If
|
||||
* _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
|
||||
* whose corresponding rule has the highest total _dynamic precedence_.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*/
|
||||
conflicts?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[][];
|
||||
|
||||
/**
|
||||
* An array of token names which can be returned by an _external scanner_.
|
||||
* External scanners allow you to write custom C code which runs during the
|
||||
* lexing process in order to handle lexical rules (e.g. Python's indentation
|
||||
* tokens) that cannot be described by regular expressions.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
* @param previous array of externals from the base schema, if any
|
||||
*
|
||||
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
|
||||
*/
|
||||
externals?: (
|
||||
$: Record<string, SymbolRule<string>>,
|
||||
previous: Rule[],
|
||||
) => SymbolRule<string>[];
|
||||
|
||||
/**
|
||||
* An array of tokens that may appear anywhere in the language. This
|
||||
* is often used for whitespace and comments. The default value of
|
||||
* extras is to accept whitespace. To control whitespace explicitly,
|
||||
* specify extras: `$ => []` in your grammar.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*/
|
||||
extras?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[];
|
||||
|
||||
/**
|
||||
* An array of rules that should be automatically removed from the
|
||||
* grammar by replacing all of their usages with a copy of their definition.
|
||||
* This is useful for rules that are used in multiple places but for which
|
||||
* you don't want to create syntax tree nodes at runtime.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*/
|
||||
inline?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[];
|
||||
|
||||
/**
|
||||
* A list of hidden rule names that should be considered supertypes in the
|
||||
* generated node types file.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*
|
||||
* @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||
*/
|
||||
supertypes?: (
|
||||
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
|
||||
) => RuleOrLiteral[];
|
||||
|
||||
/**
|
||||
* The name of a token that will match keywords for the purpose of the
|
||||
* keyword extraction optimization.
|
||||
*
|
||||
* @param $ grammar rules
|
||||
*
|
||||
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
|
||||
*/
|
||||
word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
|
||||
}
|
||||
|
||||
type GrammarSchema<RuleName extends string> = {
|
||||
[K in keyof Grammar<RuleName>]: K extends 'rules'
|
||||
? Record<RuleName, Rule>
|
||||
: Grammar<RuleName>[K];
|
||||
};
|
||||
|
||||
/**
|
||||
* Causes the given rule to appear with an alternative name in the syntax tree.
|
||||
* For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
|
||||
* anonymous node, as if the rule had been written as the simple string.
|
||||
*
|
||||
* @param rule rule that will be aliased
|
||||
* @param name target name for the alias
|
||||
*/
|
||||
declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
|
||||
|
||||
/**
|
||||
* Causes the given rule to appear as an alternative named node, for instance
|
||||
* with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
|
||||
* node called `bar`.
|
||||
*
|
||||
* @param rule rule that will be aliased
|
||||
* @param symbol target symbol for the alias
|
||||
*/
|
||||
declare function alias(
|
||||
rule: RuleOrLiteral,
|
||||
symbol: SymbolRule<string>,
|
||||
): AliasRule;
|
||||
|
||||
/**
|
||||
* Creates a blank rule, matching nothing.
|
||||
*/
|
||||
declare function blank(): BlankRule;
|
||||
|
||||
/**
|
||||
* Assigns a field name to the child node(s) matched by the given rule.
|
||||
* In the resulting syntax tree, you can then use that field name to
|
||||
* access specific children.
|
||||
*
|
||||
* @param name name of the field
|
||||
* @param rule rule the field should match
|
||||
*/
|
||||
declare function field(name: string, rule: RuleOrLiteral): FieldRule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches one of a set of possible rules. The order
|
||||
* of the arguments does not matter. This is analogous to the `|` (pipe)
|
||||
* operator in EBNF notation.
|
||||
*
|
||||
* @param options possible rule choices
|
||||
*/
|
||||
declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches zero or one occurrence of a given rule.
|
||||
* It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
|
||||
*
|
||||
* @param value rule to be made optional
|
||||
*/
|
||||
declare function optional(rule: RuleOrLiteral): ChoiceRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule with a numerical precedence which will be used to
|
||||
* resolve LR(1) conflicts at parser-generation time. When two rules overlap
|
||||
* in a way that represents either a true ambiguity or a _local_ ambiguity
|
||||
* given one token of lookahead, Tree-sitter will try to resolve the conflict by
|
||||
* matching the rule with the higher precedence. The default precedence of all
|
||||
* rules is zero. This works similarly to the precedence directives in Yacc grammars.
|
||||
*
|
||||
* @param number precedence weight
|
||||
* @param rule rule being weighted
|
||||
*
|
||||
* @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
|
||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
*/
|
||||
declare const prec: {
|
||||
(number: number, rule: RuleOrLiteral): PrecRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule as left-associative (and optionally applies a
|
||||
* numerical precedence). When an LR(1) conflict arises in which all of the
|
||||
* rules have the same numerical precedence, Tree-sitter will consult the
|
||||
* rules' associativity. If there is a left-associative rule, Tree-sitter
|
||||
* will prefer matching a rule that ends _earlier_. This works similarly to
|
||||
* associativity directives in Yacc grammars.
|
||||
*
|
||||
* @param number (optional) precedence weight
|
||||
* @param rule rule to mark as left-associative
|
||||
*
|
||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
*/
|
||||
left(rule: RuleOrLiteral): PrecLeftRule;
|
||||
left(number: number, rule: RuleOrLiteral): PrecLeftRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule as right-associative (and optionally applies a
|
||||
* numerical precedence). When an LR(1) conflict arises in which all of the
|
||||
* rules have the same numerical precedence, Tree-sitter will consult the
|
||||
* rules' associativity. If there is a right-associative rule, Tree-sitter
|
||||
* will prefer matching a rule that ends _later_. This works similarly to
|
||||
* associativity directives in Yacc grammars.
|
||||
*
|
||||
* @param number (optional) precedence weight
|
||||
* @param rule rule to mark as right-associative
|
||||
*
|
||||
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
|
||||
*/
|
||||
right(rule: RuleOrLiteral): PrecRightRule;
|
||||
right(number: number, rule: RuleOrLiteral): PrecRightRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule with a numerical precedence which will be used to
|
||||
* resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
|
||||
* This is only necessary when handling a conflict dynamically using the
|
||||
* `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
|
||||
* multiple rules correctly match a given piece of code. In that event,
|
||||
* Tree-sitter compares the total dynamic precedence associated with each
|
||||
* rule, and selects the one with the highest total. This is similar to
|
||||
* dynamic precedence directives in Bison grammars.
|
||||
*
|
||||
* @param number precedence weight
|
||||
* @param rule rule being weighted
|
||||
*
|
||||
* @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
|
||||
*/
|
||||
dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule;
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a rule that matches _zero-or-more_ occurrences of a given rule.
|
||||
* It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
|
||||
* rule is implemented in terms of `repeat1` but is included because it
|
||||
* is very commonly used.
|
||||
*
|
||||
* @param rule rule to repeat, zero or more times
|
||||
*/
|
||||
declare function repeat(rule: RuleOrLiteral): RepeatRule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches one-or-more occurrences of a given rule.
|
||||
*
|
||||
* @param rule rule to repeat, one or more times
|
||||
*/
|
||||
declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
|
||||
|
||||
/**
|
||||
* Creates a rule that matches any number of other rules, one after another.
|
||||
* It is analogous to simply writing multiple symbols next to each other
|
||||
* in EBNF notation.
|
||||
*
|
||||
* @param rules ordered rules that comprise the sequence
|
||||
*/
|
||||
declare function seq(...rules: RuleOrLiteral[]): SeqRule;
|
||||
|
||||
/**
|
||||
* Creates a symbol rule, representing another rule in the grammar by name.
|
||||
*
|
||||
* @param name name of the target rule
|
||||
*/
|
||||
declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
|
||||
|
||||
/**
|
||||
* Marks the given rule as producing only a single token. Tree-sitter's
|
||||
* default is to treat each String or RegExp literal in the grammar as a
|
||||
* separate token. Each token is matched separately by the lexer and
|
||||
* returned as its own leaf node in the tree. The token function allows
|
||||
* you to express a complex rule using the DSL functions (rather
|
||||
* than as a single regular expression) but still have Tree-sitter treat
|
||||
* it as a single token.
|
||||
*
|
||||
* @param rule rule to represent as a single token
|
||||
*/
|
||||
declare const token: {
|
||||
(rule: RuleOrLiteral): TokenRule;
|
||||
|
||||
/**
|
||||
* Marks the given rule as producing an immediate token. This allows
|
||||
* the parser to produce a different token based on whether or not
|
||||
* there are `extras` preceding the token's main content. When there
|
||||
* are _no_ leading `extras`, an immediate token is preferred over a
|
||||
* normal token which would otherwise match.
|
||||
*
|
||||
* @param rule rule to represent as an immediate token
|
||||
*/
|
||||
immediate(rule: RuleOrLiteral): ImmediateTokenRule;
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a new language grammar with the provided schema.
|
||||
*
|
||||
* @param options grammar options
|
||||
*/
|
||||
declare function grammar<RuleName extends string>(
|
||||
options: Grammar<RuleName>,
|
||||
): GrammarSchema<RuleName>;
|
||||
|
||||
/**
|
||||
* Extends an existing language grammar with the provided options,
|
||||
* creating a new language.
|
||||
*
|
||||
* @param baseGrammar base grammar schema to extend from
|
||||
* @param options grammar options for the new extended language
|
||||
*/
|
||||
declare function grammar<
|
||||
BaseGrammarRuleName extends string,
|
||||
RuleName extends string
|
||||
>(
|
||||
baseGrammar: GrammarSchema<BaseGrammarRuleName>,
|
||||
options: Grammar<RuleName, BaseGrammarRuleName>,
|
||||
): GrammarSchema<RuleName | BaseGrammarRuleName>;
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.16.8",
|
||||
"version": "0.16.9",
|
||||
"author": "Max Brunsfeld",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ impl<'a> From<tree_sitter_highlight::Error> for Error {
|
|||
|
||||
impl<'a> From<tree_sitter_tags::Error> for Error {
|
||||
fn from(error: tree_sitter_tags::Error) -> Self {
|
||||
Error::new(format!("{:?}", error))
|
||||
Error::new(format!("{}", error))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -199,6 +199,9 @@ impl<'a> Minimizer<'a> {
|
|||
right_state: &ParseState,
|
||||
group_ids_by_state_id: &Vec<ParseStateId>,
|
||||
) -> bool {
|
||||
if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
|
||||
return true;
|
||||
}
|
||||
for (token, left_entry) in &left_state.terminal_entries {
|
||||
if let Some(right_entry) = right_state.terminal_entries.get(token) {
|
||||
if self.entries_conflict(
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ pub(crate) struct FieldInfo {
|
|||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct VariableInfo {
|
||||
pub fields: HashMap<String, FieldInfo>,
|
||||
pub child_types: Vec<ChildType>,
|
||||
pub children: FieldInfo,
|
||||
pub children_without_fields: FieldInfo,
|
||||
pub has_multi_step_production: bool,
|
||||
}
|
||||
|
|
@ -70,7 +70,7 @@ impl Default for FieldInfoJSON {
|
|||
|
||||
impl Default for ChildQuantity {
|
||||
fn default() -> Self {
|
||||
Self::zero()
|
||||
Self::one()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -158,7 +158,7 @@ pub(crate) fn get_variable_info(
|
|||
|
||||
// Each variable's summary can depend on the summaries of other hidden variables,
|
||||
// and variables can have mutually recursive structure. So we compute the summaries
|
||||
// iteratively, in a loop that terminates only when more changes are possible.
|
||||
// iteratively, in a loop that terminates only when no more changes are possible.
|
||||
let mut did_change = true;
|
||||
let mut all_initialized = false;
|
||||
let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()];
|
||||
|
|
@ -168,13 +168,14 @@ pub(crate) fn get_variable_info(
|
|||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
let mut variable_info = result[i].clone();
|
||||
|
||||
// Within a variable, consider each production separately. For each
|
||||
// production, determine which children and fields can occur, and how many
|
||||
// times they can occur.
|
||||
for (production_index, production) in variable.productions.iter().enumerate() {
|
||||
let mut field_quantities = HashMap::new();
|
||||
let mut children_without_fields_quantity = ChildQuantity::zero();
|
||||
let mut has_uninitialized_invisible_children = false;
|
||||
// Examine each of the variable's productions. The variable's child types can be
|
||||
// immediately combined across all productions, but the child quantities must be
|
||||
// recorded separately for each production.
|
||||
for production in &variable.productions {
|
||||
let mut production_field_quantities = HashMap::new();
|
||||
let mut production_children_quantity = ChildQuantity::zero();
|
||||
let mut production_children_without_fields_quantity = ChildQuantity::zero();
|
||||
let mut production_has_uninitialized_invisible_children = false;
|
||||
|
||||
if production.steps.len() > 1 {
|
||||
variable_info.has_multi_step_production = true;
|
||||
|
|
@ -190,111 +191,97 @@ pub(crate) fn get_variable_info(
|
|||
ChildType::Normal(child_symbol)
|
||||
};
|
||||
|
||||
// Record all of the types of direct children.
|
||||
did_change |= sorted_vec_insert(&mut variable_info.child_types, &child_type);
|
||||
let child_is_hidden = !child_type_is_visible(&child_type)
|
||||
&& !syntax_grammar.supertype_symbols.contains(&child_symbol);
|
||||
|
||||
// Record all of the field names that occur.
|
||||
// Maintain the set of all child types for this variable, and the quantity of
|
||||
// visible children in this production.
|
||||
did_change |=
|
||||
extend_sorted(&mut variable_info.children.types, Some(&child_type));
|
||||
if !child_is_hidden {
|
||||
production_children_quantity.append(ChildQuantity::one());
|
||||
}
|
||||
|
||||
// Maintain the set of child types associated with each field, and the quantity
|
||||
// of children associated with each field in this production.
|
||||
if let Some(field_name) = &step.field_name {
|
||||
// Record how many times each field occurs in this production.
|
||||
field_quantities
|
||||
let field_info = variable_info
|
||||
.fields
|
||||
.entry(field_name.clone())
|
||||
.or_insert(FieldInfo::default());
|
||||
did_change |= extend_sorted(&mut field_info.types, Some(&child_type));
|
||||
|
||||
let production_field_quantity = production_field_quantities
|
||||
.entry(field_name)
|
||||
.or_insert(ChildQuantity::zero())
|
||||
.append(ChildQuantity::one());
|
||||
.or_insert(ChildQuantity::zero());
|
||||
|
||||
// Record the types of children for this field.
|
||||
let field_info =
|
||||
variable_info.fields.entry(field_name.clone()).or_insert({
|
||||
let mut info = FieldInfo {
|
||||
types: Vec::new(),
|
||||
quantity: ChildQuantity::one(),
|
||||
};
|
||||
|
||||
// If this field did *not* occur in an earlier production,
|
||||
// then it is not required.
|
||||
if production_index > 0 {
|
||||
info.quantity.required = false;
|
||||
}
|
||||
info
|
||||
});
|
||||
did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
|
||||
}
|
||||
// Record named children without fields.
|
||||
else if child_type_is_named(&child_type) {
|
||||
// Record how many named children without fields occur in this production.
|
||||
children_without_fields_quantity.append(ChildQuantity::one());
|
||||
|
||||
// Record the types of all of the named children without fields.
|
||||
let children_info = &mut variable_info.children_without_fields;
|
||||
if children_info.types.is_empty() {
|
||||
children_info.quantity = ChildQuantity::one();
|
||||
// Inherit the types and quantities of hidden children associated with fields.
|
||||
if child_is_hidden && child_symbol.is_non_terminal() {
|
||||
let child_variable_info = &result[child_symbol.index];
|
||||
did_change |= extend_sorted(
|
||||
&mut field_info.types,
|
||||
&child_variable_info.children.types,
|
||||
);
|
||||
production_field_quantity.append(child_variable_info.children.quantity);
|
||||
} else {
|
||||
production_field_quantity.append(ChildQuantity::one());
|
||||
}
|
||||
did_change |= sorted_vec_insert(&mut children_info.types, &child_type);
|
||||
}
|
||||
// Maintain the set of named children without fields within this variable.
|
||||
else if child_type_is_named(&child_type) {
|
||||
production_children_without_fields_quantity.append(ChildQuantity::one());
|
||||
did_change |= extend_sorted(
|
||||
&mut variable_info.children_without_fields.types,
|
||||
Some(&child_type),
|
||||
);
|
||||
}
|
||||
|
||||
// Inherit information from any hidden children.
|
||||
if child_symbol.is_non_terminal()
|
||||
&& !syntax_grammar.supertype_symbols.contains(&child_symbol)
|
||||
&& step.alias.is_none()
|
||||
&& !child_type_is_visible(&child_type)
|
||||
{
|
||||
// Inherit all child information from hidden children.
|
||||
if child_is_hidden && child_symbol.is_non_terminal() {
|
||||
let child_variable_info = &result[child_symbol.index];
|
||||
|
||||
// If a hidden child can have multiple children, then this
|
||||
// node can appear to have multiple children.
|
||||
// If a hidden child can have multiple children, then its parent node can
|
||||
// appear to have multiple children.
|
||||
if child_variable_info.has_multi_step_production {
|
||||
variable_info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
// Inherit fields from this hidden child
|
||||
// If a hidden child has fields, then the parent node can appear to have
|
||||
// those same fields.
|
||||
for (field_name, child_field_info) in &child_variable_info.fields {
|
||||
field_quantities
|
||||
production_field_quantities
|
||||
.entry(field_name)
|
||||
.or_insert(ChildQuantity::zero())
|
||||
.append(child_field_info.quantity);
|
||||
let field_info = variable_info
|
||||
.fields
|
||||
.entry(field_name.clone())
|
||||
.or_insert(FieldInfo {
|
||||
types: Vec::new(),
|
||||
quantity: ChildQuantity::one(),
|
||||
});
|
||||
for child_type in &child_field_info.types {
|
||||
sorted_vec_insert(&mut field_info.types, &child_type);
|
||||
}
|
||||
did_change |= extend_sorted(
|
||||
&mut variable_info
|
||||
.fields
|
||||
.entry(field_name.clone())
|
||||
.or_insert(FieldInfo::default())
|
||||
.types,
|
||||
&child_field_info.types,
|
||||
);
|
||||
}
|
||||
|
||||
// Inherit child types from this hidden child
|
||||
for child_type in &child_variable_info.child_types {
|
||||
did_change |=
|
||||
sorted_vec_insert(&mut variable_info.child_types, child_type);
|
||||
}
|
||||
// If a hidden child has children, then the parent node can appear to have
|
||||
// those same children.
|
||||
production_children_quantity.append(child_variable_info.children.quantity);
|
||||
did_change |= extend_sorted(
|
||||
&mut variable_info.children.types,
|
||||
&child_variable_info.children.types,
|
||||
);
|
||||
|
||||
// If any field points to this hidden child, inherit child types
|
||||
// for the field.
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = variable_info.fields.get_mut(field_name).unwrap();
|
||||
for child_type in &child_variable_info.child_types {
|
||||
did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
|
||||
}
|
||||
}
|
||||
// Inherit info about children without fields from this hidden child.
|
||||
else {
|
||||
// If a hidden child can have named children without fields, then the parent
|
||||
// node can appear to have those same children.
|
||||
if step.field_name.is_none() {
|
||||
let grandchildren_info = &child_variable_info.children_without_fields;
|
||||
if !grandchildren_info.types.is_empty() {
|
||||
children_without_fields_quantity
|
||||
.append(grandchildren_info.quantity);
|
||||
|
||||
if variable_info.children_without_fields.types.is_empty() {
|
||||
variable_info.children_without_fields.quantity =
|
||||
ChildQuantity::one();
|
||||
}
|
||||
|
||||
for child_type in &grandchildren_info.types {
|
||||
did_change |= sorted_vec_insert(
|
||||
&mut variable_info.children_without_fields.types,
|
||||
&child_type,
|
||||
);
|
||||
}
|
||||
production_children_without_fields_quantity
|
||||
.append(child_variable_info.children_without_fields.quantity);
|
||||
did_change |= extend_sorted(
|
||||
&mut variable_info.children_without_fields.types,
|
||||
&child_variable_info.children_without_fields.types,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -302,22 +289,27 @@ pub(crate) fn get_variable_info(
|
|||
// Note whether or not this production contains children whose summaries
|
||||
// have not yet been computed.
|
||||
if child_symbol.index >= i && !all_initialized {
|
||||
has_uninitialized_invisible_children = true;
|
||||
production_has_uninitialized_invisible_children = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If this production's children all have had their summaries initialized,
|
||||
// then expand the quantity information with all of the possibilities introduced
|
||||
// by this production.
|
||||
if !has_uninitialized_invisible_children {
|
||||
if !production_has_uninitialized_invisible_children {
|
||||
did_change |= variable_info
|
||||
.children
|
||||
.quantity
|
||||
.union(production_children_quantity);
|
||||
|
||||
did_change |= variable_info
|
||||
.children_without_fields
|
||||
.quantity
|
||||
.union(children_without_fields_quantity);
|
||||
.union(production_children_without_fields_quantity);
|
||||
|
||||
for (field_name, info) in variable_info.fields.iter_mut() {
|
||||
did_change |= info.quantity.union(
|
||||
field_quantities
|
||||
production_field_quantities
|
||||
.get(field_name)
|
||||
.cloned()
|
||||
.unwrap_or(ChildQuantity::zero()),
|
||||
|
|
@ -352,13 +344,15 @@ pub(crate) fn get_variable_info(
|
|||
// Update all of the node type lists to eliminate hidden nodes.
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
result[supertype_symbol.index]
|
||||
.child_types
|
||||
.children
|
||||
.types
|
||||
.retain(child_type_is_visible);
|
||||
}
|
||||
for variable_info in result.iter_mut() {
|
||||
for (_, field_info) in variable_info.fields.iter_mut() {
|
||||
field_info.types.retain(child_type_is_visible);
|
||||
}
|
||||
variable_info.fields.retain(|_, v| !v.types.is_empty());
|
||||
variable_info
|
||||
.children_without_fields
|
||||
.types
|
||||
|
|
@ -467,7 +461,8 @@ pub(crate) fn generate_node_types_json(
|
|||
subtypes: None,
|
||||
});
|
||||
let mut subtypes = info
|
||||
.child_types
|
||||
.children
|
||||
.types
|
||||
.iter()
|
||||
.map(child_type_to_node_type)
|
||||
.collect::<Vec<_>>();
|
||||
|
|
@ -686,16 +681,19 @@ fn variable_type_for_child_type(
|
|||
}
|
||||
}
|
||||
|
||||
fn sorted_vec_insert<T>(vec: &mut Vec<T>, value: &T) -> bool
|
||||
fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T>) -> bool
|
||||
where
|
||||
T: Clone + Eq + Ord,
|
||||
T: 'a,
|
||||
{
|
||||
if let Err(i) = vec.binary_search(&value) {
|
||||
vec.insert(i, value.clone());
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
values.into_iter().any(|value| {
|
||||
if let Err(i) = vec.binary_search(&value) {
|
||||
vec.insert(i, value.clone());
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -1177,6 +1175,38 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_types_with_fields_on_hidden_tokens() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
name: String::new(),
|
||||
extra_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
supertype_symbols: vec![],
|
||||
variables: vec![Variable {
|
||||
name: "script".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("a".to_string(), Rule::pattern("hi")),
|
||||
Rule::field("b".to_string(), Rule::pattern("bye")),
|
||||
]),
|
||||
}],
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
node_types,
|
||||
[NodeInfoJSON {
|
||||
kind: "script".to_string(),
|
||||
named: true,
|
||||
fields: Some(BTreeMap::new()),
|
||||
children: None,
|
||||
subtypes: None
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_types_with_multiple_rules_same_alias_name() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
|
|
@ -1461,6 +1491,71 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_repetitions_inside_fields() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
// Field associated with a repetition.
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1")],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![],
|
||||
},
|
||||
],
|
||||
},
|
||||
// Repetition node
|
||||
SyntaxVariable {
|
||||
name: "_rule0_repeat".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(1))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
&AliasMap::new(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
quantity: ChildQuantity {
|
||||
exists: true,
|
||||
required: false,
|
||||
multiple: true,
|
||||
},
|
||||
types: vec![ChildType::Normal(Symbol::terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_inherited_fields() {
|
||||
let variable_info = get_variable_info(
|
||||
|
|
|
|||
|
|
@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
|
|||
last_inserted_step.associativity = removed_step.associativity;
|
||||
}
|
||||
}
|
||||
if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
|
||||
production.dynamic_precedence = p.dynamic_precedence;
|
||||
}
|
||||
production
|
||||
}),
|
||||
);
|
||||
|
|
@ -226,7 +229,7 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
dynamic_precedence: -2,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(14))],
|
||||
},
|
||||
],
|
||||
|
|
@ -258,7 +261,7 @@ mod tests {
|
|||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
dynamic_precedence: -2,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(14)),
|
||||
|
|
|
|||
|
|
@ -160,7 +160,9 @@ impl Loader {
|
|||
// If multiple language configurations match, then determine which
|
||||
// one to use by applying the configurations' content regexes.
|
||||
else {
|
||||
let file_contents = fs::read_to_string(path)?;
|
||||
let file_contents = fs::read(path)
|
||||
.map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?;
|
||||
let file_contents = String::from_utf8_lossy(&file_contents);
|
||||
let mut best_score = -2isize;
|
||||
let mut best_configuration_id = None;
|
||||
for configuration_id in configuration_ids {
|
||||
|
|
|
|||
140
cli/src/main.rs
140
cli/src/main.rs
|
|
@ -53,11 +53,12 @@ fn run() -> error::Result<()> {
|
|||
.subcommand(
|
||||
SubCommand::with_name("parse")
|
||||
.about("Parse files")
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("path")
|
||||
Arg::with_name("paths")
|
||||
.index(1)
|
||||
.multiple(true)
|
||||
.required(true),
|
||||
.required(false),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
|
|
@ -79,37 +80,33 @@ fn run() -> error::Result<()> {
|
|||
SubCommand::with_name("query")
|
||||
.about("Search files using a syntax tree query")
|
||||
.arg(Arg::with_name("query-path").index(1).required(true))
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("path")
|
||||
Arg::with_name("paths")
|
||||
.index(2)
|
||||
.multiple(true)
|
||||
.required(true),
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("byte-range")
|
||||
.help("The range of byte offsets in which the query will be executed")
|
||||
.long("byte-range")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("captures").long("captures").short("c")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("tags")
|
||||
.arg(
|
||||
Arg::with_name("format")
|
||||
.short("f")
|
||||
.long("format")
|
||||
.value_name("json|protobuf")
|
||||
.help("Determine output format (default: json)"),
|
||||
)
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("inputs")
|
||||
Arg::with_name("paths")
|
||||
.help("The source file to use")
|
||||
.index(1)
|
||||
.required(true)
|
||||
.multiple(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("v")
|
||||
.short("v")
|
||||
.multiple(true)
|
||||
.help("Sets the level of verbosity"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
|
|
@ -127,11 +124,12 @@ fn run() -> error::Result<()> {
|
|||
.subcommand(
|
||||
SubCommand::with_name("highlight")
|
||||
.about("Highlight a file")
|
||||
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("path")
|
||||
Arg::with_name("paths")
|
||||
.index(1)
|
||||
.multiple(true)
|
||||
.required(true),
|
||||
.required(false),
|
||||
)
|
||||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("html").long("html").short("h"))
|
||||
|
|
@ -230,7 +228,9 @@ fn run() -> error::Result<()> {
|
|||
let timeout = matches
|
||||
.value_of("timeout")
|
||||
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
|
||||
let paths = collect_paths(matches.values_of("path").unwrap())?;
|
||||
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
|
||||
let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
|
||||
let mut has_error = false;
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
|
|
@ -256,31 +256,36 @@ fn run() -> error::Result<()> {
|
|||
}
|
||||
} else if let Some(matches) = matches.subcommand_matches("query") {
|
||||
let ordered_captures = matches.values_of("captures").is_some();
|
||||
let paths = matches
|
||||
.values_of("path")
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(Path::new)
|
||||
.collect::<Vec<&Path>>();
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
let language = select_language(
|
||||
&mut loader,
|
||||
paths[0],
|
||||
Path::new(&paths[0]),
|
||||
¤t_dir,
|
||||
matches.value_of("scope"),
|
||||
)?;
|
||||
let query_path = Path::new(matches.value_of("query-path").unwrap());
|
||||
query::query_files_at_paths(language, paths, query_path, ordered_captures)?;
|
||||
let range = matches.value_of("byte-range").map(|br| {
|
||||
let r: Vec<&str> = br.split(":").collect();
|
||||
(r[0].parse().unwrap(), r[1].parse().unwrap())
|
||||
});
|
||||
query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("tags") {
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
let paths = collect_paths(matches.values_of("inputs").unwrap())?;
|
||||
tags::generate_tags(&loader, matches.value_of("scope"), &paths)?;
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
tags::generate_tags(
|
||||
&loader,
|
||||
matches.value_of("scope"),
|
||||
&paths,
|
||||
matches.is_present("quiet"),
|
||||
matches.is_present("time"),
|
||||
)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("highlight") {
|
||||
loader.configure_highlights(&config.theme.highlight_names);
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
|
||||
let time = matches.is_present("time");
|
||||
let paths = collect_paths(matches.values_of("path").unwrap())?;
|
||||
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
|
||||
let html_mode = matches.is_present("html");
|
||||
if html_mode {
|
||||
println!("{}", highlight::HTML_HEADER);
|
||||
|
|
@ -353,39 +358,58 @@ fn run() -> error::Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn collect_paths<'a>(paths: impl Iterator<Item = &'a str>) -> error::Result<Vec<String>> {
|
||||
let mut result = Vec::new();
|
||||
fn collect_paths<'a>(
|
||||
paths_file: Option<&str>,
|
||||
paths: Option<impl Iterator<Item = &'a str>>,
|
||||
) -> error::Result<Vec<String>> {
|
||||
if let Some(paths_file) = paths_file {
|
||||
return Ok(fs::read_to_string(paths_file)
|
||||
.map_err(Error::wrap(|| {
|
||||
format!("Failed to read paths file {}", paths_file)
|
||||
}))?
|
||||
.trim()
|
||||
.split_ascii_whitespace()
|
||||
.map(String::from)
|
||||
.collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
let mut incorporate_path = |path: &str, positive| {
|
||||
if positive {
|
||||
result.push(path.to_string());
|
||||
} else {
|
||||
if let Some(index) = result.iter().position(|p| p == path) {
|
||||
result.remove(index);
|
||||
if let Some(paths) = paths {
|
||||
let mut result = Vec::new();
|
||||
|
||||
let mut incorporate_path = |path: &str, positive| {
|
||||
if positive {
|
||||
result.push(path.to_string());
|
||||
} else {
|
||||
if let Some(index) = result.iter().position(|p| p == path) {
|
||||
result.remove(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
for mut path in paths {
|
||||
let mut positive = true;
|
||||
if path.starts_with("!") {
|
||||
positive = false;
|
||||
path = path.trim_start_matches("!");
|
||||
}
|
||||
for mut path in paths {
|
||||
let mut positive = true;
|
||||
if path.starts_with("!") {
|
||||
positive = false;
|
||||
path = path.trim_start_matches("!");
|
||||
}
|
||||
|
||||
if Path::new(path).exists() {
|
||||
incorporate_path(path, positive);
|
||||
} else {
|
||||
let paths =
|
||||
glob(path).map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
|
||||
for path in paths {
|
||||
if let Some(path) = path?.to_str() {
|
||||
incorporate_path(path, positive);
|
||||
if Path::new(path).exists() {
|
||||
incorporate_path(path, positive);
|
||||
} else {
|
||||
let paths = glob(path)
|
||||
.map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
|
||||
for path in paths {
|
||||
if let Some(path) = path?.to_str() {
|
||||
incorporate_path(path, positive);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
Ok(result)
|
||||
|
||||
Err(Error::new("Must provide one or more paths".to_string()))
|
||||
}
|
||||
|
||||
fn select_language(
|
||||
|
|
|
|||
|
|
@ -6,9 +6,10 @@ use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
|
|||
|
||||
pub fn query_files_at_paths(
|
||||
language: Language,
|
||||
paths: Vec<&Path>,
|
||||
paths: Vec<String>,
|
||||
query_path: &Path,
|
||||
ordered_captures: bool,
|
||||
range: Option<(usize, usize)>,
|
||||
) -> Result<()> {
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
|
@ -20,14 +21,17 @@ pub fn query_files_at_paths(
|
|||
.map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?;
|
||||
|
||||
let mut query_cursor = QueryCursor::new();
|
||||
if let Some((beg, end)) = range {
|
||||
query_cursor.set_byte_range(beg, end);
|
||||
}
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).map_err(|e| e.to_string())?;
|
||||
|
||||
for path in paths {
|
||||
writeln!(&mut stdout, "{}", path.to_str().unwrap())?;
|
||||
writeln!(&mut stdout, "{}", path)?;
|
||||
|
||||
let source_code = fs::read(path).map_err(Error::wrap(|| {
|
||||
let source_code = fs::read(&path).map_err(Error::wrap(|| {
|
||||
format!("Error reading source file {:?}", path)
|
||||
}))?;
|
||||
let text_callback = |n: Node| &source_code[n.byte_range()];
|
||||
|
|
|
|||
|
|
@ -3,10 +3,17 @@ use super::util;
|
|||
use crate::error::{Error, Result};
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
use std::{fs, str};
|
||||
use tree_sitter_tags::TagsContext;
|
||||
|
||||
pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> {
|
||||
pub fn generate_tags(
|
||||
loader: &Loader,
|
||||
scope: Option<&str>,
|
||||
paths: &[String],
|
||||
quiet: bool,
|
||||
time: bool,
|
||||
) -> Result<()> {
|
||||
let mut lang = None;
|
||||
if let Some(scope) = scope {
|
||||
lang = loader.language_configuration_for_scope(scope)?;
|
||||
|
|
@ -34,28 +41,50 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
|
|||
};
|
||||
|
||||
if let Some(tags_config) = language_config.tags_config(language)? {
|
||||
let path_str = format!("{:?}", path);
|
||||
writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?;
|
||||
let indent;
|
||||
if paths.len() > 1 {
|
||||
if !quiet {
|
||||
writeln!(&mut stdout, "{}", path.to_string_lossy())?;
|
||||
}
|
||||
indent = "\t"
|
||||
} else {
|
||||
indent = "";
|
||||
};
|
||||
|
||||
let source = fs::read(path)?;
|
||||
for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? {
|
||||
let t0 = Instant::now();
|
||||
for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))?.0 {
|
||||
let tag = tag?;
|
||||
write!(
|
||||
&mut stdout,
|
||||
" {:<8} {:<40}\t{:>9}-{:<9}",
|
||||
tag.kind,
|
||||
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
|
||||
tag.span.start,
|
||||
tag.span.end,
|
||||
)?;
|
||||
if let Some(docs) = tag.docs {
|
||||
if docs.len() > 120 {
|
||||
write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
|
||||
} else {
|
||||
write!(&mut stdout, "\t{:?}", &docs)?;
|
||||
if !quiet {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
|
||||
indent,
|
||||
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
|
||||
&tags_config.syntax_type_name(tag.syntax_type_id),
|
||||
if tag.is_definition { "def" } else { "ref" },
|
||||
tag.span.start,
|
||||
tag.span.end,
|
||||
str::from_utf8(&source[tag.line_range]).unwrap_or(""),
|
||||
)?;
|
||||
if let Some(docs) = tag.docs {
|
||||
if docs.len() > 120 {
|
||||
write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
|
||||
} else {
|
||||
write!(&mut stdout, "\t{:?}", &docs)?;
|
||||
}
|
||||
}
|
||||
writeln!(&mut stdout, "")?;
|
||||
}
|
||||
writeln!(&mut stdout, "")?;
|
||||
}
|
||||
|
||||
if time {
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
"{}time: {}ms",
|
||||
indent,
|
||||
t0.elapsed().as_millis(),
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
eprintln!("No tags config found for path {:?}", path);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ mod helpers;
|
|||
mod highlight_test;
|
||||
mod node_test;
|
||||
mod parser_test;
|
||||
mod pathological_test;
|
||||
mod query_test;
|
||||
mod tags_test;
|
||||
mod test_highlight_test;
|
||||
|
|
|
|||
15
cli/src/tests/pathological_test.rs
Normal file
15
cli/src/tests/pathological_test.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::fixtures::get_language;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
#[test]
|
||||
fn test_pathological_example_1() {
|
||||
let language = "cpp";
|
||||
let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
|
||||
|
||||
allocations::record(|| {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language(language)).unwrap();
|
||||
parser.parse(source, None).unwrap();
|
||||
});
|
||||
}
|
||||
|
|
@ -408,7 +408,7 @@ fn test_query_matches_with_many_overlapping_results() {
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
let count = 80;
|
||||
let count = 1024;
|
||||
|
||||
// Deeply nested chained function calls:
|
||||
// a
|
||||
|
|
@ -573,8 +573,8 @@ fn test_query_matches_with_immediate_siblings() {
|
|||
&[
|
||||
(0, vec![("parent", "a"), ("child", "b")]),
|
||||
(0, vec![("parent", "b"), ("child", "c")]),
|
||||
(1, vec![("last-child", "d")]),
|
||||
(0, vec![("parent", "c"), ("child", "d")]),
|
||||
(1, vec![("last-child", "d")]),
|
||||
(2, vec![("first-element", "w")]),
|
||||
(2, vec![("first-element", "1")]),
|
||||
],
|
||||
|
|
@ -758,6 +758,55 @@ fn test_query_matches_with_nested_repetitions() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("javascript");
|
||||
|
||||
// When this query sees a comment, it must keep track of several potential
|
||||
// matches: up to two for each pattern that begins with a comment.
|
||||
let query = Query::new(
|
||||
language,
|
||||
r#"
|
||||
(call_expression
|
||||
function: (member_expression
|
||||
property: (property_identifier) @name)) @ref.method
|
||||
|
||||
((comment)* @doc (function_declaration))
|
||||
((comment)* @doc (generator_function_declaration))
|
||||
((comment)* @doc (class_declaration))
|
||||
((comment)* @doc (lexical_declaration))
|
||||
((comment)* @doc (variable_declaration))
|
||||
((comment)* @doc (method_definition))
|
||||
|
||||
(comment) @comment
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Here, a series of comments occurs in the middle of a match of the first
|
||||
// pattern. To avoid exceeding the storage limits and discarding that outer
|
||||
// match, the comment-related matches need to be managed efficiently.
|
||||
let source = format!(
|
||||
"theObject\n{}\n.theMethod()",
|
||||
" // the comment\n".repeat(64)
|
||||
);
|
||||
|
||||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
&source,
|
||||
&vec![(7, vec![("comment", "// the comment")]); 64]
|
||||
.into_iter()
|
||||
.chain(vec![(
|
||||
0,
|
||||
vec![("ref.method", source.as_str()), ("name", "theMethod")],
|
||||
)])
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -1161,6 +1210,43 @@ fn test_query_matches_with_too_many_permutations_to_track() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("javascript");
|
||||
let query = Query::new(
|
||||
language,
|
||||
"
|
||||
(
|
||||
(comment) @doc
|
||||
; not immediate
|
||||
(class_declaration) @class
|
||||
)
|
||||
|
||||
(call_expression
|
||||
function: [
|
||||
(identifier) @function
|
||||
(member_expression property: (property_identifier) @method)
|
||||
])
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source = "/* hi */ a.b(); ".repeat(50);
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&source, None).unwrap();
|
||||
let mut cursor = QueryCursor::new();
|
||||
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
|
||||
|
||||
assert_eq!(
|
||||
collect_matches(matches, &query, source.as_str()),
|
||||
vec![(1, vec![("method", "b")]); 50],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_anonymous_tokens() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -1215,6 +1301,45 @@ fn test_query_matches_within_byte_range() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_captures_within_byte_range() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("c");
|
||||
let query = Query::new(
|
||||
language,
|
||||
"
|
||||
(call_expression
|
||||
function: (identifier) @function
|
||||
arguments: (argument_list (string_literal) @string.arg))
|
||||
|
||||
(string_literal) @string
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#;
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&source, None).unwrap();
|
||||
|
||||
let mut cursor = QueryCursor::new();
|
||||
let captures =
|
||||
cursor
|
||||
.set_byte_range(3, 27)
|
||||
.captures(&query, tree.root_node(), to_callback(source));
|
||||
|
||||
assert_eq!(
|
||||
collect_captures(captures, &query, source),
|
||||
&[
|
||||
("function", "DEFUN"),
|
||||
("string.arg", "\"safe-length\""),
|
||||
("string", "\"safe-length\""),
|
||||
]
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_different_queries_same_cursor() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -1420,12 +1545,17 @@ fn test_query_captures_with_text_conditions() {
|
|||
((identifier) @function.builtin
|
||||
(#eq? @function.builtin "require"))
|
||||
|
||||
(identifier) @variable
|
||||
((identifier) @variable
|
||||
(#not-match? @variable "^(lambda|load)$"))
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source = "
|
||||
toad
|
||||
load
|
||||
panda
|
||||
lambda
|
||||
const ab = require('./ab');
|
||||
new Cd(EF);
|
||||
";
|
||||
|
|
@ -1439,6 +1569,8 @@ fn test_query_captures_with_text_conditions() {
|
|||
assert_eq!(
|
||||
collect_captures(captures, &query, source),
|
||||
&[
|
||||
("variable", "toad"),
|
||||
("variable", "panda"),
|
||||
("variable", "ab"),
|
||||
("function.builtin", "require"),
|
||||
("variable", "require"),
|
||||
|
|
@ -2074,6 +2206,39 @@ fn test_query_disable_pattern() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_alternative_predicate_prefix() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("c");
|
||||
let query = Query::new(
|
||||
language,
|
||||
r#"
|
||||
((call_expression
|
||||
function: (identifier) @keyword
|
||||
arguments: (argument_list
|
||||
(string_literal) @function))
|
||||
(.eq? @keyword "DEFUN"))
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let source = r#"
|
||||
DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0,
|
||||
doc: /* Return the argument unchanged. */
|
||||
attributes: const)
|
||||
(Lisp_Object arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
"#;
|
||||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
source,
|
||||
&[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_is_definite() {
|
||||
struct Row {
|
||||
|
|
@ -2086,10 +2251,7 @@ fn test_query_is_definite() {
|
|||
Row {
|
||||
language: get_language("python"),
|
||||
pattern: r#"(expression_statement (string))"#,
|
||||
results_by_symbol: &[
|
||||
("expression_statement", false),
|
||||
("string", false),
|
||||
],
|
||||
results_by_symbol: &[("expression_statement", false), ("string", false)],
|
||||
},
|
||||
Row {
|
||||
language: get_language("javascript"),
|
||||
|
|
@ -2102,30 +2264,17 @@ fn test_query_is_definite() {
|
|||
Row {
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(object "{" "}")"#,
|
||||
results_by_symbol: &[
|
||||
("object", false),
|
||||
("{", true),
|
||||
("}", true),
|
||||
],
|
||||
results_by_symbol: &[("object", false), ("{", true), ("}", true)],
|
||||
},
|
||||
Row {
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(pair (property_identifier) ":")"#,
|
||||
results_by_symbol: &[
|
||||
("pair", false),
|
||||
("property_identifier", false),
|
||||
(":", true),
|
||||
],
|
||||
results_by_symbol: &[("pair", false), ("property_identifier", false), (":", true)],
|
||||
},
|
||||
Row {
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"(object "{" (_) "}")"#,
|
||||
results_by_symbol: &[
|
||||
("object", false),
|
||||
("{", false),
|
||||
("", false),
|
||||
("}", true),
|
||||
],
|
||||
results_by_symbol: &[("object", false), ("{", false), ("", false), ("}", true)],
|
||||
},
|
||||
Row {
|
||||
language: get_language("javascript"),
|
||||
|
|
|
|||
|
|
@ -1,73 +1,81 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::fixtures::{get_language, get_language_queries_path};
|
||||
use std::ffi::CStr;
|
||||
use std::ffi::CString;
|
||||
use std::{fs, ptr, slice, str};
|
||||
use tree_sitter::Point;
|
||||
use tree_sitter_tags::c_lib as c;
|
||||
use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext};
|
||||
use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};
|
||||
|
||||
const PYTHON_TAG_QUERY: &'static str = r#"
|
||||
(
|
||||
(function_definition
|
||||
name: (identifier) @name
|
||||
body: (block . (expression_statement (string) @doc))) @function
|
||||
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
|
||||
(function_definition
|
||||
name: (identifier) @name
|
||||
body: (block . (expression_statement (string) @doc))) @definition.function
|
||||
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
|
||||
)
|
||||
|
||||
(function_definition
|
||||
name: (identifier) @name) @function
|
||||
name: (identifier) @name) @definition.function
|
||||
|
||||
(
|
||||
(class_definition
|
||||
name: (identifier) @name
|
||||
body: (block
|
||||
. (expression_statement (string) @doc))) @class
|
||||
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
|
||||
(class_definition
|
||||
name: (identifier) @name
|
||||
body: (block
|
||||
. (expression_statement (string) @doc))) @definition.class
|
||||
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
|
||||
)
|
||||
|
||||
(class_definition
|
||||
name: (identifier) @name) @class
|
||||
name: (identifier) @name) @definition.class
|
||||
|
||||
(call
|
||||
function: (identifier) @name) @call
|
||||
function: (identifier) @name) @reference.call
|
||||
|
||||
(call
|
||||
function: (attribute
|
||||
attribute: (identifier) @name)) @reference.call
|
||||
"#;
|
||||
|
||||
const JS_TAG_QUERY: &'static str = r#"
|
||||
(
|
||||
(comment)* @doc .
|
||||
(class_declaration
|
||||
name: (identifier) @name) @class
|
||||
(#select-adjacent! @doc @class)
|
||||
name: (identifier) @name) @definition.class
|
||||
(#select-adjacent! @doc @definition.class)
|
||||
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc .
|
||||
(method_definition
|
||||
name: (property_identifier) @name) @method
|
||||
(#select-adjacent! @doc @method)
|
||||
name: (property_identifier) @name) @definition.method
|
||||
(#select-adjacent! @doc @definition.method)
|
||||
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc .
|
||||
(function_declaration
|
||||
name: (identifier) @name) @function
|
||||
(#select-adjacent! @doc @function)
|
||||
name: (identifier) @name) @definition.function
|
||||
(#select-adjacent! @doc @definition.function)
|
||||
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
|
||||
)
|
||||
|
||||
(call_expression
|
||||
function: (identifier) @name) @call
|
||||
function: (identifier) @name) @reference.call
|
||||
"#;
|
||||
|
||||
const RUBY_TAG_QUERY: &'static str = r#"
|
||||
(method
|
||||
name: (identifier) @name) @method
|
||||
name: (_) @name) @definition.method
|
||||
|
||||
(method_call
|
||||
method: (identifier) @name) @call
|
||||
method: (identifier) @name) @reference.call
|
||||
|
||||
((identifier) @name @call
|
||||
(setter (identifier) @ignore)
|
||||
|
||||
((identifier) @name @reference.call
|
||||
(#is-not? local))
|
||||
"#;
|
||||
|
||||
|
|
@ -94,25 +102,26 @@ fn test_tags_python() {
|
|||
let tags = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap()
|
||||
.0
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tags.iter()
|
||||
.map(|t| (substr(source, &t.name_range), t.kind))
|
||||
.map(|t| (
|
||||
substr(source, &t.name_range),
|
||||
tags_config.syntax_type_name(t.syntax_type_id)
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("Customer", TagKind::Class),
|
||||
("age", TagKind::Function),
|
||||
("compute_age", TagKind::Call),
|
||||
("Customer", "class"),
|
||||
("age", "function"),
|
||||
("compute_age", "call"),
|
||||
]
|
||||
);
|
||||
|
||||
assert_eq!(substr(source, &tags[0].line_range), " class Customer:");
|
||||
assert_eq!(
|
||||
substr(source, &tags[1].line_range),
|
||||
" def age(self):"
|
||||
);
|
||||
assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
|
||||
assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
|
||||
assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
|
||||
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
|
||||
}
|
||||
|
|
@ -145,17 +154,22 @@ fn test_tags_javascript() {
|
|||
let tags = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap()
|
||||
.0
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tags.iter()
|
||||
.map(|t| (substr(source, &t.name_range), t.kind))
|
||||
.map(|t| (
|
||||
substr(source, &t.name_range),
|
||||
t.span.clone(),
|
||||
tags_config.syntax_type_name(t.syntax_type_id)
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("Customer", TagKind::Class),
|
||||
("getAge", TagKind::Method),
|
||||
("Agent", TagKind::Class)
|
||||
("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
|
||||
("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
|
||||
("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
|
|
@ -166,6 +180,27 @@ fn test_tags_javascript() {
|
|||
assert_eq!(tags[2].docs, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_columns_measured_in_utf16_code_units() {
|
||||
let language = get_language("python");
|
||||
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
|
||||
let mut tag_context = TagsContext::new();
|
||||
|
||||
let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
|
||||
|
||||
let tag = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap()
|
||||
.0
|
||||
.next()
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
|
||||
assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
|
||||
assert_eq!(tag.utf16_column_range, 9..18);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_ruby() {
|
||||
let language = get_language("ruby");
|
||||
|
|
@ -177,7 +212,7 @@ fn test_tags_ruby() {
|
|||
"
|
||||
b = 1
|
||||
|
||||
def foo()
|
||||
def foo=()
|
||||
c = 1
|
||||
|
||||
# a is a method because it is not in scope
|
||||
|
|
@ -197,6 +232,7 @@ fn test_tags_ruby() {
|
|||
let tags = tag_context
|
||||
.generate_tags(&tags_config, source.as_bytes(), None)
|
||||
.unwrap()
|
||||
.0
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
|
|
@ -204,18 +240,18 @@ fn test_tags_ruby() {
|
|||
tags.iter()
|
||||
.map(|t| (
|
||||
substr(source.as_bytes(), &t.name_range),
|
||||
t.kind,
|
||||
tags_config.syntax_type_name(t.syntax_type_id),
|
||||
(t.span.start.row, t.span.start.column),
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("foo", TagKind::Method, (2, 0)),
|
||||
("bar", TagKind::Call, (7, 4)),
|
||||
("a", TagKind::Call, (7, 8)),
|
||||
("b", TagKind::Call, (7, 11)),
|
||||
("each", TagKind::Call, (9, 14)),
|
||||
("baz", TagKind::Call, (13, 8)),
|
||||
("b", TagKind::Call, (13, 15),),
|
||||
("foo=", "method", (2, 4)),
|
||||
("bar", "call", (7, 4)),
|
||||
("a", "call", (7, 8)),
|
||||
("b", "call", (7, 11)),
|
||||
("each", "call", (9, 14)),
|
||||
("baz", "call", (13, 8)),
|
||||
("b", "call", (13, 15),),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
|
@ -239,7 +275,7 @@ fn test_tags_cancellation() {
|
|||
.generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
|
||||
.unwrap();
|
||||
|
||||
for (i, tag) in tags.enumerate() {
|
||||
for (i, tag) in tags.0.enumerate() {
|
||||
if i == 150 {
|
||||
cancellation_flag.store(1, Ordering::SeqCst);
|
||||
}
|
||||
|
|
@ -253,6 +289,47 @@ fn test_tags_cancellation() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_capture() {
|
||||
let language = get_language("python");
|
||||
let e = TagsConfiguration::new(language, "(identifier) @method", "")
|
||||
.expect_err("expected InvalidCapture error");
|
||||
assert_eq!(e, Error::InvalidCapture("method".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_with_parse_error() {
|
||||
let language = get_language("python");
|
||||
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
|
||||
let mut tag_context = TagsContext::new();
|
||||
|
||||
let source = br#"
|
||||
class Fine: pass
|
||||
class Bad
|
||||
"#;
|
||||
|
||||
let (tags, failed) = tag_context
|
||||
.generate_tags(&tags_config, source, None)
|
||||
.unwrap();
|
||||
|
||||
let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
|
||||
|
||||
assert!(failed, "syntax error should have been detected");
|
||||
|
||||
assert_eq!(
|
||||
newtags.iter()
|
||||
.map(|t| (
|
||||
substr(source, &t.name_range),
|
||||
tags_config.syntax_type_name(t.syntax_type_id)
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("Fine", "class"),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_tags_via_c_api() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -316,29 +393,29 @@ fn test_tags_via_c_api() {
|
|||
})
|
||||
.unwrap();
|
||||
|
||||
let syntax_types: Vec<&str> = unsafe {
|
||||
let mut len: u32 = 0;
|
||||
let ptr =
|
||||
c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
|
||||
slice::from_raw_parts(ptr, len as usize)
|
||||
.iter()
|
||||
.map(|i| CStr::from_ptr(*i).to_str().unwrap())
|
||||
.collect()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
tags.iter()
|
||||
.map(|tag| (
|
||||
tag.kind,
|
||||
syntax_types[tag.syntax_type_id as usize],
|
||||
&source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
|
||||
&source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
|
||||
&docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
(
|
||||
c::TSTagKind::Function,
|
||||
"b",
|
||||
"function b() {",
|
||||
"one\ntwo\nthree"
|
||||
),
|
||||
(
|
||||
c::TSTagKind::Class,
|
||||
"C",
|
||||
"class C extends D {",
|
||||
"four\nfive"
|
||||
),
|
||||
(c::TSTagKind::Call, "b", "b(a);", "")
|
||||
("function", "b", "function b() {", "one\ntwo\nthree"),
|
||||
("class", "C", "class C extends D {", "four\nfive"),
|
||||
("call", "b", "b(a);", "")
|
||||
]
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use super::error::{Error, Result};
|
||||
use std::io;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
|
@ -31,12 +32,12 @@ pub struct LogSession();
|
|||
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
|
||||
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
|
||||
Ok(LogSession())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
|
||||
pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
|
||||
use std::io::Write;
|
||||
|
||||
let mut dot_file = std::fs::File::create(path)?;
|
||||
|
|
@ -46,11 +47,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession
|
|||
.stdin(Stdio::piped())
|
||||
.stdout(dot_file)
|
||||
.spawn()
|
||||
.expect("Failed to run Dot");
|
||||
.map_err(Error::wrap(|| {
|
||||
"Failed to run the `dot` command. Check that graphviz is installed."
|
||||
}))?;
|
||||
let dot_stdin = dot_process
|
||||
.stdin
|
||||
.take()
|
||||
.expect("Failed to open stdin for Dot");
|
||||
.ok_or_else(|| Error::new("Failed to open stdin for `dot` process.".to_string()))?;
|
||||
parser.print_dot_graphs(&dot_stdin);
|
||||
Ok(LogSession(
|
||||
PathBuf::from(path),
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo
|
|||
|
||||
In order to develop a Tree-sitter parser, there are two dependencies that you need to install:
|
||||
|
||||
* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. It shouldn't matter what version of Node you have.
|
||||
* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater.
|
||||
* **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform.
|
||||
|
||||
### Installation
|
||||
|
|
@ -505,6 +505,8 @@ Grammars often contain multiple tokens that can match the same characters. For e
|
|||
|
||||
4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`.
|
||||
|
||||
5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar.
|
||||
|
||||
### Keywords
|
||||
|
||||
Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this:
|
||||
|
|
|
|||
|
|
@ -385,6 +385,14 @@ The following query would specify that the contents of the heredoc should be par
|
|||
(heredoc_end) @injection.language) @injection.content
|
||||
```
|
||||
|
||||
You can also force the language using the `#set!` predicate.
|
||||
For example, this will force the language to be always `ruby`.
|
||||
|
||||
```
|
||||
((heredoc_body) @injection.content
|
||||
(#set! injection.language "ruby"))
|
||||
```
|
||||
|
||||
## Unit Testing
|
||||
|
||||
Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system](https://www.sublimetext.com/docs/3/syntax.html#testing) for testing highlighting.
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ use tree_sitter::{
|
|||
};
|
||||
|
||||
const CANCELLATION_CHECK_INTERVAL: usize = 100;
|
||||
const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
|
||||
const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
|
||||
|
||||
/// Indicates which highlight should be applied to a region of source code.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
|
|
@ -620,7 +622,7 @@ where
|
|||
type Item = Result<HighlightEvent, Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
'main: loop {
|
||||
// If we've already determined the next highlight boundary, just return it.
|
||||
if let Some(e) = self.next_event.take() {
|
||||
return Some(Ok(e));
|
||||
|
|
@ -640,29 +642,34 @@ where
|
|||
|
||||
// If none of the layers have any more highlight boundaries, terminate.
|
||||
if self.layers.is_empty() {
|
||||
if self.byte_offset < self.source.len() {
|
||||
return if self.byte_offset < self.source.len() {
|
||||
let result = Some(Ok(HighlightEvent::Source {
|
||||
start: self.byte_offset,
|
||||
end: self.source.len(),
|
||||
}));
|
||||
self.byte_offset = self.source.len();
|
||||
return result;
|
||||
result
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
None
|
||||
};
|
||||
}
|
||||
|
||||
// Get the next capture from whichever layer has the earliest highlight boundary.
|
||||
let match_;
|
||||
let mut captures;
|
||||
let mut capture;
|
||||
let mut pattern_index;
|
||||
let range;
|
||||
let layer = &mut self.layers[0];
|
||||
if let Some((m, capture_index)) = layer.captures.peek() {
|
||||
match_ = m;
|
||||
captures = match_.captures;
|
||||
pattern_index = match_.pattern_index;
|
||||
capture = captures[*capture_index];
|
||||
if let Some((next_match, capture_index)) = layer.captures.peek() {
|
||||
let next_capture = next_match.captures[*capture_index];
|
||||
range = next_capture.node.byte_range();
|
||||
|
||||
// If any previous highlight ends before this node starts, then before
|
||||
// processing this capture, emit the source code up until the end of the
|
||||
// previous highlight, and an end event for that highlight.
|
||||
if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
|
||||
if end_byte <= range.start {
|
||||
layer.highlight_end_stack.pop();
|
||||
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
|
||||
}
|
||||
}
|
||||
}
|
||||
// If there are no more captures, then emit any remaining highlight end events.
|
||||
// And if there are none of those, then just advance to the end of the document.
|
||||
|
|
@ -673,30 +680,17 @@ where
|
|||
return self.emit_event(self.source.len(), None);
|
||||
};
|
||||
|
||||
// If any previous highlight ends before this node starts, then before
|
||||
// processing this capture, emit the source code up until the end of the
|
||||
// previous highlight, and an end event for that highlight.
|
||||
let range = capture.node.byte_range();
|
||||
if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
|
||||
if end_byte <= range.start {
|
||||
layer.highlight_end_stack.pop();
|
||||
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from the local scope stack any local scopes that have already ended.
|
||||
while range.start > layer.scope_stack.last().unwrap().range.end {
|
||||
layer.scope_stack.pop();
|
||||
}
|
||||
let (mut match_, capture_index) = layer.captures.next().unwrap();
|
||||
let mut capture = match_.captures[capture_index];
|
||||
|
||||
// If this capture represents an injection, then process the injection.
|
||||
if pattern_index < layer.config.locals_pattern_index {
|
||||
if match_.pattern_index < layer.config.locals_pattern_index {
|
||||
let (language_name, content_node, include_children) =
|
||||
injection_for_match(&layer.config, &layer.config.query, match_, &self.source);
|
||||
injection_for_match(&layer.config, &layer.config.query, &match_, &self.source);
|
||||
|
||||
// Explicitly remove this match so that none of its other captures will remain
|
||||
// in the stream of captures. The `unwrap` is ok because
|
||||
layer.captures.next().unwrap().0.remove();
|
||||
// in the stream of captures.
|
||||
match_.remove();
|
||||
|
||||
// If a language is found with the given name, then add a new language layer
|
||||
// to the highlighted document.
|
||||
|
|
@ -729,16 +723,19 @@ where
|
|||
}
|
||||
|
||||
self.sort_layers();
|
||||
continue;
|
||||
continue 'main;
|
||||
}
|
||||
|
||||
layer.captures.next();
|
||||
// Remove from the local scope stack any local scopes that have already ended.
|
||||
while range.start > layer.scope_stack.last().unwrap().range.end {
|
||||
layer.scope_stack.pop();
|
||||
}
|
||||
|
||||
// If this capture is for tracking local variables, then process the
|
||||
// local variable info.
|
||||
let mut reference_highlight = None;
|
||||
let mut definition_highlight = None;
|
||||
while pattern_index < layer.config.highlights_pattern_index {
|
||||
while match_.pattern_index < layer.config.highlights_pattern_index {
|
||||
// If the node represents a local scope, push a new local scope onto
|
||||
// the scope stack.
|
||||
if Some(capture.index) == layer.config.local_scope_capture_index {
|
||||
|
|
@ -748,7 +745,7 @@ where
|
|||
range: range.clone(),
|
||||
local_defs: Vec::new(),
|
||||
};
|
||||
for prop in layer.config.query.property_settings(pattern_index) {
|
||||
for prop in layer.config.query.property_settings(match_.pattern_index) {
|
||||
match prop.key.as_ref() {
|
||||
"local.scope-inherits" => {
|
||||
scope.inherits =
|
||||
|
|
@ -767,7 +764,7 @@ where
|
|||
let scope = layer.scope_stack.last_mut().unwrap();
|
||||
|
||||
let mut value_range = 0..0;
|
||||
for capture in captures {
|
||||
for capture in match_.captures {
|
||||
if Some(capture.index) == layer.config.local_def_value_capture_index {
|
||||
value_range = capture.node.byte_range();
|
||||
}
|
||||
|
|
@ -810,84 +807,76 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
// Continue processing any additional local-variable-tracking patterns
|
||||
// for the same node.
|
||||
// Continue processing any additional matches for the same node.
|
||||
if let Some((next_match, next_capture_index)) = layer.captures.peek() {
|
||||
let next_capture = next_match.captures[*next_capture_index];
|
||||
if next_capture.node == capture.node {
|
||||
pattern_index = next_match.pattern_index;
|
||||
captures = next_match.captures;
|
||||
capture = next_capture;
|
||||
layer.captures.next();
|
||||
match_ = layer.captures.next().unwrap().0;
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
self.sort_layers();
|
||||
continue 'main;
|
||||
}
|
||||
|
||||
// Otherwise, this capture must represent a highlight.
|
||||
let mut has_highlight = true;
|
||||
|
||||
// If this exact range has already been highlighted by an earlier pattern, or by
|
||||
// a different layer, then skip over this one.
|
||||
if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
|
||||
if range.start == last_start && range.end == last_end && layer.depth < last_depth {
|
||||
has_highlight = false;
|
||||
self.sort_layers();
|
||||
continue 'main;
|
||||
}
|
||||
}
|
||||
|
||||
// If the current node was found to be a local variable, then skip over any
|
||||
// highlighting patterns that are disabled for local variables.
|
||||
while has_highlight
|
||||
&& (definition_highlight.is_some() || reference_highlight.is_some())
|
||||
&& layer.config.non_local_variable_patterns[pattern_index]
|
||||
{
|
||||
has_highlight = false;
|
||||
if let Some((next_match, next_capture_index)) = layer.captures.peek() {
|
||||
let next_capture = next_match.captures[*next_capture_index];
|
||||
if next_capture.node == capture.node {
|
||||
capture = next_capture;
|
||||
has_highlight = true;
|
||||
pattern_index = next_match.pattern_index;
|
||||
layer.captures.next();
|
||||
continue;
|
||||
if definition_highlight.is_some() || reference_highlight.is_some() {
|
||||
while layer.config.non_local_variable_patterns[match_.pattern_index] {
|
||||
if let Some((next_match, next_capture_index)) = layer.captures.peek() {
|
||||
let next_capture = next_match.captures[*next_capture_index];
|
||||
if next_capture.node == capture.node {
|
||||
capture = next_capture;
|
||||
match_ = layer.captures.next().unwrap().0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
self.sort_layers();
|
||||
continue 'main;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if has_highlight {
|
||||
// Once a highlighting pattern is found for the current node, skip over
|
||||
// any later highlighting patterns that also match this node. Captures
|
||||
// for a given node are ordered by pattern index, so these subsequent
|
||||
// captures are guaranteed to be for highlighting, not injections or
|
||||
// local variables.
|
||||
while let Some((next_match, next_capture_index)) = layer.captures.peek() {
|
||||
if next_match.captures[*next_capture_index].node == capture.node {
|
||||
layer.captures.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
// Once a highlighting pattern is found for the current node, skip over
|
||||
// any later highlighting patterns that also match this node. Captures
|
||||
// for a given node are ordered by pattern index, so these subsequent
|
||||
// captures are guaranteed to be for highlighting, not injections or
|
||||
// local variables.
|
||||
while let Some((next_match, next_capture_index)) = layer.captures.peek() {
|
||||
let next_capture = next_match.captures[*next_capture_index];
|
||||
if next_capture.node == capture.node {
|
||||
layer.captures.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let current_highlight = layer.config.highlight_indices[capture.index as usize];
|
||||
let current_highlight = layer.config.highlight_indices[capture.index as usize];
|
||||
|
||||
// If this node represents a local definition, then store the current
|
||||
// highlight value on the local scope entry representing this node.
|
||||
if let Some(definition_highlight) = definition_highlight {
|
||||
*definition_highlight = current_highlight;
|
||||
}
|
||||
// If this node represents a local definition, then store the current
|
||||
// highlight value on the local scope entry representing this node.
|
||||
if let Some(definition_highlight) = definition_highlight {
|
||||
*definition_highlight = current_highlight;
|
||||
}
|
||||
|
||||
// Emit a scope start event and push the node's end position to the stack.
|
||||
if let Some(highlight) = reference_highlight.or(current_highlight) {
|
||||
self.last_highlight_range = Some((range.start, range.end, layer.depth));
|
||||
layer.highlight_end_stack.push(range.end);
|
||||
return self
|
||||
.emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
|
||||
}
|
||||
// Emit a scope start event and push the node's end position to the stack.
|
||||
if let Some(highlight) = reference_highlight.or(current_highlight) {
|
||||
self.last_highlight_range = Some((range.start, range.end, layer.depth));
|
||||
layer.highlight_end_stack.push(range.end);
|
||||
return self
|
||||
.emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
|
||||
}
|
||||
|
||||
self.sort_layers();
|
||||
|
|
@ -897,11 +886,13 @@ where
|
|||
|
||||
impl HtmlRenderer {
|
||||
pub fn new() -> Self {
|
||||
HtmlRenderer {
|
||||
html: Vec::new(),
|
||||
line_offsets: vec![0],
|
||||
let mut result = HtmlRenderer {
|
||||
html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
|
||||
line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
|
||||
carriage_return_highlight: None,
|
||||
}
|
||||
};
|
||||
result.line_offsets.push(0);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
|
||||
|
|
@ -909,8 +900,8 @@ impl HtmlRenderer {
|
|||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
self.html.clear();
|
||||
self.line_offsets.clear();
|
||||
shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
|
||||
shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
|
||||
self.line_offsets.push(0);
|
||||
}
|
||||
|
||||
|
|
@ -1074,3 +1065,11 @@ fn injection_for_match<'a>(
|
|||
|
||||
(language_name, content_node, include_children)
|
||||
}
|
||||
|
||||
fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
|
||||
if vec.len() > capacity {
|
||||
vec.truncate(capacity);
|
||||
vec.shrink_to_fit();
|
||||
}
|
||||
vec.clear();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ pub enum QueryError {
|
|||
enum TextPredicate {
|
||||
CaptureEqString(u32, String, bool),
|
||||
CaptureEqCapture(u32, u32, bool),
|
||||
CaptureMatchString(u32, regex::bytes::Regex),
|
||||
CaptureMatchString(u32, regex::bytes::Regex, bool),
|
||||
}
|
||||
|
||||
impl Language {
|
||||
|
|
@ -1314,7 +1314,7 @@ impl Query {
|
|||
});
|
||||
}
|
||||
|
||||
"match?" => {
|
||||
"match?" | "not-match?" => {
|
||||
if p.len() != 3 {
|
||||
return Err(QueryError::Predicate(format!(
|
||||
"Wrong number of arguments to #match? predicate. Expected 2, got {}.",
|
||||
|
|
@ -1334,12 +1334,14 @@ impl Query {
|
|||
)));
|
||||
}
|
||||
|
||||
let is_positive = operator_name == "match?";
|
||||
let regex = &string_values[p[2].value_id as usize];
|
||||
text_predicates.push(TextPredicate::CaptureMatchString(
|
||||
p[1].value_id,
|
||||
regex::bytes::Regex::new(regex).map_err(|_| {
|
||||
QueryError::Predicate(format!("Invalid regex '{}'", regex))
|
||||
})?,
|
||||
is_positive,
|
||||
));
|
||||
}
|
||||
|
||||
|
|
@ -1631,9 +1633,9 @@ impl<'a> QueryMatch<'a> {
|
|||
let node = self.capture_for_index(*i).unwrap();
|
||||
(text_callback(node).as_ref() == s.as_bytes()) == *is_positive
|
||||
}
|
||||
TextPredicate::CaptureMatchString(i, r) => {
|
||||
TextPredicate::CaptureMatchString(i, r, is_positive) => {
|
||||
let node = self.capture_for_index(*i).unwrap();
|
||||
r.is_match(text_callback(node).as_ref())
|
||||
r.is_match(text_callback(node).as_ref()) == *is_positive
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -787,6 +787,8 @@ class Language {
|
|||
}
|
||||
break;
|
||||
|
||||
case 'not-match?':
|
||||
isPositive = false;
|
||||
case 'match?':
|
||||
if (steps.length !== 3) throw new Error(
|
||||
`Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.`
|
||||
|
|
@ -801,7 +803,7 @@ class Language {
|
|||
const regex = new RegExp(steps[2].value);
|
||||
textPredicates[i].push(function(captures) {
|
||||
for (const c of captures) {
|
||||
if (c.name === captureName) return regex.test(c.node.text);
|
||||
if (c.name === captureName) return regex.test(c.node.text) === isPositive;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
|
|
|||
|
|
@ -126,12 +126,17 @@ describe("Query", () => {
|
|||
|
||||
it("handles conditions that compare the text of capture to literal strings", () => {
|
||||
tree = parser.parse(`
|
||||
lambda
|
||||
panda
|
||||
load
|
||||
toad
|
||||
const ab = require('./ab');
|
||||
new Cd(EF);
|
||||
`);
|
||||
|
||||
query = JavaScript.query(`
|
||||
(identifier) @variable
|
||||
((identifier) @variable
|
||||
(#not-match? @variable "^(lambda|load)$"))
|
||||
|
||||
((identifier) @function.builtin
|
||||
(#eq? @function.builtin "require"))
|
||||
|
|
@ -145,6 +150,8 @@ describe("Query", () => {
|
|||
|
||||
const captures = query.captures(tree.rootNode);
|
||||
assert.deepEqual(formatCaptures(captures), [
|
||||
{ name: "variable", text: "panda" },
|
||||
{ name: "variable", text: "toad" },
|
||||
{ name: "variable", text: "ab" },
|
||||
{ name: "variable", text: "require" },
|
||||
{ name: "function.builtin", text: "require" },
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ static inline bool ts_toggle_allocation_recording(bool value) {
|
|||
static inline void *ts_malloc(size_t size) {
|
||||
void *result = malloc(size);
|
||||
if (size > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size);
|
||||
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
|
||||
exit(1);
|
||||
}
|
||||
return result;
|
||||
|
|
@ -54,7 +54,7 @@ static inline void *ts_malloc(size_t size) {
|
|||
static inline void *ts_calloc(size_t count, size_t size) {
|
||||
void *result = calloc(count, size);
|
||||
if (count > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size);
|
||||
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
|
||||
exit(1);
|
||||
}
|
||||
return result;
|
||||
|
|
@ -63,7 +63,7 @@ static inline void *ts_calloc(size_t count, size_t size) {
|
|||
static inline void *ts_realloc(void *buffer, size_t size) {
|
||||
void *result = realloc(buffer, size);
|
||||
if (size > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size);
|
||||
fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
|
||||
exit(1);
|
||||
}
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -355,10 +355,14 @@ static Subtree ts_parser__lex(
|
|||
StackVersion version,
|
||||
TSStateId parse_state
|
||||
) {
|
||||
TSLexMode lex_mode = self->language->lex_modes[parse_state];
|
||||
if (lex_mode.lex_state == (uint16_t)-1) {
|
||||
LOG("no_lookahead_after_non_terminal_extra");
|
||||
return NULL_SUBTREE;
|
||||
}
|
||||
|
||||
Length start_position = ts_stack_position(self->stack, version);
|
||||
Subtree external_token = ts_stack_last_external_token(self->stack, version);
|
||||
TSLexMode lex_mode = self->language->lex_modes[parse_state];
|
||||
if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE;
|
||||
const bool *valid_external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_lex_state
|
||||
|
|
@ -761,20 +765,26 @@ static StackVersion ts_parser__reduce(
|
|||
int dynamic_precedence,
|
||||
uint16_t production_id,
|
||||
bool is_fragile,
|
||||
bool is_extra
|
||||
bool end_of_non_terminal_extra
|
||||
) {
|
||||
uint32_t initial_version_count = ts_stack_version_count(self->stack);
|
||||
uint32_t removed_version_count = 0;
|
||||
StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
|
||||
|
||||
// Pop the given number of nodes from the given version of the parse stack.
|
||||
// If stack versions have previously merged, then there may be more than one
|
||||
// path back through the stack. For each path, create a new parent node to
|
||||
// contain the popped children, and push it onto the stack in place of the
|
||||
// children.
|
||||
StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
|
||||
uint32_t removed_version_count = 0;
|
||||
for (uint32_t i = 0; i < pop.size; i++) {
|
||||
StackSlice slice = pop.contents[i];
|
||||
StackVersion slice_version = slice.version - removed_version_count;
|
||||
|
||||
// Error recovery can sometimes cause lots of stack versions to merge,
|
||||
// such that a single pop operation can produce a lots of slices.
|
||||
// Avoid creating too many stack versions in that situation.
|
||||
if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
|
||||
// This is where new versions are added to the parse stack. The versions
|
||||
// will all be sorted and truncated at the end of the outer parsing loop.
|
||||
// Allow the maximum version count to be temporarily exceeded, but only
|
||||
// by a limited threshold.
|
||||
if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
|
||||
ts_stack_remove_version(self->stack, slice_version);
|
||||
ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
|
||||
removed_version_count++;
|
||||
|
|
@ -826,7 +836,9 @@ static StackVersion ts_parser__reduce(
|
|||
|
||||
TSStateId state = ts_stack_state(self->stack, slice_version);
|
||||
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
|
||||
if (is_extra) parent.ptr->extra = true;
|
||||
if (end_of_non_terminal_extra && next_state == state) {
|
||||
parent.ptr->extra = true;
|
||||
}
|
||||
if (is_fragile || pop.size > 1 || initial_version_count > 1) {
|
||||
parent.ptr->fragile_left = true;
|
||||
parent.ptr->fragile_right = true;
|
||||
|
|
@ -1339,24 +1351,26 @@ static bool ts_parser__advance(
|
|||
);
|
||||
}
|
||||
|
||||
lex:
|
||||
// Otherwise, re-run the lexer.
|
||||
if (!lookahead.ptr) {
|
||||
lookahead = ts_parser__lex(self, version, state);
|
||||
if (lookahead.ptr) {
|
||||
ts_parser__set_cached_token(self, position, last_external_token, lookahead);
|
||||
ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
|
||||
}
|
||||
|
||||
// When parsing a non-terminal extra, a null lookahead indicates the
|
||||
// end of the rule. The reduction is stored in the EOF table entry.
|
||||
// After the reduction, the lexer needs to be run again.
|
||||
else {
|
||||
ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
|
||||
}
|
||||
}
|
||||
|
||||
bool needs_lex = !lookahead.ptr;
|
||||
for (;;) {
|
||||
// Otherwise, re-run the lexer.
|
||||
if (needs_lex) {
|
||||
needs_lex = false;
|
||||
lookahead = ts_parser__lex(self, version, state);
|
||||
|
||||
if (lookahead.ptr) {
|
||||
ts_parser__set_cached_token(self, position, last_external_token, lookahead);
|
||||
ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
|
||||
}
|
||||
|
||||
// When parsing a non-terminal extra, a null lookahead indicates the
|
||||
// end of the rule. The reduction is stored in the EOF table entry.
|
||||
// After the reduction, the lexer needs to be run again.
|
||||
else {
|
||||
ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
|
||||
}
|
||||
}
|
||||
|
||||
// If a cancellation flag or a timeout was provided, then check every
|
||||
// time a fixed number of parse actions has been processed.
|
||||
if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
|
||||
|
|
@ -1408,12 +1422,12 @@ lex:
|
|||
|
||||
case TSParseActionTypeReduce: {
|
||||
bool is_fragile = table_entry.action_count > 1;
|
||||
bool is_extra = lookahead.ptr == NULL;
|
||||
bool end_of_non_terminal_extra = lookahead.ptr == NULL;
|
||||
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count);
|
||||
StackVersion reduction_version = ts_parser__reduce(
|
||||
self, version, action.params.reduce.symbol, action.params.reduce.child_count,
|
||||
action.params.reduce.dynamic_precedence, action.params.reduce.production_id,
|
||||
is_fragile, is_extra
|
||||
is_fragile, end_of_non_terminal_extra
|
||||
);
|
||||
if (reduction_version != STACK_VERSION_NONE) {
|
||||
last_reduction_version = reduction_version;
|
||||
|
|
@ -1453,8 +1467,10 @@ lex:
|
|||
// (and completing the non-terminal extra rule) run the lexer again based
|
||||
// on the current parse state.
|
||||
if (!lookahead.ptr) {
|
||||
lookahead = ts_parser__lex(self, version, state);
|
||||
needs_lex = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
ts_language_table_entry(
|
||||
self->language,
|
||||
state,
|
||||
|
|
@ -1464,6 +1480,11 @@ lex:
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!lookahead.ptr) {
|
||||
ts_stack_pause(self->stack, version, ts_builtin_sym_end);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If there were no parse actions for the current lookahead token, then
|
||||
// it is not valid in this state. If the current lookahead token is a
|
||||
// keyword, then switch to treating it as the normal word token if that
|
||||
|
|
@ -1503,8 +1524,7 @@ lex:
|
|||
if (ts_parser__breakdown_top_of_stack(self, version)) {
|
||||
state = ts_stack_state(self->stack, version);
|
||||
ts_subtree_release(&self->tree_pool, lookahead);
|
||||
lookahead = NULL_SUBTREE;
|
||||
goto lex;
|
||||
needs_lex = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
386
lib/src/query.c
386
lib/src/query.c
|
|
@ -11,7 +11,6 @@
|
|||
// #define LOG(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define LOG(...)
|
||||
|
||||
#define MAX_STATE_COUNT 256
|
||||
#define MAX_CAPTURE_LIST_COUNT 32
|
||||
#define MAX_STEP_CAPTURE_COUNT 3
|
||||
#define MAX_STATE_PREDECESSOR_COUNT 100
|
||||
|
|
@ -51,7 +50,6 @@ typedef struct {
|
|||
uint16_t alternative_index;
|
||||
uint16_t depth;
|
||||
bool contains_captures: 1;
|
||||
bool is_pattern_start: 1;
|
||||
bool is_immediate: 1;
|
||||
bool is_last_child: 1;
|
||||
bool is_pass_through: 1;
|
||||
|
|
@ -128,9 +126,10 @@ typedef struct {
|
|||
uint16_t step_index;
|
||||
uint16_t pattern_index;
|
||||
uint16_t capture_list_id;
|
||||
uint16_t consumed_capture_count: 14;
|
||||
uint16_t consumed_capture_count: 12;
|
||||
bool seeking_immediate_match: 1;
|
||||
bool has_in_progress_alternatives: 1;
|
||||
bool dead: 1;
|
||||
} QueryState;
|
||||
|
||||
typedef Array(TSQueryCapture) CaptureList;
|
||||
|
|
@ -224,6 +223,7 @@ struct TSQueryCursor {
|
|||
TSPoint start_point;
|
||||
TSPoint end_point;
|
||||
bool ascending;
|
||||
bool halted;
|
||||
};
|
||||
|
||||
static const TSQueryError PARENT_DONE = -1;
|
||||
|
|
@ -500,7 +500,6 @@ static QueryStep query_step__new(
|
|||
.alternative_index = NONE,
|
||||
.contains_captures = false,
|
||||
.is_last_child = false,
|
||||
.is_pattern_start = false,
|
||||
.is_pass_through = false,
|
||||
.is_dead_end = false,
|
||||
.is_definite = false,
|
||||
|
|
@ -692,6 +691,23 @@ static inline void ts_query__pattern_map_insert(
|
|||
) {
|
||||
uint32_t index;
|
||||
ts_query__pattern_map_search(self, symbol, &index);
|
||||
|
||||
// Ensure that the entries are sorted not only by symbol, but also
|
||||
// by pattern_index. This way, states for earlier patterns will be
|
||||
// initiated first, which allows the ordering of the states array
|
||||
// to be maintained more efficiently.
|
||||
while (index < self->pattern_map.size) {
|
||||
PatternEntry *entry = &self->pattern_map.contents[index];
|
||||
if (
|
||||
self->steps.contents[entry->step_index].symbol == symbol &&
|
||||
entry->pattern_index < pattern_index
|
||||
) {
|
||||
index++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
array_insert(&self->pattern_map, index, ((PatternEntry) {
|
||||
.step_index = start_step_index,
|
||||
.pattern_index = pattern_index,
|
||||
|
|
@ -1438,8 +1454,8 @@ static TSQueryError ts_query__parse_pattern(
|
|||
}
|
||||
}
|
||||
|
||||
// A pound character indicates the start of a predicate.
|
||||
else if (stream->next == '#') {
|
||||
// A dot/pound character indicates the start of a predicate.
|
||||
else if (stream->next == '.' || stream->next == '#') {
|
||||
stream_advance(stream);
|
||||
return ts_query__parse_predicate(self, stream);
|
||||
}
|
||||
|
|
@ -1796,7 +1812,6 @@ TSQuery *ts_query_new(
|
|||
// Maintain a map that can look up patterns for a given root symbol.
|
||||
for (;;) {
|
||||
QueryStep *step = &self->steps.contents[start_step_index];
|
||||
step->is_pattern_start = true;
|
||||
ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index);
|
||||
if (step->symbol == WILDCARD_SYMBOL) {
|
||||
self->wildcard_root_pattern_count++;
|
||||
|
|
@ -1806,6 +1821,7 @@ TSQuery *ts_query_new(
|
|||
// then add multiple entries to the pattern map.
|
||||
if (step->alternative_index != NONE) {
|
||||
start_step_index = step->alternative_index;
|
||||
step->alternative_index = NONE;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -1944,6 +1960,7 @@ TSQueryCursor *ts_query_cursor_new(void) {
|
|||
TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
|
||||
*self = (TSQueryCursor) {
|
||||
.ascending = false,
|
||||
.halted = false,
|
||||
.states = array_new(),
|
||||
.finished_states = array_new(),
|
||||
.capture_list_pool = capture_list_pool_new(),
|
||||
|
|
@ -1952,8 +1969,8 @@ TSQueryCursor *ts_query_cursor_new(void) {
|
|||
.start_point = {0, 0},
|
||||
.end_point = POINT_MAX,
|
||||
};
|
||||
array_reserve(&self->states, MAX_STATE_COUNT);
|
||||
array_reserve(&self->finished_states, MAX_CAPTURE_LIST_COUNT);
|
||||
array_reserve(&self->states, 8);
|
||||
array_reserve(&self->finished_states, 8);
|
||||
return self;
|
||||
}
|
||||
|
||||
|
|
@ -1977,6 +1994,7 @@ void ts_query_cursor_exec(
|
|||
self->next_state_id = 0;
|
||||
self->depth = 0;
|
||||
self->ascending = false;
|
||||
self->halted = false;
|
||||
self->query = query;
|
||||
}
|
||||
|
||||
|
|
@ -2020,6 +2038,7 @@ static bool ts_query_cursor__first_in_progress_capture(
|
|||
*pattern_index = UINT32_MAX;
|
||||
for (unsigned i = 0; i < self->states.size; i++) {
|
||||
const QueryState *state = &self->states.contents[i];
|
||||
if (state->dead) continue;
|
||||
const CaptureList *captures = capture_list_pool_get(
|
||||
&self->capture_list_pool,
|
||||
state->capture_list_id
|
||||
|
|
@ -2114,65 +2133,138 @@ void ts_query_cursor__compare_captures(
|
|||
}
|
||||
}
|
||||
|
||||
static bool ts_query_cursor__add_state(
|
||||
static void ts_query_cursor__add_state(
|
||||
TSQueryCursor *self,
|
||||
const PatternEntry *pattern
|
||||
) {
|
||||
if (self->states.size >= MAX_STATE_COUNT) {
|
||||
LOG(" too many states");
|
||||
return false;
|
||||
QueryStep *step = &self->query->steps.contents[pattern->step_index];
|
||||
uint32_t start_depth = self->depth - step->depth;
|
||||
|
||||
// Keep the states array in ascending order of start_depth and pattern_index,
|
||||
// so that it can be processed more efficiently elsewhere. Usually, there is
|
||||
// no work to do here because of two facts:
|
||||
// * States with lower start_depth are naturally added first due to the
|
||||
// order in which nodes are visited.
|
||||
// * Earlier patterns are naturally added first because of the ordering of the
|
||||
// pattern_map data structure that's used to initiate matches.
|
||||
//
|
||||
// This loop is only needed in cases where two conditions hold:
|
||||
// * A pattern consists of more than one sibling node, so that its states
|
||||
// remain in progress after exiting the node that started the match.
|
||||
// * The first node in the pattern matches against multiple nodes at the
|
||||
// same depth.
|
||||
//
|
||||
// An example of this is the pattern '((comment)* (function))'. If multiple
|
||||
// `comment` nodes appear in a row, then we may initiate a new state for this
|
||||
// pattern while another state for the same pattern is already in progress.
|
||||
// If there are multiple patterns like this in a query, then this loop will
|
||||
// need to execute in order to keep the states ordered by pattern_index.
|
||||
uint32_t index = self->states.size;
|
||||
while (index > 0) {
|
||||
QueryState *prev_state = &self->states.contents[index - 1];
|
||||
if (prev_state->start_depth < start_depth) break;
|
||||
if (prev_state->start_depth == start_depth) {
|
||||
if (prev_state->pattern_index < pattern->pattern_index) break;
|
||||
if (prev_state->pattern_index == pattern->pattern_index) {
|
||||
// Avoid unnecessarily inserting an unnecessary duplicate state,
|
||||
// which would be immediately pruned by the longest-match criteria.
|
||||
if (prev_state->step_index == pattern->step_index) return;
|
||||
}
|
||||
}
|
||||
index--;
|
||||
}
|
||||
|
||||
LOG(
|
||||
" start state. pattern:%u, step:%u\n",
|
||||
pattern->pattern_index,
|
||||
pattern->step_index
|
||||
);
|
||||
QueryStep *step = &self->query->steps.contents[pattern->step_index];
|
||||
array_push(&self->states, ((QueryState) {
|
||||
array_insert(&self->states, index, ((QueryState) {
|
||||
.capture_list_id = NONE,
|
||||
.step_index = pattern->step_index,
|
||||
.pattern_index = pattern->pattern_index,
|
||||
.start_depth = self->depth - step->depth,
|
||||
.start_depth = start_depth,
|
||||
.consumed_capture_count = 0,
|
||||
.seeking_immediate_match = false,
|
||||
.seeking_immediate_match = true,
|
||||
.has_in_progress_alternatives = false,
|
||||
.dead = false,
|
||||
}));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Acquire a capture list for this state. If there are no capture lists left in the
|
||||
// pool, this will steal the capture list from another existing state, and mark that
|
||||
// other state as 'dead'.
|
||||
static CaptureList *ts_query_cursor__prepare_to_capture(
|
||||
TSQueryCursor *self,
|
||||
QueryState *state,
|
||||
unsigned state_index_to_preserve
|
||||
) {
|
||||
if (state->capture_list_id == NONE) {
|
||||
state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
|
||||
|
||||
// If there are no capture lists left in the pool, then terminate whichever
|
||||
// state has captured the earliest node in the document, and steal its
|
||||
// capture list.
|
||||
if (state->capture_list_id == NONE) {
|
||||
uint32_t state_index, byte_offset, pattern_index;
|
||||
if (
|
||||
ts_query_cursor__first_in_progress_capture(
|
||||
self,
|
||||
&state_index,
|
||||
&byte_offset,
|
||||
&pattern_index
|
||||
) &&
|
||||
state_index != state_index_to_preserve
|
||||
) {
|
||||
LOG(
|
||||
" abandon state. index:%u, pattern:%u, offset:%u.\n",
|
||||
state_index, pattern_index, byte_offset
|
||||
);
|
||||
QueryState *other_state = &self->states.contents[state_index];
|
||||
state->capture_list_id = other_state->capture_list_id;
|
||||
other_state->capture_list_id = NONE;
|
||||
other_state->dead = true;
|
||||
CaptureList *list = capture_list_pool_get_mut(
|
||||
&self->capture_list_pool,
|
||||
state->capture_list_id
|
||||
);
|
||||
array_clear(list);
|
||||
return list;
|
||||
} else {
|
||||
LOG(" ran out of capture lists");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
|
||||
}
|
||||
|
||||
// Duplicate the given state and insert the newly-created state immediately after
|
||||
// the given state in the `states` array.
|
||||
static QueryState *ts_query__cursor_copy_state(
|
||||
// the given state in the `states` array. Ensures that the given state reference is
|
||||
// still valid, even if the states array is reallocated.
|
||||
static QueryState *ts_query_cursor__copy_state(
|
||||
TSQueryCursor *self,
|
||||
const QueryState *state
|
||||
QueryState **state_ref
|
||||
) {
|
||||
if (self->states.size >= MAX_STATE_COUNT) {
|
||||
LOG(" too many states");
|
||||
return NULL;
|
||||
}
|
||||
const QueryState *state = *state_ref;
|
||||
uint32_t state_index = state - self->states.contents;
|
||||
QueryState copy = *state;
|
||||
copy.capture_list_id = NONE;
|
||||
|
||||
// If the state has captures, copy its capture list.
|
||||
QueryState copy = *state;
|
||||
copy.capture_list_id = state->capture_list_id;
|
||||
if (state->capture_list_id != NONE) {
|
||||
copy.capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
|
||||
if (copy.capture_list_id == NONE) {
|
||||
LOG(" too many capture lists");
|
||||
return NULL;
|
||||
}
|
||||
CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index);
|
||||
if (!new_captures) return NULL;
|
||||
const CaptureList *old_captures = capture_list_pool_get(
|
||||
&self->capture_list_pool,
|
||||
state->capture_list_id
|
||||
);
|
||||
CaptureList *new_captures = capture_list_pool_get_mut(
|
||||
&self->capture_list_pool,
|
||||
copy.capture_list_id
|
||||
);
|
||||
array_push_all(new_captures, old_captures);
|
||||
}
|
||||
|
||||
uint32_t index = (state - self->states.contents) + 1;
|
||||
array_insert(&self->states, index, copy);
|
||||
return &self->states.contents[index];
|
||||
array_insert(&self->states, state_index + 1, copy);
|
||||
*state_ref = &self->states.contents[state_index];
|
||||
return &self->states.contents[state_index + 1];
|
||||
}
|
||||
|
||||
// Walk the tree, processing patterns until at least one pattern finishes,
|
||||
|
|
@ -2180,18 +2272,30 @@ static QueryState *ts_query__cursor_copy_state(
|
|||
// `finished_states` array. Multiple patterns can finish on the same node. If
|
||||
// there are no more matches, return `false`.
|
||||
static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
||||
do {
|
||||
bool did_match = false;
|
||||
for (;;) {
|
||||
if (self->halted) {
|
||||
while (self->states.size > 0) {
|
||||
QueryState state = array_pop(&self->states);
|
||||
capture_list_pool_release(
|
||||
&self->capture_list_pool,
|
||||
state.capture_list_id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (did_match || self->halted) return did_match;
|
||||
|
||||
if (self->ascending) {
|
||||
LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
|
||||
|
||||
// Leave this node by stepping to its next sibling or to its parent.
|
||||
bool did_move = true;
|
||||
if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
|
||||
self->ascending = false;
|
||||
} else if (ts_tree_cursor_goto_parent(&self->cursor)) {
|
||||
self->depth--;
|
||||
} else {
|
||||
did_move = false;
|
||||
self->halted = true;
|
||||
}
|
||||
|
||||
// After leaving a node, remove any states that cannot make further progress.
|
||||
|
|
@ -2203,10 +2307,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
// If a state completed its pattern inside of this node, but was deferred from finishing
|
||||
// in order to search for longer matches, mark it as finished.
|
||||
if (step->depth == PATTERN_DONE_MARKER) {
|
||||
if (state->start_depth > self->depth || !did_move) {
|
||||
if (state->start_depth > self->depth || self->halted) {
|
||||
LOG(" finish pattern %u\n", state->pattern_index);
|
||||
state->id = self->next_state_id++;
|
||||
array_push(&self->finished_states, *state);
|
||||
did_match = true;
|
||||
deleted_count++;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -2233,10 +2338,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
}
|
||||
}
|
||||
self->states.size -= deleted_count;
|
||||
|
||||
if (!did_move) {
|
||||
return self->finished_states.size > 0;
|
||||
}
|
||||
} else {
|
||||
// If this node is before the selected range, then avoid descending into it.
|
||||
TSNode node = ts_tree_cursor_current_node(&self->cursor);
|
||||
|
|
@ -2254,7 +2355,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
if (
|
||||
self->end_byte <= ts_node_start_byte(node) ||
|
||||
point_lte(self->end_point, ts_node_start_point(node))
|
||||
) return false;
|
||||
) {
|
||||
self->halted = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the properties of the current node.
|
||||
TSSymbol symbol = ts_node_symbol(node);
|
||||
|
|
@ -2286,7 +2390,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
// If this node matches the first step of the pattern, then add a new
|
||||
// state at the start of this pattern.
|
||||
if (step->field && field_id != step->field) continue;
|
||||
if (!ts_query_cursor__add_state(self, pattern)) break;
|
||||
ts_query_cursor__add_state(self, pattern);
|
||||
}
|
||||
|
||||
// Add new states for any patterns whose root node matches this node.
|
||||
|
|
@ -2298,7 +2402,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
// If this node matches the first step of the pattern, then add a new
|
||||
// state at the start of this pattern.
|
||||
if (step->field && field_id != step->field) continue;
|
||||
if (!ts_query_cursor__add_state(self, pattern)) break;
|
||||
ts_query_cursor__add_state(self, pattern);
|
||||
|
||||
// Advance to the next pattern whose root node matches this node.
|
||||
i++;
|
||||
|
|
@ -2366,12 +2470,8 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
// parent, then this query state cannot simply be updated in place. It must be
|
||||
// split into two states: one that matches this node, and one which skips over
|
||||
// this node, to preserve the possibility of matching later siblings.
|
||||
if (
|
||||
later_sibling_can_match &&
|
||||
!step->is_pattern_start &&
|
||||
step->contains_captures
|
||||
) {
|
||||
if (ts_query__cursor_copy_state(self, state)) {
|
||||
if (later_sibling_can_match && step->contains_captures) {
|
||||
if (ts_query_cursor__copy_state(self, &state)) {
|
||||
LOG(
|
||||
" split state for capture. pattern:%u, step:%u\n",
|
||||
state->pattern_index,
|
||||
|
|
@ -2382,45 +2482,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
}
|
||||
|
||||
// If the current node is captured in this pattern, add it to the capture list.
|
||||
// For the first capture in a pattern, lazily acquire a capture list.
|
||||
if (step->capture_ids[0] != NONE) {
|
||||
if (state->capture_list_id == NONE) {
|
||||
state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
|
||||
|
||||
// If there are no capture lists left in the pool, then terminate whichever
|
||||
// state has captured the earliest node in the document, and steal its
|
||||
// capture list.
|
||||
if (state->capture_list_id == NONE) {
|
||||
uint32_t state_index, byte_offset, pattern_index;
|
||||
if (ts_query_cursor__first_in_progress_capture(
|
||||
self,
|
||||
&state_index,
|
||||
&byte_offset,
|
||||
&pattern_index
|
||||
)) {
|
||||
LOG(
|
||||
" abandon state. index:%u, pattern:%u, offset:%u.\n",
|
||||
state_index, pattern_index, byte_offset
|
||||
);
|
||||
state->capture_list_id = self->states.contents[state_index].capture_list_id;
|
||||
array_erase(&self->states, state_index);
|
||||
if (state_index < i) {
|
||||
i--;
|
||||
state--;
|
||||
}
|
||||
} else {
|
||||
LOG(" too many finished states.\n");
|
||||
array_erase(&self->states, i);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
|
||||
if (!capture_list) {
|
||||
array_erase(&self->states, i);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
CaptureList *capture_list = capture_list_pool_get_mut(
|
||||
&self->capture_list_pool,
|
||||
state->capture_list_id
|
||||
);
|
||||
for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
|
||||
uint16_t capture_id = step->capture_ids[j];
|
||||
if (step->capture_ids[j] == NONE) break;
|
||||
|
|
@ -2443,10 +2512,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
state->step_index
|
||||
);
|
||||
|
||||
// If this state's next step has an 'alternative' step (the step is either optional,
|
||||
// or is the end of a repetition), then copy the state in order to pursue both
|
||||
// alternatives. The alternative step itself may have an alternative, so this is
|
||||
// an interative process.
|
||||
// If this state's next step has an alternative step, then copy the state in order
|
||||
// to pursue both alternatives. The alternative step itself may have an alternative,
|
||||
// so this is an interative process.
|
||||
unsigned end_index = i + 1;
|
||||
for (unsigned j = i; j < end_index; j++) {
|
||||
QueryState *state = &self->states.contents[j];
|
||||
|
|
@ -2458,25 +2526,27 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
continue;
|
||||
}
|
||||
|
||||
QueryState *copy = ts_query__cursor_copy_state(self, state);
|
||||
if (next_step->is_pass_through) {
|
||||
state->step_index++;
|
||||
j--;
|
||||
}
|
||||
|
||||
QueryState *copy = ts_query_cursor__copy_state(self, &state);
|
||||
if (copy) {
|
||||
copy_count++;
|
||||
LOG(
|
||||
" split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
|
||||
copy->pattern_index,
|
||||
copy->step_index,
|
||||
next_step->alternative_index,
|
||||
next_step->alternative_is_immediate,
|
||||
capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
|
||||
);
|
||||
end_index++;
|
||||
copy_count++;
|
||||
copy->step_index = next_step->alternative_index;
|
||||
if (next_step->alternative_is_immediate) {
|
||||
copy->seeking_immediate_match = true;
|
||||
}
|
||||
LOG(
|
||||
" split state for branch. pattern:%u, step:%u, step:%u, immediate:%d\n",
|
||||
copy->pattern_index,
|
||||
state->step_index,
|
||||
copy->step_index,
|
||||
copy->seeking_immediate_match
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2484,59 +2554,77 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
|
||||
for (unsigned i = 0; i < self->states.size; i++) {
|
||||
QueryState *state = &self->states.contents[i];
|
||||
bool did_remove = false;
|
||||
if (state->dead) {
|
||||
array_erase(&self->states, i);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Enfore the longest-match criteria. When a query pattern contains optional or
|
||||
// repeated nodes, this is necesssary to avoid multiple redundant states, where
|
||||
// repeated nodes, this is necessary to avoid multiple redundant states, where
|
||||
// one state has a strict subset of another state's captures.
|
||||
bool did_remove = false;
|
||||
for (unsigned j = i + 1; j < self->states.size; j++) {
|
||||
QueryState *other_state = &self->states.contents[j];
|
||||
|
||||
// Query states are kept in ascending order of start_depth and pattern_index.
|
||||
// Since the longest-match criteria is only used for deduping matches of the same
|
||||
// pattern and root node, we only need to perform pairwise comparisons within a
|
||||
// small slice of the states array.
|
||||
if (
|
||||
state->pattern_index == other_state->pattern_index &&
|
||||
state->start_depth == other_state->start_depth
|
||||
) {
|
||||
bool left_contains_right, right_contains_left;
|
||||
ts_query_cursor__compare_captures(
|
||||
self,
|
||||
state,
|
||||
other_state,
|
||||
&left_contains_right,
|
||||
&right_contains_left
|
||||
);
|
||||
if (left_contains_right) {
|
||||
if (state->step_index == other_state->step_index) {
|
||||
LOG(
|
||||
" drop shorter state. pattern: %u, step_index: %u\n",
|
||||
state->pattern_index,
|
||||
state->step_index
|
||||
);
|
||||
capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
|
||||
array_erase(&self->states, j);
|
||||
j--;
|
||||
continue;
|
||||
}
|
||||
other_state->has_in_progress_alternatives = true;
|
||||
other_state->start_depth != state->start_depth ||
|
||||
other_state->pattern_index != state->pattern_index
|
||||
) break;
|
||||
|
||||
bool left_contains_right, right_contains_left;
|
||||
ts_query_cursor__compare_captures(
|
||||
self,
|
||||
state,
|
||||
other_state,
|
||||
&left_contains_right,
|
||||
&right_contains_left
|
||||
);
|
||||
if (left_contains_right) {
|
||||
if (state->step_index == other_state->step_index) {
|
||||
LOG(
|
||||
" drop shorter state. pattern: %u, step_index: %u\n",
|
||||
state->pattern_index,
|
||||
state->step_index
|
||||
);
|
||||
capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
|
||||
array_erase(&self->states, j);
|
||||
j--;
|
||||
continue;
|
||||
}
|
||||
if (right_contains_left) {
|
||||
if (state->step_index == other_state->step_index) {
|
||||
LOG(
|
||||
" drop shorter state. pattern: %u, step_index: %u\n",
|
||||
state->pattern_index,
|
||||
state->step_index
|
||||
);
|
||||
capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
|
||||
array_erase(&self->states, i);
|
||||
did_remove = true;
|
||||
break;
|
||||
}
|
||||
state->has_in_progress_alternatives = true;
|
||||
other_state->has_in_progress_alternatives = true;
|
||||
}
|
||||
if (right_contains_left) {
|
||||
if (state->step_index == other_state->step_index) {
|
||||
LOG(
|
||||
" drop shorter state. pattern: %u, step_index: %u\n",
|
||||
state->pattern_index,
|
||||
state->step_index
|
||||
);
|
||||
capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
|
||||
array_erase(&self->states, i);
|
||||
i--;
|
||||
did_remove = true;
|
||||
break;
|
||||
}
|
||||
state->has_in_progress_alternatives = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If there the state is at the end of its pattern, remove it from the list
|
||||
// of in-progress states and add it to the list of finished states.
|
||||
if (!did_remove) {
|
||||
LOG(
|
||||
" keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
|
||||
state->pattern_index,
|
||||
state->start_depth,
|
||||
state->step_index,
|
||||
capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
|
||||
);
|
||||
QueryStep *next_step = &self->query->steps.contents[state->step_index];
|
||||
if (next_step->depth == PATTERN_DONE_MARKER) {
|
||||
if (state->has_in_progress_alternatives) {
|
||||
|
|
@ -2546,6 +2634,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
state->id = self->next_state_id++;
|
||||
array_push(&self->finished_states, *state);
|
||||
array_erase(&self->states, state - self->states.contents);
|
||||
did_match = true;
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
|
@ -2559,9 +2648,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
|
|||
self->ascending = true;
|
||||
}
|
||||
}
|
||||
} while (self->finished_states.size == 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool ts_query_cursor_next_match(
|
||||
|
|
@ -2701,7 +2788,10 @@ bool ts_query_cursor_next_capture(
|
|||
|
||||
// If there are no finished matches that are ready to be returned, then
|
||||
// continue finding more matches.
|
||||
if (!ts_query_cursor__advance(self)) return false;
|
||||
if (
|
||||
!ts_query_cursor__advance(self) &&
|
||||
self->finished_states.size == 0
|
||||
) return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,18 +16,10 @@ typedef enum {
|
|||
TSTagsInvalidUtf8,
|
||||
TSTagsInvalidRegex,
|
||||
TSTagsInvalidQuery,
|
||||
TSTagsInvalidCapture,
|
||||
} TSTagsError;
|
||||
|
||||
typedef enum {
|
||||
TSTagKindFunction,
|
||||
TSTagKindMethod,
|
||||
TSTagKindClass,
|
||||
TSTagKindModule,
|
||||
TSTagKindCall,
|
||||
} TSTagKind;
|
||||
|
||||
typedef struct {
|
||||
TSTagKind kind;
|
||||
uint32_t start_byte;
|
||||
uint32_t end_byte;
|
||||
uint32_t name_start_byte;
|
||||
|
|
@ -36,8 +28,12 @@ typedef struct {
|
|||
uint32_t line_end_byte;
|
||||
TSPoint start_point;
|
||||
TSPoint end_point;
|
||||
uint32_t utf16_start_column;
|
||||
uint32_t utf16_end_column;
|
||||
uint32_t docs_start_byte;
|
||||
uint32_t docs_end_byte;
|
||||
uint32_t syntax_type_id;
|
||||
bool is_definition;
|
||||
} TSTag;
|
||||
|
||||
typedef struct TSTagger TSTagger;
|
||||
|
|
@ -89,6 +85,12 @@ uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *);
|
|||
const char *ts_tags_buffer_docs(const TSTagsBuffer *);
|
||||
uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);
|
||||
|
||||
// Get the syntax kinds for a scope.
|
||||
const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);
|
||||
|
||||
// Determine whether a parse error was encountered while tagging.
|
||||
bool ts_tags_buffer_found_parse_error(const TSTagsBuffer*);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use super::{Error, TagKind, TagsConfiguration, TagsContext};
|
||||
use super::{Error, TagsConfiguration, TagsContext};
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CStr;
|
||||
use std::process::abort;
|
||||
|
|
@ -6,6 +6,9 @@ use std::sync::atomic::AtomicUsize;
|
|||
use std::{fmt, slice, str};
|
||||
use tree_sitter::Language;
|
||||
|
||||
const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100;
|
||||
const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum TSTagsError {
|
||||
|
|
@ -16,19 +19,10 @@ pub enum TSTagsError {
|
|||
InvalidUtf8,
|
||||
InvalidRegex,
|
||||
InvalidQuery,
|
||||
InvalidCapture,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum TSTagKind {
|
||||
Function,
|
||||
Method,
|
||||
Class,
|
||||
Module,
|
||||
Call,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct TSPoint {
|
||||
row: u32,
|
||||
|
|
@ -37,7 +31,6 @@ pub struct TSPoint {
|
|||
|
||||
#[repr(C)]
|
||||
pub struct TSTag {
|
||||
pub kind: TSTagKind,
|
||||
pub start_byte: u32,
|
||||
pub end_byte: u32,
|
||||
pub name_start_byte: u32,
|
||||
|
|
@ -46,8 +39,12 @@ pub struct TSTag {
|
|||
pub line_end_byte: u32,
|
||||
pub start_point: TSPoint,
|
||||
pub end_point: TSPoint,
|
||||
pub utf16_start_colum: u32,
|
||||
pub utf16_end_colum: u32,
|
||||
pub docs_start_byte: u32,
|
||||
pub docs_end_byte: u32,
|
||||
pub syntax_type_id: u32,
|
||||
pub is_definition: bool,
|
||||
}
|
||||
|
||||
pub struct TSTagger {
|
||||
|
|
@ -58,6 +55,7 @@ pub struct TSTagsBuffer {
|
|||
context: TagsContext,
|
||||
tags: Vec<TSTag>,
|
||||
docs: Vec<u8>,
|
||||
errors_present: bool,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
|
@ -102,7 +100,9 @@ pub extern "C" fn ts_tagger_add_language(
|
|||
}
|
||||
Err(Error::Query(_)) => TSTagsError::InvalidQuery,
|
||||
Err(Error::Regex(_)) => TSTagsError::InvalidRegex,
|
||||
Err(_) => TSTagsError::Unknown,
|
||||
Err(Error::Cancelled) => TSTagsError::Timeout,
|
||||
Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage,
|
||||
Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -120,8 +120,9 @@ pub extern "C" fn ts_tagger_tag(
|
|||
let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
|
||||
|
||||
if let Some(config) = tagger.languages.get(scope_name) {
|
||||
buffer.tags.clear();
|
||||
buffer.docs.clear();
|
||||
shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY);
|
||||
shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY);
|
||||
|
||||
let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) };
|
||||
let cancellation_flag = unsafe { cancellation_flag.as_ref() };
|
||||
|
||||
|
|
@ -129,7 +130,10 @@ pub extern "C" fn ts_tagger_tag(
|
|||
.context
|
||||
.generate_tags(config, source_code, cancellation_flag)
|
||||
{
|
||||
Ok(tags) => tags,
|
||||
Ok((tags, found_error)) => {
|
||||
buffer.errors_present = found_error;
|
||||
tags
|
||||
}
|
||||
Err(e) => {
|
||||
return match e {
|
||||
Error::InvalidLanguage => TSTagsError::InvalidLanguage,
|
||||
|
|
@ -153,13 +157,6 @@ pub extern "C" fn ts_tagger_tag(
|
|||
buffer.docs.extend_from_slice(docs.as_bytes());
|
||||
}
|
||||
buffer.tags.push(TSTag {
|
||||
kind: match tag.kind {
|
||||
TagKind::Function => TSTagKind::Function,
|
||||
TagKind::Method => TSTagKind::Method,
|
||||
TagKind::Class => TSTagKind::Class,
|
||||
TagKind::Module => TSTagKind::Module,
|
||||
TagKind::Call => TSTagKind::Call,
|
||||
},
|
||||
start_byte: tag.range.start as u32,
|
||||
end_byte: tag.range.end as u32,
|
||||
name_start_byte: tag.name_range.start as u32,
|
||||
|
|
@ -174,8 +171,12 @@ pub extern "C" fn ts_tagger_tag(
|
|||
row: tag.span.end.row as u32,
|
||||
column: tag.span.end.column as u32,
|
||||
},
|
||||
utf16_start_colum: tag.utf16_column_range.start as u32,
|
||||
utf16_end_colum: tag.utf16_column_range.end as u32,
|
||||
docs_start_byte: prev_docs_len as u32,
|
||||
docs_end_byte: buffer.docs.len() as u32,
|
||||
syntax_type_id: tag.syntax_type_id,
|
||||
is_definition: tag.is_definition,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -189,8 +190,9 @@ pub extern "C" fn ts_tagger_tag(
|
|||
pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
|
||||
Box::into_raw(Box::new(TSTagsBuffer {
|
||||
context: TagsContext::new(),
|
||||
tags: Vec::with_capacity(64),
|
||||
docs: Vec::with_capacity(64),
|
||||
tags: Vec::with_capacity(BUFFER_TAGS_RESERVE_CAPACITY),
|
||||
docs: Vec::with_capacity(BUFFER_DOCS_RESERVE_CAPACITY),
|
||||
errors_present: false,
|
||||
}))
|
||||
}
|
||||
|
||||
|
|
@ -223,6 +225,30 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
|
|||
buffer.docs.len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool {
|
||||
let buffer = unwrap_ptr(this);
|
||||
buffer.errors_present
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
|
||||
this: *mut TSTagger,
|
||||
scope_name: *const i8,
|
||||
len: *mut u32,
|
||||
) -> *const *const i8 {
|
||||
let tagger = unwrap_mut_ptr(this);
|
||||
let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
|
||||
let len = unwrap_mut_ptr(len);
|
||||
|
||||
*len = 0;
|
||||
if let Some(config) = tagger.languages.get(scope_name) {
|
||||
*len = config.c_syntax_type_names.len() as u32;
|
||||
return config.c_syntax_type_names.as_ptr() as *const *const i8;
|
||||
}
|
||||
std::ptr::null()
|
||||
}
|
||||
|
||||
fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
|
||||
unsafe { result.as_ref() }.unwrap_or_else(|| {
|
||||
eprintln!("{}:{} - pointer must not be null", file!(), line!());
|
||||
|
|
@ -243,3 +269,11 @@ fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
|
|||
abort();
|
||||
})
|
||||
}
|
||||
|
||||
fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
|
||||
if vec.len() > capacity {
|
||||
vec.truncate(capacity);
|
||||
vec.shrink_to_fit();
|
||||
}
|
||||
vec.clear();
|
||||
}
|
||||
|
|
|
|||
532
tags/src/lib.rs
532
tags/src/lib.rs
|
|
@ -1,10 +1,12 @@
|
|||
pub mod c_lib;
|
||||
|
||||
use memchr::{memchr, memrchr};
|
||||
use memchr::memchr;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::ops::Range;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::{fmt, mem, str};
|
||||
use std::{char, fmt, mem, str};
|
||||
use tree_sitter::{
|
||||
Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
|
||||
};
|
||||
|
|
@ -18,19 +20,24 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
|
|||
pub struct TagsConfiguration {
|
||||
pub language: Language,
|
||||
pub query: Query,
|
||||
call_capture_index: Option<u32>,
|
||||
class_capture_index: Option<u32>,
|
||||
syntax_type_names: Vec<Box<[u8]>>,
|
||||
c_syntax_type_names: Vec<*const u8>,
|
||||
capture_map: HashMap<u32, NamedCapture>,
|
||||
doc_capture_index: Option<u32>,
|
||||
function_capture_index: Option<u32>,
|
||||
method_capture_index: Option<u32>,
|
||||
module_capture_index: Option<u32>,
|
||||
name_capture_index: Option<u32>,
|
||||
ignore_capture_index: Option<u32>,
|
||||
local_scope_capture_index: Option<u32>,
|
||||
local_definition_capture_index: Option<u32>,
|
||||
tags_pattern_index: usize,
|
||||
pattern_info: Vec<PatternInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NamedCapture {
|
||||
pub syntax_type_id: u32,
|
||||
pub is_definition: bool,
|
||||
}
|
||||
|
||||
pub struct TagsContext {
|
||||
parser: Parser,
|
||||
cursor: QueryCursor,
|
||||
|
|
@ -38,21 +45,14 @@ pub struct TagsContext {
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Tag {
|
||||
pub kind: TagKind,
|
||||
pub range: Range<usize>,
|
||||
pub name_range: Range<usize>,
|
||||
pub line_range: Range<usize>,
|
||||
pub span: Range<Point>,
|
||||
pub utf16_column_range: Range<usize>,
|
||||
pub docs: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum TagKind {
|
||||
Function,
|
||||
Method,
|
||||
Class,
|
||||
Module,
|
||||
Call,
|
||||
pub is_definition: bool,
|
||||
pub syntax_type_id: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
|
@ -61,6 +61,7 @@ pub enum Error {
|
|||
Regex(regex::Error),
|
||||
Cancelled,
|
||||
InvalidLanguage,
|
||||
InvalidCapture(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
|
|
@ -91,6 +92,7 @@ where
|
|||
matches: I,
|
||||
_tree: Tree,
|
||||
source: &'a [u8],
|
||||
prev_line_info: Option<LineInfo>,
|
||||
config: &'a TagsConfiguration,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
iter_count: usize,
|
||||
|
|
@ -98,6 +100,18 @@ where
|
|||
scopes: Vec<LocalScope<'a>>,
|
||||
}
|
||||
|
||||
struct LineInfo {
|
||||
utf8_position: Point,
|
||||
utf8_byte: usize,
|
||||
utf16_column: usize,
|
||||
line_range: Range<usize>,
|
||||
}
|
||||
|
||||
struct LossyUtf8<'a> {
|
||||
bytes: &'a [u8],
|
||||
in_replacement: bool,
|
||||
}
|
||||
|
||||
impl TagsConfiguration {
|
||||
pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result<Self, Error> {
|
||||
let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?;
|
||||
|
|
@ -111,31 +125,57 @@ impl TagsConfiguration {
|
|||
}
|
||||
}
|
||||
|
||||
let mut call_capture_index = None;
|
||||
let mut class_capture_index = None;
|
||||
let mut capture_map = HashMap::new();
|
||||
let mut syntax_type_names = Vec::new();
|
||||
let mut doc_capture_index = None;
|
||||
let mut function_capture_index = None;
|
||||
let mut method_capture_index = None;
|
||||
let mut module_capture_index = None;
|
||||
let mut name_capture_index = None;
|
||||
let mut ignore_capture_index = None;
|
||||
let mut local_scope_capture_index = None;
|
||||
let mut local_definition_capture_index = None;
|
||||
for (i, name) in query.capture_names().iter().enumerate() {
|
||||
let index = match name.as_str() {
|
||||
"call" => &mut call_capture_index,
|
||||
"class" => &mut class_capture_index,
|
||||
"doc" => &mut doc_capture_index,
|
||||
"function" => &mut function_capture_index,
|
||||
"method" => &mut method_capture_index,
|
||||
"module" => &mut module_capture_index,
|
||||
"name" => &mut name_capture_index,
|
||||
"local.scope" => &mut local_scope_capture_index,
|
||||
"local.definition" => &mut local_definition_capture_index,
|
||||
_ => continue,
|
||||
};
|
||||
*index = Some(i as u32);
|
||||
match name.as_str() {
|
||||
"" => continue,
|
||||
"name" => name_capture_index = Some(i as u32),
|
||||
"ignore" => ignore_capture_index = Some(i as u32),
|
||||
"doc" => doc_capture_index = Some(i as u32),
|
||||
"local.scope" => local_scope_capture_index = Some(i as u32),
|
||||
"local.definition" => local_definition_capture_index = Some(i as u32),
|
||||
"local.reference" => continue,
|
||||
_ => {
|
||||
let mut is_definition = false;
|
||||
|
||||
let kind = if name.starts_with("definition.") {
|
||||
is_definition = true;
|
||||
name.trim_start_matches("definition.")
|
||||
} else if name.starts_with("reference.") {
|
||||
name.trim_start_matches("reference.")
|
||||
} else {
|
||||
return Err(Error::InvalidCapture(name.to_string()));
|
||||
};
|
||||
|
||||
if let Ok(cstr) = CString::new(kind) {
|
||||
let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice();
|
||||
let syntax_type_id = syntax_type_names
|
||||
.iter()
|
||||
.position(|n| n == &c_kind)
|
||||
.unwrap_or_else(|| {
|
||||
syntax_type_names.push(c_kind);
|
||||
syntax_type_names.len() - 1
|
||||
}) as u32;
|
||||
capture_map.insert(
|
||||
i as u32,
|
||||
NamedCapture {
|
||||
syntax_type_id,
|
||||
is_definition,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let c_syntax_type_names = syntax_type_names.iter().map(|s| s.as_ptr()).collect();
|
||||
|
||||
let pattern_info = (0..query.pattern_count())
|
||||
.map(|pattern_index| {
|
||||
let mut info = PatternInfo::default();
|
||||
|
|
@ -180,19 +220,26 @@ impl TagsConfiguration {
|
|||
Ok(TagsConfiguration {
|
||||
language,
|
||||
query,
|
||||
function_capture_index,
|
||||
class_capture_index,
|
||||
method_capture_index,
|
||||
module_capture_index,
|
||||
syntax_type_names,
|
||||
c_syntax_type_names,
|
||||
capture_map,
|
||||
doc_capture_index,
|
||||
call_capture_index,
|
||||
name_capture_index,
|
||||
ignore_capture_index,
|
||||
tags_pattern_index,
|
||||
local_scope_capture_index,
|
||||
local_definition_capture_index,
|
||||
pattern_info,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn syntax_type_name(&self, id: u32) -> &str {
|
||||
unsafe {
|
||||
let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8)
|
||||
.to_bytes();
|
||||
str::from_utf8(cstr).expect("syntax type name was not valid utf-8")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TagsContext {
|
||||
|
|
@ -208,7 +255,7 @@ impl TagsContext {
|
|||
config: &'a TagsConfiguration,
|
||||
source: &'a [u8],
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
) -> Result<impl Iterator<Item = Result<Tag, Error>> + 'a, Error> {
|
||||
) -> Result<(impl Iterator<Item = Result<Tag, Error>> + 'a, bool), Error> {
|
||||
self.parser
|
||||
.set_language(config.language)
|
||||
.map_err(|_| Error::InvalidLanguage)?;
|
||||
|
|
@ -224,12 +271,13 @@ impl TagsContext {
|
|||
.matches(&config.query, tree_ref.root_node(), move |node| {
|
||||
&source[node.byte_range()]
|
||||
});
|
||||
Ok(TagsIter {
|
||||
Ok((TagsIter {
|
||||
_tree: tree,
|
||||
matches,
|
||||
source,
|
||||
config,
|
||||
cancellation_flag,
|
||||
prev_line_info: None,
|
||||
tag_queue: Vec::new(),
|
||||
iter_count: 0,
|
||||
scopes: vec![LocalScope {
|
||||
|
|
@ -237,7 +285,7 @@ impl TagsContext {
|
|||
inherits: false,
|
||||
local_defs: Vec::new(),
|
||||
}],
|
||||
})
|
||||
}, tree_ref.root_node().has_error()))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -267,7 +315,12 @@ where
|
|||
if self.tag_queue.len() > 1
|
||||
&& self.tag_queue[0].0.name_range.end < last_entry.0.name_range.start
|
||||
{
|
||||
return Some(Ok(self.tag_queue.remove(0).0));
|
||||
let tag = self.tag_queue.remove(0).0;
|
||||
if tag.is_ignored() {
|
||||
continue;
|
||||
} else {
|
||||
return Some(Ok(tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -300,141 +353,185 @@ where
|
|||
continue;
|
||||
}
|
||||
|
||||
let mut name_range = None;
|
||||
let mut name_node = None;
|
||||
let mut doc_nodes = Vec::new();
|
||||
let mut tag_node = None;
|
||||
let mut kind = TagKind::Call;
|
||||
let mut syntax_type_id = 0;
|
||||
let mut is_definition = false;
|
||||
let mut docs_adjacent_node = None;
|
||||
let mut is_ignored = false;
|
||||
|
||||
for capture in mat.captures {
|
||||
let index = Some(capture.index);
|
||||
|
||||
if index == self.config.ignore_capture_index {
|
||||
is_ignored = true;
|
||||
name_node = Some(capture.node);
|
||||
}
|
||||
|
||||
if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture {
|
||||
docs_adjacent_node = Some(capture.node);
|
||||
}
|
||||
|
||||
if index == self.config.name_capture_index {
|
||||
name_range = Some(capture.node.byte_range());
|
||||
name_node = Some(capture.node);
|
||||
} else if index == self.config.doc_capture_index {
|
||||
doc_nodes.push(capture.node);
|
||||
} else if index == self.config.call_capture_index {
|
||||
}
|
||||
|
||||
if let Some(named_capture) = self.config.capture_map.get(&capture.index) {
|
||||
tag_node = Some(capture.node);
|
||||
kind = TagKind::Call;
|
||||
} else if index == self.config.class_capture_index {
|
||||
tag_node = Some(capture.node);
|
||||
kind = TagKind::Class;
|
||||
} else if index == self.config.function_capture_index {
|
||||
tag_node = Some(capture.node);
|
||||
kind = TagKind::Function;
|
||||
} else if index == self.config.method_capture_index {
|
||||
tag_node = Some(capture.node);
|
||||
kind = TagKind::Method;
|
||||
} else if index == self.config.module_capture_index {
|
||||
tag_node = Some(capture.node);
|
||||
kind = TagKind::Module;
|
||||
syntax_type_id = named_capture.syntax_type_id;
|
||||
is_definition = named_capture.is_definition;
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(tag_node), Some(name_range)) = (tag_node, name_range) {
|
||||
if pattern_info.name_must_be_non_local {
|
||||
let mut is_local = false;
|
||||
for scope in self.scopes.iter().rev() {
|
||||
if scope.range.start <= name_range.start
|
||||
&& scope.range.end >= name_range.end
|
||||
{
|
||||
if scope
|
||||
.local_defs
|
||||
.iter()
|
||||
.any(|d| d.name == &self.source[name_range.clone()])
|
||||
{
|
||||
is_local = true;
|
||||
break;
|
||||
}
|
||||
if !scope.inherits {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if is_local {
|
||||
if let Some(name_node) = name_node {
|
||||
let name_range = name_node.byte_range();
|
||||
|
||||
let tag;
|
||||
if let Some(tag_node) = tag_node {
|
||||
if name_node.has_error() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, filter the doc nodes based on their ranges, selecting
|
||||
// only the slice that are adjacent to some specified node.
|
||||
let mut docs_start_index = 0;
|
||||
if let (Some(docs_adjacent_node), false) =
|
||||
(docs_adjacent_node, doc_nodes.is_empty())
|
||||
{
|
||||
docs_start_index = doc_nodes.len();
|
||||
let mut start_row = docs_adjacent_node.start_position().row;
|
||||
while docs_start_index > 0 {
|
||||
let doc_node = &doc_nodes[docs_start_index - 1];
|
||||
let prev_doc_end_row = doc_node.end_position().row;
|
||||
if prev_doc_end_row + 1 >= start_row {
|
||||
docs_start_index -= 1;
|
||||
start_row = doc_node.start_position().row;
|
||||
} else {
|
||||
break;
|
||||
if pattern_info.name_must_be_non_local {
|
||||
let mut is_local = false;
|
||||
for scope in self.scopes.iter().rev() {
|
||||
if scope.range.start <= name_range.start
|
||||
&& scope.range.end >= name_range.end
|
||||
{
|
||||
if scope
|
||||
.local_defs
|
||||
.iter()
|
||||
.any(|d| d.name == &self.source[name_range.clone()])
|
||||
{
|
||||
is_local = true;
|
||||
break;
|
||||
}
|
||||
if !scope.inherits {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if is_local {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate a doc string from all of the doc nodes, applying any strip regexes.
|
||||
let mut docs = None;
|
||||
for doc_node in &doc_nodes[docs_start_index..] {
|
||||
if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) {
|
||||
let content = if let Some(regex) = &pattern_info.doc_strip_regex {
|
||||
regex.replace_all(content, "").to_string()
|
||||
} else {
|
||||
content.to_string()
|
||||
};
|
||||
match &mut docs {
|
||||
None => docs = Some(content),
|
||||
Some(d) => {
|
||||
d.push('\n');
|
||||
d.push_str(&content);
|
||||
// If needed, filter the doc nodes based on their ranges, selecting
|
||||
// only the slice that are adjacent to some specified node.
|
||||
let mut docs_start_index = 0;
|
||||
if let (Some(docs_adjacent_node), false) =
|
||||
(docs_adjacent_node, doc_nodes.is_empty())
|
||||
{
|
||||
docs_start_index = doc_nodes.len();
|
||||
let mut start_row = docs_adjacent_node.start_position().row;
|
||||
while docs_start_index > 0 {
|
||||
let doc_node = &doc_nodes[docs_start_index - 1];
|
||||
let prev_doc_end_row = doc_node.end_position().row;
|
||||
if prev_doc_end_row + 1 >= start_row {
|
||||
docs_start_index -= 1;
|
||||
start_row = doc_node.start_position().row;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate a doc string from all of the doc nodes, applying any strip regexes.
|
||||
let mut docs = None;
|
||||
for doc_node in &doc_nodes[docs_start_index..] {
|
||||
if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()])
|
||||
{
|
||||
let content = if let Some(regex) = &pattern_info.doc_strip_regex {
|
||||
regex.replace_all(content, "").to_string()
|
||||
} else {
|
||||
content.to_string()
|
||||
};
|
||||
match &mut docs {
|
||||
None => docs = Some(content),
|
||||
Some(d) => {
|
||||
d.push('\n');
|
||||
d.push_str(&content);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let rng = tag_node.byte_range();
|
||||
let range = rng.start.min(name_range.start)..rng.end.max(name_range.end);
|
||||
let span = name_node.start_position()..name_node.end_position();
|
||||
|
||||
// Compute tag properties that depend on the text of the containing line. If the
|
||||
// previous tag occurred on the same line, then reuse results from the previous tag.
|
||||
let line_range;
|
||||
let mut prev_utf16_column = 0;
|
||||
let mut prev_utf8_byte = name_range.start - span.start.column;
|
||||
let line_info = self.prev_line_info.as_ref().and_then(|info| {
|
||||
if info.utf8_position.row == span.start.row {
|
||||
Some(info)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
if let Some(line_info) = line_info {
|
||||
line_range = line_info.line_range.clone();
|
||||
if line_info.utf8_position.column <= span.start.column {
|
||||
prev_utf8_byte = line_info.utf8_byte;
|
||||
prev_utf16_column = line_info.utf16_column;
|
||||
}
|
||||
} else {
|
||||
line_range = self::line_range(
|
||||
self.source,
|
||||
name_range.start,
|
||||
span.start,
|
||||
MAX_LINE_LEN,
|
||||
);
|
||||
}
|
||||
|
||||
let utf16_start_column = prev_utf16_column
|
||||
+ utf16_len(&self.source[prev_utf8_byte..name_range.start]);
|
||||
let utf16_end_column =
|
||||
utf16_start_column + utf16_len(&self.source[name_range.clone()]);
|
||||
let utf16_column_range = utf16_start_column..utf16_end_column;
|
||||
|
||||
self.prev_line_info = Some(LineInfo {
|
||||
utf8_position: span.end,
|
||||
utf8_byte: name_range.end,
|
||||
utf16_column: utf16_end_column,
|
||||
line_range: line_range.clone(),
|
||||
});
|
||||
tag = Tag {
|
||||
line_range,
|
||||
span,
|
||||
utf16_column_range,
|
||||
range,
|
||||
name_range,
|
||||
docs,
|
||||
is_definition,
|
||||
syntax_type_id,
|
||||
};
|
||||
} else if is_ignored {
|
||||
tag = Tag::ignored(name_range);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only create one tag per node. The tag queue is sorted by node position
|
||||
// to allow for fast lookup.
|
||||
let range = tag_node.byte_range();
|
||||
match self
|
||||
.tag_queue
|
||||
.binary_search_by_key(&(name_range.end, name_range.start), |(tag, _)| {
|
||||
(tag.name_range.end, tag.name_range.start)
|
||||
}) {
|
||||
match self.tag_queue.binary_search_by_key(
|
||||
&(tag.name_range.end, tag.name_range.start),
|
||||
|(tag, _)| (tag.name_range.end, tag.name_range.start),
|
||||
) {
|
||||
Ok(i) => {
|
||||
let (tag, pattern_index) = &mut self.tag_queue[i];
|
||||
let (existing_tag, pattern_index) = &mut self.tag_queue[i];
|
||||
if *pattern_index > mat.pattern_index {
|
||||
*pattern_index = mat.pattern_index;
|
||||
*tag = Tag {
|
||||
line_range: line_range(self.source, range.start, MAX_LINE_LEN),
|
||||
span: tag_node.start_position()..tag_node.end_position(),
|
||||
kind,
|
||||
range,
|
||||
name_range,
|
||||
docs,
|
||||
};
|
||||
*existing_tag = tag;
|
||||
}
|
||||
}
|
||||
Err(i) => self.tag_queue.insert(
|
||||
i,
|
||||
(
|
||||
Tag {
|
||||
line_range: line_range(self.source, range.start, MAX_LINE_LEN),
|
||||
span: tag_node.start_position()..tag_node.end_position(),
|
||||
kind,
|
||||
range,
|
||||
name_range,
|
||||
docs,
|
||||
},
|
||||
mat.pattern_index,
|
||||
),
|
||||
),
|
||||
Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -448,16 +545,31 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TagKind {
|
||||
impl Tag {
|
||||
fn ignored(name_range: Range<usize>) -> Self {
|
||||
Tag {
|
||||
name_range,
|
||||
line_range: 0..0,
|
||||
span: Point::new(0, 0)..Point::new(0, 0),
|
||||
utf16_column_range: 0..0,
|
||||
range: usize::MAX..usize::MAX,
|
||||
docs: None,
|
||||
is_definition: false,
|
||||
syntax_type_id: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_ignored(&self) -> bool {
|
||||
self.range.start == usize::MAX
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
TagKind::Call => "Call",
|
||||
TagKind::Module => "Module",
|
||||
TagKind::Class => "Class",
|
||||
TagKind::Method => "Method",
|
||||
TagKind::Function => "Function",
|
||||
Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name),
|
||||
_ => write!(f, "{:?}", self)
|
||||
}
|
||||
.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -473,11 +585,90 @@ impl From<QueryError> for Error {
|
|||
}
|
||||
}
|
||||
|
||||
fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range<usize> {
|
||||
let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1);
|
||||
let max_line_len = max_line_len.min(text.len() - start);
|
||||
let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len);
|
||||
start..end
|
||||
// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
|
||||
// is ever stabilized, we should use that. Otherwise, this struct could be moved
|
||||
// into some module that's shared between `tree-sitter-tags` and `tree-sitter-highlight`.
|
||||
impl<'a> LossyUtf8<'a> {
|
||||
fn new(bytes: &'a [u8]) -> Self {
|
||||
LossyUtf8 {
|
||||
bytes,
|
||||
in_replacement: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LossyUtf8<'a> {
|
||||
type Item = &'a str;
|
||||
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
if self.bytes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if self.in_replacement {
|
||||
self.in_replacement = false;
|
||||
return Some("\u{fffd}");
|
||||
}
|
||||
match str::from_utf8(self.bytes) {
|
||||
Ok(valid) => {
|
||||
self.bytes = &[];
|
||||
Some(valid)
|
||||
}
|
||||
Err(error) => {
|
||||
if let Some(error_len) = error.error_len() {
|
||||
let error_start = error.valid_up_to();
|
||||
if error_start > 0 {
|
||||
let result =
|
||||
unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) };
|
||||
self.bytes = &self.bytes[(error_start + error_len)..];
|
||||
self.in_replacement = true;
|
||||
Some(result)
|
||||
} else {
|
||||
self.bytes = &self.bytes[error_len..];
|
||||
Some("\u{fffd}")
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn line_range(
|
||||
text: &[u8],
|
||||
start_byte: usize,
|
||||
start_point: Point,
|
||||
max_line_len: usize,
|
||||
) -> Range<usize> {
|
||||
// Trim leading whitespace
|
||||
let mut line_start_byte = start_byte - start_point.column;
|
||||
while line_start_byte < text.len() && text[line_start_byte].is_ascii_whitespace() {
|
||||
line_start_byte += 1;
|
||||
}
|
||||
|
||||
let max_line_len = max_line_len.min(text.len() - line_start_byte);
|
||||
let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)];
|
||||
let line_len = if let Some(len) = memchr(b'\n', text_after_line_start) {
|
||||
len
|
||||
} else if let Err(e) = str::from_utf8(text_after_line_start) {
|
||||
e.valid_up_to()
|
||||
} else {
|
||||
max_line_len
|
||||
};
|
||||
|
||||
// Trim trailing whitespace
|
||||
let mut line_end_byte = line_start_byte + line_len;
|
||||
while line_end_byte > line_start_byte && text[line_end_byte - 1].is_ascii_whitespace() {
|
||||
line_end_byte -= 1;
|
||||
}
|
||||
|
||||
line_start_byte..line_end_byte
|
||||
}
|
||||
|
||||
fn utf16_len(bytes: &[u8]) -> usize {
|
||||
LossyUtf8::new(bytes)
|
||||
.flat_map(|chunk| chunk.chars().map(char::len_utf16))
|
||||
.sum()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -486,14 +677,27 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_get_line() {
|
||||
let text = b"abc\ndefg\nhijkl";
|
||||
assert_eq!(line_range(text, 0, 10), 0..3);
|
||||
assert_eq!(line_range(text, 1, 10), 0..3);
|
||||
assert_eq!(line_range(text, 2, 10), 0..3);
|
||||
assert_eq!(line_range(text, 3, 10), 0..3);
|
||||
assert_eq!(line_range(text, 1, 2), 0..2);
|
||||
assert_eq!(line_range(text, 4, 10), 4..8);
|
||||
assert_eq!(line_range(text, 5, 10), 4..8);
|
||||
assert_eq!(line_range(text, 11, 10), 9..14);
|
||||
let text = "abc\ndefg❤hij\nklmno".as_bytes();
|
||||
assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14);
|
||||
assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8);
|
||||
assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20);
|
||||
assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_line_trims() {
|
||||
let text = b" foo\nbar\n";
|
||||
assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 3..6);
|
||||
|
||||
let text = b"\t func foo \nbar\n";
|
||||
assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 2..10);
|
||||
|
||||
let r = line_range(text, 0, Point::new(0, 0), 14);
|
||||
assert_eq!(r, 2..10);
|
||||
assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo");
|
||||
|
||||
let r = line_range(text, 12, Point::new(1, 0), 14);
|
||||
assert_eq!(r, 12..15);
|
||||
assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
19
test/fixtures/error_corpus/ruby_errors.txt
vendored
Normal file
19
test/fixtures/error_corpus/ruby_errors.txt
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
==========================
|
||||
Heredocs with errors
|
||||
==========================
|
||||
|
||||
joins(<<~SQL(
|
||||
b
|
||||
SQL
|
||||
c
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(method_call
|
||||
method: (identifier)
|
||||
(ERROR (heredoc_beginning))
|
||||
arguments: (argument_list
|
||||
(heredoc_body (heredoc_end))
|
||||
(identifier)
|
||||
(MISSING ")"))))
|
||||
23
test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
vendored
Normal file
23
test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
=====
|
||||
Extras
|
||||
=====
|
||||
|
||||
;
|
||||
%;
|
||||
%foo:;
|
||||
;
|
||||
bar: baz:;
|
||||
;
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(statement)
|
||||
(macro_statement (statement))
|
||||
(macro_statement (statement
|
||||
(label_declaration (identifier))))
|
||||
(statement)
|
||||
(statement
|
||||
(label_declaration (identifier))
|
||||
(label_declaration (identifier)))
|
||||
(statement))
|
||||
68
test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
vendored
Normal file
68
test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
{
|
||||
"name": "extra_non_terminals_with_shared_rules",
|
||||
|
||||
"extras": [
|
||||
{ "type": "PATTERN", "value": "\\s+" },
|
||||
{ "type": "SYMBOL", "name": "macro_statement" }
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"program": {
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "statement"
|
||||
}
|
||||
},
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "label_declaration"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ";"
|
||||
}
|
||||
]
|
||||
},
|
||||
"macro_statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "%"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "statement"
|
||||
}
|
||||
]
|
||||
},
|
||||
"label_declaration": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ":"
|
||||
}
|
||||
]
|
||||
},
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
},
|
||||
"conflicts": [],
|
||||
"externals": [],
|
||||
"inline": [],
|
||||
"supertypes": []
|
||||
}
|
||||
|
|
@ -22,10 +22,10 @@ The fuzzers can then be built with:
|
|||
export CLANG_DIR=$HOME/src/third_party/llvm-build/Release+Asserts/bin
|
||||
CC="$CLANG_DIR/clang" CXX="$CLANG_DIR/clang++" LINK="$CLANG_DIR/clang++" \
|
||||
LIB_FUZZER_PATH=$HOME/src/compiler-rt/lib/fuzzer/libFuzzer.a \
|
||||
./script/build_fuzzers
|
||||
./script/build-fuzzers
|
||||
```
|
||||
|
||||
This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build_fuzzers python ruby`.
|
||||
This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build-fuzzers python ruby`.
|
||||
|
||||
The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments:
|
||||
```
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue