Merge branch 'master' into query-pattern-is-definite

2020-08-14 09:31:55 -07:00 · 2020-08-14 09:31:55 -07:00 · 1ea29053e1
commit 1ea29053e1
parent cc37da7457 d5576e306c
33 changed files with 2004 additions and 763 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -740,7 +740,7 @@ dependencies = [

 [[package]]
 name = "tree-sitter-cli"
-version = "0.16.8"
+version = "0.16.9"
 dependencies = [
 "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter-cli"
 description = "CLI tool for developing, testing, and using Tree-sitter parsers"
-version = "0.16.8"
+version = "0.16.9"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 license = "MIT"
--- a/cli/npm/dsl.d.ts
+++ b/cli/npm/dsl.d.ts
@ -0,0 +1,356 @@
+type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
+type BlankRule = {type: 'BLANK'};
+type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
+type FieldRule = {type: 'FIELD'; name: string; content: Rule};
+type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
+type PatternRule = {type: 'PATTERN'; value: string};
+type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
+type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
+type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
+type PrecRule = {type: 'PREC'; content: Rule; value: number};
+type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
+type RepeatRule = {type: 'REPEAT'; content: Rule};
+type SeqRule = {type: 'SEQ'; members: Rule[]};
+type StringRule = {type: 'STRING'; value: string};
+type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
+type TokenRule = {type: 'TOKEN'; content: Rule};
+
+type Rule =
+  | AliasRule
+  | BlankRule
+  | ChoiceRule
+  | FieldRule
+  | ImmediateTokenRule
+  | PatternRule
+  | PrecDynamicRule
+  | PrecLeftRule
+  | PrecRightRule
+  | PrecRule
+  | Repeat1Rule
+  | RepeatRule
+  | SeqRule
+  | StringRule
+  | SymbolRule<string>
+  | TokenRule;
+
+type RuleOrLiteral = Rule | RegExp | string;
+
+type GrammarSymbols<RuleName extends string> = {
+  [name in RuleName]: SymbolRule<name>;
+} &
+  Record<string, SymbolRule<string>>;
+
+type RuleBuilder<RuleName extends string> = (
+  $: GrammarSymbols<RuleName>,
+) => RuleOrLiteral;
+
+type RuleBuilders<
+  RuleName extends string,
+  BaseGrammarRuleName extends string
+> = {
+  [name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
+};
+
+interface Grammar<
+  RuleName extends string,
+  BaseGrammarRuleName extends string = never,
+  Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
+    RuleName,
+    BaseGrammarRuleName
+  >
+> {
+  /**
+   * Name of the grammar language.
+   */
+  name: string;
+
+  /** Mapping of grammar rule names to rule builder functions. */
+  rules: Rules;
+
+  /**
+   * An array of arrays of rule names. Each inner array represents a set of
+   * rules that's involved in an _LR(1) conflict_ that is _intended to exist_
+   * in the grammar. When these conflicts occur at runtime, Tree-sitter will
+   * use the GLR algorithm to explore all of the possible interpretations. If
+   * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
+   * whose corresponding rule has the highest total _dynamic precedence_.
+   *
+   * @param $ grammar rules
+   */
+  conflicts?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[][];
+
+  /**
+   * An array of token names which can be returned by an _external scanner_.
+   * External scanners allow you to write custom C code which runs during the
+   * lexing process in order to handle lexical rules (e.g. Python's indentation
+   * tokens) that cannot be described by regular expressions.
+   *
+   * @param $ grammar rules
+   * @param previous array of externals from the base schema, if any
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+   */
+  externals?: (
+    $: Record<string, SymbolRule<string>>,
+    previous: Rule[],
+  ) => SymbolRule<string>[];
+
+  /**
+   * An array of tokens that may appear anywhere in the language. This
+   * is often used for whitespace and comments. The default value of
+   * extras is to accept whitespace. To control whitespace explicitly,
+   * specify extras: `$ => []` in your grammar.
+   *
+   *  @param $ grammar rules
+   */
+  extras?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * An array of rules that should be automatically removed from the
+   * grammar by replacing all of their usages with a copy of their definition.
+   * This is useful for rules that are used in multiple places but for which
+   * you don't want to create syntax tree nodes at runtime.
+   *
+   * @param $ grammar rules
+   */
+  inline?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * A list of hidden rule names that should be considered supertypes in the
+   * generated node types file.
+   *
+   * @param $ grammar rules
+   *
+   * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+   */
+  supertypes?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * The name of a token that will match keywords for the purpose of the
+   * keyword extraction optimization.
+   *
+   * @param $ grammar rules
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
+   */
+  word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
+}
+
+type GrammarSchema<RuleName extends string> = {
+  [K in keyof Grammar<RuleName>]: K extends 'rules'
+    ? Record<RuleName, Rule>
+    : Grammar<RuleName>[K];
+};
+
+/**
+ * Causes the given rule to appear with an alternative name in the syntax tree.
+ * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
+ * anonymous node, as if the rule had been written as the simple string.
+ *
+ * @param rule rule that will be aliased
+ * @param name target name for the alias
+ */
+declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
+
+/**
+ * Causes the given rule to appear as an alternative named node, for instance
+ * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
+ * node called `bar`.
+ *
+ * @param rule rule that will be aliased
+ * @param symbol target symbol for the alias
+ */
+declare function alias(
+  rule: RuleOrLiteral,
+  symbol: SymbolRule<string>,
+): AliasRule;
+
+/**
+ * Creates a blank rule, matching nothing.
+ */
+declare function blank(): BlankRule;
+
+/**
+ * Assigns a field name to the child node(s) matched by the given rule.
+ * In the resulting syntax tree, you can then use that field name to
+ * access specific children.
+ *
+ * @param name name of the field
+ * @param rule rule the field should match
+ */
+declare function field(name: string, rule: RuleOrLiteral): FieldRule;
+
+/**
+ * Creates a rule that matches one of a set of possible rules. The order
+ * of the arguments does not matter. This is analogous to the `|` (pipe)
+ * operator in EBNF notation.
+ *
+ * @param options possible rule choices
+ */
+declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
+
+/**
+ * Creates a rule that matches zero or one occurrence of a given rule.
+ * It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
+ *
+ * @param value rule to be made optional
+ */
+declare function optional(rule: RuleOrLiteral): ChoiceRule;
+
+/**
+ * Marks the given rule with a numerical precedence which will be used to
+ * resolve LR(1) conflicts at parser-generation time. When two rules overlap
+ * in a way that represents either a true ambiguity or a _local_ ambiguity
+ * given one token of lookahead, Tree-sitter will try to resolve the conflict by
+ * matching the rule with the higher precedence. The default precedence of all
+ * rules is zero. This works similarly to the precedence directives in Yacc grammars.
+ *
+ * @param number precedence weight
+ * @param rule rule being weighted
+ *
+ * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
+ * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+ */
+declare const prec: {
+  (number: number, rule: RuleOrLiteral): PrecRule;
+
+  /**
+   * Marks the given rule as left-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a left-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _earlier_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param number (optional) precedence weight
+   * @param rule rule to mark as left-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  left(rule: RuleOrLiteral): PrecLeftRule;
+  left(number: number, rule: RuleOrLiteral): PrecLeftRule;
+
+  /**
+   * Marks the given rule as right-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a right-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _later_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param number (optional) precedence weight
+   * @param rule rule to mark as right-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  right(rule: RuleOrLiteral): PrecRightRule;
+  right(number: number, rule: RuleOrLiteral): PrecRightRule;
+
+  /**
+   * Marks the given rule with a numerical precedence which will be used to
+   * resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
+   * This is only necessary when handling a conflict dynamically using the
+   * `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
+   * multiple rules correctly match a given piece of code. In that event,
+   * Tree-sitter compares the total dynamic precedence associated with each
+   * rule, and selects the one with the highest total. This is similar to
+   * dynamic precedence directives in Bison grammars.
+   *
+   * @param number precedence weight
+   * @param rule rule being weighted
+   *
+   * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
+   */
+  dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule;
+};
+
+/**
+ * Creates a rule that matches _zero-or-more_ occurrences of a given rule.
+ * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
+ * rule is implemented in terms of `repeat1` but is included because it
+ * is very commonly used.
+ *
+ * @param rule rule to repeat, zero or more times
+ */
+declare function repeat(rule: RuleOrLiteral): RepeatRule;
+
+/**
+ * Creates a rule that matches one-or-more occurrences of a given rule.
+ *
+ * @param rule rule to repeat, one or more times
+ */
+declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
+
+/**
+ * Creates a rule that matches any number of other rules, one after another.
+ * It is analogous to simply writing multiple symbols next to each other
+ * in EBNF notation.
+ *
+ * @param rules ordered rules that comprise the sequence
+ */
+declare function seq(...rules: RuleOrLiteral[]): SeqRule;
+
+/**
+ * Creates a symbol rule, representing another rule in the grammar by name.
+ *
+ * @param name name of the target rule
+ */
+declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
+
+/**
+ * Marks the given rule as producing only a single token. Tree-sitter's
+ * default is to treat each String or RegExp literal in the grammar as a
+ * separate token. Each token is matched separately by the lexer and
+ * returned as its own leaf node in the tree. The token function allows
+ * you to express a complex rule using the DSL functions (rather
+ * than as a single regular expression) but still have Tree-sitter treat
+ * it as a single token.
+ *
+ * @param rule rule to represent as a single token
+ */
+declare const token: {
+  (rule: RuleOrLiteral): TokenRule;
+
+  /**
+   * Marks the given rule as producing an immediate token. This allows
+   * the parser to produce a different token based on whether or not
+   * there are `extras` preceding the token's main content. When there
+   * are _no_ leading `extras`, an immediate token is preferred over a
+   * normal token which would otherwise match.
+   *
+   * @param rule rule to represent as an immediate token
+   */
+  immediate(rule: RuleOrLiteral): ImmediateTokenRule;
+};
+
+/**
+ * Creates a new language grammar with the provided schema.
+ *
+ * @param options grammar options
+ */
+declare function grammar<RuleName extends string>(
+  options: Grammar<RuleName>,
+): GrammarSchema<RuleName>;
+
+/**
+ * Extends an existing language grammar with the provided options,
+ * creating a new language.
+ *
+ * @param baseGrammar base grammar schema to extend from
+ * @param options grammar options for the new extended language
+ */
+declare function grammar<
+  BaseGrammarRuleName extends string,
+  RuleName extends string
+>(
+  baseGrammar: GrammarSchema<BaseGrammarRuleName>,
+  options: Grammar<RuleName, BaseGrammarRuleName>,
+): GrammarSchema<RuleName | BaseGrammarRuleName>;
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@ -1,6 +1,6 @@
 {
  "name": "tree-sitter-cli",
-  "version": "0.16.8",
+  "version": "0.16.9",
  "author": "Max Brunsfeld",
  "license": "MIT",
  "repository": {
--- a/cli/src/error.rs
+++ b/cli/src/error.rs
@ -87,7 +87,7 @@ impl<'a> From<tree_sitter_highlight::Error> for Error {

 impl<'a> From<tree_sitter_tags::Error> for Error {
    fn from(error: tree_sitter_tags::Error) -> Self {
-        Error::new(format!("{:?}", error))
+        Error::new(format!("{}", error))
    }
 }

--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@ -199,6 +199,9 @@ impl<'a> Minimizer<'a> {
        right_state: &ParseState,
        group_ids_by_state_id: &Vec<ParseStateId>,
    ) -> bool {
+        if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
+            return true;
+        }
        for (token, left_entry) in &left_state.terminal_entries {
            if let Some(right_entry) = right_state.terminal_entries.get(token) {
                if self.entries_conflict(
--- a/cli/src/generate/node_types.rs
+++ b/cli/src/generate/node_types.rs
@ -19,7 +19,7 @@ pub(crate) struct FieldInfo {
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub(crate) struct VariableInfo {
    pub fields: HashMap<String, FieldInfo>,
-    pub child_types: Vec<ChildType>,
+    pub children: FieldInfo,
    pub children_without_fields: FieldInfo,
    pub has_multi_step_production: bool,
 }
@ -70,7 +70,7 @@ impl Default for FieldInfoJSON {

 impl Default for ChildQuantity {
    fn default() -> Self {
-        Self::zero()
+        Self::one()
    }
 }

@ -158,7 +158,7 @@ pub(crate) fn get_variable_info(

    // Each variable's summary can depend on the summaries of other hidden variables,
    // and variables can have mutually recursive structure. So we compute the summaries
-    // iteratively, in a loop that terminates only when more changes are possible.
+    // iteratively, in a loop that terminates only when no more changes are possible.
    let mut did_change = true;
    let mut all_initialized = false;
    let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()];
@ -168,13 +168,14 @@ pub(crate) fn get_variable_info(
        for (i, variable) in syntax_grammar.variables.iter().enumerate() {
            let mut variable_info = result[i].clone();

-            // Within a variable, consider each production separately. For each
-            // production, determine which children and fields can occur, and how many
-            // times they can occur.
-            for (production_index, production) in variable.productions.iter().enumerate() {
-                let mut field_quantities = HashMap::new();
-                let mut children_without_fields_quantity = ChildQuantity::zero();
-                let mut has_uninitialized_invisible_children = false;
+            // Examine each of the variable's productions. The variable's child types can be
+            // immediately combined across all productions, but the child quantities must be
+            // recorded separately for each production.
+            for production in &variable.productions {
+                let mut production_field_quantities = HashMap::new();
+                let mut production_children_quantity = ChildQuantity::zero();
+                let mut production_children_without_fields_quantity = ChildQuantity::zero();
+                let mut production_has_uninitialized_invisible_children = false;

                if production.steps.len() > 1 {
                    variable_info.has_multi_step_production = true;
@ -190,111 +191,97 @@ pub(crate) fn get_variable_info(
                        ChildType::Normal(child_symbol)
                    };

-                    // Record all of the types of direct children.
-                    did_change |= sorted_vec_insert(&mut variable_info.child_types, &child_type);
+                    let child_is_hidden = !child_type_is_visible(&child_type)
+                        && !syntax_grammar.supertype_symbols.contains(&child_symbol);

-                    // Record all of the field names that occur.
+                    // Maintain the set of all child types for this variable, and the quantity of
+                    // visible children in this production.
+                    did_change |=
+                        extend_sorted(&mut variable_info.children.types, Some(&child_type));
+                    if !child_is_hidden {
+                        production_children_quantity.append(ChildQuantity::one());
+                    }
+
+                    // Maintain the set of child types associated with each field, and the quantity
+                    // of children associated with each field in this production.
                    if let Some(field_name) = &step.field_name {
-                        // Record how many times each field occurs in this production.
-                        field_quantities
+                        let field_info = variable_info
+                            .fields
+                            .entry(field_name.clone())
+                            .or_insert(FieldInfo::default());
+                        did_change |= extend_sorted(&mut field_info.types, Some(&child_type));
+
+                        let production_field_quantity = production_field_quantities
                            .entry(field_name)
-                            .or_insert(ChildQuantity::zero())
-                            .append(ChildQuantity::one());
+                            .or_insert(ChildQuantity::zero());

-                        // Record the types of children for this field.
-                        let field_info =
-                            variable_info.fields.entry(field_name.clone()).or_insert({
-                                let mut info = FieldInfo {
-                                    types: Vec::new(),
-                                    quantity: ChildQuantity::one(),
-                                };
-
-                                // If this field did *not* occur in an earlier production,
-                                // then it is not required.
-                                if production_index > 0 {
-                                    info.quantity.required = false;
-                                }
-                                info
-                            });
-                        did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
-                    }
-                    // Record named children without fields.
-                    else if child_type_is_named(&child_type) {
-                        // Record how many named children without fields occur in this production.
-                        children_without_fields_quantity.append(ChildQuantity::one());
-
-                        // Record the types of all of the named children without fields.
-                        let children_info = &mut variable_info.children_without_fields;
-                        if children_info.types.is_empty() {
-                            children_info.quantity = ChildQuantity::one();
+                        // Inherit the types and quantities of hidden children associated with fields.
+                        if child_is_hidden && child_symbol.is_non_terminal() {
+                            let child_variable_info = &result[child_symbol.index];
+                            did_change |= extend_sorted(
+                                &mut field_info.types,
+                                &child_variable_info.children.types,
+                            );
+                            production_field_quantity.append(child_variable_info.children.quantity);
+                        } else {
+                            production_field_quantity.append(ChildQuantity::one());
                        }
-                        did_change |= sorted_vec_insert(&mut children_info.types, &child_type);
+                    }
+                    // Maintain the set of named children without fields within this variable.
+                    else if child_type_is_named(&child_type) {
+                        production_children_without_fields_quantity.append(ChildQuantity::one());
+                        did_change |= extend_sorted(
+                            &mut variable_info.children_without_fields.types,
+                            Some(&child_type),
+                        );
                    }

-                    // Inherit information from any hidden children.
-                    if child_symbol.is_non_terminal()
-                        && !syntax_grammar.supertype_symbols.contains(&child_symbol)
-                        && step.alias.is_none()
-                        && !child_type_is_visible(&child_type)
-                    {
+                    // Inherit all child information from hidden children.
+                    if child_is_hidden && child_symbol.is_non_terminal() {
                        let child_variable_info = &result[child_symbol.index];

-                        // If a hidden child can have multiple children, then this
-                        // node can appear to have multiple children.
+                        // If a hidden child can have multiple children, then its parent node can
+                        // appear to have multiple children.
                        if child_variable_info.has_multi_step_production {
                            variable_info.has_multi_step_production = true;
                        }

-                        // Inherit fields from this hidden child
+                        // If a hidden child has fields, then the parent node can appear to have
+                        // those same fields.
                        for (field_name, child_field_info) in &child_variable_info.fields {
-                            field_quantities
+                            production_field_quantities
                                .entry(field_name)
                                .or_insert(ChildQuantity::zero())
                                .append(child_field_info.quantity);
-                            let field_info = variable_info
-                                .fields
-                                .entry(field_name.clone())
-                                .or_insert(FieldInfo {
-                                    types: Vec::new(),
-                                    quantity: ChildQuantity::one(),
-                                });
-                            for child_type in &child_field_info.types {
-                                sorted_vec_insert(&mut field_info.types, &child_type);
-                            }
+                            did_change |= extend_sorted(
+                                &mut variable_info
+                                    .fields
+                                    .entry(field_name.clone())
+                                    .or_insert(FieldInfo::default())
+                                    .types,
+                                &child_field_info.types,
+                            );
                        }

-                        // Inherit child types from this hidden child
-                        for child_type in &child_variable_info.child_types {
-                            did_change |=
-                                sorted_vec_insert(&mut variable_info.child_types, child_type);
-                        }
+                        // If a hidden child has children, then the parent node can appear to have
+                        // those same children.
+                        production_children_quantity.append(child_variable_info.children.quantity);
+                        did_change |= extend_sorted(
+                            &mut variable_info.children.types,
+                            &child_variable_info.children.types,
+                        );

-                        // If any field points to this hidden child, inherit child types
-                        // for the field.
-                        if let Some(field_name) = &step.field_name {
-                            let field_info = variable_info.fields.get_mut(field_name).unwrap();
-                            for child_type in &child_variable_info.child_types {
-                                did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
-                            }
-                        }
-                        // Inherit info about children without fields from this hidden child.
-                        else {
+                        // If a hidden child can have named children without fields, then the parent
+                        // node can appear to have those same children.
+                        if step.field_name.is_none() {
                            let grandchildren_info = &child_variable_info.children_without_fields;
                            if !grandchildren_info.types.is_empty() {
-                                children_without_fields_quantity
-                                    .append(grandchildren_info.quantity);
-
-                                if variable_info.children_without_fields.types.is_empty() {
-                                    variable_info.children_without_fields.quantity =
-                                        ChildQuantity::one();
-                                }
-
-                                for child_type in &grandchildren_info.types {
-                                    did_change |= sorted_vec_insert(
-                                        &mut variable_info.children_without_fields.types,
-                                        &child_type,
-                                    );
-                                }
+                                production_children_without_fields_quantity
+                                    .append(child_variable_info.children_without_fields.quantity);
+                                did_change |= extend_sorted(
+                                    &mut variable_info.children_without_fields.types,
+                                    &child_variable_info.children_without_fields.types,
+                                );
                            }
                        }
                    }
@ -302,22 +289,27 @@ pub(crate) fn get_variable_info(
                    // Note whether or not this production contains children whose summaries
                    // have not yet been computed.
                    if child_symbol.index >= i && !all_initialized {
-                        has_uninitialized_invisible_children = true;
+                        production_has_uninitialized_invisible_children = true;
                    }
                }

                // If this production's children all have had their summaries initialized,
                // then expand the quantity information with all of the possibilities introduced
                // by this production.
-                if !has_uninitialized_invisible_children {
+                if !production_has_uninitialized_invisible_children {
+                    did_change |= variable_info
+                        .children
+                        .quantity
+                        .union(production_children_quantity);
+
                    did_change |= variable_info
                        .children_without_fields
                        .quantity
-                        .union(children_without_fields_quantity);
+                        .union(production_children_without_fields_quantity);

                    for (field_name, info) in variable_info.fields.iter_mut() {
                        did_change |= info.quantity.union(
-                            field_quantities
+                            production_field_quantities
                                .get(field_name)
                                .cloned()
                                .unwrap_or(ChildQuantity::zero()),
@ -352,13 +344,15 @@ pub(crate) fn get_variable_info(
    // Update all of the node type lists to eliminate hidden nodes.
    for supertype_symbol in &syntax_grammar.supertype_symbols {
        result[supertype_symbol.index]
-            .child_types
+            .children
+            .types
            .retain(child_type_is_visible);
    }
    for variable_info in result.iter_mut() {
        for (_, field_info) in variable_info.fields.iter_mut() {
            field_info.types.retain(child_type_is_visible);
        }
+        variable_info.fields.retain(|_, v| !v.types.is_empty());
        variable_info
            .children_without_fields
            .types
@ -467,7 +461,8 @@ pub(crate) fn generate_node_types_json(
                        subtypes: None,
                    });
            let mut subtypes = info
-                .child_types
+                .children
+                .types
                .iter()
                .map(child_type_to_node_type)
                .collect::<Vec<_>>();
@ -686,16 +681,19 @@ fn variable_type_for_child_type(
    }
 }

-fn sorted_vec_insert<T>(vec: &mut Vec<T>, value: &T) -> bool
+fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T>) -> bool
 where
    T: Clone + Eq + Ord,
+    T: 'a,
 {
-    if let Err(i) = vec.binary_search(&value) {
-        vec.insert(i, value.clone());
-        true
-    } else {
-        false
-    }
+    values.into_iter().any(|value| {
+        if let Err(i) = vec.binary_search(&value) {
+            vec.insert(i, value.clone());
+            true
+        } else {
+            false
+        }
+    })
 }

 #[cfg(test)]
@ -1177,6 +1175,38 @@ mod tests {
        );
    }

+    #[test]
+    fn test_node_types_with_fields_on_hidden_tokens() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![Variable {
+                name: "script".to_string(),
+                kind: VariableType::Named,
+                rule: Rule::seq(vec![
+                    Rule::field("a".to_string(), Rule::pattern("hi")),
+                    Rule::field("b".to_string(), Rule::pattern("bye")),
+                ]),
+            }],
+        });
+
+        assert_eq!(
+            node_types,
+            [NodeInfoJSON {
+                kind: "script".to_string(),
+                named: true,
+                fields: Some(BTreeMap::new()),
+                children: None,
+                subtypes: None
+            }]
+        );
+    }
+
    #[test]
    fn test_node_types_with_multiple_rules_same_alias_name() {
        let node_types = get_node_types(InputGrammar {
@ -1461,6 +1491,71 @@ mod tests {
        );
    }

+    #[test]
+    fn test_get_variable_info_with_repetitions_inside_fields() {
+        let variable_info = get_variable_info(
+            &build_syntax_grammar(
+                vec![
+                    // Field associated with a repetition.
+                    SyntaxVariable {
+                        name: "rule0".to_string(),
+                        kind: VariableType::Named,
+                        productions: vec![
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
+                                    .with_field_name("field1")],
+                            },
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![],
+                            },
+                        ],
+                    },
+                    // Repetition node
+                    SyntaxVariable {
+                        name: "_rule0_repeat".to_string(),
+                        kind: VariableType::Hidden,
+                        productions: vec![
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![ProductionStep::new(Symbol::terminal(1))],
+                            },
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![
+                                    ProductionStep::new(Symbol::non_terminal(1)),
+                                    ProductionStep::new(Symbol::non_terminal(1)),
+                                ],
+                            },
+                        ],
+                    },
+                ],
+                vec![],
+            ),
+            &build_lexical_grammar(),
+            &AliasMap::new(),
+        )
+        .unwrap();
+
+        assert_eq!(
+            variable_info[0].fields,
+            vec![(
+                "field1".to_string(),
+                FieldInfo {
+                    quantity: ChildQuantity {
+                        exists: true,
+                        required: false,
+                        multiple: true,
+                    },
+                    types: vec![ChildType::Normal(Symbol::terminal(1))],
+                }
+            )]
+            .into_iter()
+            .collect::<HashMap<_, _>>()
+        );
+    }
+
    #[test]
    fn test_get_variable_info_with_inherited_fields() {
        let variable_info = get_variable_info(
--- a/cli/src/generate/prepare_grammar/process_inlines.rs
+++ b/cli/src/generate/prepare_grammar/process_inlines.rs
@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
                                    last_inserted_step.associativity = removed_step.associativity;
                                }
                            }
+                            if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
+                                production.dynamic_precedence = p.dynamic_precedence;
+                            }
                            production
                        }),
                    );
@ -226,7 +229,7 @@ mod tests {
                            ],
                        },
                        Production {
-                            dynamic_precedence: 0,
+                            dynamic_precedence: -2,
                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
                        },
                    ],
@ -258,7 +261,7 @@ mod tests {
                    ],
                },
                Production {
-                    dynamic_precedence: 0,
+                    dynamic_precedence: -2,
                    steps: vec![
                        ProductionStep::new(Symbol::terminal(10)),
                        ProductionStep::new(Symbol::terminal(14)),
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@ -160,7 +160,9 @@ impl Loader {
                // If multiple language configurations match, then determine which
                // one to use by applying the configurations' content regexes.
                else {
-                    let file_contents = fs::read_to_string(path)?;
+                    let file_contents = fs::read(path)
+                        .map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?;
+                    let file_contents = String::from_utf8_lossy(&file_contents);
                    let mut best_score = -2isize;
                    let mut best_configuration_id = None;
                    for configuration_id in configuration_ids {
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -53,11 +53,12 @@ fn run() -> error::Result<()> {
        .subcommand(
            SubCommand::with_name("parse")
                .about("Parse files")
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                        .index(1)
                        .multiple(true)
-                        .required(true),
+                        .required(false),
                )
                .arg(Arg::with_name("scope").long("scope").takes_value(true))
                .arg(Arg::with_name("debug").long("debug").short("d"))
@ -79,37 +80,33 @@ fn run() -> error::Result<()> {
            SubCommand::with_name("query")
                .about("Search files using a syntax tree query")
                .arg(Arg::with_name("query-path").index(1).required(true))
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                        .index(2)
                        .multiple(true)
-                        .required(true),
+                        .required(false),
+                )
+                .arg(
+                    Arg::with_name("byte-range")
+                        .help("The range of byte offsets in which the query will be executed")
+                        .long("byte-range")
+                        .takes_value(true),
                )
                .arg(Arg::with_name("scope").long("scope").takes_value(true))
                .arg(Arg::with_name("captures").long("captures").short("c")),
        )
        .subcommand(
            SubCommand::with_name("tags")
-                .arg(
-                    Arg::with_name("format")
-                        .short("f")
-                        .long("format")
-                        .value_name("json|protobuf")
-                        .help("Determine output format (default: json)"),
-                )
+                .arg(Arg::with_name("quiet").long("quiet").short("q"))
+                .arg(Arg::with_name("time").long("time").short("t"))
                .arg(Arg::with_name("scope").long("scope").takes_value(true))
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                .arg(
-                    Arg::with_name("inputs")
+                    Arg::with_name("paths")
                        .help("The source file to use")
                        .index(1)
-                        .required(true)
                        .multiple(true),
-                )
-                .arg(
-                    Arg::with_name("v")
-                        .short("v")
-                        .multiple(true)
-                        .help("Sets the level of verbosity"),
                ),
        )
        .subcommand(
@ -127,11 +124,12 @@ fn run() -> error::Result<()> {
        .subcommand(
            SubCommand::with_name("highlight")
                .about("Highlight a file")
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                        .index(1)
                        .multiple(true)
-                        .required(true),
+                        .required(false),
                )
                .arg(Arg::with_name("scope").long("scope").takes_value(true))
                .arg(Arg::with_name("html").long("html").short("h"))
@ -230,7 +228,9 @@ fn run() -> error::Result<()> {
        let timeout = matches
            .value_of("timeout")
            .map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
-        let paths = collect_paths(matches.values_of("path").unwrap())?;
+
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+
        let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
        let mut has_error = false;
        loader.find_all_languages(&config.parser_directories)?;
@ -256,31 +256,36 @@ fn run() -> error::Result<()> {
        }
    } else if let Some(matches) = matches.subcommand_matches("query") {
        let ordered_captures = matches.values_of("captures").is_some();
-        let paths = matches
-            .values_of("path")
-            .unwrap()
-            .into_iter()
-            .map(Path::new)
-            .collect::<Vec<&Path>>();
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
        loader.find_all_languages(&config.parser_directories)?;
        let language = select_language(
            &mut loader,
-            paths[0],
+            Path::new(&paths[0]),
            &current_dir,
            matches.value_of("scope"),
        )?;
        let query_path = Path::new(matches.value_of("query-path").unwrap());
-        query::query_files_at_paths(language, paths, query_path, ordered_captures)?;
+        let range = matches.value_of("byte-range").map(|br| {
+            let r: Vec<&str> = br.split(":").collect();
+            (r[0].parse().unwrap(), r[1].parse().unwrap())
+        });
+        query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?;
    } else if let Some(matches) = matches.subcommand_matches("tags") {
        loader.find_all_languages(&config.parser_directories)?;
-        let paths = collect_paths(matches.values_of("inputs").unwrap())?;
-        tags::generate_tags(&loader, matches.value_of("scope"), &paths)?;
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+        tags::generate_tags(
+            &loader,
+            matches.value_of("scope"),
+            &paths,
+            matches.is_present("quiet"),
+            matches.is_present("time"),
+        )?;
    } else if let Some(matches) = matches.subcommand_matches("highlight") {
        loader.configure_highlights(&config.theme.highlight_names);
        loader.find_all_languages(&config.parser_directories)?;

        let time = matches.is_present("time");
-        let paths = collect_paths(matches.values_of("path").unwrap())?;
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
        let html_mode = matches.is_present("html");
        if html_mode {
            println!("{}", highlight::HTML_HEADER);
@ -353,39 +358,58 @@ fn run() -> error::Result<()> {
    Ok(())
 }

-fn collect_paths<'a>(paths: impl Iterator<Item = &'a str>) -> error::Result<Vec<String>> {
-    let mut result = Vec::new();
+fn collect_paths<'a>(
+    paths_file: Option<&str>,
+    paths: Option<impl Iterator<Item = &'a str>>,
+) -> error::Result<Vec<String>> {
+    if let Some(paths_file) = paths_file {
+        return Ok(fs::read_to_string(paths_file)
+            .map_err(Error::wrap(|| {
+                format!("Failed to read paths file {}", paths_file)
+            }))?
+            .trim()
+            .split_ascii_whitespace()
+            .map(String::from)
+            .collect::<Vec<_>>());
+    }

-    let mut incorporate_path = |path: &str, positive| {
-        if positive {
-            result.push(path.to_string());
-        } else {
-            if let Some(index) = result.iter().position(|p| p == path) {
-                result.remove(index);
+    if let Some(paths) = paths {
+        let mut result = Vec::new();
+
+        let mut incorporate_path = |path: &str, positive| {
+            if positive {
+                result.push(path.to_string());
+            } else {
+                if let Some(index) = result.iter().position(|p| p == path) {
+                    result.remove(index);
+                }
            }
-        }
-    };
+        };

-    for mut path in paths {
-        let mut positive = true;
-        if path.starts_with("!") {
-            positive = false;
-            path = path.trim_start_matches("!");
-        }
+        for mut path in paths {
+            let mut positive = true;
+            if path.starts_with("!") {
+                positive = false;
+                path = path.trim_start_matches("!");
+            }

-        if Path::new(path).exists() {
-            incorporate_path(path, positive);
-        } else {
-            let paths =
-                glob(path).map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
-            for path in paths {
-                if let Some(path) = path?.to_str() {
-                    incorporate_path(path, positive);
+            if Path::new(path).exists() {
+                incorporate_path(path, positive);
+            } else {
+                let paths = glob(path)
+                    .map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
+                for path in paths {
+                    if let Some(path) = path?.to_str() {
+                        incorporate_path(path, positive);
+                    }
                }
            }
        }
+
+        return Ok(result);
    }
-    Ok(result)
+
+    Err(Error::new("Must provide one or more paths".to_string()))
 }

 fn select_language(
--- a/cli/src/query.rs
+++ b/cli/src/query.rs
@ -6,9 +6,10 @@ use tree_sitter::{Language, Node, Parser, Query, QueryCursor};

 pub fn query_files_at_paths(
    language: Language,
-    paths: Vec<&Path>,
+    paths: Vec<String>,
    query_path: &Path,
    ordered_captures: bool,
+    range: Option<(usize, usize)>,
 ) -> Result<()> {
    let stdout = io::stdout();
    let mut stdout = stdout.lock();
@ -20,14 +21,17 @@ pub fn query_files_at_paths(
        .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?;

    let mut query_cursor = QueryCursor::new();
+    if let Some((beg, end)) = range {
+        query_cursor.set_byte_range(beg, end);
+    }

    let mut parser = Parser::new();
    parser.set_language(language).map_err(|e| e.to_string())?;

    for path in paths {
-        writeln!(&mut stdout, "{}", path.to_str().unwrap())?;
+        writeln!(&mut stdout, "{}", path)?;

-        let source_code = fs::read(path).map_err(Error::wrap(|| {
+        let source_code = fs::read(&path).map_err(Error::wrap(|| {
            format!("Error reading source file {:?}", path)
        }))?;
        let text_callback = |n: Node| &source_code[n.byte_range()];
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@ -3,10 +3,17 @@ use super::util;
 use crate::error::{Error, Result};
 use std::io::{self, Write};
 use std::path::Path;
+use std::time::Instant;
 use std::{fs, str};
 use tree_sitter_tags::TagsContext;

-pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> {
+pub fn generate_tags(
+    loader: &Loader,
+    scope: Option<&str>,
+    paths: &[String],
+    quiet: bool,
+    time: bool,
+) -> Result<()> {
    let mut lang = None;
    if let Some(scope) = scope {
        lang = loader.language_configuration_for_scope(scope)?;
@ -34,28 +41,50 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
        };

        if let Some(tags_config) = language_config.tags_config(language)? {
-            let path_str = format!("{:?}", path);
-            writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?;
+            let indent;
+            if paths.len() > 1 {
+                if !quiet {
+                    writeln!(&mut stdout, "{}", path.to_string_lossy())?;
+                }
+                indent = "\t"
+            } else {
+                indent = "";
+            };

            let source = fs::read(path)?;
-            for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? {
+            let t0 = Instant::now();
+            for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))?.0 {
                let tag = tag?;
-                write!(
-                    &mut stdout,
-                    "  {:<8} {:<40}\t{:>9}-{:<9}",
-                    tag.kind,
-                    str::from_utf8(&source[tag.name_range]).unwrap_or(""),
-                    tag.span.start,
-                    tag.span.end,
-                )?;
-                if let Some(docs) = tag.docs {
-                    if docs.len() > 120 {
-                        write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
-                    } else {
-                        write!(&mut stdout, "\t{:?}", &docs)?;
+                if !quiet {
+                    write!(
+                        &mut stdout,
+                        "{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
+                        indent,
+                        str::from_utf8(&source[tag.name_range]).unwrap_or(""),
+                        &tags_config.syntax_type_name(tag.syntax_type_id),
+                        if tag.is_definition { "def" } else { "ref" },
+                        tag.span.start,
+                        tag.span.end,
+                        str::from_utf8(&source[tag.line_range]).unwrap_or(""),
+                    )?;
+                    if let Some(docs) = tag.docs {
+                        if docs.len() > 120 {
+                            write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
+                        } else {
+                            write!(&mut stdout, "\t{:?}", &docs)?;
+                        }
                    }
+                    writeln!(&mut stdout, "")?;
                }
-                writeln!(&mut stdout, "")?;
+            }
+
+            if time {
+                writeln!(
+                    &mut stdout,
+                    "{}time: {}ms",
+                    indent,
+                    t0.elapsed().as_millis(),
+                )?;
            }
        } else {
            eprintln!("No tags config found for path {:?}", path);
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@ -3,6 +3,7 @@ mod helpers;
 mod highlight_test;
 mod node_test;
 mod parser_test;
+mod pathological_test;
 mod query_test;
 mod tags_test;
 mod test_highlight_test;
--- a/cli/src/tests/pathological_test.rs
+++ b/cli/src/tests/pathological_test.rs
@ -0,0 +1,15 @@
+use super::helpers::allocations;
+use super::helpers::fixtures::get_language;
+use tree_sitter::Parser;
+
+#[test]
+fn test_pathological_example_1() {
+    let language = "cpp";
+    let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
+
+    allocations::record(|| {
+        let mut parser = Parser::new();
+        parser.set_language(get_language(language)).unwrap();
+        parser.parse(source, None).unwrap();
+    });
+}
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@ -408,7 +408,7 @@ fn test_query_matches_with_many_overlapping_results() {
        )
        .unwrap();

-        let count = 80;
+        let count = 1024;

        // Deeply nested chained function calls:
        // a
@ -573,8 +573,8 @@ fn test_query_matches_with_immediate_siblings() {
            &[
                (0, vec![("parent", "a"), ("child", "b")]),
                (0, vec![("parent", "b"), ("child", "c")]),
-                (1, vec![("last-child", "d")]),
                (0, vec![("parent", "c"), ("child", "d")]),
+                (1, vec![("last-child", "d")]),
                (2, vec![("first-element", "w")]),
                (2, vec![("first-element", "1")]),
            ],
@ -758,6 +758,55 @@ fn test_query_matches_with_nested_repetitions() {
    });
 }

+#[test]
+fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // When this query sees a comment, it must keep track of several potential
+        // matches: up to two for each pattern that begins with a comment.
+        let query = Query::new(
+            language,
+            r#"
+            (call_expression
+                function: (member_expression
+                    property: (property_identifier) @name)) @ref.method
+
+            ((comment)* @doc (function_declaration))
+            ((comment)* @doc (generator_function_declaration))
+            ((comment)* @doc (class_declaration))
+            ((comment)* @doc (lexical_declaration))
+            ((comment)* @doc (variable_declaration))
+            ((comment)* @doc (method_definition))
+
+            (comment) @comment
+            "#,
+        )
+        .unwrap();
+
+        // Here, a series of comments occurs in the middle of a match of the first
+        // pattern. To avoid exceeding the storage limits and discarding that outer
+        // match, the comment-related matches need to be managed efficiently.
+        let source = format!(
+            "theObject\n{}\n.theMethod()",
+            "  // the comment\n".repeat(64)
+        );
+
+        assert_query_matches(
+            language,
+            &query,
+            &source,
+            &vec![(7, vec![("comment", "// the comment")]); 64]
+                .into_iter()
+                .chain(vec![(
+                    0,
+                    vec![("ref.method", source.as_str()), ("name", "theMethod")],
+                )])
+                .collect::<Vec<_>>(),
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() {
    allocations::record(|| {
@ -1161,6 +1210,43 @@ fn test_query_matches_with_too_many_permutations_to_track() {
    });
 }

+#[test]
+fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (
+                (comment) @doc
+                ; not immediate
+                (class_declaration) @class
+            )
+
+            (call_expression
+                function: [
+                    (identifier) @function
+                    (member_expression property: (property_identifier) @method)
+                ])
+            ",
+        )
+        .unwrap();
+
+        let source = "/* hi */ a.b(); ".repeat(50);
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
+
+        assert_eq!(
+            collect_matches(matches, &query, source.as_str()),
+            vec![(1, vec![("method", "b")]); 50],
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_with_anonymous_tokens() {
    allocations::record(|| {
@ -1215,6 +1301,45 @@ fn test_query_matches_within_byte_range() {
    });
 }

+#[test]
+fn test_query_captures_within_byte_range() {
+    allocations::record(|| {
+        let language = get_language("c");
+        let query = Query::new(
+            language,
+            "
+            (call_expression
+                function: (identifier) @function
+                arguments: (argument_list (string_literal) @string.arg))
+
+            (string_literal) @string
+           ",
+        )
+        .unwrap();
+
+        let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#;
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+
+        let mut cursor = QueryCursor::new();
+        let captures =
+            cursor
+                .set_byte_range(3, 27)
+                .captures(&query, tree.root_node(), to_callback(source));
+
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[
+                ("function", "DEFUN"),
+                ("string.arg", "\"safe-length\""),
+                ("string", "\"safe-length\""),
+            ]
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_different_queries_same_cursor() {
    allocations::record(|| {
@ -1420,12 +1545,17 @@ fn test_query_captures_with_text_conditions() {
            ((identifier) @function.builtin
             (#eq? @function.builtin "require"))

-             (identifier) @variable
+            ((identifier) @variable
+             (#not-match? @variable "^(lambda|load)$"))
            "#,
        )
        .unwrap();

        let source = "
+          toad
+          load
+          panda
+          lambda
          const ab = require('./ab');
          new Cd(EF);
        ";
@ -1439,6 +1569,8 @@ fn test_query_captures_with_text_conditions() {
        assert_eq!(
            collect_captures(captures, &query, source),
            &[
+                ("variable", "toad"),
+                ("variable", "panda"),
                ("variable", "ab"),
                ("function.builtin", "require"),
                ("variable", "require"),
@ -2074,6 +2206,39 @@ fn test_query_disable_pattern() {
    });
 }

+#[test]
+fn test_query_alternative_predicate_prefix() {
+    allocations::record(|| {
+        let language = get_language("c");
+        let query = Query::new(
+            language,
+            r#"
+            ((call_expression
+              function: (identifier) @keyword
+              arguments: (argument_list
+                          (string_literal) @function))
+             (.eq? @keyword "DEFUN"))
+        "#,
+        )
+        .unwrap();
+        let source = r#"
+            DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0,
+                   doc: /* Return the argument unchanged.  */
+                   attributes: const)
+              (Lisp_Object arg)
+            {
+              return arg;
+            }
+        "#;
+        assert_query_matches(
+            language,
+            &query,
+            source,
+            &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])],
+        );
+    });
+}
+
 #[test]
 fn test_query_is_definite() {
    struct Row {
@ -2086,10 +2251,7 @@ fn test_query_is_definite() {
        Row {
            language: get_language("python"),
            pattern: r#"(expression_statement (string))"#,
-            results_by_symbol: &[
-                ("expression_statement", false),
-                ("string", false),
-            ],
+            results_by_symbol: &[("expression_statement", false), ("string", false)],
        },
        Row {
            language: get_language("javascript"),
@ -2102,30 +2264,17 @@ fn test_query_is_definite() {
        Row {
            language: get_language("javascript"),
            pattern: r#"(object "{" "}")"#,
-            results_by_symbol: &[
-                ("object", false),
-                ("{", true),
-                ("}", true),
-            ],
+            results_by_symbol: &[("object", false), ("{", true), ("}", true)],
        },
        Row {
            language: get_language("javascript"),
            pattern: r#"(pair (property_identifier) ":")"#,
-            results_by_symbol: &[
-                ("pair", false),
-                ("property_identifier", false),
-                (":", true),
-            ],
+            results_by_symbol: &[("pair", false), ("property_identifier", false), (":", true)],
        },
        Row {
            language: get_language("javascript"),
            pattern: r#"(object "{" (_) "}")"#,
-            results_by_symbol: &[
-                ("object", false),
-                ("{", false),
-                ("", false),
-                ("}", true),
-            ],
+            results_by_symbol: &[("object", false), ("{", false), ("", false), ("}", true)],
        },
        Row {
            language: get_language("javascript"),
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@ -1,73 +1,81 @@
 use super::helpers::allocations;
 use super::helpers::fixtures::{get_language, get_language_queries_path};
+use std::ffi::CStr;
 use std::ffi::CString;
 use std::{fs, ptr, slice, str};
+use tree_sitter::Point;
 use tree_sitter_tags::c_lib as c;
-use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext};
+use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};

 const PYTHON_TAG_QUERY: &'static str = r#"
 (
-    (function_definition
-      name: (identifier) @name
-      body: (block . (expression_statement (string) @doc))) @function
-    (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+  (function_definition
+    name: (identifier) @name
+    body: (block . (expression_statement (string) @doc))) @definition.function
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )

 (function_definition
-  name: (identifier) @name) @function
+  name: (identifier) @name) @definition.function

 (
-    (class_definition
-        name: (identifier) @name
-        body: (block
-            . (expression_statement (string) @doc))) @class
-    (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+  (class_definition
+    name: (identifier) @name
+    body: (block
+      . (expression_statement (string) @doc))) @definition.class
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )

 (class_definition
-  name: (identifier) @name) @class
+  name: (identifier) @name) @definition.class

 (call
-  function: (identifier) @name) @call
+  function: (identifier) @name) @reference.call
+
+(call
+  function: (attribute
+    attribute: (identifier) @name)) @reference.call
 "#;

 const JS_TAG_QUERY: &'static str = r#"
 (
    (comment)* @doc .
    (class_declaration
-        name: (identifier) @name) @class
-    (#select-adjacent! @doc @class)
+        name: (identifier) @name) @definition.class
+    (#select-adjacent! @doc @definition.class)
    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
 )

 (
    (comment)* @doc .
    (method_definition
-        name: (property_identifier) @name) @method
-    (#select-adjacent! @doc @method)
+        name: (property_identifier) @name) @definition.method
+    (#select-adjacent! @doc @definition.method)
    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
 )

 (
    (comment)* @doc .
    (function_declaration
-        name: (identifier) @name) @function
-    (#select-adjacent! @doc @function)
+        name: (identifier) @name) @definition.function
+    (#select-adjacent! @doc @definition.function)
    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
 )

 (call_expression
-    function: (identifier) @name) @call
+    function: (identifier) @name) @reference.call
 "#;

 const RUBY_TAG_QUERY: &'static str = r#"
 (method
-    name: (identifier) @name) @method
+    name: (_) @name) @definition.method

 (method_call
-    method: (identifier) @name) @call
+    method: (identifier) @name) @reference.call

-((identifier) @name @call
+(setter (identifier) @ignore)
+
+((identifier) @name @reference.call
 (#is-not? local))
 "#;

@ -94,25 +102,26 @@ fn test_tags_python() {
    let tags = tag_context
        .generate_tags(&tags_config, source, None)
        .unwrap()
+        .0
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

    assert_eq!(
        tags.iter()
-            .map(|t| (substr(source, &t.name_range), t.kind))
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
            .collect::<Vec<_>>(),
        &[
-            ("Customer", TagKind::Class),
-            ("age", TagKind::Function),
-            ("compute_age", TagKind::Call),
+            ("Customer", "class"),
+            ("age", "function"),
+            ("compute_age", "call"),
        ]
    );

-    assert_eq!(substr(source, &tags[0].line_range), "    class Customer:");
-    assert_eq!(
-        substr(source, &tags[1].line_range),
-        "        def age(self):"
-    );
+    assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
+    assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
    assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
 }
@ -145,17 +154,22 @@ fn test_tags_javascript() {
    let tags = tag_context
        .generate_tags(&tags_config, source, None)
        .unwrap()
+        .0
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

    assert_eq!(
        tags.iter()
-            .map(|t| (substr(source, &t.name_range), t.kind))
+            .map(|t| (
+                substr(source, &t.name_range),
+                t.span.clone(),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
            .collect::<Vec<_>>(),
        &[
-            ("Customer", TagKind::Class),
-            ("getAge", TagKind::Method),
-            ("Agent", TagKind::Class)
+            ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
+            ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
+            ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
        ]
    );
    assert_eq!(
@ -166,6 +180,27 @@ fn test_tags_javascript() {
    assert_eq!(tags[2].docs, None);
 }

+#[test]
+fn test_tags_columns_measured_in_utf16_code_units() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
+
+    let tag = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .0
+        .next()
+        .unwrap()
+        .unwrap();
+
+    assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
+    assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
+    assert_eq!(tag.utf16_column_range, 9..18);
+}
+
 #[test]
 fn test_tags_ruby() {
    let language = get_language("ruby");
@ -177,7 +212,7 @@ fn test_tags_ruby() {
        "
        b = 1

-        def foo()
+        def foo=()
            c = 1

            # a is a method because it is not in scope
@ -197,6 +232,7 @@ fn test_tags_ruby() {
    let tags = tag_context
        .generate_tags(&tags_config, source.as_bytes(), None)
        .unwrap()
+        .0
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

@ -204,18 +240,18 @@ fn test_tags_ruby() {
        tags.iter()
            .map(|t| (
                substr(source.as_bytes(), &t.name_range),
-                t.kind,
+                tags_config.syntax_type_name(t.syntax_type_id),
                (t.span.start.row, t.span.start.column),
            ))
            .collect::<Vec<_>>(),
        &[
-            ("foo", TagKind::Method, (2, 0)),
-            ("bar", TagKind::Call, (7, 4)),
-            ("a", TagKind::Call, (7, 8)),
-            ("b", TagKind::Call, (7, 11)),
-            ("each", TagKind::Call, (9, 14)),
-            ("baz", TagKind::Call, (13, 8)),
-            ("b", TagKind::Call, (13, 15),),
+            ("foo=", "method", (2, 4)),
+            ("bar", "call", (7, 4)),
+            ("a", "call", (7, 8)),
+            ("b", "call", (7, 11)),
+            ("each", "call", (9, 14)),
+            ("baz", "call", (13, 8)),
+            ("b", "call", (13, 15),),
        ]
    );
 }
@ -239,7 +275,7 @@ fn test_tags_cancellation() {
            .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
            .unwrap();

-        for (i, tag) in tags.enumerate() {
+        for (i, tag) in tags.0.enumerate() {
            if i == 150 {
                cancellation_flag.store(1, Ordering::SeqCst);
            }
@ -253,6 +289,47 @@ fn test_tags_cancellation() {
    });
 }

+#[test]
+fn test_invalid_capture() {
+    let language = get_language("python");
+    let e = TagsConfiguration::new(language, "(identifier) @method", "")
+        .expect_err("expected InvalidCapture error");
+    assert_eq!(e, Error::InvalidCapture("method".to_string()));
+}
+
+#[test]
+fn test_tags_with_parse_error() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = br#"
+    class Fine: pass
+    class Bad
+    "#;
+
+    let (tags, failed) = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap();
+
+    let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
+
+    assert!(failed, "syntax error should have been detected");
+
+    assert_eq!(
+        newtags.iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("Fine", "class"),
+        ]
+    );
+}
+
+
 #[test]
 fn test_tags_via_c_api() {
    allocations::record(|| {
@ -316,29 +393,29 @@ fn test_tags_via_c_api() {
        })
        .unwrap();

+        let syntax_types: Vec<&str> = unsafe {
+            let mut len: u32 = 0;
+            let ptr =
+                c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
+            slice::from_raw_parts(ptr, len as usize)
+                .iter()
+                .map(|i| CStr::from_ptr(*i).to_str().unwrap())
+                .collect()
+        };
+
        assert_eq!(
            tags.iter()
                .map(|tag| (
-                    tag.kind,
+                    syntax_types[tag.syntax_type_id as usize],
                    &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
                    &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
                    &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
                ))
                .collect::<Vec<_>>(),
            &[
-                (
-                    c::TSTagKind::Function,
-                    "b",
-                    "function b() {",
-                    "one\ntwo\nthree"
-                ),
-                (
-                    c::TSTagKind::Class,
-                    "C",
-                    "class C extends D {",
-                    "four\nfive"
-                ),
-                (c::TSTagKind::Call, "b", "b(a);", "")
+                ("function", "b", "function b() {", "one\ntwo\nthree"),
+                ("class", "C", "class C extends D {", "four\nfive"),
+                ("call", "b", "b(a);", "")
            ]
        );

--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@ -1,3 +1,4 @@
+use super::error::{Error, Result};
 use std::io;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
@ -31,12 +32,12 @@ pub struct LogSession();
 pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);

 #[cfg(windows)]
-pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
    Ok(LogSession())
 }

 #[cfg(unix)]
-pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
    use std::io::Write;

    let mut dot_file = std::fs::File::create(path)?;
@ -46,11 +47,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession
        .stdin(Stdio::piped())
        .stdout(dot_file)
        .spawn()
-        .expect("Failed to run Dot");
+        .map_err(Error::wrap(|| {
+            "Failed to run the `dot` command. Check that graphviz is installed."
+        }))?;
    let dot_stdin = dot_process
        .stdin
        .take()
-        .expect("Failed to open stdin for Dot");
+        .ok_or_else(|| Error::new("Failed to open stdin for `dot` process.".to_string()))?;
    parser.print_dot_graphs(&dot_stdin);
    Ok(LogSession(
        PathBuf::from(path),
--- a/docs/section-3-creating-parsers.md
+++ b/docs/section-3-creating-parsers.md
@ -13,7 +13,7 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo

 In order to develop a Tree-sitter parser, there are two dependencies that you need to install:

-* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. It shouldn't matter what version of Node you have.
+* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater.
 * **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform.

 ### Installation
@ -505,6 +505,8 @@ Grammars often contain multiple tokens that can match the same characters. For e

 4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`.

+5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar.
+
 ### Keywords

 Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this:
--- a/docs/section-4-syntax-highlighting.md
+++ b/docs/section-4-syntax-highlighting.md
@ -385,6 +385,14 @@ The following query would specify that the contents of the heredoc should be par
  (heredoc_end) @injection.language) @injection.content
 ```

+You can also force the language using the `#set!` predicate.
+For example, this will force the language to be always `ruby`.
+
+```
+((heredoc_body) @injection.content
+ (#set! injection.language "ruby"))
+```
+
 ## Unit Testing

 Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system](https://www.sublimetext.com/docs/3/syntax.html#testing) for testing highlighting.
--- a/highlight/src/lib.rs
+++ b/highlight/src/lib.rs
@ -10,6 +10,8 @@ use tree_sitter::{
 };

 const CANCELLATION_CHECK_INTERVAL: usize = 100;
+const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
+const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;

 /// Indicates which highlight should be applied to a region of source code.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@ -620,7 +622,7 @@ where
    type Item = Result<HighlightEvent, Error>;

    fn next(&mut self) -> Option<Self::Item> {
-        loop {
+        'main: loop {
            // If we've already determined the next highlight boundary, just return it.
            if let Some(e) = self.next_event.take() {
                return Some(Ok(e));
@ -640,29 +642,34 @@ where

            // If none of the layers have any more highlight boundaries, terminate.
            if self.layers.is_empty() {
-                if self.byte_offset < self.source.len() {
+                return if self.byte_offset < self.source.len() {
                    let result = Some(Ok(HighlightEvent::Source {
                        start: self.byte_offset,
                        end: self.source.len(),
                    }));
                    self.byte_offset = self.source.len();
-                    return result;
+                    result
                } else {
-                    return None;
-                }
+                    None
+                };
            }

            // Get the next capture from whichever layer has the earliest highlight boundary.
-            let match_;
-            let mut captures;
-            let mut capture;
-            let mut pattern_index;
+            let range;
            let layer = &mut self.layers[0];
-            if let Some((m, capture_index)) = layer.captures.peek() {
-                match_ = m;
-                captures = match_.captures;
-                pattern_index = match_.pattern_index;
-                capture = captures[*capture_index];
+            if let Some((next_match, capture_index)) = layer.captures.peek() {
+                let next_capture = next_match.captures[*capture_index];
+                range = next_capture.node.byte_range();
+
+                // If any previous highlight ends before this node starts, then before
+                // processing this capture, emit the source code up until the end of the
+                // previous highlight, and an end event for that highlight.
+                if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+                    if end_byte <= range.start {
+                        layer.highlight_end_stack.pop();
+                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+                    }
+                }
            }
            // If there are no more captures, then emit any remaining highlight end events.
            // And if there are none of those, then just advance to the end of the document.
@ -673,30 +680,17 @@ where
                return self.emit_event(self.source.len(), None);
            };

-            // If any previous highlight ends before this node starts, then before
-            // processing this capture, emit the source code up until the end of the
-            // previous highlight, and an end event for that highlight.
-            let range = capture.node.byte_range();
-            if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
-                if end_byte <= range.start {
-                    layer.highlight_end_stack.pop();
-                    return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
-                }
-            }
-
-            // Remove from the local scope stack any local scopes that have already ended.
-            while range.start > layer.scope_stack.last().unwrap().range.end {
-                layer.scope_stack.pop();
-            }
+            let (mut match_, capture_index) = layer.captures.next().unwrap();
+            let mut capture = match_.captures[capture_index];

            // If this capture represents an injection, then process the injection.
-            if pattern_index < layer.config.locals_pattern_index {
+            if match_.pattern_index < layer.config.locals_pattern_index {
                let (language_name, content_node, include_children) =
-                    injection_for_match(&layer.config, &layer.config.query, match_, &self.source);
+                    injection_for_match(&layer.config, &layer.config.query, &match_, &self.source);

                // Explicitly remove this match so that none of its other captures will remain
-                // in the stream of captures. The `unwrap` is ok because
-                layer.captures.next().unwrap().0.remove();
+                // in the stream of captures.
+                match_.remove();

                // If a language is found with the given name, then add a new language layer
                // to the highlighted document.
@ -729,16 +723,19 @@ where
                }

                self.sort_layers();
-                continue;
+                continue 'main;
            }

-            layer.captures.next();
+            // Remove from the local scope stack any local scopes that have already ended.
+            while range.start > layer.scope_stack.last().unwrap().range.end {
+                layer.scope_stack.pop();
+            }

            // If this capture is for tracking local variables, then process the
            // local variable info.
            let mut reference_highlight = None;
            let mut definition_highlight = None;
-            while pattern_index < layer.config.highlights_pattern_index {
+            while match_.pattern_index < layer.config.highlights_pattern_index {
                // If the node represents a local scope, push a new local scope onto
                // the scope stack.
                if Some(capture.index) == layer.config.local_scope_capture_index {
@ -748,7 +745,7 @@ where
                        range: range.clone(),
                        local_defs: Vec::new(),
                    };
-                    for prop in layer.config.query.property_settings(pattern_index) {
+                    for prop in layer.config.query.property_settings(match_.pattern_index) {
                        match prop.key.as_ref() {
                            "local.scope-inherits" => {
                                scope.inherits =
@ -767,7 +764,7 @@ where
                    let scope = layer.scope_stack.last_mut().unwrap();

                    let mut value_range = 0..0;
-                    for capture in captures {
+                    for capture in match_.captures {
                        if Some(capture.index) == layer.config.local_def_value_capture_index {
                            value_range = capture.node.byte_range();
                        }
@ -810,84 +807,76 @@ where
                    }
                }

-                // Continue processing any additional local-variable-tracking patterns
-                // for the same node.
+                // Continue processing any additional matches for the same node.
                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
                    let next_capture = next_match.captures[*next_capture_index];
                    if next_capture.node == capture.node {
-                        pattern_index = next_match.pattern_index;
-                        captures = next_match.captures;
                        capture = next_capture;
-                        layer.captures.next();
+                        match_ = layer.captures.next().unwrap().0;
                        continue;
-                    } else {
-                        break;
                    }
                }

-                break;
+                self.sort_layers();
+                continue 'main;
            }

            // Otherwise, this capture must represent a highlight.
-            let mut has_highlight = true;
-
            // If this exact range has already been highlighted by an earlier pattern, or by
            // a different layer, then skip over this one.
            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
-                    has_highlight = false;
+                    self.sort_layers();
+                    continue 'main;
                }
            }

            // If the current node was found to be a local variable, then skip over any
            // highlighting patterns that are disabled for local variables.
-            while has_highlight
-                && (definition_highlight.is_some() || reference_highlight.is_some())
-                && layer.config.non_local_variable_patterns[pattern_index]
-            {
-                has_highlight = false;
-                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
-                    let next_capture = next_match.captures[*next_capture_index];
-                    if next_capture.node == capture.node {
-                        capture = next_capture;
-                        has_highlight = true;
-                        pattern_index = next_match.pattern_index;
-                        layer.captures.next();
-                        continue;
+            if definition_highlight.is_some() || reference_highlight.is_some() {
+                while layer.config.non_local_variable_patterns[match_.pattern_index] {
+                    if let Some((next_match, next_capture_index)) = layer.captures.peek() {
+                        let next_capture = next_match.captures[*next_capture_index];
+                        if next_capture.node == capture.node {
+                            capture = next_capture;
+                            match_ = layer.captures.next().unwrap().0;
+                            continue;
+                        }
                    }
+
+                    self.sort_layers();
+                    continue 'main;
                }
-                break;
            }

-            if has_highlight {
-                // Once a highlighting pattern is found for the current node, skip over
-                // any later highlighting patterns that also match this node. Captures
-                // for a given node are ordered by pattern index, so these subsequent
-                // captures are guaranteed to be for highlighting, not injections or
-                // local variables.
-                while let Some((next_match, next_capture_index)) = layer.captures.peek() {
-                    if next_match.captures[*next_capture_index].node == capture.node {
-                        layer.captures.next();
-                    } else {
-                        break;
-                    }
+            // Once a highlighting pattern is found for the current node, skip over
+            // any later highlighting patterns that also match this node. Captures
+            // for a given node are ordered by pattern index, so these subsequent
+            // captures are guaranteed to be for highlighting, not injections or
+            // local variables.
+            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
+                let next_capture = next_match.captures[*next_capture_index];
+                if next_capture.node == capture.node {
+                    layer.captures.next();
+                } else {
+                    break;
                }
+            }

-                let current_highlight = layer.config.highlight_indices[capture.index as usize];
+            let current_highlight = layer.config.highlight_indices[capture.index as usize];

-                // If this node represents a local definition, then store the current
-                // highlight value on the local scope entry representing this node.
-                if let Some(definition_highlight) = definition_highlight {
-                    *definition_highlight = current_highlight;
-                }
+            // If this node represents a local definition, then store the current
+            // highlight value on the local scope entry representing this node.
+            if let Some(definition_highlight) = definition_highlight {
+                *definition_highlight = current_highlight;
+            }

-                // Emit a scope start event and push the node's end position to the stack.
-                if let Some(highlight) = reference_highlight.or(current_highlight) {
-                    self.last_highlight_range = Some((range.start, range.end, layer.depth));
-                    layer.highlight_end_stack.push(range.end);
-                    return self
-                        .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
-                }
+            // Emit a scope start event and push the node's end position to the stack.
+            if let Some(highlight) = reference_highlight.or(current_highlight) {
+                self.last_highlight_range = Some((range.start, range.end, layer.depth));
+                layer.highlight_end_stack.push(range.end);
+                return self
+                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
            }

            self.sort_layers();
@ -897,11 +886,13 @@ where

 impl HtmlRenderer {
    pub fn new() -> Self {
-        HtmlRenderer {
-            html: Vec::new(),
-            line_offsets: vec![0],
+        let mut result = HtmlRenderer {
+            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
+            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
            carriage_return_highlight: None,
-        }
+        };
+        result.line_offsets.push(0);
+        result
    }

    pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
@ -909,8 +900,8 @@ impl HtmlRenderer {
    }

    pub fn reset(&mut self) {
-        self.html.clear();
-        self.line_offsets.clear();
+        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
+        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
        self.line_offsets.push(0);
    }

@ -1074,3 +1065,11 @@ fn injection_for_match<'a>(

    (language_name, content_node, include_children)
 }
+
+fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
+    if vec.len() > capacity {
+        vec.truncate(capacity);
+        vec.shrink_to_fit();
+    }
+    vec.clear();
+}
--- a/lib/binding_rust/lib.rs
+++ b/lib/binding_rust/lib.rs
@ -170,7 +170,7 @@ pub enum QueryError {
 enum TextPredicate {
    CaptureEqString(u32, String, bool),
    CaptureEqCapture(u32, u32, bool),
-    CaptureMatchString(u32, regex::bytes::Regex),
+    CaptureMatchString(u32, regex::bytes::Regex, bool),
 }

 impl Language {
@ -1314,7 +1314,7 @@ impl Query {
                        });
                    }

-                    "match?" => {
+                    "match?" | "not-match?" => {
                        if p.len() != 3 {
                            return Err(QueryError::Predicate(format!(
                                "Wrong number of arguments to #match? predicate. Expected 2, got {}.",
@ -1334,12 +1334,14 @@ impl Query {
                            )));
                        }

+                        let is_positive = operator_name == "match?";
                        let regex = &string_values[p[2].value_id as usize];
                        text_predicates.push(TextPredicate::CaptureMatchString(
                            p[1].value_id,
                            regex::bytes::Regex::new(regex).map_err(|_| {
                                QueryError::Predicate(format!("Invalid regex '{}'", regex))
                            })?,
+                            is_positive,
                        ));
                    }

@ -1631,9 +1633,9 @@ impl<'a> QueryMatch<'a> {
                    let node = self.capture_for_index(*i).unwrap();
                    (text_callback(node).as_ref() == s.as_bytes()) == *is_positive
                }
-                TextPredicate::CaptureMatchString(i, r) => {
+                TextPredicate::CaptureMatchString(i, r, is_positive) => {
                    let node = self.capture_for_index(*i).unwrap();
-                    r.is_match(text_callback(node).as_ref())
+                    r.is_match(text_callback(node).as_ref()) == *is_positive
                }
            })
    }
--- a/lib/binding_web/binding.js
+++ b/lib/binding_web/binding.js
@ -787,6 +787,8 @@ class Language {
              }
              break;

+            case 'not-match?':
+              isPositive = false;
            case 'match?':
              if (steps.length !== 3) throw new Error(
                `Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.`
@ -801,7 +803,7 @@ class Language {
              const regex = new RegExp(steps[2].value);
              textPredicates[i].push(function(captures) {
                for (const c of captures) {
-                  if (c.name === captureName) return regex.test(c.node.text);
+                  if (c.name === captureName) return regex.test(c.node.text) === isPositive;
                }
                return false;
              });
--- a/lib/binding_web/test/query-test.js
+++ b/lib/binding_web/test/query-test.js
@ -126,12 +126,17 @@ describe("Query", () => {

    it("handles conditions that compare the text of capture to literal strings", () => {
      tree = parser.parse(`
+        lambda
+        panda
+        load
+        toad
        const ab = require('./ab');
        new Cd(EF);
      `);

      query = JavaScript.query(`
-        (identifier) @variable
+        ((identifier) @variable
+         (#not-match? @variable "^(lambda|load)$"))

        ((identifier) @function.builtin
         (#eq? @function.builtin "require"))
@ -145,6 +150,8 @@ describe("Query", () => {

      const captures = query.captures(tree.rootNode);
      assert.deepEqual(formatCaptures(captures), [
+        { name: "variable", text: "panda" },
+        { name: "variable", text: "toad" },
        { name: "variable", text: "ab" },
        { name: "variable", text: "require" },
        { name: "function.builtin", text: "require" },
--- a/lib/src/alloc.h
+++ b/lib/src/alloc.h
@ -45,7 +45,7 @@ static inline bool ts_toggle_allocation_recording(bool value) {
 static inline void *ts_malloc(size_t size) {
  void *result = malloc(size);
  if (size > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size);
+    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
    exit(1);
  }
  return result;
@ -54,7 +54,7 @@ static inline void *ts_malloc(size_t size) {
 static inline void *ts_calloc(size_t count, size_t size) {
  void *result = calloc(count, size);
  if (count > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size);
+    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
    exit(1);
  }
  return result;
@ -63,7 +63,7 @@ static inline void *ts_calloc(size_t count, size_t size) {
 static inline void *ts_realloc(void *buffer, size_t size) {
  void *result = realloc(buffer, size);
  if (size > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size);
+    fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
    exit(1);
  }
  return result;
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@ -355,10 +355,14 @@ static Subtree ts_parser__lex(
  StackVersion version,
  TSStateId parse_state
 ) {
+  TSLexMode lex_mode = self->language->lex_modes[parse_state];
+  if (lex_mode.lex_state == (uint16_t)-1) {
+    LOG("no_lookahead_after_non_terminal_extra");
+    return NULL_SUBTREE;
+  }
+
  Length start_position = ts_stack_position(self->stack, version);
  Subtree external_token = ts_stack_last_external_token(self->stack, version);
-  TSLexMode lex_mode = self->language->lex_modes[parse_state];
-  if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE;
  const bool *valid_external_tokens = ts_language_enabled_external_tokens(
    self->language,
    lex_mode.external_lex_state
@ -761,20 +765,26 @@ static StackVersion ts_parser__reduce(
  int dynamic_precedence,
  uint16_t production_id,
  bool is_fragile,
-  bool is_extra
+  bool end_of_non_terminal_extra
 ) {
  uint32_t initial_version_count = ts_stack_version_count(self->stack);
-  uint32_t removed_version_count = 0;
-  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);

+  // Pop the given number of nodes from the given version of the parse stack.
+  // If stack versions have previously merged, then there may be more than one
+  // path back through the stack. For each path, create a new parent node to
+  // contain the popped children, and push it onto the stack in place of the
+  // children.
+  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
+  uint32_t removed_version_count = 0;
  for (uint32_t i = 0; i < pop.size; i++) {
    StackSlice slice = pop.contents[i];
    StackVersion slice_version = slice.version - removed_version_count;

-    // Error recovery can sometimes cause lots of stack versions to merge,
-    // such that a single pop operation can produce a lots of slices.
-    // Avoid creating too many stack versions in that situation.
-    if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
+    // This is where new versions are added to the parse stack. The versions
+    // will all be sorted and truncated at the end of the outer parsing loop.
+    // Allow the maximum version count to be temporarily exceeded, but only
+    // by a limited threshold.
+    if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
      ts_stack_remove_version(self->stack, slice_version);
      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
      removed_version_count++;
@ -826,7 +836,9 @@ static StackVersion ts_parser__reduce(

    TSStateId state = ts_stack_state(self->stack, slice_version);
    TSStateId next_state = ts_language_next_state(self->language, state, symbol);
-    if (is_extra) parent.ptr->extra = true;
+    if (end_of_non_terminal_extra && next_state == state) {
+      parent.ptr->extra = true;
+    }
    if (is_fragile || pop.size > 1 || initial_version_count > 1) {
      parent.ptr->fragile_left = true;
      parent.ptr->fragile_right = true;
@ -1339,24 +1351,26 @@ static bool ts_parser__advance(
    );
  }

-lex:
-  // Otherwise, re-run the lexer.
-  if (!lookahead.ptr) {
-    lookahead = ts_parser__lex(self, version, state);
-    if (lookahead.ptr) {
-      ts_parser__set_cached_token(self, position, last_external_token, lookahead);
-      ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
-    }
-
-    // When parsing a non-terminal extra, a null lookahead indicates the
-    // end of the rule. The reduction is stored in the EOF table entry.
-    // After the reduction, the lexer needs to be run again.
-    else {
-      ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
-    }
-  }
-
+  bool needs_lex = !lookahead.ptr;
  for (;;) {
+    // Otherwise, re-run the lexer.
+    if (needs_lex) {
+      needs_lex = false;
+      lookahead = ts_parser__lex(self, version, state);
+
+      if (lookahead.ptr) {
+        ts_parser__set_cached_token(self, position, last_external_token, lookahead);
+        ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
+      }
+
+      // When parsing a non-terminal extra, a null lookahead indicates the
+      // end of the rule. The reduction is stored in the EOF table entry.
+      // After the reduction, the lexer needs to be run again.
+      else {
+        ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
+      }
+    }
+
    // If a cancellation flag or a timeout was provided, then check every
    // time a fixed number of parse actions has been processed.
    if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
@ -1408,12 +1422,12 @@ lex:

        case TSParseActionTypeReduce: {
          bool is_fragile = table_entry.action_count > 1;
-          bool is_extra = lookahead.ptr == NULL;
+          bool end_of_non_terminal_extra = lookahead.ptr == NULL;
          LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count);
          StackVersion reduction_version = ts_parser__reduce(
            self, version, action.params.reduce.symbol, action.params.reduce.child_count,
            action.params.reduce.dynamic_precedence, action.params.reduce.production_id,
-            is_fragile, is_extra
+            is_fragile, end_of_non_terminal_extra
          );
          if (reduction_version != STACK_VERSION_NONE) {
            last_reduction_version = reduction_version;
@ -1453,8 +1467,10 @@ lex:
      // (and completing the non-terminal extra rule) run the lexer again based
      // on the current parse state.
      if (!lookahead.ptr) {
-        lookahead = ts_parser__lex(self, version, state);
+        needs_lex = true;
+        continue;
      }
+
      ts_language_table_entry(
        self->language,
        state,
@ -1464,6 +1480,11 @@ lex:
      continue;
    }

+    if (!lookahead.ptr) {
+      ts_stack_pause(self->stack, version, ts_builtin_sym_end);
+      return true;
+    }
+
    // If there were no parse actions for the current lookahead token, then
    // it is not valid in this state. If the current lookahead token is a
    // keyword, then switch to treating it as the normal word token if that
@ -1503,8 +1524,7 @@ lex:
    if (ts_parser__breakdown_top_of_stack(self, version)) {
      state = ts_stack_state(self->stack, version);
      ts_subtree_release(&self->tree_pool, lookahead);
-      lookahead = NULL_SUBTREE;
-      goto lex;
+      needs_lex = true;
      continue;
    }

--- a/lib/src/query.c
+++ b/lib/src/query.c
@ -11,7 +11,6 @@
 // #define LOG(...) fprintf(stderr, __VA_ARGS__)
 #define LOG(...)

-#define MAX_STATE_COUNT 256
 #define MAX_CAPTURE_LIST_COUNT 32
 #define MAX_STEP_CAPTURE_COUNT 3
 #define MAX_STATE_PREDECESSOR_COUNT 100
@ -51,7 +50,6 @@ typedef struct {
  uint16_t alternative_index;
  uint16_t depth;
  bool contains_captures: 1;
-  bool is_pattern_start: 1;
  bool is_immediate: 1;
  bool is_last_child: 1;
  bool is_pass_through: 1;
@ -128,9 +126,10 @@ typedef struct {
  uint16_t step_index;
  uint16_t pattern_index;
  uint16_t capture_list_id;
-  uint16_t consumed_capture_count: 14;
+  uint16_t consumed_capture_count: 12;
  bool seeking_immediate_match: 1;
  bool has_in_progress_alternatives: 1;
+  bool dead: 1;
 } QueryState;

 typedef Array(TSQueryCapture) CaptureList;
@ -224,6 +223,7 @@ struct TSQueryCursor {
  TSPoint start_point;
  TSPoint end_point;
  bool ascending;
+  bool halted;
 };

 static const TSQueryError PARENT_DONE = -1;
@ -500,7 +500,6 @@ static QueryStep query_step__new(
    .alternative_index = NONE,
    .contains_captures = false,
    .is_last_child = false,
-    .is_pattern_start = false,
    .is_pass_through = false,
    .is_dead_end = false,
    .is_definite = false,
@ -692,6 +691,23 @@ static inline void ts_query__pattern_map_insert(
 ) {
  uint32_t index;
  ts_query__pattern_map_search(self, symbol, &index);
+
+  // Ensure that the entries are sorted not only by symbol, but also
+  // by pattern_index. This way, states for earlier patterns will be
+  // initiated first, which allows the ordering of the states array
+  // to be maintained more efficiently.
+  while (index < self->pattern_map.size) {
+    PatternEntry *entry = &self->pattern_map.contents[index];
+    if (
+      self->steps.contents[entry->step_index].symbol == symbol &&
+      entry->pattern_index < pattern_index
+    ) {
+      index++;
+    } else {
+      break;
+    }
+  }
+
  array_insert(&self->pattern_map, index, ((PatternEntry) {
    .step_index = start_step_index,
    .pattern_index = pattern_index,
@ -1438,8 +1454,8 @@ static TSQueryError ts_query__parse_pattern(
      }
    }

-    // A pound character indicates the start of a predicate.
-    else if (stream->next == '#') {
+    // A dot/pound character indicates the start of a predicate.
+    else if (stream->next == '.' || stream->next == '#') {
      stream_advance(stream);
      return ts_query__parse_predicate(self, stream);
    }
@ -1796,7 +1812,6 @@ TSQuery *ts_query_new(
    // Maintain a map that can look up patterns for a given root symbol.
    for (;;) {
      QueryStep *step = &self->steps.contents[start_step_index];
-      step->is_pattern_start = true;
      ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index);
      if (step->symbol == WILDCARD_SYMBOL) {
        self->wildcard_root_pattern_count++;
@ -1806,6 +1821,7 @@ TSQuery *ts_query_new(
      // then add multiple entries to the pattern map.
      if (step->alternative_index != NONE) {
        start_step_index = step->alternative_index;
+        step->alternative_index = NONE;
      } else {
        break;
      }
@ -1944,6 +1960,7 @@ TSQueryCursor *ts_query_cursor_new(void) {
  TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
  *self = (TSQueryCursor) {
    .ascending = false,
+    .halted = false,
    .states = array_new(),
    .finished_states = array_new(),
    .capture_list_pool = capture_list_pool_new(),
@ -1952,8 +1969,8 @@ TSQueryCursor *ts_query_cursor_new(void) {
    .start_point = {0, 0},
    .end_point = POINT_MAX,
  };
-  array_reserve(&self->states, MAX_STATE_COUNT);
-  array_reserve(&self->finished_states, MAX_CAPTURE_LIST_COUNT);
+  array_reserve(&self->states, 8);
+  array_reserve(&self->finished_states, 8);
  return self;
 }

@ -1977,6 +1994,7 @@ void ts_query_cursor_exec(
  self->next_state_id = 0;
  self->depth = 0;
  self->ascending = false;
+  self->halted = false;
  self->query = query;
 }

@ -2020,6 +2038,7 @@ static bool ts_query_cursor__first_in_progress_capture(
  *pattern_index = UINT32_MAX;
  for (unsigned i = 0; i < self->states.size; i++) {
    const QueryState *state = &self->states.contents[i];
+    if (state->dead) continue;
    const CaptureList *captures = capture_list_pool_get(
      &self->capture_list_pool,
      state->capture_list_id
@ -2114,65 +2133,138 @@ void ts_query_cursor__compare_captures(
  }
 }

-static bool ts_query_cursor__add_state(
+static void ts_query_cursor__add_state(
  TSQueryCursor *self,
  const PatternEntry *pattern
 ) {
-  if (self->states.size >= MAX_STATE_COUNT) {
-    LOG("  too many states");
-    return false;
+  QueryStep *step = &self->query->steps.contents[pattern->step_index];
+  uint32_t start_depth = self->depth - step->depth;
+
+  // Keep the states array in ascending order of start_depth and pattern_index,
+  // so that it can be processed more efficiently elsewhere. Usually, there is
+  // no work to do here because of two facts:
+  // * States with lower start_depth are naturally added first due to the
+  //   order in which nodes are visited.
+  // * Earlier patterns are naturally added first because of the ordering of the
+  //   pattern_map data structure that's used to initiate matches.
+  //
+  // This loop is only needed in cases where two conditions hold:
+  // * A pattern consists of more than one sibling node, so that its states
+  //   remain in progress after exiting the node that started the match.
+  // * The first node in the pattern matches against multiple nodes at the
+  //   same depth.
+  //
+  // An example of this is the pattern '((comment)* (function))'. If multiple
+  // `comment` nodes appear in a row, then we may initiate a new state for this
+  // pattern while another state for the same pattern is already in progress.
+  // If there are multiple patterns like this in a query, then this loop will
+  // need to execute in order to keep the states ordered by pattern_index.
+  uint32_t index = self->states.size;
+  while (index > 0) {
+    QueryState *prev_state = &self->states.contents[index - 1];
+    if (prev_state->start_depth < start_depth) break;
+    if (prev_state->start_depth == start_depth) {
+      if (prev_state->pattern_index < pattern->pattern_index) break;
+      if (prev_state->pattern_index == pattern->pattern_index) {
+        // Avoid unnecessarily inserting an unnecessary duplicate state,
+        // which would be immediately pruned by the longest-match criteria.
+        if (prev_state->step_index == pattern->step_index) return;
+      }
+    }
+    index--;
  }
+
  LOG(
    "  start state. pattern:%u, step:%u\n",
    pattern->pattern_index,
    pattern->step_index
  );
-  QueryStep *step = &self->query->steps.contents[pattern->step_index];
-  array_push(&self->states, ((QueryState) {
+  array_insert(&self->states, index, ((QueryState) {
    .capture_list_id = NONE,
    .step_index = pattern->step_index,
    .pattern_index = pattern->pattern_index,
-    .start_depth = self->depth - step->depth,
+    .start_depth = start_depth,
    .consumed_capture_count = 0,
-    .seeking_immediate_match = false,
+    .seeking_immediate_match = true,
+    .has_in_progress_alternatives = false,
+    .dead = false,
  }));
-  return true;
+}
+
+// Acquire a capture list for this state. If there are no capture lists left in the
+// pool, this will steal the capture list from another existing state, and mark that
+// other state as 'dead'.
+static CaptureList *ts_query_cursor__prepare_to_capture(
+  TSQueryCursor *self,
+  QueryState *state,
+  unsigned state_index_to_preserve
+) {
+  if (state->capture_list_id == NONE) {
+    state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
+
+    // If there are no capture lists left in the pool, then terminate whichever
+    // state has captured the earliest node in the document, and steal its
+    // capture list.
+    if (state->capture_list_id == NONE) {
+      uint32_t state_index, byte_offset, pattern_index;
+      if (
+        ts_query_cursor__first_in_progress_capture(
+          self,
+          &state_index,
+          &byte_offset,
+          &pattern_index
+        ) &&
+        state_index != state_index_to_preserve
+      ) {
+        LOG(
+          "  abandon state. index:%u, pattern:%u, offset:%u.\n",
+          state_index, pattern_index, byte_offset
+        );
+        QueryState *other_state = &self->states.contents[state_index];
+        state->capture_list_id = other_state->capture_list_id;
+        other_state->capture_list_id = NONE;
+        other_state->dead = true;
+        CaptureList *list = capture_list_pool_get_mut(
+          &self->capture_list_pool,
+          state->capture_list_id
+        );
+        array_clear(list);
+        return list;
+      } else {
+        LOG("  ran out of capture lists");
+        return NULL;
+      }
+    }
+  }
+  return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
 }

 // Duplicate the given state and insert the newly-created state immediately after
-// the given state in the `states` array.
-static QueryState *ts_query__cursor_copy_state(
+// the given state in the `states` array. Ensures that the given state reference is
+// still valid, even if the states array is reallocated.
+static QueryState *ts_query_cursor__copy_state(
  TSQueryCursor *self,
-  const QueryState *state
+  QueryState **state_ref
 ) {
-  if (self->states.size >= MAX_STATE_COUNT) {
-    LOG("  too many states");
-    return NULL;
-  }
+  const QueryState *state = *state_ref;
+  uint32_t state_index = state - self->states.contents;
+  QueryState copy = *state;
+  copy.capture_list_id = NONE;

  // If the state has captures, copy its capture list.
-  QueryState copy = *state;
-  copy.capture_list_id = state->capture_list_id;
  if (state->capture_list_id != NONE) {
-    copy.capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
-    if (copy.capture_list_id == NONE) {
-      LOG("  too many capture lists");
-      return NULL;
-    }
+    CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
+    if (!new_captures) return NULL;
    const CaptureList *old_captures = capture_list_pool_get(
      &self->capture_list_pool,
      state->capture_list_id
    );
-    CaptureList *new_captures = capture_list_pool_get_mut(
-      &self->capture_list_pool,
-      copy.capture_list_id
-    );
    array_push_all(new_captures, old_captures);
  }

-  uint32_t index = (state - self->states.contents) + 1;
-  array_insert(&self->states, index, copy);
-  return &self->states.contents[index];
+  array_insert(&self->states, state_index + 1, copy);
+  *state_ref = &self->states.contents[state_index];
+  return &self->states.contents[state_index + 1];
 }

 // Walk the tree, processing patterns until at least one pattern finishes,
@ -2180,18 +2272,30 @@ static QueryState *ts_query__cursor_copy_state(
 // `finished_states` array. Multiple patterns can finish on the same node. If
 // there are no more matches, return `false`.
 static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
-  do {
+  bool did_match = false;
+  for (;;) {
+    if (self->halted) {
+      while (self->states.size > 0) {
+        QueryState state = array_pop(&self->states);
+        capture_list_pool_release(
+          &self->capture_list_pool,
+          state.capture_list_id
+        );
+      }
+    }
+
+    if (did_match || self->halted) return did_match;
+
    if (self->ascending) {
      LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));

      // Leave this node by stepping to its next sibling or to its parent.
-      bool did_move = true;
      if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
        self->ascending = false;
      } else if (ts_tree_cursor_goto_parent(&self->cursor)) {
        self->depth--;
      } else {
-        did_move = false;
+        self->halted = true;
      }

      // After leaving a node, remove any states that cannot make further progress.
@ -2203,10 +2307,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
        // If a state completed its pattern inside of this node, but was deferred from finishing
        // in order to search for longer matches, mark it as finished.
        if (step->depth == PATTERN_DONE_MARKER) {
-          if (state->start_depth > self->depth || !did_move) {
+          if (state->start_depth > self->depth || self->halted) {
            LOG("  finish pattern %u\n", state->pattern_index);
            state->id = self->next_state_id++;
            array_push(&self->finished_states, *state);
+            did_match = true;
            deleted_count++;
            continue;
          }
@ -2233,10 +2338,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
        }
      }
      self->states.size -= deleted_count;
-
-      if (!did_move) {
-        return self->finished_states.size > 0;
-      }
    } else {
      // If this node is before the selected range, then avoid descending into it.
      TSNode node = ts_tree_cursor_current_node(&self->cursor);
@ -2254,7 +2355,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
      if (
        self->end_byte <= ts_node_start_byte(node) ||
        point_lte(self->end_point, ts_node_start_point(node))
-      ) return false;
+      ) {
+        self->halted = true;
+        continue;
+      }

      // Get the properties of the current node.
      TSSymbol symbol = ts_node_symbol(node);
@ -2286,7 +2390,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
        // If this node matches the first step of the pattern, then add a new
        // state at the start of this pattern.
        if (step->field && field_id != step->field) continue;
-        if (!ts_query_cursor__add_state(self, pattern)) break;
+        ts_query_cursor__add_state(self, pattern);
      }

      // Add new states for any patterns whose root node matches this node.
@ -2298,7 +2402,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
          // If this node matches the first step of the pattern, then add a new
          // state at the start of this pattern.
          if (step->field && field_id != step->field) continue;
-          if (!ts_query_cursor__add_state(self, pattern)) break;
+          ts_query_cursor__add_state(self, pattern);

          // Advance to the next pattern whose root node matches this node.
          i++;
@ -2366,12 +2470,8 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
        // parent, then this query state cannot simply be updated in place. It must be
        // split into two states: one that matches this node, and one which skips over
        // this node, to preserve the possibility of matching later siblings.
-        if (
-          later_sibling_can_match &&
-          !step->is_pattern_start &&
-          step->contains_captures
-        ) {
-          if (ts_query__cursor_copy_state(self, state)) {
+        if (later_sibling_can_match && step->contains_captures) {
+          if (ts_query_cursor__copy_state(self, &state)) {
            LOG(
              "  split state for capture. pattern:%u, step:%u\n",
              state->pattern_index,
@ -2382,45 +2482,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
        }

        // If the current node is captured in this pattern, add it to the capture list.
-        // For the first capture in a pattern, lazily acquire a capture list.
        if (step->capture_ids[0] != NONE) {
-          if (state->capture_list_id == NONE) {
-            state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
-
-            // If there are no capture lists left in the pool, then terminate whichever
-            // state has captured the earliest node in the document, and steal its
-            // capture list.
-            if (state->capture_list_id == NONE) {
-              uint32_t state_index, byte_offset, pattern_index;
-              if (ts_query_cursor__first_in_progress_capture(
-                self,
-                &state_index,
-                &byte_offset,
-                &pattern_index
-              )) {
-                LOG(
-                  "  abandon state. index:%u, pattern:%u, offset:%u.\n",
-                  state_index, pattern_index, byte_offset
-                );
-                state->capture_list_id = self->states.contents[state_index].capture_list_id;
-                array_erase(&self->states, state_index);
-                if (state_index < i) {
-                  i--;
-                  state--;
-                }
-              } else {
-                LOG("  too many finished states.\n");
-                array_erase(&self->states, i);
-                i--;
-                continue;
-              }
-            }
+          CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
+          if (!capture_list) {
+            array_erase(&self->states, i);
+            i--;
+            continue;
          }

-          CaptureList *capture_list = capture_list_pool_get_mut(
-            &self->capture_list_pool,
-            state->capture_list_id
-          );
          for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
            uint16_t capture_id = step->capture_ids[j];
            if (step->capture_ids[j] == NONE) break;
@ -2443,10 +2512,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
          state->step_index
        );

-        // If this state's next step has an 'alternative' step (the step is either optional,
-        // or is the end of a repetition), then copy the state in order to pursue both
-        // alternatives. The alternative step itself may have an alternative, so this is
-        // an interative process.
+        // If this state's next step has an alternative step, then copy the state in order
+        // to pursue both alternatives. The alternative step itself may have an alternative,
+        // so this is an interative process.
        unsigned end_index = i + 1;
        for (unsigned j = i; j < end_index; j++) {
          QueryState *state = &self->states.contents[j];
@ -2458,25 +2526,27 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
              continue;
            }

-            QueryState *copy = ts_query__cursor_copy_state(self, state);
            if (next_step->is_pass_through) {
              state->step_index++;
              j--;
            }
+
+            QueryState *copy = ts_query_cursor__copy_state(self, &state);
            if (copy) {
-              copy_count++;
+              LOG(
+                "  split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
+                copy->pattern_index,
+                copy->step_index,
+                next_step->alternative_index,
+                next_step->alternative_is_immediate,
+                capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
+              );
              end_index++;
+              copy_count++;
              copy->step_index = next_step->alternative_index;
              if (next_step->alternative_is_immediate) {
                copy->seeking_immediate_match = true;
              }
-              LOG(
-                "  split state for branch. pattern:%u, step:%u, step:%u, immediate:%d\n",
-                copy->pattern_index,
-                state->step_index,
-                copy->step_index,
-                copy->seeking_immediate_match
-              );
            }
          }
        }
@ -2484,59 +2554,77 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {

      for (unsigned i = 0; i < self->states.size; i++) {
        QueryState *state = &self->states.contents[i];
-        bool did_remove = false;
+        if (state->dead) {
+          array_erase(&self->states, i);
+          i--;
+          continue;
+        }

        // Enfore the longest-match criteria. When a query pattern contains optional or
-        // repeated nodes, this is necesssary to avoid multiple redundant states, where
+        // repeated nodes, this is necessary to avoid multiple redundant states, where
        // one state has a strict subset of another state's captures.
+        bool did_remove = false;
        for (unsigned j = i + 1; j < self->states.size; j++) {
          QueryState *other_state = &self->states.contents[j];
+
+          // Query states are kept in ascending order of start_depth and pattern_index.
+          // Since the longest-match criteria is only used for deduping matches of the same
+          // pattern and root node, we only need to perform pairwise comparisons within a
+          // small slice of the states array.
          if (
-            state->pattern_index == other_state->pattern_index &&
-            state->start_depth == other_state->start_depth
-          ) {
-            bool left_contains_right, right_contains_left;
-            ts_query_cursor__compare_captures(
-              self,
-              state,
-              other_state,
-              &left_contains_right,
-              &right_contains_left
-            );
-            if (left_contains_right) {
-              if (state->step_index == other_state->step_index) {
-                LOG(
-                  "  drop shorter state. pattern: %u, step_index: %u\n",
-                  state->pattern_index,
-                  state->step_index
-                );
-                capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
-                array_erase(&self->states, j);
-                j--;
-                continue;
-              }
-              other_state->has_in_progress_alternatives = true;
+            other_state->start_depth != state->start_depth ||
+            other_state->pattern_index != state->pattern_index
+          ) break;
+
+          bool left_contains_right, right_contains_left;
+          ts_query_cursor__compare_captures(
+            self,
+            state,
+            other_state,
+            &left_contains_right,
+            &right_contains_left
+          );
+          if (left_contains_right) {
+            if (state->step_index == other_state->step_index) {
+              LOG(
+                "  drop shorter state. pattern: %u, step_index: %u\n",
+                state->pattern_index,
+                state->step_index
+              );
+              capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
+              array_erase(&self->states, j);
+              j--;
+              continue;
            }
-            if (right_contains_left) {
-              if (state->step_index == other_state->step_index) {
-                LOG(
-                  "  drop shorter state. pattern: %u, step_index: %u\n",
-                  state->pattern_index,
-                  state->step_index
-                );
-                capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
-                array_erase(&self->states, i);
-                did_remove = true;
-                break;
-              }
-              state->has_in_progress_alternatives = true;
+            other_state->has_in_progress_alternatives = true;
+          }
+          if (right_contains_left) {
+            if (state->step_index == other_state->step_index) {
+              LOG(
+                "  drop shorter state. pattern: %u, step_index: %u\n",
+                state->pattern_index,
+                state->step_index
+              );
+              capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
+              array_erase(&self->states, i);
+              i--;
+              did_remove = true;
+              break;
            }
+            state->has_in_progress_alternatives = true;
          }
        }

        // If there the state is at the end of its pattern, remove it from the list
        // of in-progress states and add it to the list of finished states.
        if (!did_remove) {
+          LOG(
+            "  keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
+            state->pattern_index,
+            state->start_depth,
+            state->step_index,
+            capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
+          );
          QueryStep *next_step = &self->query->steps.contents[state->step_index];
          if (next_step->depth == PATTERN_DONE_MARKER) {
            if (state->has_in_progress_alternatives) {
@ -2546,6 +2634,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
              state->id = self->next_state_id++;
              array_push(&self->finished_states, *state);
              array_erase(&self->states, state - self->states.contents);
+              did_match = true;
              i--;
            }
          }
@ -2559,9 +2648,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
        self->ascending = true;
      }
    }
-  } while (self->finished_states.size == 0);
-
-  return true;
+  }
 }

 bool ts_query_cursor_next_match(
@ -2701,7 +2788,10 @@ bool ts_query_cursor_next_capture(

    // If there are no finished matches that are ready to be returned, then
    // continue finding more matches.
-    if (!ts_query_cursor__advance(self)) return false;
+    if (
+      !ts_query_cursor__advance(self) &&
+      self->finished_states.size == 0
+    ) return false;
  }
 }

--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@ -16,18 +16,10 @@ typedef enum {
  TSTagsInvalidUtf8,
  TSTagsInvalidRegex,
  TSTagsInvalidQuery,
+  TSTagsInvalidCapture,
 } TSTagsError;

-typedef enum {
-  TSTagKindFunction,
-  TSTagKindMethod,
-  TSTagKindClass,
-  TSTagKindModule,
-  TSTagKindCall,
-} TSTagKind;
-
 typedef struct {
-  TSTagKind kind;
  uint32_t start_byte;
  uint32_t end_byte;
  uint32_t name_start_byte;
@ -36,8 +28,12 @@ typedef struct {
  uint32_t line_end_byte;
  TSPoint start_point;
  TSPoint end_point;
+  uint32_t utf16_start_column;
+  uint32_t utf16_end_column;
  uint32_t docs_start_byte;
  uint32_t docs_end_byte;
+  uint32_t syntax_type_id;
+  bool is_definition;
 } TSTag;

 typedef struct TSTagger TSTagger;
@ -89,6 +85,12 @@ uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *);
 const char *ts_tags_buffer_docs(const TSTagsBuffer *);
 uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);

+// Get the syntax kinds for a scope.
+const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);
+
+// Determine whether a parse error was encountered while tagging.
+bool ts_tags_buffer_found_parse_error(const TSTagsBuffer*);
+
 #ifdef __cplusplus
 }
 #endif
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@ -1,4 +1,4 @@
-use super::{Error, TagKind, TagsConfiguration, TagsContext};
+use super::{Error, TagsConfiguration, TagsContext};
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::process::abort;
@ -6,6 +6,9 @@ use std::sync::atomic::AtomicUsize;
 use std::{fmt, slice, str};
 use tree_sitter::Language;

+const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100;
+const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024;
+
 #[repr(C)]
 #[derive(Debug, PartialEq, Eq)]
 pub enum TSTagsError {
@ -16,19 +19,10 @@ pub enum TSTagsError {
    InvalidUtf8,
    InvalidRegex,
    InvalidQuery,
+    InvalidCapture,
    Unknown,
 }

-#[repr(C)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum TSTagKind {
-    Function,
-    Method,
-    Class,
-    Module,
-    Call,
-}
-
 #[repr(C)]
 pub struct TSPoint {
    row: u32,
@ -37,7 +31,6 @@ pub struct TSPoint {

 #[repr(C)]
 pub struct TSTag {
-    pub kind: TSTagKind,
    pub start_byte: u32,
    pub end_byte: u32,
    pub name_start_byte: u32,
@ -46,8 +39,12 @@ pub struct TSTag {
    pub line_end_byte: u32,
    pub start_point: TSPoint,
    pub end_point: TSPoint,
+    pub utf16_start_colum: u32,
+    pub utf16_end_colum: u32,
    pub docs_start_byte: u32,
    pub docs_end_byte: u32,
+    pub syntax_type_id: u32,
+    pub is_definition: bool,
 }

 pub struct TSTagger {
@ -58,6 +55,7 @@ pub struct TSTagsBuffer {
    context: TagsContext,
    tags: Vec<TSTag>,
    docs: Vec<u8>,
+    errors_present: bool,
 }

 #[no_mangle]
@ -102,7 +100,9 @@ pub extern "C" fn ts_tagger_add_language(
        }
        Err(Error::Query(_)) => TSTagsError::InvalidQuery,
        Err(Error::Regex(_)) => TSTagsError::InvalidRegex,
-        Err(_) => TSTagsError::Unknown,
+        Err(Error::Cancelled) => TSTagsError::Timeout,
+        Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage,
+        Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture,
    }
 }

@ -120,8 +120,9 @@ pub extern "C" fn ts_tagger_tag(
    let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };

    if let Some(config) = tagger.languages.get(scope_name) {
-        buffer.tags.clear();
-        buffer.docs.clear();
+        shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY);
+        shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY);
+
        let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) };
        let cancellation_flag = unsafe { cancellation_flag.as_ref() };

@ -129,7 +130,10 @@ pub extern "C" fn ts_tagger_tag(
            .context
            .generate_tags(config, source_code, cancellation_flag)
        {
-            Ok(tags) => tags,
+            Ok((tags, found_error)) => {
+                buffer.errors_present = found_error;
+                tags
+            }
            Err(e) => {
                return match e {
                    Error::InvalidLanguage => TSTagsError::InvalidLanguage,
@ -153,13 +157,6 @@ pub extern "C" fn ts_tagger_tag(
                buffer.docs.extend_from_slice(docs.as_bytes());
            }
            buffer.tags.push(TSTag {
-                kind: match tag.kind {
-                    TagKind::Function => TSTagKind::Function,
-                    TagKind::Method => TSTagKind::Method,
-                    TagKind::Class => TSTagKind::Class,
-                    TagKind::Module => TSTagKind::Module,
-                    TagKind::Call => TSTagKind::Call,
-                },
                start_byte: tag.range.start as u32,
                end_byte: tag.range.end as u32,
                name_start_byte: tag.name_range.start as u32,
@ -174,8 +171,12 @@ pub extern "C" fn ts_tagger_tag(
                    row: tag.span.end.row as u32,
                    column: tag.span.end.column as u32,
                },
+                utf16_start_colum: tag.utf16_column_range.start as u32,
+                utf16_end_colum: tag.utf16_column_range.end as u32,
                docs_start_byte: prev_docs_len as u32,
                docs_end_byte: buffer.docs.len() as u32,
+                syntax_type_id: tag.syntax_type_id,
+                is_definition: tag.is_definition,
            });
        }

@ -189,8 +190,9 @@ pub extern "C" fn ts_tagger_tag(
 pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
    Box::into_raw(Box::new(TSTagsBuffer {
        context: TagsContext::new(),
-        tags: Vec::with_capacity(64),
-        docs: Vec::with_capacity(64),
+        tags: Vec::with_capacity(BUFFER_TAGS_RESERVE_CAPACITY),
+        docs: Vec::with_capacity(BUFFER_DOCS_RESERVE_CAPACITY),
+        errors_present: false,
    }))
 }

@ -223,6 +225,30 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
    buffer.docs.len() as u32
 }

+#[no_mangle]
+pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool {
+    let buffer = unwrap_ptr(this);
+    buffer.errors_present
+}
+
+#[no_mangle]
+pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
+    this: *mut TSTagger,
+    scope_name: *const i8,
+    len: *mut u32,
+) -> *const *const i8 {
+    let tagger = unwrap_mut_ptr(this);
+    let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
+    let len = unwrap_mut_ptr(len);
+
+    *len = 0;
+    if let Some(config) = tagger.languages.get(scope_name) {
+        *len = config.c_syntax_type_names.len() as u32;
+        return config.c_syntax_type_names.as_ptr() as *const *const i8;
+    }
+    std::ptr::null()
+}
+
 fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
    unsafe { result.as_ref() }.unwrap_or_else(|| {
        eprintln!("{}:{} - pointer must not be null", file!(), line!());
@ -243,3 +269,11 @@ fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
        abort();
    })
 }
+
+fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
+    if vec.len() > capacity {
+        vec.truncate(capacity);
+        vec.shrink_to_fit();
+    }
+    vec.clear();
+}
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@ -1,10 +1,12 @@
 pub mod c_lib;

-use memchr::{memchr, memrchr};
+use memchr::memchr;
 use regex::Regex;
+use std::collections::HashMap;
+use std::ffi::{CStr, CString};
 use std::ops::Range;
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{fmt, mem, str};
+use std::{char, fmt, mem, str};
 use tree_sitter::{
    Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
 };
@ -18,19 +20,24 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
 pub struct TagsConfiguration {
    pub language: Language,
    pub query: Query,
-    call_capture_index: Option<u32>,
-    class_capture_index: Option<u32>,
+    syntax_type_names: Vec<Box<[u8]>>,
+    c_syntax_type_names: Vec<*const u8>,
+    capture_map: HashMap<u32, NamedCapture>,
    doc_capture_index: Option<u32>,
-    function_capture_index: Option<u32>,
-    method_capture_index: Option<u32>,
-    module_capture_index: Option<u32>,
    name_capture_index: Option<u32>,
+    ignore_capture_index: Option<u32>,
    local_scope_capture_index: Option<u32>,
    local_definition_capture_index: Option<u32>,
    tags_pattern_index: usize,
    pattern_info: Vec<PatternInfo>,
 }

+#[derive(Debug)]
+pub struct NamedCapture {
+    pub syntax_type_id: u32,
+    pub is_definition: bool,
+}
+
 pub struct TagsContext {
    parser: Parser,
    cursor: QueryCursor,
@ -38,21 +45,14 @@ pub struct TagsContext {

 #[derive(Debug, Clone)]
 pub struct Tag {
-    pub kind: TagKind,
    pub range: Range<usize>,
    pub name_range: Range<usize>,
    pub line_range: Range<usize>,
    pub span: Range<Point>,
+    pub utf16_column_range: Range<usize>,
    pub docs: Option<String>,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum TagKind {
-    Function,
-    Method,
-    Class,
-    Module,
-    Call,
+    pub is_definition: bool,
+    pub syntax_type_id: u32,
 }

 #[derive(Debug, PartialEq)]
@ -61,6 +61,7 @@ pub enum Error {
    Regex(regex::Error),
    Cancelled,
    InvalidLanguage,
+    InvalidCapture(String),
 }

 #[derive(Debug, Default)]
@ -91,6 +92,7 @@ where
    matches: I,
    _tree: Tree,
    source: &'a [u8],
+    prev_line_info: Option<LineInfo>,
    config: &'a TagsConfiguration,
    cancellation_flag: Option<&'a AtomicUsize>,
    iter_count: usize,
@ -98,6 +100,18 @@ where
    scopes: Vec<LocalScope<'a>>,
 }

+struct LineInfo {
+    utf8_position: Point,
+    utf8_byte: usize,
+    utf16_column: usize,
+    line_range: Range<usize>,
+}
+
+struct LossyUtf8<'a> {
+    bytes: &'a [u8],
+    in_replacement: bool,
+}
+
 impl TagsConfiguration {
    pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result<Self, Error> {
        let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?;
@ -111,31 +125,57 @@ impl TagsConfiguration {
            }
        }

-        let mut call_capture_index = None;
-        let mut class_capture_index = None;
+        let mut capture_map = HashMap::new();
+        let mut syntax_type_names = Vec::new();
        let mut doc_capture_index = None;
-        let mut function_capture_index = None;
-        let mut method_capture_index = None;
-        let mut module_capture_index = None;
        let mut name_capture_index = None;
+        let mut ignore_capture_index = None;
        let mut local_scope_capture_index = None;
        let mut local_definition_capture_index = None;
        for (i, name) in query.capture_names().iter().enumerate() {
-            let index = match name.as_str() {
-                "call" => &mut call_capture_index,
-                "class" => &mut class_capture_index,
-                "doc" => &mut doc_capture_index,
-                "function" => &mut function_capture_index,
-                "method" => &mut method_capture_index,
-                "module" => &mut module_capture_index,
-                "name" => &mut name_capture_index,
-                "local.scope" => &mut local_scope_capture_index,
-                "local.definition" => &mut local_definition_capture_index,
-                _ => continue,
-            };
-            *index = Some(i as u32);
+            match name.as_str() {
+                "" => continue,
+                "name" => name_capture_index = Some(i as u32),
+                "ignore" => ignore_capture_index = Some(i as u32),
+                "doc" => doc_capture_index = Some(i as u32),
+                "local.scope" => local_scope_capture_index = Some(i as u32),
+                "local.definition" => local_definition_capture_index = Some(i as u32),
+                "local.reference" => continue,
+                _ => {
+                    let mut is_definition = false;
+
+                    let kind = if name.starts_with("definition.") {
+                        is_definition = true;
+                        name.trim_start_matches("definition.")
+                    } else if name.starts_with("reference.") {
+                        name.trim_start_matches("reference.")
+                    } else {
+                        return Err(Error::InvalidCapture(name.to_string()));
+                    };
+
+                    if let Ok(cstr) = CString::new(kind) {
+                        let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice();
+                        let syntax_type_id = syntax_type_names
+                            .iter()
+                            .position(|n| n == &c_kind)
+                            .unwrap_or_else(|| {
+                                syntax_type_names.push(c_kind);
+                                syntax_type_names.len() - 1
+                            }) as u32;
+                        capture_map.insert(
+                            i as u32,
+                            NamedCapture {
+                                syntax_type_id,
+                                is_definition,
+                            },
+                        );
+                    }
+                }
+            }
        }

+        let c_syntax_type_names = syntax_type_names.iter().map(|s| s.as_ptr()).collect();
+
        let pattern_info = (0..query.pattern_count())
            .map(|pattern_index| {
                let mut info = PatternInfo::default();
@ -180,19 +220,26 @@ impl TagsConfiguration {
        Ok(TagsConfiguration {
            language,
            query,
-            function_capture_index,
-            class_capture_index,
-            method_capture_index,
-            module_capture_index,
+            syntax_type_names,
+            c_syntax_type_names,
+            capture_map,
            doc_capture_index,
-            call_capture_index,
            name_capture_index,
+            ignore_capture_index,
            tags_pattern_index,
            local_scope_capture_index,
            local_definition_capture_index,
            pattern_info,
        })
    }
+
+    pub fn syntax_type_name(&self, id: u32) -> &str {
+        unsafe {
+            let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8)
+                .to_bytes();
+            str::from_utf8(cstr).expect("syntax type name was not valid utf-8")
+        }
+    }
 }

 impl TagsContext {
@ -208,7 +255,7 @@ impl TagsContext {
        config: &'a TagsConfiguration,
        source: &'a [u8],
        cancellation_flag: Option<&'a AtomicUsize>,
-    ) -> Result<impl Iterator<Item = Result<Tag, Error>> + 'a, Error> {
+    ) -> Result<(impl Iterator<Item = Result<Tag, Error>> + 'a, bool), Error> {
        self.parser
            .set_language(config.language)
            .map_err(|_| Error::InvalidLanguage)?;
@ -224,12 +271,13 @@ impl TagsContext {
            .matches(&config.query, tree_ref.root_node(), move |node| {
                &source[node.byte_range()]
            });
-        Ok(TagsIter {
+        Ok((TagsIter {
            _tree: tree,
            matches,
            source,
            config,
            cancellation_flag,
+            prev_line_info: None,
            tag_queue: Vec::new(),
            iter_count: 0,
            scopes: vec![LocalScope {
@ -237,7 +285,7 @@ impl TagsContext {
                inherits: false,
                local_defs: Vec::new(),
            }],
-        })
+        }, tree_ref.root_node().has_error()))
    }
 }

@ -267,7 +315,12 @@ where
                if self.tag_queue.len() > 1
                    && self.tag_queue[0].0.name_range.end < last_entry.0.name_range.start
                {
-                    return Some(Ok(self.tag_queue.remove(0).0));
+                    let tag = self.tag_queue.remove(0).0;
+                    if tag.is_ignored() {
+                        continue;
+                    } else {
+                        return Some(Ok(tag));
+                    }
                }
            }

@ -300,141 +353,185 @@ where
                    continue;
                }

-                let mut name_range = None;
+                let mut name_node = None;
                let mut doc_nodes = Vec::new();
                let mut tag_node = None;
-                let mut kind = TagKind::Call;
+                let mut syntax_type_id = 0;
+                let mut is_definition = false;
                let mut docs_adjacent_node = None;
+                let mut is_ignored = false;

                for capture in mat.captures {
                    let index = Some(capture.index);

+                    if index == self.config.ignore_capture_index {
+                        is_ignored = true;
+                        name_node = Some(capture.node);
+                    }
+
                    if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture {
                        docs_adjacent_node = Some(capture.node);
                    }

                    if index == self.config.name_capture_index {
-                        name_range = Some(capture.node.byte_range());
+                        name_node = Some(capture.node);
                    } else if index == self.config.doc_capture_index {
                        doc_nodes.push(capture.node);
-                    } else if index == self.config.call_capture_index {
+                    }
+
+                    if let Some(named_capture) = self.config.capture_map.get(&capture.index) {
                        tag_node = Some(capture.node);
-                        kind = TagKind::Call;
-                    } else if index == self.config.class_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Class;
-                    } else if index == self.config.function_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Function;
-                    } else if index == self.config.method_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Method;
-                    } else if index == self.config.module_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Module;
+                        syntax_type_id = named_capture.syntax_type_id;
+                        is_definition = named_capture.is_definition;
                    }
                }

-                if let (Some(tag_node), Some(name_range)) = (tag_node, name_range) {
-                    if pattern_info.name_must_be_non_local {
-                        let mut is_local = false;
-                        for scope in self.scopes.iter().rev() {
-                            if scope.range.start <= name_range.start
-                                && scope.range.end >= name_range.end
-                            {
-                                if scope
-                                    .local_defs
-                                    .iter()
-                                    .any(|d| d.name == &self.source[name_range.clone()])
-                                {
-                                    is_local = true;
-                                    break;
-                                }
-                                if !scope.inherits {
-                                    break;
-                                }
-                            }
-                        }
-                        if is_local {
+                if let Some(name_node) = name_node {
+                    let name_range = name_node.byte_range();
+
+                    let tag;
+                    if let Some(tag_node) = tag_node {
+                        if name_node.has_error() {
                            continue;
                        }
-                    }

-                    // If needed, filter the doc nodes based on their ranges, selecting
-                    // only the slice that are adjacent to some specified node.
-                    let mut docs_start_index = 0;
-                    if let (Some(docs_adjacent_node), false) =
-                        (docs_adjacent_node, doc_nodes.is_empty())
-                    {
-                        docs_start_index = doc_nodes.len();
-                        let mut start_row = docs_adjacent_node.start_position().row;
-                        while docs_start_index > 0 {
-                            let doc_node = &doc_nodes[docs_start_index - 1];
-                            let prev_doc_end_row = doc_node.end_position().row;
-                            if prev_doc_end_row + 1 >= start_row {
-                                docs_start_index -= 1;
-                                start_row = doc_node.start_position().row;
-                            } else {
-                                break;
+                        if pattern_info.name_must_be_non_local {
+                            let mut is_local = false;
+                            for scope in self.scopes.iter().rev() {
+                                if scope.range.start <= name_range.start
+                                    && scope.range.end >= name_range.end
+                                {
+                                    if scope
+                                        .local_defs
+                                        .iter()
+                                        .any(|d| d.name == &self.source[name_range.clone()])
+                                    {
+                                        is_local = true;
+                                        break;
+                                    }
+                                    if !scope.inherits {
+                                        break;
+                                    }
+                                }
+                            }
+                            if is_local {
+                                continue;
                            }
                        }
-                    }

-                    // Generate a doc string from all of the doc nodes, applying any strip regexes.
-                    let mut docs = None;
-                    for doc_node in &doc_nodes[docs_start_index..] {
-                        if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) {
-                            let content = if let Some(regex) = &pattern_info.doc_strip_regex {
-                                regex.replace_all(content, "").to_string()
-                            } else {
-                                content.to_string()
-                            };
-                            match &mut docs {
-                                None => docs = Some(content),
-                                Some(d) => {
-                                    d.push('\n');
-                                    d.push_str(&content);
+                        // If needed, filter the doc nodes based on their ranges, selecting
+                        // only the slice that are adjacent to some specified node.
+                        let mut docs_start_index = 0;
+                        if let (Some(docs_adjacent_node), false) =
+                            (docs_adjacent_node, doc_nodes.is_empty())
+                        {
+                            docs_start_index = doc_nodes.len();
+                            let mut start_row = docs_adjacent_node.start_position().row;
+                            while docs_start_index > 0 {
+                                let doc_node = &doc_nodes[docs_start_index - 1];
+                                let prev_doc_end_row = doc_node.end_position().row;
+                                if prev_doc_end_row + 1 >= start_row {
+                                    docs_start_index -= 1;
+                                    start_row = doc_node.start_position().row;
+                                } else {
+                                    break;
                                }
                            }
                        }
+
+                        // Generate a doc string from all of the doc nodes, applying any strip regexes.
+                        let mut docs = None;
+                        for doc_node in &doc_nodes[docs_start_index..] {
+                            if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()])
+                            {
+                                let content = if let Some(regex) = &pattern_info.doc_strip_regex {
+                                    regex.replace_all(content, "").to_string()
+                                } else {
+                                    content.to_string()
+                                };
+                                match &mut docs {
+                                    None => docs = Some(content),
+                                    Some(d) => {
+                                        d.push('\n');
+                                        d.push_str(&content);
+                                    }
+                                }
+                            }
+                        }
+
+                        let rng = tag_node.byte_range();
+                        let range = rng.start.min(name_range.start)..rng.end.max(name_range.end);
+                        let span = name_node.start_position()..name_node.end_position();
+
+                        // Compute tag properties that depend on the text of the containing line. If the
+                        // previous tag occurred on the same line, then reuse results from the previous tag.
+                        let line_range;
+                        let mut prev_utf16_column = 0;
+                        let mut prev_utf8_byte = name_range.start - span.start.column;
+                        let line_info = self.prev_line_info.as_ref().and_then(|info| {
+                            if info.utf8_position.row == span.start.row {
+                                Some(info)
+                            } else {
+                                None
+                            }
+                        });
+                        if let Some(line_info) = line_info {
+                            line_range = line_info.line_range.clone();
+                            if line_info.utf8_position.column <= span.start.column {
+                                prev_utf8_byte = line_info.utf8_byte;
+                                prev_utf16_column = line_info.utf16_column;
+                            }
+                        } else {
+                            line_range = self::line_range(
+                                self.source,
+                                name_range.start,
+                                span.start,
+                                MAX_LINE_LEN,
+                            );
+                        }
+
+                        let utf16_start_column = prev_utf16_column
+                            + utf16_len(&self.source[prev_utf8_byte..name_range.start]);
+                        let utf16_end_column =
+                            utf16_start_column + utf16_len(&self.source[name_range.clone()]);
+                        let utf16_column_range = utf16_start_column..utf16_end_column;
+
+                        self.prev_line_info = Some(LineInfo {
+                            utf8_position: span.end,
+                            utf8_byte: name_range.end,
+                            utf16_column: utf16_end_column,
+                            line_range: line_range.clone(),
+                        });
+                        tag = Tag {
+                            line_range,
+                            span,
+                            utf16_column_range,
+                            range,
+                            name_range,
+                            docs,
+                            is_definition,
+                            syntax_type_id,
+                        };
+                    } else if is_ignored {
+                        tag = Tag::ignored(name_range);
+                    } else {
+                        continue;
                    }

                    // Only create one tag per node. The tag queue is sorted by node position
                    // to allow for fast lookup.
-                    let range = tag_node.byte_range();
-                    match self
-                        .tag_queue
-                        .binary_search_by_key(&(name_range.end, name_range.start), |(tag, _)| {
-                            (tag.name_range.end, tag.name_range.start)
-                        }) {
+                    match self.tag_queue.binary_search_by_key(
+                        &(tag.name_range.end, tag.name_range.start),
+                        |(tag, _)| (tag.name_range.end, tag.name_range.start),
+                    ) {
                        Ok(i) => {
-                            let (tag, pattern_index) = &mut self.tag_queue[i];
+                            let (existing_tag, pattern_index) = &mut self.tag_queue[i];
                            if *pattern_index > mat.pattern_index {
                                *pattern_index = mat.pattern_index;
-                                *tag = Tag {
-                                    line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: tag_node.start_position()..tag_node.end_position(),
-                                    kind,
-                                    range,
-                                    name_range,
-                                    docs,
-                                };
+                                *existing_tag = tag;
                            }
                        }
-                        Err(i) => self.tag_queue.insert(
-                            i,
-                            (
-                                Tag {
-                                    line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: tag_node.start_position()..tag_node.end_position(),
-                                    kind,
-                                    range,
-                                    name_range,
-                                    docs,
-                                },
-                                mat.pattern_index,
-                            ),
-                        ),
+                        Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)),
                    }
                }
            }
@ -448,16 +545,31 @@ where
    }
 }

-impl fmt::Display for TagKind {
+impl Tag {
+    fn ignored(name_range: Range<usize>) -> Self {
+        Tag {
+            name_range,
+            line_range: 0..0,
+            span: Point::new(0, 0)..Point::new(0, 0),
+            utf16_column_range: 0..0,
+            range: usize::MAX..usize::MAX,
+            docs: None,
+            is_definition: false,
+            syntax_type_id: 0,
+        }
+    }
+
+    fn is_ignored(&self) -> bool {
+        self.range.start == usize::MAX
+    }
+}
+
+impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
-            TagKind::Call => "Call",
-            TagKind::Module => "Module",
-            TagKind::Class => "Class",
-            TagKind::Method => "Method",
-            TagKind::Function => "Function",
+            Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name),
+            _ => write!(f, "{:?}", self)
        }
-        .fmt(f)
    }
 }

@ -473,11 +585,90 @@ impl From<QueryError> for Error {
    }
 }

-fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range<usize> {
-    let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1);
-    let max_line_len = max_line_len.min(text.len() - start);
-    let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len);
-    start..end
+// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
+// is ever stabilized, we should use that. Otherwise, this struct could be moved
+// into some module that's shared between `tree-sitter-tags` and `tree-sitter-highlight`.
+impl<'a> LossyUtf8<'a> {
+    fn new(bytes: &'a [u8]) -> Self {
+        LossyUtf8 {
+            bytes,
+            in_replacement: false,
+        }
+    }
+}
+
+impl<'a> Iterator for LossyUtf8<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<&'a str> {
+        if self.bytes.is_empty() {
+            return None;
+        }
+        if self.in_replacement {
+            self.in_replacement = false;
+            return Some("\u{fffd}");
+        }
+        match str::from_utf8(self.bytes) {
+            Ok(valid) => {
+                self.bytes = &[];
+                Some(valid)
+            }
+            Err(error) => {
+                if let Some(error_len) = error.error_len() {
+                    let error_start = error.valid_up_to();
+                    if error_start > 0 {
+                        let result =
+                            unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) };
+                        self.bytes = &self.bytes[(error_start + error_len)..];
+                        self.in_replacement = true;
+                        Some(result)
+                    } else {
+                        self.bytes = &self.bytes[error_len..];
+                        Some("\u{fffd}")
+                    }
+                } else {
+                    None
+                }
+            }
+        }
+    }
+}
+
+fn line_range(
+    text: &[u8],
+    start_byte: usize,
+    start_point: Point,
+    max_line_len: usize,
+) -> Range<usize> {
+    // Trim leading whitespace
+    let mut line_start_byte = start_byte - start_point.column;
+    while line_start_byte < text.len() && text[line_start_byte].is_ascii_whitespace() {
+        line_start_byte += 1;
+    }
+
+    let max_line_len = max_line_len.min(text.len() - line_start_byte);
+    let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)];
+    let line_len = if let Some(len) = memchr(b'\n', text_after_line_start) {
+        len
+    } else if let Err(e) = str::from_utf8(text_after_line_start) {
+        e.valid_up_to()
+    } else {
+        max_line_len
+    };
+
+    // Trim trailing whitespace
+    let mut line_end_byte = line_start_byte + line_len;
+    while line_end_byte > line_start_byte && text[line_end_byte - 1].is_ascii_whitespace() {
+        line_end_byte -= 1;
+    }
+
+    line_start_byte..line_end_byte
+}
+
+fn utf16_len(bytes: &[u8]) -> usize {
+    LossyUtf8::new(bytes)
+        .flat_map(|chunk| chunk.chars().map(char::len_utf16))
+        .sum()
 }

 #[cfg(test)]
@ -486,14 +677,27 @@ mod tests {

    #[test]
    fn test_get_line() {
-        let text = b"abc\ndefg\nhijkl";
-        assert_eq!(line_range(text, 0, 10), 0..3);
-        assert_eq!(line_range(text, 1, 10), 0..3);
-        assert_eq!(line_range(text, 2, 10), 0..3);
-        assert_eq!(line_range(text, 3, 10), 0..3);
-        assert_eq!(line_range(text, 1, 2), 0..2);
-        assert_eq!(line_range(text, 4, 10), 4..8);
-        assert_eq!(line_range(text, 5, 10), 4..8);
-        assert_eq!(line_range(text, 11, 10), 9..14);
+        let text = "abc\ndefg❤hij\nklmno".as_bytes();
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14);
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19);
+    }
+
+    #[test]
+    fn test_get_line_trims() {
+        let text = b"   foo\nbar\n";
+        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 3..6);
+
+        let text = b"\t func foo \nbar\n";
+        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 2..10);
+
+        let r = line_range(text, 0, Point::new(0, 0), 14);
+        assert_eq!(r, 2..10);
+        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo");
+
+        let r = line_range(text, 12, Point::new(1, 0), 14);
+        assert_eq!(r, 12..15);
+        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar");
    }
 }
--- a/test/fixtures/error_corpus/ruby_errors.txt
+++ b/test/fixtures/error_corpus/ruby_errors.txt
@ -0,0 +1,19 @@
+==========================
+Heredocs with errors
+==========================
+
+joins(<<~SQL(
+  b
+SQL
+c
+
+---
+
+(program
+  (method_call
+    method: (identifier)
+    (ERROR (heredoc_beginning))
+    arguments: (argument_list
+      (heredoc_body (heredoc_end))
+      (identifier)
+      (MISSING ")"))))
--- a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
+++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
@ -0,0 +1,23 @@
+=====
+Extras
+=====
+
+;
+%;
+%foo:;
+;
+bar: baz:;
+;
+
+---
+
+(program
+  (statement)
+  (macro_statement (statement))
+  (macro_statement (statement
+    (label_declaration (identifier))))
+  (statement)
+  (statement
+    (label_declaration (identifier))
+    (label_declaration (identifier)))
+  (statement))
--- a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
+++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
@ -0,0 +1,68 @@
+{
+  "name": "extra_non_terminals_with_shared_rules",
+
+  "extras": [
+    { "type": "PATTERN", "value": "\\s+" },
+    { "type": "SYMBOL", "name": "macro_statement" }
+  ],
+
+  "rules": {
+    "program": {
+      "type": "REPEAT",
+      "content": {
+        "type": "SYMBOL",
+        "name": "statement"
+      }
+    },
+    "statement": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "label_declaration"
+          }
+        },
+        {
+          "type": "STRING",
+          "value": ";"
+        }
+      ]
+    },
+    "macro_statement": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "%"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "statement"
+        }
+      ]
+    },
+    "label_declaration": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "identifier"
+        },
+        {
+          "type": "STRING",
+          "value": ":"
+        }
+      ]
+    },
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  },
+  "conflicts": [],
+  "externals": [],
+  "inline": [],
+  "supertypes": []
+}
--- a/test/fuzz/README.md
+++ b/test/fuzz/README.md
@ -22,10 +22,10 @@ The fuzzers can then be built with:
 export CLANG_DIR=$HOME/src/third_party/llvm-build/Release+Asserts/bin
 CC="$CLANG_DIR/clang" CXX="$CLANG_DIR/clang++" LINK="$CLANG_DIR/clang++" \
  LIB_FUZZER_PATH=$HOME/src/compiler-rt/lib/fuzzer/libFuzzer.a \
-  ./script/build_fuzzers
+  ./script/build-fuzzers
 ```

-This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build_fuzzers python ruby`.
+This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build-fuzzers python ruby`.

 The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments:
 ```