tree-sitter/lib/include/tree_sitter/api.h
Max Brunsfeld a1fec71b19 Tweak QueryCursor to allow iterating either matches or captures
For syntax highlighting, we want to iterate over all of the captures in 
order, and don't care about grouping the captures by pattern.
2019-09-13 15:19:04 -07:00

790 lines
23 KiB
C

#ifndef TREE_SITTER_API_H_
#define TREE_SITTER_API_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
/****************************/
/* Section - ABI Versioning */
/****************************/
#define TREE_SITTER_LANGUAGE_VERSION 11
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
/*******************/
/* Section - Types */
/*******************/
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
typedef struct TSQuery TSQuery;
typedef struct TSQueryCursor TSQueryCursor;
typedef enum {
TSInputEncodingUTF8,
TSInputEncodingUTF16,
} TSInputEncoding;
typedef enum {
TSSymbolTypeRegular,
TSSymbolTypeAnonymous,
TSSymbolTypeAuxiliary,
} TSSymbolType;
typedef struct {
uint32_t row;
uint32_t column;
} TSPoint;
typedef struct {
TSPoint start_point;
TSPoint end_point;
uint32_t start_byte;
uint32_t end_byte;
} TSRange;
typedef struct {
void *payload;
const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
TSInputEncoding encoding;
} TSInput;
typedef enum {
TSLogTypeParse,
TSLogTypeLex,
} TSLogType;
typedef struct {
void *payload;
void (*log)(void *payload, TSLogType, const char *);
} TSLogger;
typedef struct {
uint32_t start_byte;
uint32_t old_end_byte;
uint32_t new_end_byte;
TSPoint start_point;
TSPoint old_end_point;
TSPoint new_end_point;
} TSInputEdit;
typedef struct {
uint32_t context[4];
const void *id;
const TSTree *tree;
} TSNode;
typedef struct {
const void *tree;
const void *id;
uint32_t context[2];
} TSTreeCursor;
typedef struct {
TSNode node;
uint32_t index;
} TSQueryCapture;
typedef enum {
TSQueryErrorNone = 0,
TSQueryErrorSyntax,
TSQueryErrorNodeType,
TSQueryErrorField,
} TSQueryError;
/********************/
/* Section - Parser */
/********************/
/**
* Create a new parser.
*/
TSParser *ts_parser_new(void);
/**
* Delete the parser, freeing all of the memory that it used.
*/
void ts_parser_delete(TSParser *parser);
/**
* Set the language that the parser should use for parsing.
*
* Returns a boolean indicating whether or not the language was successfully
* assigned. True means assignment succeeded. False means there was a version
* mismatch: the language was generated with an incompatible version of the
* Tree-sitter CLI. Check the language's version using `ts_language_version`
* and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and
* `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants.
*/
bool ts_parser_set_language(TSParser *self, const TSLanguage *language);
/**
* Get the parser's current language.
*/
const TSLanguage *ts_parser_language(const TSParser *self);
/**
* Set the spans of text that the parser should include when parsing.
*
* By default, the parser will always include entire documents. This function
* allows you to parse only a *portion* of a document but still return a syntax
* tree whose ranges match up with the document as a whole. You can also pass
* multiple disjoint ranges.
*
* The second and third parameters specify the location and length of an array
* of ranges. The parser does *not* take ownership of these ranges; it copies
* the data, so it doesn't matter how these ranges are allocated.
*/
void ts_parser_set_included_ranges(
TSParser *self,
const TSRange *ranges,
uint32_t length
);
/**
* Get the ranges of text that the parser will include when parsing.
*
* The returned pointer is owned by the parser. The caller should not free it
* or write to it. The length of the array will be written to the given
* `length` pointer.
*/
const TSRange *ts_parser_included_ranges(
const TSParser *self,
uint32_t *length
);
/**
* Use the parser to parse some source code and create a syntax tree.
*
* If you are parsing this document for the first time, pass `NULL` for the
* `old_tree` parameter. Otherwise, if you have already parsed an earlier
* version of this document and the document has since been edited, pass the
* previous syntax tree so that the unchanged parts of it can be reused.
* This will save time and memory. For this to work correctly, you must have
* already edited the old syntax tree using the `ts_tree_edit` function in a
* way that exactly matches the source code changes.
*
* The `TSInput` parameter lets you specify how to read the text. It has the
* following three fields:
* 1. `read`: A function to retrieve a chunk of text at a given byte offset
* and (row, column) position. The function should return a pointer to the
* text and write its length to the the `bytes_read` pointer. The parser
* does not take ownership of this buffer; it just borrows it until it has
* finished reading it. The function should write a zero value to the
* `bytes_read` pointer to indicate the end of the document.
* 2. `payload`: An arbitrary pointer that will be passed to each invocation
* of the `read` function.
* 3. `encoding`: An indication of how the text is encoded. Either
* `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
*
* This function returns a syntax tree on success, and `NULL` on failure. There
* are three possible reasons for failure:
* 1. The parser does not have a language assigned. Check for this using the
`ts_parser_language` function.
* 2. Parsing was cancelled due to a timeout that was set by an earlier call to
* the `ts_parser_set_timeout_micros` function. You can resume parsing from
* where the parser left out by calling `ts_parser_parse` again with the
* same arguments. Or you can start parsing from scratch by first calling
* `ts_parser_reset`.
* 3. Parsing was cancelled using a cancellation flag that was set by an
* earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing
* from where the parser left out by calling `ts_parser_parse` again with
* the same arguments.
*/
TSTree *ts_parser_parse(
TSParser *self,
const TSTree *old_tree,
TSInput input
);
/**
* Use the parser to parse some source code stored in one contiguous buffer.
* The first two parameters are the same as in the `ts_parser_parse` function
* above. The second two parameters indicate the location of the buffer and its
* length in bytes.
*/
TSTree *ts_parser_parse_string(
TSParser *self,
const TSTree *old_tree,
const char *string,
uint32_t length
);
/**
* Use the parser to parse some source code stored in one contiguous buffer with
* a given encoding. The first four parameters work the same as in the
* `ts_parser_parse_string` method above. The final parameter indicates whether
* the text is encoded as UTF8 or UTF16.
*/
TSTree *ts_parser_parse_string_encoding(
TSParser *self,
const TSTree *old_tree,
const char *string,
uint32_t length,
TSInputEncoding encoding
);
/**
* Instruct the parser to start the next parse from the beginning.
*
* If the parser previously failed because of a timeout or a cancellation, then
* by default, it will resume where it left off on the next call to
* `ts_parser_parse` or other parsing functions. If you don't want to resume,
* and instead intend to use this parser to parse some other document, you must
* call this `ts_parser_reset` first.
*/
void ts_parser_reset(TSParser *self);
/**
* Set the maximum duration in microseconds that parsing should be allowed to
* take before halting. If parsing takes longer than this, it will halt early,
* returning NULL. See `ts_parser_parse` for more information.
*/
void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout);
/**
* Get the duration in microseconds that parsing is allowed to take.
*/
uint64_t ts_parser_timeout_micros(const TSParser *self);
/**
* Set the parser's current cancellation flag pointer. If a non-null pointer is
* assigned, then the parser will periodically read from this pointer during
* parsing. If it reads a non-zero value, it will halt early, returning NULL.
* See `ts_parser_parse` for more information.
*/
void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
/**
* Get the parser's current cancellation flag pointer.
*/
const size_t *ts_parser_cancellation_flag(const TSParser *self);
/**
* Set the logger that a parser should use during parsing.
*
* The parser does not take ownership over the logger payload. If a logger was
* previously assigned, the caller is responsible for releasing any memory
* owned by the previous logger.
*/
void ts_parser_set_logger(TSParser *self, TSLogger logger);
/**
* Get the parser's current logger.
*/
TSLogger ts_parser_logger(const TSParser *self);
/**
* Set the file descriptor to which the parser should write debugging graphs
* during parsing. The graphs are formatted in the DOT language. You may want
* to pipe these graphs directly to a `dot(1)` process in order to generate
* SVG output. You can turn off this logging by passing a negative number.
*/
void ts_parser_print_dot_graphs(TSParser *self, int file);
/**
* Set whether or not the parser should halt immediately upon detecting an
* error. This will generally result in a syntax tree with an error at the
* root, and one or more partial syntax trees within the error. This behavior
* may not be supported long-term.
*/
void ts_parser_halt_on_error(TSParser *self, bool halt);
/******************/
/* Section - Tree */
/******************/
/**
* Create a shallow copy of the syntax tree. This is very fast.
*
* You need to copy a syntax tree in order to use it on more than one thread at
* a time, as syntax trees are not thread safe.
*/
TSTree *ts_tree_copy(const TSTree *self);
/**
* Delete the syntax tree, freeing all of the memory that it used.
*/
void ts_tree_delete(TSTree *self);
/**
* Get the root node of the syntax tree.
*/
TSNode ts_tree_root_node(const TSTree *self);
/**
* Get the language that was used to parse the syntax tree.
*/
const TSLanguage *ts_tree_language(const TSTree *);
/**
* Edit the syntax tree to keep it in sync with source code that has been
* edited.
*
* You must describe the edit both in terms of byte offsets and in terms of
* (row, column) coordinates.
*/
void ts_tree_edit(TSTree *self, const TSInputEdit *edit);
/**
* Compare a new syntax tree to a previous syntax tree representing the same
* document, returning an array of ranges whose syntactic structure has changed.
*
* For this to work correctly, the old syntax tree must have been edited such
* that its ranges match up to the new tree. Generally, you'll want to call
* this function right after calling one of the `ts_parser_parse` functions,
* passing in the new tree that was returned from `ts_parser_parse` and the old
* tree that was passed as a parameter.
*
* The returned array is allocated using `malloc` and the caller is responsible
* for freeing it using `free`. The length of the array will be written to the
* given `length` pointer.
*/
TSRange *ts_tree_get_changed_ranges(
const TSTree *self,
const TSTree *old_tree,
uint32_t *length
);
/**
* Write a DOT graph describing the syntax tree to the given file.
*/
void ts_tree_print_dot_graph(const TSTree *, FILE *);
/******************/
/* Section - Node */
/******************/
/**
* Get the node's type as a null-terminated string.
*/
const char *ts_node_type(TSNode);
/**
* Get the node's type as a numerical id.
*/
TSSymbol ts_node_symbol(TSNode);
/**
* Get the node's start byte.
*/
uint32_t ts_node_start_byte(TSNode);
/**
* Get the node's start position in terms of rows and columns.
*/
TSPoint ts_node_start_point(TSNode);
/**
* Get the node's end byte.
*/
uint32_t ts_node_end_byte(TSNode);
/**
* Get the node's end position in terms of rows and columns.
*/
TSPoint ts_node_end_point(TSNode);
/**
* Get an S-expression representing the node as a string.
*
* This string is allocated with `malloc` and the caller is responsible for
* freeing it using `free`.
*/
char *ts_node_string(TSNode);
/**
* Check if the node is null. Functions like `ts_node_child` and
* `ts_node_next_sibling` will return a null node to indicate that no such node
* was found.
*/
bool ts_node_is_null(TSNode);
/**
* Check if the node is *named*. Named nodes correspond to named rules in the
* grammar, whereas *anonymous* nodes correspond to string literals in the
* grammar.
*/
bool ts_node_is_named(TSNode);
/**
* Check if the node is *missing*. Missing nodes are inserted by the parser in
* order to recover from certain kinds of syntax errors.
*/
bool ts_node_is_missing(TSNode);
/**
* Check if the node is *missing*. Missing nodes are inserted by the parser in
* order to recover from certain kinds of syntax errors.
*/
bool ts_node_is_extra(TSNode);
/**
* Check if a syntax node has been edited.
*/
bool ts_node_has_changes(TSNode);
/**
* Check if the node is a syntax error or contains any syntax errors.
*/
bool ts_node_has_error(TSNode);
/**
* Get the node's immediate parent.
*/
TSNode ts_node_parent(TSNode);
/**
* Get the node's child at the given index, where zero represents the first
* child.
*/
TSNode ts_node_child(TSNode, uint32_t);
/**
* Get the node's number of children.
*/
uint32_t ts_node_child_count(TSNode);
/**
* Get the node's *named* child at the given index.
*
* See also `ts_node_is_named`.
*/
TSNode ts_node_named_child(TSNode, uint32_t);
/**
* Get the node's number of *named* children.
*
* See also `ts_node_is_named`.
*/
uint32_t ts_node_named_child_count(TSNode);
/**
* Get the node's child with the given field name.
*/
TSNode ts_node_child_by_field_name(
TSNode self,
const char *field_name,
uint32_t field_name_length
);
/**
* Get the node's child with the given numerical field id.
*
* You can convert a field name to an id using the
* `ts_language_field_id_for_name` function.
*/
TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
/**
* Get the node's next / previous sibling.
*/
TSNode ts_node_next_sibling(TSNode);
TSNode ts_node_prev_sibling(TSNode);
/**
* Get the node's next / previous *named* sibling.
*/
TSNode ts_node_next_named_sibling(TSNode);
TSNode ts_node_prev_named_sibling(TSNode);
/**
* Get the node's first child that extends beyond the given byte offset.
*/
TSNode ts_node_first_child_for_byte(TSNode, uint32_t);
/**
* Get the node's first named child that extends beyond the given byte offset.
*/
TSNode ts_node_first_named_child_for_byte(TSNode, uint32_t);
/**
* Get the smallest node within this node that spans the given range of bytes
* or (row, column) positions.
*/
TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
/**
* Get the smallest named node within this node that spans the given range of
* bytes or (row, column) positions.
*/
TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
/**
* Edit the node to keep it in-sync with source code that has been edited.
*
* This function is only rarely needed. When you edit a syntax tree with the
* `ts_tree_edit` function, all of the nodes that you retrieve from the tree
* afterward will already reflect the edit. You only need to use `ts_node_edit`
* when you have a `TSNode` instance that you want to keep and continue to use
* after an edit.
*/
void ts_node_edit(TSNode *, const TSInputEdit *);
/**
* Check if two nodes are identical.
*/
bool ts_node_eq(TSNode, TSNode);
/************************/
/* Section - TreeCursor */
/************************/
/**
* Create a new tree cursor starting from the given node.
*
* A tree cursor allows you to walk a syntax tree more efficiently than is
* possible using the `TSNode` functions. It is a mutable object that is always
* on a certain syntax node, and can be moved imperatively to different nodes.
*/
TSTreeCursor ts_tree_cursor_new(TSNode);
/**
* Delete a tree cursor, freeing all of the memory that it used.
*/
void ts_tree_cursor_delete(TSTreeCursor *);
/**
* Re-initialize a tree cursor to start at a different ndoe.
*/
void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
/**
* Get the tree cursor's current node.
*/
TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
/**
* Get the field name of the tree cursor's current node.
*
* This returns `NULL` if the current node doesn't have a field.
* See also `ts_node_child_by_field_name`.
*/
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
/**
* Get the field name of the tree cursor's current node.
*
* This returns zero if the current node doesn't have a field.
* See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.
*/
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
/**
* Move the cursor to the parent of its current node.
*
* This returns `true` if the cursor successfully moved, and returns `false`
* if there was no parent node (the cursor was already on the root node).
*/
bool ts_tree_cursor_goto_parent(TSTreeCursor *);
/**
* Move the cursor to the next sibling of its current node.
*
* This returns `true` if the cursor successfully moved, and returns `false`
* if there was no next sibling node.
*/
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
/**
* Move the cursor to the first schild of its current node.
*
* This returns `true` if the cursor successfully moved, and returns `false`
* if there were no children.
*/
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
/**
* Move the cursor to the first schild of its current node that extends beyond
* the given byte offset.
*
* This returns the index of the child node if one was found, and returns -1
* if no such child was found.
*/
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *);
/*******************/
/* Section - Query */
/*******************/
/**
* Create a new query from a string containing one or more S-expression
* patterns. The query is associated with a particular language, and can
* only be run on syntax nodes parsed with that language.
*
* If all of the given patterns are valid, this returns a `TSQuery`.
* If a pattern is invalid, this returns `NULL`, and provides two pieces
* of information about the problem:
* 1. The byte offset of the error is written to the `error_offset` parameter.
* 2. The type of error is written to the `error_type` parameter.
*/
TSQuery *ts_query_new(
const TSLanguage *language,
const char *source,
uint32_t source_len,
uint32_t *error_offset,
TSQueryError *error_type
);
/**
* Delete a query, freeing all of the memory that it used.
*/
void ts_query_delete(TSQuery *);
/**
* Get the number of distinct capture names in the query.
*/
uint32_t ts_query_capture_count(const TSQuery *);
/**
* Get the name and length of one of the query's capture. Each capture
* is associated with a numeric id based on the order that it appeared
* in the query's source.
*/
const char *ts_query_capture_name_for_id(
const TSQuery *self,
uint32_t index,
uint32_t *length
);
/**
* Get the numeric id of the capture with the given name.
*/
int ts_query_capture_id_for_name(
const TSQuery *self,
const char *name,
uint32_t length
);
/**
* Create a new cursor for executing a given query.
*
* The cursor stores the state that is needed to iteratively search
* for matches. To use the query cursor, first call `ts_query_cursor_exec`
* to start running a given query on a given syntax node. Then, there are
* two options for consuming the results of the query:
* 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the
* the *matches* in the order that they were found. Each match contains the
* index of the pattern that matched, and an array of captures. Because
* multiple patterns can match the same set of nodes, one match may contain
* captures that appear *before* some of the captures from a previous match.
* 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the
* individual *captures* in the order that they appear. This is useful if
* don't care about which pattern matched, and just want a single ordered
* sequence of captures.
*
* If you don't care about consuming all of the results, you can stop calling
* `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point.
* You can then start executing another query on another node by calling
* `ts_query_cursor_exec` again.
*/
TSQueryCursor *ts_query_cursor_new();
/**
* Delete a query cursor, freeing all of the memory that it used.
*/
void ts_query_cursor_delete(TSQueryCursor *);
/**
* Start running a given query on a given node.
*/
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
/**
* Set the range of bytes or (row, column) positions in which the query
* will be executed.
*/
void ts_query_cursor_set_byte_range(TSQueryCursor *, uint32_t, uint32_t);
void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint);
/**
* Advance to the next match of the currently running query.
*
* If there is another match, write its pattern index to `pattern_index`,
* the number of captures to `capture_count`, and the captures themselves
* to `*captures`, and return `true`. Otherwise, return `false`.
*/
bool ts_query_cursor_next_match(
TSQueryCursor *self,
uint32_t *pattern_index,
uint32_t *capture_count,
const TSQueryCapture **captures
);
/**
* Advance to the next capture of the currently running query.
*
* If there is another capture, write it to `capture` and return `true`.
* Otherwise, return `false`.
*/
bool ts_query_cursor_next_capture(TSQueryCursor *, TSQueryCapture *capture);
/**********************/
/* Section - Language */
/**********************/
/**
* Get the number of distinct node types in the language.
*/
uint32_t ts_language_symbol_count(const TSLanguage *);
/**
* Get a node type string for the given numerical id.
*/
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
/**
* Get the numerical id for the given node type string.
*/
TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *);
/**
* Get the number of distinct field names in the language.
*/
uint32_t ts_language_field_count(const TSLanguage *);
/**
* Get the field name string for the given numerical id.
*/
const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
/**
* Get the numerical id for the given field name string.
*/
TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
/**
* Check whether the given node type id belongs to named nodes, anonymous nodes,
* or a hidden nodes.
*
* See also `ts_node_is_named`. Hidden nodes are never returned from the API.
*/
TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol);
/**
* Get the ABI version number for this language. This version number is used
* to ensure that languages were generated by a compatible version of
* Tree-sitter.
*
* See also `ts_parser_set_language`.
*/
uint32_t ts_language_version(const TSLanguage *);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_API_H_