tree-sitter/lib/src/stack.h

#ifndef TREE_SITTER_PARSE_STACK_H_
#define TREE_SITTER_PARSE_STACK_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./array.h"
#include "./subtree.h"
#include "./error_costs.h"
#include <stdio.h>

typedef struct Stack Stack;

typedef unsigned StackVersion;
#define STACK_VERSION_NONE ((StackVersion)-1)

typedef struct {
  SubtreeArray subtrees;
  StackVersion version;
} StackSlice;
typedef Array(StackSlice) StackSliceArray;

typedef struct {
  Length position;
  unsigned depth;
  TSStateId state;
} StackSummaryEntry;
typedef Array(StackSummaryEntry) StackSummary;

// Create a stack.
Stack *ts_stack_new(SubtreePool *);

// Release the memory reserved for a given stack.
void ts_stack_delete(Stack *);

// Get the stack's current number of versions.
uint32_t ts_stack_version_count(const Stack *);

// Get the state at the top of the given version of the stack. If the stack is
// empty, this returns the initial state, 0.
TSStateId ts_stack_state(const Stack *, StackVersion);

// Get the last external token associated with a given version of the stack.
Subtree ts_stack_last_external_token(const Stack *, StackVersion);

// Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );

// Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion);

// Push a tree and state onto the given version of the stack.
//
// This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should
// first retain the tree.
void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);

// Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple
// versions which had previously been merged. It returns an array that
// specifies the index of each revealed version and the trees that were
// removed from that version.
StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);

// Remove an error at the top of the given version of the stack.
SubtreeArray ts_stack_pop_error(Stack *, StackVersion);

// Remove any pending trees from the top of the given version of the stack.
StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);

// Remove any all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *, StackVersion);

// Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected.
unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);

int ts_stack_dynamic_precedence(Stack *, StackVersion);

bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);

// Compute a summary of all the parse states near the top of the given
// version of the stack and store the summary for later retrieval.
void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);

// Retrieve a summary of all the parse states near the top of the
// given version of the stack.
StackSummary *ts_stack_get_summary(Stack *, StackVersion);

// Get the total cost of all errors on the given version of the stack.
unsigned ts_stack_error_cost(const Stack *, StackVersion version);

// Merge the given two stack versions if possible, returning true
// if they were successfully merged and false otherwise.
bool ts_stack_merge(Stack *, StackVersion, StackVersion);

// Determine whether the given two stack versions can be merged.
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);

TSSymbol ts_stack_resume(Stack *, StackVersion);

void ts_stack_pause(Stack *, StackVersion, TSSymbol);

void ts_stack_halt(Stack *, StackVersion);

bool ts_stack_is_active(const Stack *, StackVersion);

bool ts_stack_is_paused(const Stack *, StackVersion);

bool ts_stack_is_halted(const Stack *, StackVersion);

void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);

void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);

StackVersion ts_stack_copy_version(Stack *, StackVersion);

// Remove the given version from the stack.
void ts_stack_remove_version(Stack *, StackVersion);

void ts_stack_clear(Stack *);

bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);

typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_PARSE_STACK_H_
Just call the C lib 'the library' everywhere, don't call it a 'runtime' 2019-01-10 15:22:39 -08:00			`#ifndef TREE_SITTER_PARSE_STACK_H_`
			`#define TREE_SITTER_PARSE_STACK_H_`
Start work on graph-structured stack 2015-05-25 20:21:13 -07:00
			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

Update include paths to not reference 'runtime' directory 2019-01-04 17:33:34 -08:00			`#include "./array.h"`
			`#include "./subtree.h"`
			`#include "./error_costs.h"`
Write to file directly from stack debugging function 2016-04-02 22:18:44 -07:00			`#include <stdio.h>`
Start work on graph-structured stack 2015-05-25 20:21:13 -07:00
Rename ParseStack -> Stack 2015-09-18 18:04:52 -07:00			`typedef struct Stack Stack;`
Start work on graph-structured stack 2015-05-25 20:21:13 -07:00
Remove unneeded parameters from public interface of stack_iterate callback 2017-06-29 16:43:56 -07:00			`typedef unsigned StackVersion;`
Merge equivalent stacks in a separate stage of parsing * No more automatic merging every time a state is pushed to the stack * When popping from the stack, the current version is always preserved 2016-04-10 14:12:24 -07:00			`#define STACK_VERSION_NONE ((StackVersion)-1)`
Rename stack heads to versions 2016-04-04 12:25:57 -07:00
Encapsulate ParseStackNodes 2015-06-03 09:44:13 -07:00			`typedef struct {`
Rename Tree -> Subtree 2018-05-10 15:11:14 -07:00			`SubtreeArray subtrees;`
Rename stack heads to versions 2016-04-04 12:25:57 -07:00			`StackVersion version;`
Rename StackPopResult -> StackSlice 2016-03-03 10:16:10 -08:00			`} StackSlice;`
Give StackPushResult enumerators shorter names 2016-03-03 10:20:05 -08:00			`typedef Array(StackSlice) StackSliceArray;`

Halt stack pops at all error states, not just error trees 2016-03-03 11:05:37 -08:00			`typedef struct {`
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`Length position;`
			`unsigned depth;`
			`TSStateId state;`
			`} StackSummaryEntry;`
			`typedef Array(StackSummaryEntry) StackSummary;`
Halt stack pops at all error states, not just error trees 2016-03-03 11:05:37 -08:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Create a stack.`
Rename Tree -> Subtree 2018-05-10 15:11:14 -07:00			`Stack ts_stack_new(SubtreePool );`
Encapsulate ParseStackNodes 2015-06-03 09:44:13 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Release the memory reserved for a given stack.`
Rename ParseStack -> Stack 2015-09-18 18:04:52 -07:00			`void ts_stack_delete(Stack *);`
Start work on graph-structured stack 2015-05-25 20:21:13 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Get the stack's current number of versions.`
Represent byte, char and tree counts as 32 bit numbers The parser spends the majority of its time allocating and freeing trees and stack nodes. Also, the memory footprint of the AST is a significant concern when using tree-sitter with large files. This library is already unlikely to work very well with source files larger than 4GB, so representing rows, columns, byte lengths and child indices as unsigned 32 bit integers seems like the right choice. 2016-11-14 12:15:24 -08:00			`uint32_t ts_stack_version_count(const Stack *);`
Start work on graph-structured stack 2015-05-25 20:21:13 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Get the state at the top of the given version of the stack. If the stack is`
			`// empty, this returns the initial state, 0.`
			`TSStateId ts_stack_state(const Stack *, StackVersion);`
Encapsulate ParseStackNodes 2015-06-03 09:44:13 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Get the last external token associated with a given version of the stack.`
Cram terminal subtree data into a 64-bit integer when possible 2018-09-17 13:12:13 -07:00			`Subtree ts_stack_last_external_token(const Stack *, StackVersion);`
Add ability to store external token state per stack version 2017-01-04 21:22:23 -08:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Set the last external token associated with a given version of the stack.`
Cram terminal subtree data into a 64-bit integer when possible 2018-09-17 13:12:13 -07:00			`void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );`
Update parse count correctly when repairing errors & undoing reductions 2016-09-01 10:04:20 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Get the position of the given version of the stack within the document.`
			`Length ts_stack_position(const Stack *, StackVersion);`
Don't include trailing ubiquitous tokens as children when reducing 2015-12-02 07:53:15 -08:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Push a tree and state onto the given version of the stack.`
			`//`
			`// This transfers ownership of the tree to the Stack. Callers that`
			`// need to retain ownership of the tree for their own purposes should`
			`// first retain the tree.`
Cram terminal subtree data into a 64-bit integer when possible 2018-09-17 13:12:13 -07:00			`void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);`
Encapsulate ParseStackNodes 2015-06-03 09:44:13 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Pop the given number of entries from the given version of the stack. This`
			`// operation can increase the number of stack versions by revealing multiple`
			`// versions which had previously been merged. It returns an array that`
			`// specifies the index of each revealed version and the trees that were`
			`// removed from that version.`
			`StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);`
Use graph-structured parse stack in parser Not using the splitting feature yet. 2015-06-18 15:04:03 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Remove an error at the top of the given version of the stack.`
Rename Tree -> Subtree 2018-05-10 15:11:14 -07:00			`SubtreeArray ts_stack_pop_error(Stack *, StackVersion);`
Simplify error recovery; eliminate recovery states The previous approach to error recovery relied on special error-recovery states in the parse table. For each token T, there was an error recovery state in which the parser looked for any token that could follow T. Unfortunately, sometimes the set of tokens that could follow T contained conflicts. For example, in JS, the token '}' can be followed by the open-ended 'template_chars' token, but also by ordinary tokens like 'identifier'. So with the old algorithm, when recovering from an unexpected '}' token, the lexer had no way to distinguish identifiers from template_chars. This commit drops the error recovery states. Instead, when we encounter an unexpected token T, we recover from the error by finding a previous state S in the stack in which T would be valid, popping all of the nodes after S, and wrapping them in an error. This way, the lexer is always invoked in a normal parse state, in which it is looking for a non-conflicting set of tokens. Eliminating the error recovery states also shrinks the lex state machine significantly. Signed-off-by: Rick Winfrey <rewinfrey@github.com> 2017-09-11 15:22:52 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Remove any pending trees from the top of the given version of the stack.`
			`StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);`
Store `verifying` flag within parse stack 2016-03-31 12:03:07 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Remove any all trees from the given version of the stack.`
			`StackSliceArray ts_stack_pop_all(Stack *, StackVersion);`
Add function for popping all nodes from the stack 2016-04-04 11:44:45 -07:00
Remove ts_stack_force_merge function 2018-04-02 11:57:26 -07:00			`// Get the maximum number of tree nodes reachable from this version of the stack`
			`// since the last error was detected.`
Maintain a total node count on every tree This simplifies (and fixes bugs in) the parse stack's tracking of its total node count since the last error, which is needed for error recovery. 2018-04-02 10:57:44 -07:00			`unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);`
Simplify error recovery; eliminate recovery states The previous approach to error recovery relied on special error-recovery states in the parse table. For each token T, there was an error recovery state in which the parser looked for any token that could follow T. Unfortunately, sometimes the set of tokens that could follow T contained conflicts. For example, in JS, the token '}' can be followed by the open-ended 'template_chars' token, but also by ordinary tokens like 'identifier'. So with the old algorithm, when recovering from an unexpected '}' token, the lexer had no way to distinguish identifiers from template_chars. This commit drops the error recovery states. Instead, when we encounter an unexpected token T, we recover from the error by finding a previous state S in the stack in which T would be valid, popping all of the nodes after S, and wrapping them in an error. This way, the lexer is always invoked in a normal parse state, in which it is looking for a non-conflicting set of tokens. Eliminating the error recovery states also shrinks the lex state machine significantly. Signed-off-by: Rick Winfrey <rewinfrey@github.com> 2017-09-11 15:22:52 -07:00
Take total dynamic precedence into account in stack version sorting Signed-off-by: Josh Vera <vera@github.com> 2017-10-09 15:51:22 -07:00			`int ts_stack_dynamic_precedence(Stack *, StackVersion);`

Add guard to prevent infinite loops in error recovery 2018-11-08 11:29:21 -08:00			`bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);`

Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Compute a summary of all the parse states near the top of the given`
			`// version of the stack and store the summary for later retrieval.`
Limit the number of stack nodes that are included in a summary 2017-09-12 12:00:00 -07:00			`void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);`
Simplify error recovery; eliminate recovery states The previous approach to error recovery relied on special error-recovery states in the parse table. For each token T, there was an error recovery state in which the parser looked for any token that could follow T. Unfortunately, sometimes the set of tokens that could follow T contained conflicts. For example, in JS, the token '}' can be followed by the open-ended 'template_chars' token, but also by ordinary tokens like 'identifier'. So with the old algorithm, when recovering from an unexpected '}' token, the lexer had no way to distinguish identifiers from template_chars. This commit drops the error recovery states. Instead, when we encounter an unexpected token T, we recover from the error by finding a previous state S in the stack in which T would be valid, popping all of the nodes after S, and wrapping them in an error. This way, the lexer is always invoked in a normal parse state, in which it is looking for a non-conflicting set of tokens. Eliminating the error recovery states also shrinks the lex state machine significantly. Signed-off-by: Rick Winfrey <rewinfrey@github.com> 2017-09-11 15:22:52 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Retrieve a summary of all the parse states near the top of the`
			`// given version of the stack.`
Simplify error recovery; eliminate recovery states The previous approach to error recovery relied on special error-recovery states in the parse table. For each token T, there was an error recovery state in which the parser looked for any token that could follow T. Unfortunately, sometimes the set of tokens that could follow T contained conflicts. For example, in JS, the token '}' can be followed by the open-ended 'template_chars' token, but also by ordinary tokens like 'identifier'. So with the old algorithm, when recovering from an unexpected '}' token, the lexer had no way to distinguish identifiers from template_chars. This commit drops the error recovery states. Instead, when we encounter an unexpected token T, we recover from the error by finding a previous state S in the stack in which T would be valid, popping all of the nodes after S, and wrapping them in an error. This way, the lexer is always invoked in a normal parse state, in which it is looking for a non-conflicting set of tokens. Eliminating the error recovery states also shrinks the lex state machine significantly. Signed-off-by: Rick Winfrey <rewinfrey@github.com> 2017-09-11 15:22:52 -07:00			`StackSummary ts_stack_get_summary(Stack , StackVersion);`

Remove ts_stack_force_merge function 2018-04-02 11:57:26 -07:00			`// Get the total cost of all errors on the given version of the stack.`
Refactor error comparisons * Deal with mergeability outside of error comparison function * Make `better_version_exists` function pure (don't halt other versions as a side effect). * Tweak error comparison logic Signed-off-by: Rick Winfrey <rewinfrey@github.com> 2017-09-13 16:38:15 -07:00			`unsigned ts_stack_error_cost(const Stack *, StackVersion version);`
Replace stack_merge_new function with two simpler functions - merge(version1, version2) - split(version) 2016-05-28 21:22:10 -07:00
Remove ts_stack_force_merge function 2018-04-02 11:57:26 -07:00			`// Merge the given two stack versions if possible, returning true`
			`// if they were successfully merged and false otherwise.`
Swap two incorrectly placed comments 2019-03-26 16:53:35 +01:00			`bool ts_stack_merge(Stack *, StackVersion, StackVersion);`

			`// Determine whether the given two stack versions can be merged.`
Improve heuristics for pruning parse versions based on errors * Rewrite the error cost comparison in terms of explicit, discrete conditions. * Allow merging versions have different error costs. * Store the depth of each stack version since the last error. Use this state to prevent incorrect merging. * Sort the stack versions in order of preference and put a hard limit on the version count. 2017-06-29 14:58:20 -07:00			`bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);`

Allow stack versions to be temporarily paused This way, when detecting an error, we can defer the decision about whether to bail or recover until all stack versions are processed. 2018-04-02 09:47:01 -07:00			`TSSymbol ts_stack_resume(Stack *, StackVersion);`

			`void ts_stack_pause(Stack *, StackVersion, TSSymbol);`

Abort erroneous parse versions more eagerly 2016-06-02 14:04:48 -07:00			`void ts_stack_halt(Stack *, StackVersion);`
Merge equivalent stacks in a separate stage of parsing * No more automatic merging every time a state is pushed to the stack * When popping from the stack, the current version is always preserved 2016-04-10 14:12:24 -07:00
Allow stack versions to be temporarily paused This way, when detecting an error, we can defer the decision about whether to bail or recover until all stack versions are processed. 2018-04-02 09:47:01 -07:00			`bool ts_stack_is_active(const Stack *, StackVersion);`

			`bool ts_stack_is_paused(const Stack *, StackVersion);`

			`bool ts_stack_is_halted(const Stack *, StackVersion);`
Rework logic for when to abandon parses with errors 2016-05-29 22:36:47 -07:00
Merge equivalent stacks in a separate stage of parsing * No more automatic merging every time a state is pushed to the stack * When popping from the stack, the current version is always preserved 2016-04-10 14:12:24 -07:00			`void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);`
Start work on graph-structured stack 2015-05-25 20:21:13 -07:00
Improve heuristics for pruning parse versions based on errors * Rewrite the error cost comparison in terms of explicit, discrete conditions. * Allow merging versions have different error costs. * Store the depth of each stack version since the last error. Use this state to prevent incorrect merging. * Sort the stack versions in order of preference and put a hard limit on the version count. 2017-06-29 14:58:20 -07:00			`void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);`

Clean up some methods in parser.c 2016-11-14 17:25:55 -08:00			`StackVersion ts_stack_copy_version(Stack *, StackVersion);`
Discard tokens after error detection to find the best repair * Use GLR stack-splitting to try all numbers of tokens to discard until a repair is found. * Check the validity of repairs by looking at the child trees, rather than the statically-computed 'in-progress symbols' list 2016-05-09 14:31:44 -07:00
Clean up Stack API * Remove StackPopResult * Rename top_state() -> state() * Rename top_position() -> position() * Improve docs 2018-03-29 17:37:54 -07:00			`// Remove the given version from the stack.`
Rename stack heads to versions 2016-04-04 12:25:57 -07:00			`void ts_stack_remove_version(Stack *, StackVersion);`
Split parse stack when there are multiple parse actions 2015-07-08 17:34:21 -07:00
Rename ParseStack -> Stack 2015-09-18 18:04:52 -07:00			`void ts_stack_clear(Stack *);`
Use graph-structured parse stack in parser Not using the splitting feature yet. 2015-06-18 15:04:03 -07:00
Make stack_print_dot_graph function take a language as an argument 2018-04-08 13:49:20 -07:00			`bool ts_stack_print_dot_graph(Stack , const TSLanguage , FILE *);`
Simplify testing-only ts_stack_iterate function 2018-03-29 17:50:07 -07:00
			`typedef void (StackIterateCallback)(void , TSStateId, uint32_t);`

Start work on graph-structured stack 2015-05-25 20:21:13 -07:00			`#ifdef __cplusplus`
			`}`
			`#endif`

Just call the C lib 'the library' everywhere, don't call it a 'runtime' 2019-01-10 15:22:39 -08:00			`#endif // TREE_SITTER_PARSE_STACK_H_`