2015-05-25 20:21:13 -07:00
|
|
|
#ifndef RUNTIME_PARSE_STACK_H_
|
|
|
|
|
#define RUNTIME_PARSE_STACK_H_
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-02-17 20:41:29 -08:00
|
|
|
#include "runtime/array.h"
|
2016-02-22 09:23:25 -08:00
|
|
|
#include "runtime/tree.h"
|
2016-08-31 10:51:59 -07:00
|
|
|
#include "runtime/error_costs.h"
|
2016-04-02 22:18:44 -07:00
|
|
|
#include <stdio.h>
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2015-09-18 18:04:52 -07:00
|
|
|
typedef struct Stack Stack;
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2017-06-29 16:43:56 -07:00
|
|
|
typedef unsigned StackVersion;
|
2016-04-10 14:12:24 -07:00
|
|
|
#define STACK_VERSION_NONE ((StackVersion)-1)
|
2016-04-04 12:25:57 -07:00
|
|
|
|
2015-06-03 09:44:13 -07:00
|
|
|
typedef struct {
|
2016-02-21 22:31:04 -08:00
|
|
|
TreeArray trees;
|
2016-04-04 12:25:57 -07:00
|
|
|
StackVersion version;
|
2016-03-03 10:16:10 -08:00
|
|
|
} StackSlice;
|
2015-06-03 09:44:13 -07:00
|
|
|
|
2016-03-03 10:20:05 -08:00
|
|
|
typedef Array(StackSlice) StackSliceArray;
|
|
|
|
|
|
2016-03-03 11:05:37 -08:00
|
|
|
typedef struct {
|
|
|
|
|
StackSliceArray slices;
|
|
|
|
|
} StackPopResult;
|
|
|
|
|
|
2017-06-29 16:43:56 -07:00
|
|
|
typedef unsigned StackIterateAction;
|
2016-04-15 21:28:00 -07:00
|
|
|
enum {
|
|
|
|
|
StackIterateNone,
|
2017-06-29 16:43:56 -07:00
|
|
|
StackIterateStop = 1,
|
|
|
|
|
StackIteratePop = 2,
|
2016-04-15 21:28:00 -07:00
|
|
|
};
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
typedef struct {
|
2017-09-13 09:56:51 -07:00
|
|
|
Length position;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
unsigned depth;
|
|
|
|
|
TSStateId state;
|
|
|
|
|
} StackSummaryEntry;
|
|
|
|
|
|
|
|
|
|
typedef Array(StackSummaryEntry) StackSummary;
|
|
|
|
|
|
2016-03-07 16:03:23 -08:00
|
|
|
typedef StackIterateAction (*StackIterateCallback)(void *, TSStateId state,
|
2017-06-29 16:43:56 -07:00
|
|
|
const TreeArray *trees,
|
|
|
|
|
uint32_t tree_count);
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2015-06-03 09:44:13 -07:00
|
|
|
/*
|
2015-08-16 19:53:34 -07:00
|
|
|
* Create a parse stack.
|
2015-06-03 09:44:13 -07:00
|
|
|
*/
|
2017-10-05 17:32:21 -07:00
|
|
|
Stack *ts_stack_new(TreePool *);
|
2015-06-03 09:44:13 -07:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Release any resources reserved by a parse stack.
|
|
|
|
|
*/
|
2015-09-18 18:04:52 -07:00
|
|
|
void ts_stack_delete(Stack *);
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2015-06-03 09:44:13 -07:00
|
|
|
/*
|
2016-04-04 12:25:57 -07:00
|
|
|
* Get the stack's current number of versions.
|
2015-06-03 09:44:13 -07:00
|
|
|
*/
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t ts_stack_version_count(const Stack *);
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2015-06-03 09:44:13 -07:00
|
|
|
/*
|
2016-04-04 12:25:57 -07:00
|
|
|
* Get the state at the top of the given version of the stack. If the stack is
|
|
|
|
|
* empty, this returns the initial state (0).
|
2015-06-03 09:44:13 -07:00
|
|
|
*/
|
2016-04-04 12:25:57 -07:00
|
|
|
TSStateId ts_stack_top_state(const Stack *, StackVersion);
|
2015-06-03 09:44:13 -07:00
|
|
|
|
2016-08-31 17:29:14 -07:00
|
|
|
unsigned ts_stack_push_count(const Stack *, StackVersion);
|
|
|
|
|
|
2017-01-04 21:22:23 -08:00
|
|
|
void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned);
|
|
|
|
|
|
2017-06-27 14:30:46 -07:00
|
|
|
Tree *ts_stack_last_external_token(const Stack *, StackVersion);
|
2017-01-04 21:22:23 -08:00
|
|
|
|
2017-06-27 14:30:46 -07:00
|
|
|
void ts_stack_set_last_external_token(Stack *, StackVersion, Tree *);
|
2016-09-01 10:04:20 -07:00
|
|
|
|
2015-12-02 07:53:15 -08:00
|
|
|
/*
|
2016-04-04 12:25:57 -07:00
|
|
|
* Get the position at the top of the given version of the stack. If the stack
|
|
|
|
|
* is empty, this returns zero.
|
2015-12-02 07:53:15 -08:00
|
|
|
*/
|
2016-11-09 20:59:05 -08:00
|
|
|
Length ts_stack_top_position(const Stack *, StackVersion);
|
2015-12-02 07:53:15 -08:00
|
|
|
|
2015-06-03 09:44:13 -07:00
|
|
|
/*
|
2017-06-29 10:43:20 -07:00
|
|
|
* Push a tree and state onto the given head of the stack.
|
2015-06-03 09:44:13 -07:00
|
|
|
*/
|
2017-06-29 10:43:20 -07:00
|
|
|
void ts_stack_push(Stack *, StackVersion, Tree *, bool, TSStateId);
|
2015-06-03 09:44:13 -07:00
|
|
|
|
|
|
|
|
/*
|
2016-04-04 12:25:57 -07:00
|
|
|
* Pop the given number of entries from the given version of the stack. This
|
|
|
|
|
* operation can increase the number of stack versions by revealing multiple
|
|
|
|
|
* versions which had previously been merged. It returns a struct that
|
|
|
|
|
* indicates the index of each revealed version and the trees removed from that
|
|
|
|
|
* version.
|
2015-06-03 09:44:13 -07:00
|
|
|
*/
|
2016-11-14 12:15:24 -08:00
|
|
|
StackPopResult ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2017-06-29 16:43:56 -07:00
|
|
|
StackPopResult ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *);
|
2015-06-18 15:04:03 -07:00
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackPopResult ts_stack_pop_error(Stack *, StackVersion);
|
|
|
|
|
|
2016-04-04 12:25:57 -07:00
|
|
|
StackPopResult ts_stack_pop_pending(Stack *, StackVersion);
|
2016-03-31 12:03:07 -07:00
|
|
|
|
2016-04-24 00:54:20 -07:00
|
|
|
StackPopResult ts_stack_pop_all(Stack *, StackVersion);
|
2016-04-04 11:44:45 -07:00
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
unsigned ts_stack_depth_since_error(Stack *, StackVersion);
|
|
|
|
|
|
2017-10-09 15:51:22 -07:00
|
|
|
int ts_stack_dynamic_precedence(Stack *, StackVersion);
|
|
|
|
|
|
2017-09-12 12:00:00 -07:00
|
|
|
void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
|
|
|
|
|
StackSummary *ts_stack_get_summary(Stack *, StackVersion);
|
|
|
|
|
|
2017-09-13 16:38:15 -07:00
|
|
|
unsigned ts_stack_error_cost(const Stack *, StackVersion version);
|
2016-05-28 21:22:10 -07:00
|
|
|
|
2016-06-02 14:04:48 -07:00
|
|
|
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
|
2016-04-24 00:55:19 -07:00
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
|
|
|
|
|
|
|
|
|
|
void ts_stack_force_merge(Stack *, StackVersion, StackVersion);
|
|
|
|
|
|
2016-06-02 14:04:48 -07:00
|
|
|
void ts_stack_halt(Stack *, StackVersion);
|
2016-04-10 14:12:24 -07:00
|
|
|
|
2016-06-02 14:04:48 -07:00
|
|
|
bool ts_stack_is_halted(Stack *, StackVersion);
|
2016-05-29 22:36:47 -07:00
|
|
|
|
2016-04-10 14:12:24 -07:00
|
|
|
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
|
|
|
|
|
|
2016-11-14 17:25:55 -08:00
|
|
|
StackVersion ts_stack_copy_version(Stack *, StackVersion);
|
2016-05-09 14:31:44 -07:00
|
|
|
|
2015-07-08 17:34:21 -07:00
|
|
|
/*
|
2016-04-04 12:25:57 -07:00
|
|
|
* Remove the given version from the stack.
|
2015-07-08 17:34:21 -07:00
|
|
|
*/
|
2016-04-04 12:25:57 -07:00
|
|
|
void ts_stack_remove_version(Stack *, StackVersion);
|
2015-07-08 17:34:21 -07:00
|
|
|
|
2015-06-18 15:04:03 -07:00
|
|
|
/*
|
|
|
|
|
* Remove all entries from the stack.
|
|
|
|
|
*/
|
2015-09-18 18:04:52 -07:00
|
|
|
void ts_stack_clear(Stack *);
|
2015-06-18 15:04:03 -07:00
|
|
|
|
2016-05-16 10:44:19 -07:00
|
|
|
bool ts_stack_print_dot_graph(Stack *, const char **, FILE *);
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2015-05-25 20:21:13 -07:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#endif // RUNTIME_PARSE_STACK_H_
|