2016-01-15 15:08:42 -08:00
|
|
|
#include "runtime/alloc.h"
|
2015-05-25 20:21:13 -07:00
|
|
|
#include "runtime/tree.h"
|
2016-02-17 20:41:29 -08:00
|
|
|
#include "runtime/array.h"
|
2015-09-18 18:04:52 -07:00
|
|
|
#include "runtime/stack.h"
|
2015-05-28 15:06:39 -07:00
|
|
|
#include "runtime/length.h"
|
2015-05-25 20:21:13 -07:00
|
|
|
#include <assert.h>
|
2016-01-28 21:18:57 -08:00
|
|
|
#include <stdio.h>
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2016-04-11 22:41:06 -07:00
|
|
|
#define MAX_LINK_COUNT 8
|
2016-02-04 11:15:46 -08:00
|
|
|
#define MAX_NODE_POOL_SIZE 50
|
2017-02-20 14:34:10 -08:00
|
|
|
#define MAX_ITERATOR_COUNT 64
|
2015-06-03 09:44:13 -07:00
|
|
|
|
2017-07-24 21:06:41 -07:00
|
|
|
#ifdef _WIN32
|
|
|
|
|
#define inline __forceinline
|
|
|
|
|
#else
|
2017-06-29 16:49:59 -07:00
|
|
|
#define inline static inline __attribute__((always_inline))
|
2017-07-24 21:06:41 -07:00
|
|
|
#endif
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2016-02-23 17:35:50 -08:00
|
|
|
typedef struct StackNode StackNode;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
StackNode *node;
|
2016-11-09 20:59:05 -08:00
|
|
|
Tree *tree;
|
2016-03-31 12:03:07 -07:00
|
|
|
bool is_pending;
|
2016-02-23 17:35:50 -08:00
|
|
|
} StackLink;
|
|
|
|
|
|
|
|
|
|
struct StackNode {
|
2016-03-07 16:03:23 -08:00
|
|
|
TSStateId state;
|
2016-11-09 20:59:05 -08:00
|
|
|
Length position;
|
2016-04-11 22:41:06 -07:00
|
|
|
StackLink links[MAX_LINK_COUNT];
|
|
|
|
|
short unsigned int link_count;
|
2017-06-29 06:35:26 -07:00
|
|
|
uint32_t ref_count;
|
2016-06-12 17:27:08 -07:00
|
|
|
unsigned error_cost;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
unsigned depth;
|
2016-02-23 17:35:50 -08:00
|
|
|
};
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2015-11-20 00:01:53 -08:00
|
|
|
typedef struct {
|
2017-06-29 14:58:20 -07:00
|
|
|
StackNode *node;
|
2016-02-17 20:41:29 -08:00
|
|
|
TreeArray trees;
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t tree_count;
|
2016-03-31 12:03:07 -07:00
|
|
|
bool is_pending;
|
2016-09-01 10:04:20 -07:00
|
|
|
} Iterator;
|
2015-11-20 00:01:53 -08:00
|
|
|
|
2017-06-29 16:43:56 -07:00
|
|
|
typedef struct {
|
|
|
|
|
void *payload;
|
|
|
|
|
StackIterateCallback callback;
|
|
|
|
|
} StackIterateSession;
|
|
|
|
|
|
2016-02-25 21:46:13 -08:00
|
|
|
typedef Array(StackNode *) StackNodeArray;
|
|
|
|
|
|
2016-05-10 15:24:06 -07:00
|
|
|
typedef struct {
|
|
|
|
|
StackNode *node;
|
2017-06-27 14:30:46 -07:00
|
|
|
Tree *last_external_token;
|
2017-06-29 14:58:20 -07:00
|
|
|
uint32_t push_count;
|
|
|
|
|
bool is_halted;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackSummary *summary;
|
2016-05-10 15:24:06 -07:00
|
|
|
} StackHead;
|
|
|
|
|
|
2016-02-17 14:45:00 -08:00
|
|
|
struct Stack {
|
2016-05-10 15:24:06 -07:00
|
|
|
Array(StackHead) heads;
|
2016-03-03 10:16:10 -08:00
|
|
|
StackSliceArray slices;
|
2016-09-01 10:04:20 -07:00
|
|
|
Array(Iterator) iterators;
|
2016-02-25 21:46:13 -08:00
|
|
|
StackNodeArray node_pool;
|
2016-03-07 16:03:23 -08:00
|
|
|
StackNode *base_node;
|
|
|
|
|
};
|
|
|
|
|
|
2017-06-29 16:43:56 -07:00
|
|
|
typedef StackIterateAction (*StackIterateInternalCallback)(void *, const Iterator *);
|
|
|
|
|
|
2016-03-07 16:03:23 -08:00
|
|
|
static void stack_node_retain(StackNode *self) {
|
|
|
|
|
if (!self)
|
|
|
|
|
return;
|
2017-06-29 06:35:26 -07:00
|
|
|
assert(self->ref_count > 0);
|
2016-03-07 16:03:23 -08:00
|
|
|
self->ref_count++;
|
2017-06-29 06:35:26 -07:00
|
|
|
assert(self->ref_count != 0);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void stack_node_release(StackNode *self, StackNodeArray *pool) {
|
2017-06-30 17:47:04 -07:00
|
|
|
recur:
|
2016-03-07 16:03:23 -08:00
|
|
|
assert(self->ref_count != 0);
|
|
|
|
|
self->ref_count--;
|
2017-06-30 17:47:04 -07:00
|
|
|
if (self->ref_count > 0) return;
|
|
|
|
|
|
2017-07-12 22:08:36 -07:00
|
|
|
StackNode *first_predecessor = NULL;
|
2017-06-30 17:47:04 -07:00
|
|
|
if (self->link_count > 0) {
|
2017-07-12 22:08:36 -07:00
|
|
|
for (unsigned i = self->link_count - 1; i > 0; i--) {
|
2017-06-30 17:47:04 -07:00
|
|
|
if (self->links[i].tree) ts_tree_release(self->links[i].tree);
|
2016-04-11 22:41:06 -07:00
|
|
|
stack_node_release(self->links[i].node, pool);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
2017-07-12 22:08:36 -07:00
|
|
|
if (self->links[0].tree) ts_tree_release(self->links[0].tree);
|
|
|
|
|
first_predecessor = self->links[0].node;
|
2017-06-30 17:47:04 -07:00
|
|
|
}
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2017-06-30 17:47:04 -07:00
|
|
|
if (pool->size < MAX_NODE_POOL_SIZE) {
|
|
|
|
|
array_push(pool, self);
|
|
|
|
|
} else {
|
|
|
|
|
ts_free(self);
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-12 22:08:36 -07:00
|
|
|
if (first_predecessor) {
|
|
|
|
|
self = first_predecessor;
|
2017-06-30 17:47:04 -07:00
|
|
|
goto recur;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 10:43:20 -07:00
|
|
|
static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_pending,
|
|
|
|
|
TSStateId state, StackNodeArray *pool) {
|
|
|
|
|
StackNode *node = pool->size > 0 ?
|
|
|
|
|
array_pop(pool) :
|
|
|
|
|
ts_malloc(sizeof(StackNode));
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
*node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0};
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2017-06-29 10:43:20 -07:00
|
|
|
if (previous_node) {
|
|
|
|
|
stack_node_retain(previous_node);
|
2016-05-29 22:36:47 -07:00
|
|
|
|
|
|
|
|
node->link_count = 1;
|
2016-09-01 10:04:20 -07:00
|
|
|
node->links[0] = (StackLink){
|
2017-06-29 10:43:20 -07:00
|
|
|
.node = previous_node,
|
|
|
|
|
.tree = tree,
|
|
|
|
|
.is_pending = is_pending,
|
2016-09-01 10:04:20 -07:00
|
|
|
};
|
2016-06-26 22:45:19 -07:00
|
|
|
|
2017-06-29 10:43:20 -07:00
|
|
|
node->position = previous_node->position;
|
|
|
|
|
node->error_cost = previous_node->error_cost;
|
2016-05-29 22:36:47 -07:00
|
|
|
|
|
|
|
|
if (tree) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
node->depth = previous_node->depth;
|
|
|
|
|
if (!tree->extra) node->depth++;
|
2016-05-29 22:36:47 -07:00
|
|
|
ts_tree_retain(tree);
|
2016-08-31 10:51:59 -07:00
|
|
|
node->error_cost += tree->error_cost;
|
2017-06-29 10:43:20 -07:00
|
|
|
node->position = length_add(node->position, ts_tree_total_size(tree));
|
|
|
|
|
if (state == ERROR_STATE && !tree->extra) {
|
|
|
|
|
node->error_cost +=
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) +
|
|
|
|
|
ERROR_COST_PER_SKIPPED_CHAR * tree->size.chars +
|
|
|
|
|
ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row;
|
|
|
|
|
if (previous_node->links[0].tree) {
|
|
|
|
|
node->error_cost +=
|
|
|
|
|
ERROR_COST_PER_SKIPPED_CHAR * tree->padding.chars +
|
|
|
|
|
ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row;
|
|
|
|
|
}
|
2016-06-26 22:45:19 -07:00
|
|
|
}
|
2016-05-29 22:36:47 -07:00
|
|
|
}
|
2016-08-31 10:51:59 -07:00
|
|
|
} else {
|
2017-06-29 10:43:20 -07:00
|
|
|
node->position = length_zero();
|
2016-08-31 10:51:59 -07:00
|
|
|
node->error_cost = 0;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return node;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
static bool stack__tree_is_equivalent(const Tree *left, const Tree *right) {
|
|
|
|
|
return left == right || (
|
|
|
|
|
left &&
|
|
|
|
|
right &&
|
|
|
|
|
left->child_count == 0 && right->child_count == 0 &&
|
|
|
|
|
left->symbol == right->symbol &&
|
|
|
|
|
left->padding.bytes == right->padding.bytes &&
|
|
|
|
|
left->size.bytes == right->size.bytes &&
|
|
|
|
|
left->extra == right->extra &&
|
|
|
|
|
ts_tree_external_token_state_eq(left, right));
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-10 14:12:24 -07:00
|
|
|
static void stack_node_add_link(StackNode *self, StackLink link) {
|
2016-04-11 22:41:06 -07:00
|
|
|
for (int i = 0; i < self->link_count; i++) {
|
|
|
|
|
StackLink existing_link = self->links[i];
|
2017-06-29 14:58:20 -07:00
|
|
|
if (stack__tree_is_equivalent(existing_link.tree, link.tree)) {
|
|
|
|
|
if (existing_link.node == link.node) return;
|
2016-04-10 14:12:24 -07:00
|
|
|
if (existing_link.node->state == link.node->state) {
|
2017-06-29 14:58:20 -07:00
|
|
|
for (int j = 0; j < link.node->link_count; j++) {
|
2016-04-11 22:41:06 -07:00
|
|
|
stack_node_add_link(existing_link.node, link.node->links[j]);
|
2017-06-29 14:58:20 -07:00
|
|
|
}
|
2016-03-07 16:03:23 -08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-11 22:41:06 -07:00
|
|
|
if (self->link_count < MAX_LINK_COUNT) {
|
2016-04-10 14:12:24 -07:00
|
|
|
stack_node_retain(link.node);
|
2017-06-29 14:58:20 -07:00
|
|
|
if (link.tree) ts_tree_retain(link.tree);
|
|
|
|
|
self->links[self->link_count++] = link;
|
2016-03-10 11:43:13 -08:00
|
|
|
}
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2017-06-27 16:01:07 -07:00
|
|
|
static void stack_head_delete(StackHead *self, StackNodeArray *pool) {
|
2017-06-29 14:58:20 -07:00
|
|
|
if (self->node) {
|
|
|
|
|
if (self->last_external_token) {
|
|
|
|
|
ts_tree_release(self->last_external_token);
|
|
|
|
|
}
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
if (self->summary) {
|
|
|
|
|
array_delete(self->summary);
|
|
|
|
|
ts_free(self->summary);
|
|
|
|
|
}
|
2017-06-29 14:58:20 -07:00
|
|
|
stack_node_release(self->node, pool);
|
|
|
|
|
}
|
2017-06-27 16:01:07 -07:00
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
|
|
|
|
|
StackNode *node, Tree *last_external_token) {
|
2016-09-01 10:04:20 -07:00
|
|
|
StackHead head = {
|
2016-11-04 09:18:38 -07:00
|
|
|
.node = node,
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
.push_count = self->heads.contents[original_version].push_count,
|
2017-06-27 14:30:46 -07:00
|
|
|
.last_external_token = last_external_token,
|
2017-06-29 14:58:20 -07:00
|
|
|
.is_halted = false,
|
2016-09-01 10:04:20 -07:00
|
|
|
};
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->heads, head);
|
2016-04-15 21:28:00 -07:00
|
|
|
stack_node_retain(node);
|
2017-06-27 14:30:46 -07:00
|
|
|
if (last_external_token) ts_tree_retain(last_external_token);
|
2016-04-15 21:28:00 -07:00
|
|
|
return (StackVersion)(self->heads.size - 1);
|
|
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node,
|
|
|
|
|
TreeArray *trees, Tree *last_external_token) {
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
|
2016-04-25 21:59:40 -07:00
|
|
|
StackVersion version = self->slices.contents[i].version;
|
2016-05-10 15:24:06 -07:00
|
|
|
if (self->heads.contents[version].node == node) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackSlice slice = {*trees, version};
|
2016-11-04 09:18:38 -07:00
|
|
|
array_insert(&self->slices, i + 1, slice);
|
|
|
|
|
return;
|
2016-04-15 21:28:00 -07:00
|
|
|
}
|
|
|
|
|
}
|
2015-07-08 17:34:21 -07:00
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token);
|
2016-04-25 21:59:40 -07:00
|
|
|
StackSlice slice = { *trees, version };
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->slices, slice);
|
2015-06-03 09:44:13 -07:00
|
|
|
}
|
|
|
|
|
|
2017-06-29 16:49:59 -07:00
|
|
|
inline StackPopResult stack__iter(Stack *self, StackVersion version,
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackIterateInternalCallback callback, void *payload,
|
|
|
|
|
bool include_trees) {
|
2016-04-15 21:28:00 -07:00
|
|
|
array_clear(&self->slices);
|
2016-09-01 10:04:20 -07:00
|
|
|
array_clear(&self->iterators);
|
2016-04-15 21:28:00 -07:00
|
|
|
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = array_get(&self->heads, version);
|
2017-06-27 14:30:46 -07:00
|
|
|
Tree *last_external_token = head->last_external_token;
|
2016-09-01 10:04:20 -07:00
|
|
|
Iterator iterator = {
|
2016-08-31 17:29:14 -07:00
|
|
|
.node = head->node,
|
2016-03-31 12:03:07 -07:00
|
|
|
.trees = array_new(),
|
2016-04-15 21:28:00 -07:00
|
|
|
.tree_count = 0,
|
2016-03-31 12:03:07 -07:00
|
|
|
.is_pending = true,
|
2015-11-20 00:01:53 -08:00
|
|
|
};
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->iterators, iterator);
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
while (self->iterators.size > 0) {
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
|
2016-09-01 10:04:20 -07:00
|
|
|
Iterator *iterator = &self->iterators.contents[i];
|
|
|
|
|
StackNode *node = iterator->node;
|
2016-04-15 21:28:00 -07:00
|
|
|
|
2017-06-29 16:43:56 -07:00
|
|
|
StackIterateAction action = callback(payload, iterator);
|
2016-04-15 21:28:00 -07:00
|
|
|
bool should_pop = action & StackIteratePop;
|
|
|
|
|
bool should_stop = action & StackIterateStop || node->link_count == 0;
|
|
|
|
|
|
|
|
|
|
if (should_pop) {
|
2016-09-01 10:04:20 -07:00
|
|
|
TreeArray trees = iterator->trees;
|
2016-06-14 14:46:49 -07:00
|
|
|
if (!should_stop)
|
2016-11-04 09:18:38 -07:00
|
|
|
ts_tree_array_copy(trees, &trees);
|
2017-07-24 21:02:26 -07:00
|
|
|
ts_tree_array_reverse(&trees);
|
2017-06-29 14:58:20 -07:00
|
|
|
ts_stack__add_slice(
|
|
|
|
|
self,
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
version,
|
2017-06-29 14:58:20 -07:00
|
|
|
node,
|
|
|
|
|
&trees,
|
|
|
|
|
last_external_token
|
|
|
|
|
);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
2015-11-20 00:01:53 -08:00
|
|
|
|
2016-04-15 21:28:00 -07:00
|
|
|
if (should_stop) {
|
|
|
|
|
if (!should_pop)
|
2016-09-01 10:04:20 -07:00
|
|
|
ts_tree_array_delete(&iterator->trees);
|
|
|
|
|
array_erase(&self->iterators, i);
|
2016-04-11 23:12:50 -07:00
|
|
|
i--, size--;
|
2015-05-30 20:26:45 -07:00
|
|
|
continue;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t j = 1; j <= node->link_count; j++) {
|
2016-09-01 10:04:20 -07:00
|
|
|
Iterator *next_iterator;
|
2016-04-11 22:41:06 -07:00
|
|
|
StackLink link;
|
2016-04-15 21:28:00 -07:00
|
|
|
if (j == node->link_count) {
|
2016-04-11 22:41:06 -07:00
|
|
|
link = node->links[0];
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator = &self->iterators.contents[i];
|
2016-02-23 17:35:50 -08:00
|
|
|
} else {
|
2017-02-20 14:34:10 -08:00
|
|
|
if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
|
2016-04-11 22:41:06 -07:00
|
|
|
link = node->links[j];
|
2017-06-27 11:38:11 -07:00
|
|
|
Iterator current_iterator = self->iterators.contents[i];
|
|
|
|
|
array_push(&self->iterators, current_iterator);
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator = array_back(&self->iterators);
|
2016-11-04 09:18:38 -07:00
|
|
|
ts_tree_array_copy(next_iterator->trees, &next_iterator->trees);
|
2016-02-23 17:35:50 -08:00
|
|
|
}
|
2015-11-20 00:01:53 -08:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator->node = link.node;
|
2016-05-26 13:20:53 -07:00
|
|
|
if (link.tree) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
if (include_trees) {
|
|
|
|
|
array_push(&next_iterator->trees, link.tree);
|
|
|
|
|
ts_tree_retain(link.tree);
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-14 20:25:33 -07:00
|
|
|
if (!link.tree->extra) {
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator->tree_count++;
|
2017-07-03 16:18:29 -07:00
|
|
|
if (!link.is_pending) {
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator->is_pending = false;
|
2017-07-03 16:18:29 -07:00
|
|
|
}
|
2016-06-14 20:25:33 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
next_iterator->tree_count++;
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator->is_pending = false;
|
2016-05-26 13:20:53 -07:00
|
|
|
}
|
2015-05-30 20:26:45 -07:00
|
|
|
}
|
|
|
|
|
}
|
2015-05-25 20:21:13 -07:00
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
return (StackPopResult){self->slices};
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2016-04-15 21:33:31 -07:00
|
|
|
Stack *ts_stack_new() {
|
|
|
|
|
Stack *self = ts_calloc(1, sizeof(Stack));
|
|
|
|
|
|
|
|
|
|
array_init(&self->heads);
|
|
|
|
|
array_init(&self->slices);
|
2016-09-01 10:04:20 -07:00
|
|
|
array_init(&self->iterators);
|
2016-04-15 21:33:31 -07:00
|
|
|
array_init(&self->node_pool);
|
2016-11-04 09:18:38 -07:00
|
|
|
array_grow(&self->heads, 4);
|
|
|
|
|
array_grow(&self->slices, 4);
|
|
|
|
|
array_grow(&self->iterators, 4);
|
|
|
|
|
array_grow(&self->node_pool, MAX_NODE_POOL_SIZE);
|
2016-04-15 21:33:31 -07:00
|
|
|
|
2017-06-29 10:43:20 -07:00
|
|
|
self->base_node = stack_node_new(NULL, NULL, false, 1, &self->node_pool);
|
2017-06-29 14:58:20 -07:00
|
|
|
ts_stack_clear(self);
|
2016-04-15 21:33:31 -07:00
|
|
|
|
|
|
|
|
return self;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_stack_delete(Stack *self) {
|
2016-05-16 10:49:22 -07:00
|
|
|
if (self->slices.contents)
|
2016-04-15 21:33:31 -07:00
|
|
|
array_delete(&self->slices);
|
2016-09-01 10:04:20 -07:00
|
|
|
if (self->iterators.contents)
|
|
|
|
|
array_delete(&self->iterators);
|
2016-04-15 21:33:31 -07:00
|
|
|
stack_node_release(self->base_node, &self->node_pool);
|
2017-06-27 14:30:46 -07:00
|
|
|
for (uint32_t i = 0; i < self->heads.size; i++) {
|
2017-06-27 16:01:07 -07:00
|
|
|
stack_head_delete(&self->heads.contents[i], &self->node_pool);
|
2017-06-27 14:30:46 -07:00
|
|
|
}
|
2016-04-15 21:33:31 -07:00
|
|
|
array_clear(&self->heads);
|
|
|
|
|
if (self->node_pool.contents) {
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0; i < self->node_pool.size; i++)
|
2016-04-15 21:33:31 -07:00
|
|
|
ts_free(self->node_pool.contents[i]);
|
|
|
|
|
array_delete(&self->node_pool);
|
|
|
|
|
}
|
|
|
|
|
array_delete(&self->heads);
|
|
|
|
|
ts_free(self);
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t ts_stack_version_count(const Stack *self) {
|
2016-04-15 21:33:31 -07:00
|
|
|
return self->heads.size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TSStateId ts_stack_top_state(const Stack *self, StackVersion version) {
|
2016-05-10 15:24:06 -07:00
|
|
|
return array_get(&self->heads, version)->node->state;
|
2016-04-15 21:33:31 -07:00
|
|
|
}
|
|
|
|
|
|
2016-11-09 20:59:05 -08:00
|
|
|
Length ts_stack_top_position(const Stack *self, StackVersion version) {
|
2016-05-10 15:24:06 -07:00
|
|
|
return array_get(&self->heads, version)->node->position;
|
2016-04-15 21:33:31 -07:00
|
|
|
}
|
|
|
|
|
|
2016-08-31 17:29:14 -07:00
|
|
|
unsigned ts_stack_push_count(const Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->push_count;
|
|
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) {
|
2016-09-01 10:04:20 -07:00
|
|
|
array_get(&self->heads, version)->push_count -= decrement;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-27 14:30:46 -07:00
|
|
|
Tree *ts_stack_last_external_token(const Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->last_external_token;
|
2017-01-04 21:22:23 -08:00
|
|
|
}
|
|
|
|
|
|
2017-06-27 14:30:46 -07:00
|
|
|
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Tree *token) {
|
|
|
|
|
StackHead *head = array_get(&self->heads, version);
|
|
|
|
|
if (token) ts_tree_retain(token);
|
|
|
|
|
if (head->last_external_token) ts_tree_release(head->last_external_token);
|
|
|
|
|
head->last_external_token = token;
|
2017-01-04 21:22:23 -08:00
|
|
|
}
|
|
|
|
|
|
2016-08-31 10:51:59 -07:00
|
|
|
ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) {
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = array_get(&self->heads, version);
|
2016-08-31 10:51:59 -07:00
|
|
|
return (ErrorStatus){
|
2016-08-31 17:29:14 -07:00
|
|
|
.cost = head->node->error_cost,
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
.recovering = head->node->state == ERROR_STATE,
|
2016-08-31 17:29:14 -07:00
|
|
|
.push_count = head->push_count,
|
2016-08-31 10:51:59 -07:00
|
|
|
};
|
2016-06-02 14:04:48 -07:00
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) {
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = array_get(&self->heads, version);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool);
|
2016-10-05 14:02:49 -07:00
|
|
|
if (state == ERROR_STATE) {
|
2016-08-31 17:29:14 -07:00
|
|
|
head->push_count = 0;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
} else if (!tree->extra) {
|
2016-08-31 17:29:14 -07:00
|
|
|
head->push_count++;
|
2017-06-29 10:43:20 -07:00
|
|
|
}
|
|
|
|
|
stack_node_release(head->node, &self->node_pool);
|
|
|
|
|
head->node = new_node;
|
2016-04-15 21:33:31 -07:00
|
|
|
}
|
|
|
|
|
|
2017-06-29 16:49:59 -07:00
|
|
|
inline StackIterateAction iterate_callback(void *payload, const Iterator *iterator) {
|
2017-06-29 16:43:56 -07:00
|
|
|
StackIterateSession *session = payload;
|
|
|
|
|
return session->callback(session->payload, iterator->node->state, &iterator->trees, iterator->tree_count);
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 21:28:00 -07:00
|
|
|
StackPopResult ts_stack_iterate(Stack *self, StackVersion version,
|
|
|
|
|
StackIterateCallback callback, void *payload) {
|
2017-06-29 16:43:56 -07:00
|
|
|
StackIterateSession session = {payload, callback};
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
return stack__iter(self, version, iterate_callback, &session, true);
|
2016-04-04 11:59:10 -07:00
|
|
|
}
|
|
|
|
|
|
2017-06-29 16:49:59 -07:00
|
|
|
inline StackIterateAction pop_count_callback(void *payload, const Iterator *iterator) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
unsigned *goal_tree_count = payload;
|
|
|
|
|
if (iterator->tree_count == *goal_tree_count) {
|
2016-04-15 21:28:00 -07:00
|
|
|
return StackIteratePop | StackIterateStop;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
} else {
|
|
|
|
|
return StackIterateNone;
|
2016-05-09 14:31:44 -07:00
|
|
|
}
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackPopResult ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
|
|
|
|
|
return stack__iter(self, version, pop_count_callback, &count, true);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2017-06-29 16:49:59 -07:00
|
|
|
inline StackIterateAction pop_pending_callback(void *payload, const Iterator *iterator) {
|
2017-06-29 16:43:56 -07:00
|
|
|
if (iterator->tree_count >= 1) {
|
|
|
|
|
if (iterator->is_pending) {
|
2016-04-15 21:28:00 -07:00
|
|
|
return StackIteratePop | StackIterateStop;
|
2016-04-11 23:12:50 -07:00
|
|
|
} else {
|
2016-04-15 21:28:00 -07:00
|
|
|
return StackIterateStop;
|
2016-04-11 23:12:50 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
2016-04-15 21:28:00 -07:00
|
|
|
return StackIterateNone;
|
2016-04-11 23:12:50 -07:00
|
|
|
}
|
2015-05-25 20:21:13 -07:00
|
|
|
}
|
2015-06-18 15:04:03 -07:00
|
|
|
|
2016-04-04 12:25:57 -07:00
|
|
|
StackPopResult ts_stack_pop_pending(Stack *self, StackVersion version) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackPopResult pop = stack__iter(self, version, pop_pending_callback, NULL, true);
|
2016-04-10 14:12:24 -07:00
|
|
|
if (pop.slices.size > 0) {
|
|
|
|
|
ts_stack_renumber_version(self, pop.slices.contents[0].version, version);
|
|
|
|
|
pop.slices.contents[0].version = version;
|
|
|
|
|
}
|
|
|
|
|
return pop;
|
2016-03-31 12:03:07 -07:00
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
inline StackIterateAction pop_error_callback(void *payload, const Iterator *iterator) {
|
|
|
|
|
if (iterator->trees.size > 0) {
|
|
|
|
|
bool *found_error = payload;
|
|
|
|
|
if (!*found_error && iterator->trees.contents[0]->symbol == ts_builtin_sym_error) {
|
|
|
|
|
*found_error = true;
|
|
|
|
|
return StackIteratePop | StackIterateStop;
|
|
|
|
|
} else {
|
|
|
|
|
return StackIterateStop;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
return StackIterateNone;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
StackPopResult ts_stack_pop_error(Stack *self, StackVersion version) {
|
|
|
|
|
StackNode *node = array_get(&self->heads, version)->node;
|
|
|
|
|
for (unsigned i = 0; i < node->link_count; i++) {
|
|
|
|
|
if (node->links[i].tree && node->links[i].tree->symbol == ts_builtin_sym_error) {
|
|
|
|
|
bool found_error = false;
|
|
|
|
|
return stack__iter(self, version, pop_error_callback, &found_error, true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return (StackPopResult){.slices = array_new()};
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 16:49:59 -07:00
|
|
|
inline StackIterateAction pop_all_callback(void *payload, const Iterator *iterator) {
|
|
|
|
|
return iterator->node->link_count == 0 ? StackIteratePop : StackIterateNone;
|
2015-06-18 15:04:03 -07:00
|
|
|
}
|
2015-12-08 12:20:50 -08:00
|
|
|
|
2016-04-24 00:54:20 -07:00
|
|
|
StackPopResult ts_stack_pop_all(Stack *self, StackVersion version) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
return stack__iter(self, version, pop_all_callback, NULL, true);
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-12 12:00:00 -07:00
|
|
|
typedef struct {
|
|
|
|
|
StackSummary *summary;
|
|
|
|
|
unsigned max_depth;
|
|
|
|
|
} SummarizeStackSession;
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
inline StackIterateAction summarize_stack_callback(void *payload, const Iterator *iterator) {
|
2017-09-12 12:00:00 -07:00
|
|
|
SummarizeStackSession *session = payload;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
TSStateId state = iterator->node->state;
|
|
|
|
|
unsigned depth = iterator->tree_count;
|
2017-09-12 12:00:00 -07:00
|
|
|
if (depth > session->max_depth) return StackIterateStop;
|
|
|
|
|
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
|
|
|
|
|
StackSummaryEntry entry = session->summary->contents[i];
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
if (entry.depth < depth) break;
|
|
|
|
|
if (entry.depth == depth && entry.state == state) return StackIterateNone;
|
|
|
|
|
}
|
2017-09-13 09:56:51 -07:00
|
|
|
array_push(session->summary, ((StackSummaryEntry){
|
|
|
|
|
.position = iterator->node->position,
|
|
|
|
|
.depth = depth,
|
|
|
|
|
.state = state,
|
|
|
|
|
}));
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
return StackIterateNone;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-12 12:00:00 -07:00
|
|
|
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
|
|
|
|
|
SummarizeStackSession session = {
|
|
|
|
|
.summary = ts_malloc(sizeof(StackSummary)),
|
|
|
|
|
.max_depth = max_depth
|
|
|
|
|
};
|
|
|
|
|
array_init(session.summary);
|
|
|
|
|
stack__iter(self, version, summarize_stack_callback, &session, false);
|
|
|
|
|
self->heads.contents[version].summary = session.summary;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->summary;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) {
|
2017-09-12 12:00:00 -07:00
|
|
|
return array_get(&self->heads, version)->node->depth;
|
2016-04-04 11:44:45 -07:00
|
|
|
}
|
|
|
|
|
|
2016-04-15 21:33:31 -07:00
|
|
|
void ts_stack_remove_version(Stack *self, StackVersion version) {
|
2017-06-27 16:01:07 -07:00
|
|
|
stack_head_delete(array_get(&self->heads, version), &self->node_pool);
|
2016-04-15 21:33:31 -07:00
|
|
|
array_erase(&self->heads, version);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
|
|
|
|
|
assert(v2 < v1);
|
2016-11-14 12:15:24 -08:00
|
|
|
assert((uint32_t)v1 < self->heads.size);
|
2017-06-27 16:01:07 -07:00
|
|
|
stack_head_delete(&self->heads.contents[v2], &self->node_pool);
|
2016-04-15 21:33:31 -07:00
|
|
|
self->heads.contents[v2] = self->heads.contents[v1];
|
|
|
|
|
array_erase(&self->heads, v1);
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
|
|
|
|
|
StackHead temporary_head = self->heads.contents[v1];
|
|
|
|
|
self->heads.contents[v1] = self->heads.contents[v2];
|
|
|
|
|
self->heads.contents[v2] = temporary_head;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-14 17:25:55 -08:00
|
|
|
StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
|
2016-05-09 14:31:44 -07:00
|
|
|
assert(version < self->heads.size);
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->heads, self->heads.contents[version]);
|
2017-06-27 14:30:46 -07:00
|
|
|
StackHead *head = array_back(&self->heads);
|
|
|
|
|
stack_node_retain(head->node);
|
|
|
|
|
if (head->last_external_token) ts_tree_retain(head->last_external_token);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
head->summary = NULL;
|
2016-05-09 14:31:44 -07:00
|
|
|
return self->heads.size - 1;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
|
|
|
|
if (ts_stack_can_merge(self, version1, version2)) {
|
|
|
|
|
ts_stack_force_merge(self, version1, version2);
|
2016-05-28 21:22:10 -07:00
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
|
|
|
|
StackHead *head1 = &self->heads.contents[version1];
|
|
|
|
|
StackHead *head2 = &self->heads.contents[version2];
|
|
|
|
|
return
|
2017-09-12 16:19:28 -07:00
|
|
|
!head1->is_halted && !head2->is_halted &&
|
2017-06-29 14:58:20 -07:00
|
|
|
head1->node->state == head2->node->state &&
|
|
|
|
|
head1->node->position.chars == head2->node->position.chars &&
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
head1->node->depth == head2->node->depth &&
|
|
|
|
|
ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token);
|
2017-06-29 14:58:20 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
|
|
|
|
StackHead *head1 = &self->heads.contents[version1];
|
|
|
|
|
StackHead *head2 = &self->heads.contents[version2];
|
|
|
|
|
for (uint32_t i = 0; i < head2->node->link_count; i++) {
|
|
|
|
|
stack_node_add_link(head1->node, head2->node->links[i]);
|
|
|
|
|
}
|
|
|
|
|
ts_stack_remove_version(self, version2);
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-02 14:04:48 -07:00
|
|
|
void ts_stack_halt(Stack *self, StackVersion version) {
|
|
|
|
|
array_get(&self->heads, version)->is_halted = true;
|
|
|
|
|
}
|
2016-05-29 22:36:47 -07:00
|
|
|
|
2016-06-02 14:04:48 -07:00
|
|
|
bool ts_stack_is_halted(Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->is_halted;
|
2016-05-29 22:36:47 -07:00
|
|
|
}
|
|
|
|
|
|
2016-04-04 11:59:10 -07:00
|
|
|
void ts_stack_clear(Stack *self) {
|
|
|
|
|
stack_node_retain(self->base_node);
|
2017-06-27 14:30:46 -07:00
|
|
|
for (uint32_t i = 0; i < self->heads.size; i++) {
|
2017-06-27 16:01:07 -07:00
|
|
|
stack_head_delete(&self->heads.contents[i], &self->node_pool);
|
2017-06-27 14:30:46 -07:00
|
|
|
}
|
2016-04-04 11:59:10 -07:00
|
|
|
array_clear(&self->heads);
|
2017-01-04 21:22:23 -08:00
|
|
|
array_push(&self->heads, ((StackHead){
|
2017-06-29 14:58:20 -07:00
|
|
|
.node = self->base_node,
|
|
|
|
|
.last_external_token = NULL,
|
|
|
|
|
.is_halted = false,
|
2017-01-04 21:22:23 -08:00
|
|
|
}));
|
2016-04-04 11:59:10 -07:00
|
|
|
}
|
|
|
|
|
|
2016-05-16 10:44:19 -07:00
|
|
|
bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|
|
|
|
bool was_recording_allocations = ts_toggle_allocation_recording(false);
|
|
|
|
|
if (!f)
|
|
|
|
|
f = stderr;
|
|
|
|
|
|
2016-04-02 22:18:44 -07:00
|
|
|
fprintf(f, "digraph stack {\n");
|
|
|
|
|
fprintf(f, "rankdir=\"RL\";\n");
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "edge [arrowhead=none]\n");
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2017-06-27 11:38:11 -07:00
|
|
|
Array(StackNode *) visited_nodes = array_new();
|
2016-02-23 17:35:50 -08:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
array_clear(&self->iterators);
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0; i < self->heads.size; i++) {
|
2017-06-29 14:58:20 -07:00
|
|
|
if (ts_stack_is_halted(self, i)) continue;
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = &self->heads.contents[i];
|
2016-11-14 12:15:24 -08:00
|
|
|
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(
|
2016-10-05 14:02:49 -07:00
|
|
|
f,
|
2016-11-14 12:15:24 -08:00
|
|
|
"node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
"labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth
|
|
|
|
|
);
|
2017-01-30 21:58:27 -08:00
|
|
|
|
2017-06-27 14:30:46 -07:00
|
|
|
if (head->last_external_token) {
|
2017-07-17 17:12:36 -07:00
|
|
|
TSExternalTokenState *state = &head->last_external_token->external_token_state;
|
|
|
|
|
const char *data = ts_external_token_state_data(state);
|
|
|
|
|
fprintf(f, "\nexternal_token_state:");
|
|
|
|
|
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
|
2017-01-30 21:58:27 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fprintf(f, "\"]\n");
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->iterators, ((Iterator){.node = head->node }));
|
2016-02-23 09:45:27 -08:00
|
|
|
}
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
bool all_iterators_done = false;
|
|
|
|
|
while (!all_iterators_done) {
|
|
|
|
|
all_iterators_done = true;
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0; i < self->iterators.size; i++) {
|
2017-06-27 11:38:11 -07:00
|
|
|
Iterator iterator = self->iterators.contents[i];
|
|
|
|
|
StackNode *node = iterator.node;
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t j = 0; j < visited_nodes.size; j++) {
|
2016-02-23 17:35:50 -08:00
|
|
|
if (visited_nodes.contents[j] == node) {
|
|
|
|
|
node = NULL;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-23 00:08:55 -08:00
|
|
|
if (!node)
|
|
|
|
|
continue;
|
2016-09-01 10:04:20 -07:00
|
|
|
all_iterators_done = false;
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "node_%p [", node);
|
2016-10-05 14:02:49 -07:00
|
|
|
if (node->state == ERROR_STATE)
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "label=\"?\"");
|
2016-05-26 13:20:53 -07:00
|
|
|
else if (node->link_count == 1 && node->links[0].tree &&
|
|
|
|
|
node->links[0].tree->extra)
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "shape=point margin=0 label=\"\"");
|
2016-03-02 09:55:25 -08:00
|
|
|
else
|
2016-05-29 22:36:47 -07:00
|
|
|
fprintf(f, "label=\"%d\"", node->state);
|
2016-08-31 10:51:59 -07:00
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
fprintf(
|
|
|
|
|
f,
|
|
|
|
|
" tooltip=\"position: %u,%u\nerror_cost: %u\"];\n",
|
|
|
|
|
node->position.extent.row, node->position.extent.column, node->error_cost
|
|
|
|
|
);
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-04-11 22:41:06 -07:00
|
|
|
for (int j = 0; j < node->link_count; j++) {
|
|
|
|
|
StackLink link = node->links[j];
|
|
|
|
|
fprintf(f, "node_%p -> node_%p [", node, link.node);
|
|
|
|
|
if (link.is_pending)
|
2016-04-04 12:25:57 -07:00
|
|
|
fprintf(f, "style=dashed ");
|
2016-05-26 13:20:53 -07:00
|
|
|
if (link.tree && link.tree->extra)
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "fontcolor=gray ");
|
2016-02-24 17:23:58 -08:00
|
|
|
|
2016-05-26 13:20:53 -07:00
|
|
|
if (!link.tree) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
fprintf(f, "color=red");
|
2016-05-26 13:20:53 -07:00
|
|
|
} else if (link.tree->symbol == ts_builtin_sym_error) {
|
|
|
|
|
fprintf(f, "label=\"ERROR\"");
|
2016-03-02 09:55:25 -08:00
|
|
|
} else {
|
2016-05-26 13:20:53 -07:00
|
|
|
fprintf(f, "label=\"");
|
2016-08-29 16:39:14 -07:00
|
|
|
if (!link.tree->named)
|
|
|
|
|
fprintf(f, "'");
|
2016-04-11 22:41:06 -07:00
|
|
|
const char *name = symbol_names[link.tree->symbol];
|
2016-03-02 09:55:25 -08:00
|
|
|
for (const char *c = name; *c; c++) {
|
2016-04-02 22:18:44 -07:00
|
|
|
if (*c == '\"' || *c == '\\')
|
|
|
|
|
fprintf(f, "\\");
|
|
|
|
|
fprintf(f, "%c", *c);
|
2016-02-24 17:23:58 -08:00
|
|
|
}
|
2016-08-29 16:39:14 -07:00
|
|
|
if (!link.tree->named)
|
|
|
|
|
fprintf(f, "'");
|
2016-08-31 10:51:59 -07:00
|
|
|
fprintf(f, "\" labeltooltip=\"error_cost: %u\"",
|
|
|
|
|
link.tree->error_cost);
|
2016-02-24 17:23:58 -08:00
|
|
|
}
|
|
|
|
|
|
2016-05-26 13:20:53 -07:00
|
|
|
fprintf(f, "];\n");
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2017-06-27 11:38:11 -07:00
|
|
|
Iterator *next_iterator;
|
2016-02-23 17:35:50 -08:00
|
|
|
if (j == 0) {
|
2017-06-27 11:38:11 -07:00
|
|
|
next_iterator = &self->iterators.contents[i];
|
2016-02-23 17:35:50 -08:00
|
|
|
} else {
|
2017-06-27 11:38:11 -07:00
|
|
|
array_push(&self->iterators, iterator);
|
|
|
|
|
next_iterator = array_back(&self->iterators);
|
2016-02-23 17:35:50 -08:00
|
|
|
}
|
2017-06-27 11:38:11 -07:00
|
|
|
next_iterator->node = link.node;
|
2016-02-23 00:08:55 -08:00
|
|
|
}
|
2016-02-23 17:35:50 -08:00
|
|
|
|
2016-11-05 14:39:25 -07:00
|
|
|
array_push(&visited_nodes, node);
|
2016-02-23 00:08:55 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-02 22:18:44 -07:00
|
|
|
fprintf(f, "}\n");
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-02-23 17:35:50 -08:00
|
|
|
array_delete(&visited_nodes);
|
2016-05-16 10:44:19 -07:00
|
|
|
ts_toggle_allocation_recording(was_recording_allocations);
|
|
|
|
|
return true;
|
2016-02-23 00:08:55 -08:00
|
|
|
}
|