2016-01-15 15:08:42 -08:00
|
|
|
#include "runtime/alloc.h"
|
2018-04-08 13:49:20 -07:00
|
|
|
#include "runtime/language.h"
|
2018-05-10 15:11:14 -07:00
|
|
|
#include "runtime/subtree.h"
|
2016-02-17 20:41:29 -08:00
|
|
|
#include "runtime/array.h"
|
2015-09-18 18:04:52 -07:00
|
|
|
#include "runtime/stack.h"
|
2015-05-28 15:06:39 -07:00
|
|
|
#include "runtime/length.h"
|
2015-05-25 20:21:13 -07:00
|
|
|
#include <assert.h>
|
2016-01-28 21:18:57 -08:00
|
|
|
#include <stdio.h>
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2016-04-11 22:41:06 -07:00
|
|
|
#define MAX_LINK_COUNT 8
|
2016-02-04 11:15:46 -08:00
|
|
|
#define MAX_NODE_POOL_SIZE 50
|
2017-02-20 14:34:10 -08:00
|
|
|
#define MAX_ITERATOR_COUNT 64
|
2015-06-03 09:44:13 -07:00
|
|
|
|
2017-07-24 21:06:41 -07:00
|
|
|
#ifdef _WIN32
|
|
|
|
|
#define inline __forceinline
|
|
|
|
|
#else
|
2017-06-29 16:49:59 -07:00
|
|
|
#define inline static inline __attribute__((always_inline))
|
2017-07-24 21:06:41 -07:00
|
|
|
#endif
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2016-02-23 17:35:50 -08:00
|
|
|
typedef struct StackNode StackNode;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
StackNode *node;
|
2018-05-10 15:11:14 -07:00
|
|
|
Subtree *subtree;
|
2016-03-31 12:03:07 -07:00
|
|
|
bool is_pending;
|
2016-02-23 17:35:50 -08:00
|
|
|
} StackLink;
|
|
|
|
|
|
|
|
|
|
struct StackNode {
|
2016-03-07 16:03:23 -08:00
|
|
|
TSStateId state;
|
2016-11-09 20:59:05 -08:00
|
|
|
Length position;
|
2016-04-11 22:41:06 -07:00
|
|
|
StackLink links[MAX_LINK_COUNT];
|
|
|
|
|
short unsigned int link_count;
|
2017-06-29 06:35:26 -07:00
|
|
|
uint32_t ref_count;
|
2016-06-12 17:27:08 -07:00
|
|
|
unsigned error_cost;
|
2018-04-02 10:57:44 -07:00
|
|
|
unsigned node_count;
|
2017-10-09 15:51:22 -07:00
|
|
|
int dynamic_precedence;
|
2016-02-23 17:35:50 -08:00
|
|
|
};
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2015-11-20 00:01:53 -08:00
|
|
|
typedef struct {
|
2017-06-29 14:58:20 -07:00
|
|
|
StackNode *node;
|
2018-05-10 15:11:14 -07:00
|
|
|
SubtreeArray subtrees;
|
|
|
|
|
uint32_t subtree_count;
|
2016-03-31 12:03:07 -07:00
|
|
|
bool is_pending;
|
2016-09-01 10:04:20 -07:00
|
|
|
} Iterator;
|
2015-11-20 00:01:53 -08:00
|
|
|
|
2017-06-29 16:43:56 -07:00
|
|
|
typedef struct {
|
|
|
|
|
void *payload;
|
|
|
|
|
StackIterateCallback callback;
|
|
|
|
|
} StackIterateSession;
|
|
|
|
|
|
2016-02-25 21:46:13 -08:00
|
|
|
typedef Array(StackNode *) StackNodeArray;
|
|
|
|
|
|
2018-04-02 09:47:01 -07:00
|
|
|
typedef enum {
|
|
|
|
|
StackStatusActive,
|
|
|
|
|
StackStatusPaused,
|
|
|
|
|
StackStatusHalted,
|
|
|
|
|
} StackStatus;
|
|
|
|
|
|
2016-05-10 15:24:06 -07:00
|
|
|
typedef struct {
|
|
|
|
|
StackNode *node;
|
2018-05-10 15:11:14 -07:00
|
|
|
Subtree *last_external_token;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackSummary *summary;
|
2018-04-02 10:57:44 -07:00
|
|
|
unsigned node_count_at_last_error;
|
2018-04-02 09:47:01 -07:00
|
|
|
TSSymbol lookahead_when_paused;
|
|
|
|
|
StackStatus status;
|
2016-05-10 15:24:06 -07:00
|
|
|
} StackHead;
|
|
|
|
|
|
2016-02-17 14:45:00 -08:00
|
|
|
struct Stack {
|
2016-05-10 15:24:06 -07:00
|
|
|
Array(StackHead) heads;
|
2016-03-03 10:16:10 -08:00
|
|
|
StackSliceArray slices;
|
2016-09-01 10:04:20 -07:00
|
|
|
Array(Iterator) iterators;
|
2016-02-25 21:46:13 -08:00
|
|
|
StackNodeArray node_pool;
|
2016-03-07 16:03:23 -08:00
|
|
|
StackNode *base_node;
|
2018-05-10 15:11:14 -07:00
|
|
|
SubtreePool *subtree_pool;
|
2016-03-07 16:03:23 -08:00
|
|
|
};
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
typedef unsigned StackAction;
|
|
|
|
|
enum {
|
|
|
|
|
StackActionNone,
|
|
|
|
|
StackActionStop = 1,
|
|
|
|
|
StackActionPop = 2,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef StackAction (*StackCallback)(void *, const Iterator *);
|
2017-06-29 16:43:56 -07:00
|
|
|
|
2016-03-07 16:03:23 -08:00
|
|
|
static void stack_node_retain(StackNode *self) {
|
|
|
|
|
if (!self)
|
|
|
|
|
return;
|
2017-06-29 06:35:26 -07:00
|
|
|
assert(self->ref_count > 0);
|
2016-03-07 16:03:23 -08:00
|
|
|
self->ref_count++;
|
2017-06-29 06:35:26 -07:00
|
|
|
assert(self->ref_count != 0);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
|
2017-06-30 17:47:04 -07:00
|
|
|
recur:
|
2016-03-07 16:03:23 -08:00
|
|
|
assert(self->ref_count != 0);
|
|
|
|
|
self->ref_count--;
|
2017-06-30 17:47:04 -07:00
|
|
|
if (self->ref_count > 0) return;
|
|
|
|
|
|
2017-07-12 22:08:36 -07:00
|
|
|
StackNode *first_predecessor = NULL;
|
2017-06-30 17:47:04 -07:00
|
|
|
if (self->link_count > 0) {
|
2017-07-12 22:08:36 -07:00
|
|
|
for (unsigned i = self->link_count - 1; i > 0; i--) {
|
2018-05-10 15:11:14 -07:00
|
|
|
if (self->links[i].subtree) ts_subtree_release(subtree_pool, self->links[i].subtree);
|
|
|
|
|
stack_node_release(self->links[i].node, pool, subtree_pool);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
2018-05-10 15:11:14 -07:00
|
|
|
if (self->links[0].subtree) ts_subtree_release(subtree_pool, self->links[0].subtree);
|
2017-07-12 22:08:36 -07:00
|
|
|
first_predecessor = self->links[0].node;
|
2017-06-30 17:47:04 -07:00
|
|
|
}
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2017-06-30 17:47:04 -07:00
|
|
|
if (pool->size < MAX_NODE_POOL_SIZE) {
|
|
|
|
|
array_push(pool, self);
|
|
|
|
|
} else {
|
|
|
|
|
ts_free(self);
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-12 22:08:36 -07:00
|
|
|
if (first_predecessor) {
|
|
|
|
|
self = first_predecessor;
|
2017-06-30 17:47:04 -07:00
|
|
|
goto recur;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
static StackNode *stack_node_new(StackNode *previous_node, Subtree *subtree, bool is_pending,
|
2017-06-29 10:43:20 -07:00
|
|
|
TSStateId state, StackNodeArray *pool) {
|
|
|
|
|
StackNode *node = pool->size > 0 ?
|
|
|
|
|
array_pop(pool) :
|
|
|
|
|
ts_malloc(sizeof(StackNode));
|
2018-04-02 10:57:44 -07:00
|
|
|
*node = (StackNode){.ref_count = 1, .link_count = 0, .state = state};
|
2016-03-07 16:03:23 -08:00
|
|
|
|
2017-06-29 10:43:20 -07:00
|
|
|
if (previous_node) {
|
2016-05-29 22:36:47 -07:00
|
|
|
node->link_count = 1;
|
2016-09-01 10:04:20 -07:00
|
|
|
node->links[0] = (StackLink){
|
2017-06-29 10:43:20 -07:00
|
|
|
.node = previous_node,
|
2018-05-10 15:11:14 -07:00
|
|
|
.subtree = subtree,
|
2017-06-29 10:43:20 -07:00
|
|
|
.is_pending = is_pending,
|
2016-09-01 10:04:20 -07:00
|
|
|
};
|
2016-06-26 22:45:19 -07:00
|
|
|
|
2017-06-29 10:43:20 -07:00
|
|
|
node->position = previous_node->position;
|
|
|
|
|
node->error_cost = previous_node->error_cost;
|
2017-10-09 15:51:22 -07:00
|
|
|
node->dynamic_precedence = previous_node->dynamic_precedence;
|
2018-04-02 10:57:44 -07:00
|
|
|
node->node_count = previous_node->node_count;
|
2016-05-29 22:36:47 -07:00
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
if (subtree) {
|
|
|
|
|
node->error_cost += subtree->error_cost;
|
|
|
|
|
node->position = length_add(node->position, ts_subtree_total_size(subtree));
|
|
|
|
|
node->dynamic_precedence += subtree->dynamic_precedence;
|
|
|
|
|
if (!subtree->extra) node->node_count += subtree->node_count;
|
2016-05-29 22:36:47 -07:00
|
|
|
}
|
2016-08-31 10:51:59 -07:00
|
|
|
} else {
|
2017-06-29 10:43:20 -07:00
|
|
|
node->position = length_zero();
|
2016-08-31 10:51:59 -07:00
|
|
|
node->error_cost = 0;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return node;
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
static bool stack__subtree_is_equivalent(const Subtree *left, const Subtree *right) {
|
2017-09-13 10:05:31 -07:00
|
|
|
return
|
|
|
|
|
left == right ||
|
|
|
|
|
(left &&
|
|
|
|
|
right &&
|
|
|
|
|
left->symbol == right->symbol &&
|
|
|
|
|
((left->error_cost > 0 && right->error_cost > 0) ||
|
2018-04-02 18:04:26 -07:00
|
|
|
(left->children.size == 0 && right->children.size == 0 &&
|
2017-09-13 10:05:31 -07:00
|
|
|
left->padding.bytes == right->padding.bytes &&
|
|
|
|
|
left->size.bytes == right->size.bytes &&
|
|
|
|
|
left->extra == right->extra &&
|
2018-05-11 12:57:41 -07:00
|
|
|
ts_subtree_external_scanner_state_eq(left, right))));
|
2017-06-29 14:58:20 -07:00
|
|
|
}
|
|
|
|
|
|
2016-04-10 14:12:24 -07:00
|
|
|
static void stack_node_add_link(StackNode *self, StackLink link) {
|
2018-03-01 22:41:53 -08:00
|
|
|
if (link.node == self) return;
|
|
|
|
|
|
2016-04-11 22:41:06 -07:00
|
|
|
for (int i = 0; i < self->link_count; i++) {
|
|
|
|
|
StackLink existing_link = self->links[i];
|
2018-05-10 15:11:14 -07:00
|
|
|
if (stack__subtree_is_equivalent(existing_link.subtree, link.subtree)) {
|
2017-06-29 14:58:20 -07:00
|
|
|
if (existing_link.node == link.node) return;
|
2017-09-13 16:38:15 -07:00
|
|
|
if (existing_link.node->state == link.node->state &&
|
|
|
|
|
existing_link.node->position.bytes == link.node->position.bytes) {
|
2017-06-29 14:58:20 -07:00
|
|
|
for (int j = 0; j < link.node->link_count; j++) {
|
2016-04-11 22:41:06 -07:00
|
|
|
stack_node_add_link(existing_link.node, link.node->links[j]);
|
2017-06-29 14:58:20 -07:00
|
|
|
}
|
2016-03-07 16:03:23 -08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-02 11:57:26 -07:00
|
|
|
if (self->link_count == MAX_LINK_COUNT) return;
|
|
|
|
|
|
|
|
|
|
stack_node_retain(link.node);
|
2018-05-10 15:11:14 -07:00
|
|
|
if (link.subtree) ts_subtree_retain(link.subtree);
|
2018-04-02 11:57:26 -07:00
|
|
|
self->links[self->link_count++] = link;
|
|
|
|
|
|
|
|
|
|
unsigned node_count = link.node->node_count;
|
2018-05-10 15:11:14 -07:00
|
|
|
if (link.subtree) node_count += link.subtree->node_count;
|
2018-04-02 11:57:26 -07:00
|
|
|
if (node_count > self->node_count) self->node_count = node_count;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
|
2017-06-29 14:58:20 -07:00
|
|
|
if (self->node) {
|
|
|
|
|
if (self->last_external_token) {
|
2018-05-10 15:11:14 -07:00
|
|
|
ts_subtree_release(subtree_pool, self->last_external_token);
|
2017-06-29 14:58:20 -07:00
|
|
|
}
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
if (self->summary) {
|
|
|
|
|
array_delete(self->summary);
|
|
|
|
|
ts_free(self->summary);
|
|
|
|
|
}
|
2018-05-10 15:11:14 -07:00
|
|
|
stack_node_release(self->node, pool, subtree_pool);
|
2017-06-29 14:58:20 -07:00
|
|
|
}
|
2017-06-27 16:01:07 -07:00
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
|
2018-04-02 10:57:44 -07:00
|
|
|
StackNode *node) {
|
2016-09-01 10:04:20 -07:00
|
|
|
StackHead head = {
|
2016-11-04 09:18:38 -07:00
|
|
|
.node = node,
|
2018-04-02 10:57:44 -07:00
|
|
|
.node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
|
|
|
|
|
.last_external_token = self->heads.contents[original_version].last_external_token,
|
2018-04-02 09:47:01 -07:00
|
|
|
.status = StackStatusActive,
|
|
|
|
|
.lookahead_when_paused = 0,
|
2016-09-01 10:04:20 -07:00
|
|
|
};
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->heads, head);
|
2016-04-15 21:28:00 -07:00
|
|
|
stack_node_retain(node);
|
2018-05-10 15:11:14 -07:00
|
|
|
if (head.last_external_token) ts_subtree_retain(head.last_external_token);
|
2016-04-15 21:28:00 -07:00
|
|
|
return (StackVersion)(self->heads.size - 1);
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-02 10:57:44 -07:00
|
|
|
static void ts_stack__add_slice(Stack *self, StackVersion original_version,
|
2018-05-10 15:11:14 -07:00
|
|
|
StackNode *node, SubtreeArray *subtrees) {
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
|
2016-04-25 21:59:40 -07:00
|
|
|
StackVersion version = self->slices.contents[i].version;
|
2016-05-10 15:24:06 -07:00
|
|
|
if (self->heads.contents[version].node == node) {
|
2018-05-10 15:11:14 -07:00
|
|
|
StackSlice slice = {*subtrees, version};
|
2016-11-04 09:18:38 -07:00
|
|
|
array_insert(&self->slices, i + 1, slice);
|
|
|
|
|
return;
|
2016-04-15 21:28:00 -07:00
|
|
|
}
|
|
|
|
|
}
|
2015-07-08 17:34:21 -07:00
|
|
|
|
2018-04-02 10:57:44 -07:00
|
|
|
StackVersion version = ts_stack__add_version(self, original_version, node);
|
2018-05-10 15:11:14 -07:00
|
|
|
StackSlice slice = { *subtrees, version };
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->slices, slice);
|
2015-06-03 09:44:13 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:37:54 -07:00
|
|
|
inline StackSliceArray stack__iter(Stack *self, StackVersion version,
|
2018-03-29 17:50:07 -07:00
|
|
|
StackCallback callback, void *payload,
|
2018-05-10 15:11:14 -07:00
|
|
|
int goal_subtree_count) {
|
2016-04-15 21:28:00 -07:00
|
|
|
array_clear(&self->slices);
|
2016-09-01 10:04:20 -07:00
|
|
|
array_clear(&self->iterators);
|
2016-04-15 21:28:00 -07:00
|
|
|
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = array_get(&self->heads, version);
|
2016-09-01 10:04:20 -07:00
|
|
|
Iterator iterator = {
|
2016-08-31 17:29:14 -07:00
|
|
|
.node = head->node,
|
2018-05-10 15:11:14 -07:00
|
|
|
.subtrees = array_new(),
|
|
|
|
|
.subtree_count = 0,
|
2016-03-31 12:03:07 -07:00
|
|
|
.is_pending = true,
|
2015-11-20 00:01:53 -08:00
|
|
|
};
|
2018-04-09 18:09:54 -07:00
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
bool include_subtrees = false;
|
|
|
|
|
if (goal_subtree_count >= 0) {
|
|
|
|
|
include_subtrees = true;
|
|
|
|
|
array_reserve(&iterator.subtrees, goal_subtree_count);
|
2018-04-09 18:09:54 -07:00
|
|
|
}
|
|
|
|
|
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->iterators, iterator);
|
2015-05-25 20:21:13 -07:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
while (self->iterators.size > 0) {
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
|
2016-09-01 10:04:20 -07:00
|
|
|
Iterator *iterator = &self->iterators.contents[i];
|
|
|
|
|
StackNode *node = iterator->node;
|
2016-04-15 21:28:00 -07:00
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
StackAction action = callback(payload, iterator);
|
|
|
|
|
bool should_pop = action & StackActionPop;
|
|
|
|
|
bool should_stop = action & StackActionStop || node->link_count == 0;
|
2016-04-15 21:28:00 -07:00
|
|
|
|
|
|
|
|
if (should_pop) {
|
2018-05-10 15:11:14 -07:00
|
|
|
SubtreeArray subtrees = iterator->subtrees;
|
2016-06-14 14:46:49 -07:00
|
|
|
if (!should_stop)
|
2018-05-10 15:11:14 -07:00
|
|
|
ts_subtree_array_copy(subtrees, &subtrees);
|
|
|
|
|
ts_subtree_array_reverse(&subtrees);
|
2017-06-29 14:58:20 -07:00
|
|
|
ts_stack__add_slice(
|
|
|
|
|
self,
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
version,
|
2017-06-29 14:58:20 -07:00
|
|
|
node,
|
2018-05-10 15:11:14 -07:00
|
|
|
&subtrees
|
2017-06-29 14:58:20 -07:00
|
|
|
);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
2015-11-20 00:01:53 -08:00
|
|
|
|
2016-04-15 21:28:00 -07:00
|
|
|
if (should_stop) {
|
|
|
|
|
if (!should_pop)
|
2018-05-10 15:11:14 -07:00
|
|
|
ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
|
2016-09-01 10:04:20 -07:00
|
|
|
array_erase(&self->iterators, i);
|
2016-04-11 23:12:50 -07:00
|
|
|
i--, size--;
|
2015-05-30 20:26:45 -07:00
|
|
|
continue;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t j = 1; j <= node->link_count; j++) {
|
2016-09-01 10:04:20 -07:00
|
|
|
Iterator *next_iterator;
|
2016-04-11 22:41:06 -07:00
|
|
|
StackLink link;
|
2016-04-15 21:28:00 -07:00
|
|
|
if (j == node->link_count) {
|
2016-04-11 22:41:06 -07:00
|
|
|
link = node->links[0];
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator = &self->iterators.contents[i];
|
2016-02-23 17:35:50 -08:00
|
|
|
} else {
|
2017-02-20 14:34:10 -08:00
|
|
|
if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
|
2016-04-11 22:41:06 -07:00
|
|
|
link = node->links[j];
|
2017-06-27 11:38:11 -07:00
|
|
|
Iterator current_iterator = self->iterators.contents[i];
|
|
|
|
|
array_push(&self->iterators, current_iterator);
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator = array_back(&self->iterators);
|
2018-05-10 15:11:14 -07:00
|
|
|
ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
|
2016-02-23 17:35:50 -08:00
|
|
|
}
|
2015-11-20 00:01:53 -08:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator->node = link.node;
|
2018-05-10 15:11:14 -07:00
|
|
|
if (link.subtree) {
|
|
|
|
|
if (include_subtrees) {
|
|
|
|
|
array_push(&next_iterator->subtrees, link.subtree);
|
|
|
|
|
ts_subtree_retain(link.subtree);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
if (!link.subtree->extra) {
|
|
|
|
|
next_iterator->subtree_count++;
|
2017-07-03 16:18:29 -07:00
|
|
|
if (!link.is_pending) {
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator->is_pending = false;
|
2017-07-03 16:18:29 -07:00
|
|
|
}
|
2016-06-14 20:25:33 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-05-10 15:11:14 -07:00
|
|
|
next_iterator->subtree_count++;
|
2016-09-01 10:04:20 -07:00
|
|
|
next_iterator->is_pending = false;
|
2016-05-26 13:20:53 -07:00
|
|
|
}
|
2015-05-30 20:26:45 -07:00
|
|
|
}
|
|
|
|
|
}
|
2015-05-25 20:21:13 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:37:54 -07:00
|
|
|
return self->slices;
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
Stack *ts_stack_new(SubtreePool *subtree_pool) {
|
2016-04-15 21:33:31 -07:00
|
|
|
Stack *self = ts_calloc(1, sizeof(Stack));
|
|
|
|
|
|
|
|
|
|
array_init(&self->heads);
|
|
|
|
|
array_init(&self->slices);
|
2016-09-01 10:04:20 -07:00
|
|
|
array_init(&self->iterators);
|
2016-04-15 21:33:31 -07:00
|
|
|
array_init(&self->node_pool);
|
2018-04-09 18:09:54 -07:00
|
|
|
array_reserve(&self->heads, 4);
|
|
|
|
|
array_reserve(&self->slices, 4);
|
|
|
|
|
array_reserve(&self->iterators, 4);
|
|
|
|
|
array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
|
2016-04-15 21:33:31 -07:00
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
self->subtree_pool = subtree_pool;
|
2017-06-29 10:43:20 -07:00
|
|
|
self->base_node = stack_node_new(NULL, NULL, false, 1, &self->node_pool);
|
2017-06-29 14:58:20 -07:00
|
|
|
ts_stack_clear(self);
|
2016-04-15 21:33:31 -07:00
|
|
|
|
|
|
|
|
return self;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_stack_delete(Stack *self) {
|
2016-05-16 10:49:22 -07:00
|
|
|
if (self->slices.contents)
|
2016-04-15 21:33:31 -07:00
|
|
|
array_delete(&self->slices);
|
2016-09-01 10:04:20 -07:00
|
|
|
if (self->iterators.contents)
|
|
|
|
|
array_delete(&self->iterators);
|
2018-05-10 15:11:14 -07:00
|
|
|
stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
|
2017-06-27 14:30:46 -07:00
|
|
|
for (uint32_t i = 0; i < self->heads.size; i++) {
|
2018-05-10 15:11:14 -07:00
|
|
|
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
|
2017-06-27 14:30:46 -07:00
|
|
|
}
|
2016-04-15 21:33:31 -07:00
|
|
|
array_clear(&self->heads);
|
|
|
|
|
if (self->node_pool.contents) {
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0; i < self->node_pool.size; i++)
|
2016-04-15 21:33:31 -07:00
|
|
|
ts_free(self->node_pool.contents[i]);
|
|
|
|
|
array_delete(&self->node_pool);
|
|
|
|
|
}
|
|
|
|
|
array_delete(&self->heads);
|
|
|
|
|
ts_free(self);
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t ts_stack_version_count(const Stack *self) {
|
2016-04-15 21:33:31 -07:00
|
|
|
return self->heads.size;
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:37:54 -07:00
|
|
|
TSStateId ts_stack_state(const Stack *self, StackVersion version) {
|
2016-05-10 15:24:06 -07:00
|
|
|
return array_get(&self->heads, version)->node->state;
|
2016-04-15 21:33:31 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:37:54 -07:00
|
|
|
Length ts_stack_position(const Stack *self, StackVersion version) {
|
2016-05-10 15:24:06 -07:00
|
|
|
return array_get(&self->heads, version)->node->position;
|
2016-04-15 21:33:31 -07:00
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
Subtree *ts_stack_last_external_token(const Stack *self, StackVersion version) {
|
2017-06-27 14:30:46 -07:00
|
|
|
return array_get(&self->heads, version)->last_external_token;
|
2017-01-04 21:22:23 -08:00
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree *token) {
|
2017-06-27 14:30:46 -07:00
|
|
|
StackHead *head = array_get(&self->heads, version);
|
2018-05-10 15:11:14 -07:00
|
|
|
if (token) ts_subtree_retain(token);
|
|
|
|
|
if (head->last_external_token) ts_subtree_release(self->subtree_pool, head->last_external_token);
|
2017-06-27 14:30:46 -07:00
|
|
|
head->last_external_token = token;
|
2017-01-04 21:22:23 -08:00
|
|
|
}
|
|
|
|
|
|
2017-09-13 16:38:15 -07:00
|
|
|
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = array_get(&self->heads, version);
|
2018-04-02 11:52:34 -07:00
|
|
|
unsigned result = head->node->error_cost;
|
2018-04-06 09:35:17 -07:00
|
|
|
if (
|
|
|
|
|
head->status == StackStatusPaused ||
|
2018-05-10 15:11:14 -07:00
|
|
|
(head->node->state == ERROR_STATE && !head->node->links[0].subtree)) {
|
2018-04-02 13:58:20 -07:00
|
|
|
result += ERROR_COST_PER_RECOVERY;
|
|
|
|
|
}
|
2018-04-02 11:52:34 -07:00
|
|
|
return result;
|
2016-06-02 14:04:48 -07:00
|
|
|
}
|
|
|
|
|
|
2018-04-02 10:57:44 -07:00
|
|
|
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
|
|
|
|
|
StackHead *head = array_get(&self->heads, version);
|
2018-04-06 09:35:17 -07:00
|
|
|
if (head->node->node_count < head->node_count_at_last_error) {
|
|
|
|
|
head->node_count_at_last_error = head->node->node_count;
|
|
|
|
|
}
|
2018-04-02 10:57:44 -07:00
|
|
|
return head->node->node_count - head->node_count_at_last_error;
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
void ts_stack_push(Stack *self, StackVersion version, Subtree *subtree, bool pending, TSStateId state) {
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = array_get(&self->heads, version);
|
2018-05-10 15:11:14 -07:00
|
|
|
StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
|
|
|
|
|
if (!subtree) head->node_count_at_last_error = new_node->node_count;
|
2017-06-29 10:43:20 -07:00
|
|
|
head->node = new_node;
|
2016-04-15 21:33:31 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
inline StackAction iterate_callback(void *payload, const Iterator *iterator) {
|
2017-06-29 16:43:56 -07:00
|
|
|
StackIterateSession *session = payload;
|
2018-03-29 17:50:07 -07:00
|
|
|
session->callback(
|
|
|
|
|
session->payload,
|
|
|
|
|
iterator->node->state,
|
2018-05-10 15:11:14 -07:00
|
|
|
iterator->subtree_count
|
2018-03-29 17:50:07 -07:00
|
|
|
);
|
|
|
|
|
return StackActionNone;
|
2017-06-29 16:43:56 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
void ts_stack_iterate(Stack *self, StackVersion version,
|
|
|
|
|
StackIterateCallback callback, void *payload) {
|
2017-06-29 16:43:56 -07:00
|
|
|
StackIterateSession session = {payload, callback};
|
2018-04-09 18:09:54 -07:00
|
|
|
stack__iter(self, version, iterate_callback, &session, -1);
|
2016-04-04 11:59:10 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
inline StackAction pop_count_callback(void *payload, const Iterator *iterator) {
|
2018-05-10 15:11:14 -07:00
|
|
|
unsigned *goal_subtree_count = payload;
|
|
|
|
|
if (iterator->subtree_count == *goal_subtree_count) {
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionPop | StackActionStop;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
} else {
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionNone;
|
2016-05-09 14:31:44 -07:00
|
|
|
}
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:37:54 -07:00
|
|
|
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
|
2018-04-09 18:09:54 -07:00
|
|
|
return stack__iter(self, version, pop_count_callback, &count, count);
|
2016-03-07 16:03:23 -08:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
inline StackAction pop_pending_callback(void *payload, const Iterator *iterator) {
|
2018-05-10 15:11:14 -07:00
|
|
|
if (iterator->subtree_count >= 1) {
|
2017-06-29 16:43:56 -07:00
|
|
|
if (iterator->is_pending) {
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionPop | StackActionStop;
|
2016-04-11 23:12:50 -07:00
|
|
|
} else {
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionStop;
|
2016-04-11 23:12:50 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionNone;
|
2016-04-11 23:12:50 -07:00
|
|
|
}
|
2015-05-25 20:21:13 -07:00
|
|
|
}
|
2015-06-18 15:04:03 -07:00
|
|
|
|
2018-03-29 17:37:54 -07:00
|
|
|
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
|
2018-04-09 18:09:54 -07:00
|
|
|
StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
|
2018-03-29 17:37:54 -07:00
|
|
|
if (pop.size > 0) {
|
|
|
|
|
ts_stack_renumber_version(self, pop.contents[0].version, version);
|
|
|
|
|
pop.contents[0].version = version;
|
2016-04-10 14:12:24 -07:00
|
|
|
}
|
|
|
|
|
return pop;
|
2016-03-31 12:03:07 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
inline StackAction pop_error_callback(void *payload, const Iterator *iterator) {
|
2018-05-10 15:11:14 -07:00
|
|
|
if (iterator->subtrees.size > 0) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
bool *found_error = payload;
|
2018-05-10 15:11:14 -07:00
|
|
|
if (!*found_error && iterator->subtrees.contents[0]->symbol == ts_builtin_sym_error) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
*found_error = true;
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionPop | StackActionStop;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
} else {
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionStop;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionNone;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
StackNode *node = array_get(&self->heads, version)->node;
|
|
|
|
|
for (unsigned i = 0; i < node->link_count; i++) {
|
2018-05-10 15:11:14 -07:00
|
|
|
if (node->links[i].subtree && node->links[i].subtree->symbol == ts_builtin_sym_error) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
bool found_error = false;
|
2018-04-09 18:09:54 -07:00
|
|
|
StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
|
2018-04-06 09:35:17 -07:00
|
|
|
if (pop.size > 0) {
|
|
|
|
|
assert(pop.size == 1);
|
|
|
|
|
ts_stack_renumber_version(self, pop.contents[0].version, version);
|
2018-05-10 15:11:14 -07:00
|
|
|
return pop.contents[0].subtrees;
|
2018-04-06 09:35:17 -07:00
|
|
|
}
|
|
|
|
|
break;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
}
|
2018-05-10 15:11:14 -07:00
|
|
|
return (SubtreeArray){.size = 0};
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
inline StackAction pop_all_callback(void *payload, const Iterator *iterator) {
|
|
|
|
|
return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
|
2015-06-18 15:04:03 -07:00
|
|
|
}
|
2015-12-08 12:20:50 -08:00
|
|
|
|
2018-03-29 17:37:54 -07:00
|
|
|
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
|
2018-04-09 18:09:54 -07:00
|
|
|
return stack__iter(self, version, pop_all_callback, NULL, 0);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
|
2017-09-12 12:00:00 -07:00
|
|
|
typedef struct {
|
|
|
|
|
StackSummary *summary;
|
|
|
|
|
unsigned max_depth;
|
|
|
|
|
} SummarizeStackSession;
|
|
|
|
|
|
2018-03-29 17:50:07 -07:00
|
|
|
inline StackAction summarize_stack_callback(void *payload, const Iterator *iterator) {
|
2017-09-12 12:00:00 -07:00
|
|
|
SummarizeStackSession *session = payload;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
TSStateId state = iterator->node->state;
|
2018-05-10 15:11:14 -07:00
|
|
|
unsigned depth = iterator->subtree_count;
|
2018-03-29 17:50:07 -07:00
|
|
|
if (depth > session->max_depth) return StackActionStop;
|
2017-09-12 12:00:00 -07:00
|
|
|
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
|
|
|
|
|
StackSummaryEntry entry = session->summary->contents[i];
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
if (entry.depth < depth) break;
|
2018-03-29 17:50:07 -07:00
|
|
|
if (entry.depth == depth && entry.state == state) return StackActionNone;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
2017-09-13 09:56:51 -07:00
|
|
|
array_push(session->summary, ((StackSummaryEntry){
|
|
|
|
|
.position = iterator->node->position,
|
|
|
|
|
.depth = depth,
|
|
|
|
|
.state = state,
|
|
|
|
|
}));
|
2018-03-29 17:50:07 -07:00
|
|
|
return StackActionNone;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
|
2017-09-12 12:00:00 -07:00
|
|
|
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
|
|
|
|
|
SummarizeStackSession session = {
|
|
|
|
|
.summary = ts_malloc(sizeof(StackSummary)),
|
|
|
|
|
.max_depth = max_depth
|
|
|
|
|
};
|
|
|
|
|
array_init(session.summary);
|
2018-04-09 18:09:54 -07:00
|
|
|
stack__iter(self, version, summarize_stack_callback, &session, -1);
|
2017-09-12 12:00:00 -07:00
|
|
|
self->heads.contents[version].summary = session.summary;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->summary;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-09 15:51:22 -07:00
|
|
|
int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->node->dynamic_precedence;
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 21:33:31 -07:00
|
|
|
void ts_stack_remove_version(Stack *self, StackVersion version) {
|
2018-05-10 15:11:14 -07:00
|
|
|
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
|
2016-04-15 21:33:31 -07:00
|
|
|
array_erase(&self->heads, version);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
|
|
|
|
|
assert(v2 < v1);
|
2016-11-14 12:15:24 -08:00
|
|
|
assert((uint32_t)v1 < self->heads.size);
|
2018-04-06 09:35:17 -07:00
|
|
|
StackHead *source_head = &self->heads.contents[v1];
|
|
|
|
|
StackHead *target_head = &self->heads.contents[v2];
|
|
|
|
|
if (target_head->summary && !source_head->summary) {
|
|
|
|
|
source_head->summary = target_head->summary;
|
|
|
|
|
target_head->summary = NULL;
|
|
|
|
|
}
|
2018-05-10 15:11:14 -07:00
|
|
|
stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
|
2018-04-06 09:35:17 -07:00
|
|
|
*target_head = *source_head;
|
2016-04-15 21:33:31 -07:00
|
|
|
array_erase(&self->heads, v1);
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
|
|
|
|
|
StackHead temporary_head = self->heads.contents[v1];
|
|
|
|
|
self->heads.contents[v1] = self->heads.contents[v2];
|
|
|
|
|
self->heads.contents[v2] = temporary_head;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-14 17:25:55 -08:00
|
|
|
StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
|
2016-05-09 14:31:44 -07:00
|
|
|
assert(version < self->heads.size);
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->heads, self->heads.contents[version]);
|
2017-06-27 14:30:46 -07:00
|
|
|
StackHead *head = array_back(&self->heads);
|
|
|
|
|
stack_node_retain(head->node);
|
2018-05-10 15:11:14 -07:00
|
|
|
if (head->last_external_token) ts_subtree_retain(head->last_external_token);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
head->summary = NULL;
|
2016-05-09 14:31:44 -07:00
|
|
|
return self->heads.size - 1;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
2018-04-02 11:57:26 -07:00
|
|
|
if (!ts_stack_can_merge(self, version1, version2)) return false;
|
|
|
|
|
StackHead *head1 = &self->heads.contents[version1];
|
|
|
|
|
StackHead *head2 = &self->heads.contents[version2];
|
|
|
|
|
for (uint32_t i = 0; i < head2->node->link_count; i++) {
|
|
|
|
|
stack_node_add_link(head1->node, head2->node->links[i]);
|
|
|
|
|
}
|
2018-04-06 09:35:17 -07:00
|
|
|
if (head1->node->state == ERROR_STATE) {
|
|
|
|
|
head1->node_count_at_last_error = head1->node->node_count;
|
2016-05-28 21:22:10 -07:00
|
|
|
}
|
2018-04-02 11:57:26 -07:00
|
|
|
ts_stack_remove_version(self, version2);
|
|
|
|
|
return true;
|
2016-05-28 21:22:10 -07:00
|
|
|
}
|
|
|
|
|
|
2017-06-29 14:58:20 -07:00
|
|
|
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
|
|
|
|
StackHead *head1 = &self->heads.contents[version1];
|
|
|
|
|
StackHead *head2 = &self->heads.contents[version2];
|
|
|
|
|
return
|
2018-04-02 09:47:01 -07:00
|
|
|
head1->status == StackStatusActive &&
|
|
|
|
|
head2->status == StackStatusActive &&
|
2017-06-29 14:58:20 -07:00
|
|
|
head1->node->state == head2->node->state &&
|
2017-12-20 16:26:38 -08:00
|
|
|
head1->node->position.bytes == head2->node->position.bytes &&
|
2018-04-06 09:35:17 -07:00
|
|
|
head1->node->error_cost == head2->node->error_cost &&
|
2018-05-11 12:57:41 -07:00
|
|
|
ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
|
2017-06-29 14:58:20 -07:00
|
|
|
}
|
|
|
|
|
|
2016-06-02 14:04:48 -07:00
|
|
|
void ts_stack_halt(Stack *self, StackVersion version) {
|
2018-04-02 09:47:01 -07:00
|
|
|
array_get(&self->heads, version)->status = StackStatusHalted;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) {
|
|
|
|
|
StackHead *head = array_get(&self->heads, version);
|
|
|
|
|
head->status = StackStatusPaused;
|
|
|
|
|
head->lookahead_when_paused = lookahead;
|
2018-04-02 10:57:44 -07:00
|
|
|
head->node_count_at_last_error = head->node->node_count;
|
2018-04-02 09:47:01 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ts_stack_is_active(const Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->status == StackStatusActive;
|
2016-06-02 14:04:48 -07:00
|
|
|
}
|
2016-05-29 22:36:47 -07:00
|
|
|
|
2018-04-02 09:47:01 -07:00
|
|
|
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->status == StackStatusHalted;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ts_stack_is_paused(const Stack *self, StackVersion version) {
|
|
|
|
|
return array_get(&self->heads, version)->status == StackStatusPaused;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TSSymbol ts_stack_resume(Stack *self, StackVersion version) {
|
|
|
|
|
StackHead *head = array_get(&self->heads, version);
|
|
|
|
|
assert(head->status == StackStatusPaused);
|
|
|
|
|
TSSymbol result = head->lookahead_when_paused;
|
|
|
|
|
head->status = StackStatusActive;
|
|
|
|
|
head->lookahead_when_paused = 0;
|
|
|
|
|
return result;
|
2016-05-29 22:36:47 -07:00
|
|
|
}
|
|
|
|
|
|
2016-04-04 11:59:10 -07:00
|
|
|
void ts_stack_clear(Stack *self) {
|
|
|
|
|
stack_node_retain(self->base_node);
|
2017-06-27 14:30:46 -07:00
|
|
|
for (uint32_t i = 0; i < self->heads.size; i++) {
|
2018-05-10 15:11:14 -07:00
|
|
|
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
|
2017-06-27 14:30:46 -07:00
|
|
|
}
|
2016-04-04 11:59:10 -07:00
|
|
|
array_clear(&self->heads);
|
2017-01-04 21:22:23 -08:00
|
|
|
array_push(&self->heads, ((StackHead){
|
2017-06-29 14:58:20 -07:00
|
|
|
.node = self->base_node,
|
|
|
|
|
.last_external_token = NULL,
|
2018-04-02 09:47:01 -07:00
|
|
|
.status = StackStatusActive,
|
|
|
|
|
.lookahead_when_paused = 0,
|
2017-01-04 21:22:23 -08:00
|
|
|
}));
|
2016-04-04 11:59:10 -07:00
|
|
|
}
|
|
|
|
|
|
2018-04-08 13:49:20 -07:00
|
|
|
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
|
2016-05-16 10:44:19 -07:00
|
|
|
bool was_recording_allocations = ts_toggle_allocation_recording(false);
|
|
|
|
|
if (!f)
|
|
|
|
|
f = stderr;
|
|
|
|
|
|
2016-04-02 22:18:44 -07:00
|
|
|
fprintf(f, "digraph stack {\n");
|
|
|
|
|
fprintf(f, "rankdir=\"RL\";\n");
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "edge [arrowhead=none]\n");
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2017-06-27 11:38:11 -07:00
|
|
|
Array(StackNode *) visited_nodes = array_new();
|
2016-02-23 17:35:50 -08:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
array_clear(&self->iterators);
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0; i < self->heads.size; i++) {
|
2016-08-31 17:29:14 -07:00
|
|
|
StackHead *head = &self->heads.contents[i];
|
2018-04-02 09:47:01 -07:00
|
|
|
if (head->status == StackStatusHalted) continue;
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
|
2018-04-02 09:47:01 -07:00
|
|
|
fprintf(f, "node_head_%u -> node_%p [", i, head->node);
|
|
|
|
|
|
|
|
|
|
if (head->status == StackStatusPaused) {
|
|
|
|
|
fprintf(f, "color=red ");
|
|
|
|
|
}
|
|
|
|
|
fprintf(f,
|
2018-04-02 11:52:34 -07:00
|
|
|
"label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
|
|
|
|
|
i,
|
|
|
|
|
ts_stack_node_count_since_error(self, i),
|
|
|
|
|
ts_stack_error_cost(self, i)
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
);
|
2017-01-30 21:58:27 -08:00
|
|
|
|
2017-06-27 14:30:46 -07:00
|
|
|
if (head->last_external_token) {
|
2018-05-11 12:57:41 -07:00
|
|
|
ExternalScannerState *state = &head->last_external_token->external_scanner_state;
|
|
|
|
|
const char *data = ts_external_scanner_state_data(state);
|
|
|
|
|
fprintf(f, "\nexternal_scanner_state:");
|
2017-07-17 17:12:36 -07:00
|
|
|
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
|
2017-01-30 21:58:27 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fprintf(f, "\"]\n");
|
2016-11-04 09:18:38 -07:00
|
|
|
array_push(&self->iterators, ((Iterator){.node = head->node }));
|
2016-02-23 09:45:27 -08:00
|
|
|
}
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-09-01 10:04:20 -07:00
|
|
|
bool all_iterators_done = false;
|
|
|
|
|
while (!all_iterators_done) {
|
|
|
|
|
all_iterators_done = true;
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t i = 0; i < self->iterators.size; i++) {
|
2017-06-27 11:38:11 -07:00
|
|
|
Iterator iterator = self->iterators.contents[i];
|
|
|
|
|
StackNode *node = iterator.node;
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
for (uint32_t j = 0; j < visited_nodes.size; j++) {
|
2016-02-23 17:35:50 -08:00
|
|
|
if (visited_nodes.contents[j] == node) {
|
|
|
|
|
node = NULL;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-04 10:41:20 -07:00
|
|
|
if (!node) continue;
|
2016-09-01 10:04:20 -07:00
|
|
|
all_iterators_done = false;
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "node_%p [", node);
|
2016-10-05 14:02:49 -07:00
|
|
|
if (node->state == ERROR_STATE)
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "label=\"?\"");
|
2018-05-10 15:11:14 -07:00
|
|
|
else if (node->link_count == 1 && node->links[0].subtree &&
|
|
|
|
|
node->links[0].subtree->extra)
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "shape=point margin=0 label=\"\"");
|
2016-03-02 09:55:25 -08:00
|
|
|
else
|
2016-05-29 22:36:47 -07:00
|
|
|
fprintf(f, "label=\"%d\"", node->state);
|
2016-08-31 10:51:59 -07:00
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
fprintf(
|
|
|
|
|
f,
|
2018-04-06 09:35:17 -07:00
|
|
|
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
|
2017-10-09 15:51:22 -07:00
|
|
|
node->position.extent.row,
|
|
|
|
|
node->position.extent.column,
|
2018-04-06 09:35:17 -07:00
|
|
|
node->node_count,
|
2017-10-09 15:51:22 -07:00
|
|
|
node->error_cost,
|
|
|
|
|
node->dynamic_precedence
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
);
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-04-11 22:41:06 -07:00
|
|
|
for (int j = 0; j < node->link_count; j++) {
|
|
|
|
|
StackLink link = node->links[j];
|
|
|
|
|
fprintf(f, "node_%p -> node_%p [", node, link.node);
|
|
|
|
|
if (link.is_pending)
|
2016-04-04 12:25:57 -07:00
|
|
|
fprintf(f, "style=dashed ");
|
2018-05-10 15:11:14 -07:00
|
|
|
if (link.subtree && link.subtree->extra)
|
2016-05-01 15:24:50 -07:00
|
|
|
fprintf(f, "fontcolor=gray ");
|
2016-02-24 17:23:58 -08:00
|
|
|
|
2018-05-10 15:11:14 -07:00
|
|
|
if (!link.subtree) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
fprintf(f, "color=red");
|
2016-03-02 09:55:25 -08:00
|
|
|
} else {
|
2018-04-08 13:49:20 -07:00
|
|
|
fprintf(f, "label=\"");
|
2018-05-10 15:11:14 -07:00
|
|
|
if (link.subtree->visible && !link.subtree->named) fprintf(f, "'");
|
|
|
|
|
const char *name = ts_language_symbol_name(language, link.subtree->symbol);
|
2018-04-08 13:49:20 -07:00
|
|
|
for (const char *c = name; *c; c++) {
|
|
|
|
|
if (*c == '\"' || *c == '\\') fprintf(f, "\\");
|
|
|
|
|
fprintf(f, "%c", *c);
|
2016-02-24 17:23:58 -08:00
|
|
|
}
|
2018-05-10 15:11:14 -07:00
|
|
|
if (link.subtree->visible && !link.subtree->named) fprintf(f, "'");
|
2018-04-08 13:49:20 -07:00
|
|
|
fprintf(f, "\"");
|
2018-04-06 09:35:17 -07:00
|
|
|
fprintf(f, "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
|
2018-05-10 15:11:14 -07:00
|
|
|
link.subtree->error_cost,
|
|
|
|
|
link.subtree->dynamic_precedence);
|
2016-02-24 17:23:58 -08:00
|
|
|
}
|
|
|
|
|
|
2016-05-26 13:20:53 -07:00
|
|
|
fprintf(f, "];\n");
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2017-06-27 11:38:11 -07:00
|
|
|
Iterator *next_iterator;
|
2016-02-23 17:35:50 -08:00
|
|
|
if (j == 0) {
|
2017-06-27 11:38:11 -07:00
|
|
|
next_iterator = &self->iterators.contents[i];
|
2016-02-23 17:35:50 -08:00
|
|
|
} else {
|
2017-06-27 11:38:11 -07:00
|
|
|
array_push(&self->iterators, iterator);
|
|
|
|
|
next_iterator = array_back(&self->iterators);
|
2016-02-23 17:35:50 -08:00
|
|
|
}
|
2017-06-27 11:38:11 -07:00
|
|
|
next_iterator->node = link.node;
|
2016-02-23 00:08:55 -08:00
|
|
|
}
|
2016-02-23 17:35:50 -08:00
|
|
|
|
2016-11-05 14:39:25 -07:00
|
|
|
array_push(&visited_nodes, node);
|
2016-02-23 00:08:55 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-02 22:18:44 -07:00
|
|
|
fprintf(f, "}\n");
|
2016-02-23 00:08:55 -08:00
|
|
|
|
2016-02-23 17:35:50 -08:00
|
|
|
array_delete(&visited_nodes);
|
2016-05-16 10:44:19 -07:00
|
|
|
ts_toggle_allocation_recording(was_recording_allocations);
|
|
|
|
|
return true;
|
2016-02-23 00:08:55 -08:00
|
|
|
}
|