Start work on error recovery
- In runtime, make parse errors part of the parse tree - Add error state to lexers in which they can accept any token
This commit is contained in:
parent
4520d6e1a2
commit
e58a6d8ba7
18 changed files with 622 additions and 528 deletions
|
|
@ -12,7 +12,7 @@ extern "C" {
|
|||
|
||||
//#define TS_DEBUG_PARSE
|
||||
//#define TS_DEBUG_LEX
|
||||
|
||||
|
||||
#ifdef TS_DEBUG_LEX
|
||||
#define DEBUG_LEX(...) fprintf(stderr, __VA_ARGS__)
|
||||
#else
|
||||
|
|
@ -27,8 +27,9 @@ extern "C" {
|
|||
|
||||
static int INITIAL_STACK_SIZE = 100;
|
||||
static const char *ts_symbol_names[];
|
||||
|
||||
|
||||
typedef int ts_state;
|
||||
static const ts_state ts_lex_state_error = -1;
|
||||
|
||||
typedef struct {
|
||||
ts_state state;
|
||||
|
|
@ -37,15 +38,18 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
const char *input;
|
||||
int error_mode;
|
||||
size_t position;
|
||||
ts_tree *lookahead_node;
|
||||
ts_tree *prev_lookahead_node;
|
||||
ts_state lex_state;
|
||||
ts_stack_entry *stack;
|
||||
size_t stack_size;
|
||||
ts_parse_result result;
|
||||
ts_tree *result;
|
||||
} ts_parser;
|
||||
|
||||
static void ts_lex(ts_parser *parser);
|
||||
|
||||
static ts_parser ts_parser_make(const char *input) {
|
||||
ts_parser result = {
|
||||
.input = input,
|
||||
|
|
@ -54,13 +58,7 @@ static ts_parser ts_parser_make(const char *input) {
|
|||
.lex_state = 0,
|
||||
.stack = calloc(INITIAL_STACK_SIZE, sizeof(ts_stack_entry)),
|
||||
.stack_size = 0,
|
||||
.result = {
|
||||
.tree = NULL,
|
||||
.error = {
|
||||
.expected_inputs = NULL,
|
||||
.expected_input_count = 0
|
||||
},
|
||||
},
|
||||
.result = NULL,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
|
@ -69,9 +67,9 @@ static char ts_parser_lookahead_char(const ts_parser *parser) {
|
|||
return parser->input[parser->position];
|
||||
}
|
||||
|
||||
static long ts_parser_lookahead_sym(const ts_parser *parser) {
|
||||
static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) {
|
||||
ts_tree *node = parser->lookahead_node;
|
||||
return node ? node->value : -1;
|
||||
return node ? node->symbol : ts_symbol_error;
|
||||
}
|
||||
|
||||
static ts_state ts_parser_parse_state(const ts_parser *parser) {
|
||||
|
|
@ -96,7 +94,7 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
|
|||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = parser->stack[parser->stack_size + i].node;
|
||||
if (collapse_flags[i]) {
|
||||
total_child_count += child->child_count;
|
||||
total_child_count += ts_tree_child_count(child);
|
||||
} else {
|
||||
total_child_count++;
|
||||
}
|
||||
|
|
@ -107,8 +105,11 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
|
|||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = parser->stack[parser->stack_size + i].node;
|
||||
if (collapse_flags[i]) {
|
||||
memcpy(children + n, child->children, (child->child_count * sizeof(ts_tree *)));
|
||||
n += child->child_count;
|
||||
size_t grandchild_count = ts_tree_child_count(child);
|
||||
if (grandchild_count > 0) {
|
||||
memcpy(children + n, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *)));
|
||||
n += grandchild_count;
|
||||
}
|
||||
} else {
|
||||
children[n] = child;
|
||||
n++;
|
||||
|
|
@ -116,22 +117,10 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
|
|||
}
|
||||
|
||||
parser->prev_lookahead_node = parser->lookahead_node;
|
||||
parser->lookahead_node = ts_tree_make(symbol, total_child_count, children);
|
||||
parser->lookahead_node = ts_tree_make_node(symbol, total_child_count, children);
|
||||
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser));
|
||||
}
|
||||
|
||||
static void ts_parser_set_error(ts_parser *parser, size_t count, const char **expected_inputs) {
|
||||
ts_error *error = &parser->result.error;
|
||||
error->position = parser->position;
|
||||
error->lookahead_char = ts_parser_lookahead_char(parser);
|
||||
error->expected_input_count = count;
|
||||
error->expected_inputs = expected_inputs;
|
||||
}
|
||||
|
||||
static int ts_parser_has_error(const ts_parser *parser) {
|
||||
return (parser->result.error.expected_inputs != NULL);
|
||||
}
|
||||
|
||||
static void ts_parser_advance(ts_parser *parser, ts_state lex_state) {
|
||||
DEBUG_LEX("character: '%c' \n", ts_parser_lookahead_char(parser));
|
||||
parser->position++;
|
||||
|
|
@ -140,18 +129,28 @@ static void ts_parser_advance(ts_parser *parser, ts_state lex_state) {
|
|||
|
||||
static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) {
|
||||
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]);
|
||||
parser->lookahead_node = ts_tree_make(symbol, 0, NULL);
|
||||
parser->lookahead_node = ts_tree_make_leaf(symbol);
|
||||
}
|
||||
|
||||
static void ts_parser_accept_input(ts_parser *parser) {
|
||||
parser->result.tree = parser->stack[parser->stack_size - 1].node;
|
||||
static ts_tree * ts_parser_tree(ts_parser *parser) {
|
||||
DEBUG_PARSE("accept \n");
|
||||
return parser->stack[0].node;
|
||||
}
|
||||
|
||||
static void ts_parser_skip_whitespace(ts_parser *parser) {
|
||||
while (isspace(parser->input[parser->position]))
|
||||
while (isspace(ts_parser_lookahead_char(parser)))
|
||||
parser->position++;
|
||||
}
|
||||
|
||||
static void ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) {
|
||||
if (parser->error_mode) {
|
||||
parser->lex_state = ts_lex_state_error;
|
||||
ts_lex(parser);
|
||||
} else {
|
||||
parser->error_mode = 1;
|
||||
parser->lookahead_node = ts_tree_make_error(ts_parser_lookahead_char(parser), count, expected_symbols);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - DSL
|
||||
|
||||
|
|
@ -159,7 +158,7 @@ static void ts_parser_skip_whitespace(ts_parser *parser) {
|
|||
static void ts_lex(ts_parser *parser)
|
||||
|
||||
#define PARSE_FN() \
|
||||
static ts_parse_result ts_parse(const char *input)
|
||||
static const ts_tree * ts_parse(const char *input)
|
||||
|
||||
#define SYMBOL_NAMES \
|
||||
static const char *ts_symbol_names[] =
|
||||
|
|
@ -175,6 +174,9 @@ ts_parser p = ts_parser_make(input), *parser = &p; \
|
|||
next_state:
|
||||
|
||||
#define START_LEXER() \
|
||||
if (ts_parser_lookahead_char(parser) == '\0') { \
|
||||
ACCEPT_TOKEN(ts_aux_sym_end) \
|
||||
} \
|
||||
ts_parser_skip_whitespace(parser); \
|
||||
next_state:
|
||||
|
||||
|
|
@ -194,7 +196,6 @@ parser->lex_state
|
|||
{ \
|
||||
parser->lex_state = state_index; \
|
||||
if (LOOKAHEAD_SYM() < 0) ts_lex(parser); \
|
||||
if (ts_parser_has_error(parser)) goto done; \
|
||||
}
|
||||
|
||||
#define SHIFT(state) \
|
||||
|
|
@ -211,30 +212,33 @@ goto next_state; \
|
|||
}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{ ts_parser_accept_input(parser); goto done; }
|
||||
{ goto done; }
|
||||
|
||||
#define ACCEPT_TOKEN(symbol) \
|
||||
{ ts_parser_set_lookahead_sym(parser, symbol); goto done; }
|
||||
|
||||
#define LEX_ERROR(count, inputs) \
|
||||
#define LEX_ERROR() \
|
||||
{ ts_parser_set_lookahead_sym(parser, -1); goto done; }
|
||||
|
||||
#define PARSE_ERROR(count, inputs) \
|
||||
{ \
|
||||
static const char *expected_inputs[] = inputs; \
|
||||
ts_parser_set_error(parser, count, expected_inputs); \
|
||||
goto done; \
|
||||
static const ts_symbol expected_inputs[] = inputs; \
|
||||
ts_parser_handle_error(parser, count, expected_inputs); \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define LEX_PANIC() \
|
||||
printf("Lex error: unexpected state %ud", LEX_STATE());
|
||||
printf("Lex error: unexpected state %d", LEX_STATE());
|
||||
|
||||
#define PARSE_PANIC() \
|
||||
printf("Parse error: unexpected state %ud", PARSE_STATE());
|
||||
printf("Parse error: unexpected state %d", PARSE_STATE());
|
||||
|
||||
#define EXPECT(...) __VA_ARGS__
|
||||
#define COLLAPSE(...) __VA_ARGS__
|
||||
|
||||
#define FINISH_PARSER() \
|
||||
done: \
|
||||
return parser->result;
|
||||
return ts_parser_tree(parser);
|
||||
|
||||
#define FINISH_LEXER() \
|
||||
done:
|
||||
|
|
|
|||
|
|
@ -7,36 +7,37 @@ extern "C" {
|
|||
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct {
|
||||
size_t position;
|
||||
char lookahead_char;
|
||||
size_t expected_input_count;
|
||||
const char **expected_inputs;
|
||||
} ts_error;
|
||||
|
||||
const char * ts_error_string(const ts_error *error);
|
||||
|
||||
typedef size_t ts_symbol;
|
||||
typedef int ts_symbol;
|
||||
extern const ts_symbol ts_symbol_error;
|
||||
|
||||
typedef struct ts_tree {
|
||||
ts_symbol value;
|
||||
struct ts_tree **children;
|
||||
size_t child_count;
|
||||
ts_symbol symbol;
|
||||
size_t ref_count;
|
||||
union {
|
||||
struct {
|
||||
size_t count;
|
||||
struct ts_tree **contents;
|
||||
} children;
|
||||
struct {
|
||||
char lookahead_char;
|
||||
size_t expected_input_count;
|
||||
const ts_symbol *expected_inputs;
|
||||
} error;
|
||||
} data;
|
||||
} ts_tree;
|
||||
|
||||
ts_tree * ts_tree_make(ts_symbol value, size_t child_count, ts_tree **children);
|
||||
ts_tree * ts_tree_make_leaf(ts_symbol symbol);
|
||||
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children);
|
||||
ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs);
|
||||
void ts_tree_retain(ts_tree *tree);
|
||||
void ts_tree_release(ts_tree *tree);
|
||||
int ts_tree_equals(const ts_tree *tree1, const ts_tree *tree2);
|
||||
char * ts_tree_string(const ts_tree *tree, const char **names);
|
||||
char * ts_tree_error_string(const ts_tree *tree, const char **names);
|
||||
size_t ts_tree_child_count(const ts_tree *tree);
|
||||
ts_tree ** ts_tree_children(const ts_tree *tree);
|
||||
|
||||
typedef struct {
|
||||
ts_error error;
|
||||
ts_tree *tree;
|
||||
} ts_parse_result;
|
||||
|
||||
typedef ts_parse_result ts_parse_fn(const char *);
|
||||
typedef const ts_tree * ts_parse_fn(const char *);
|
||||
|
||||
typedef struct {
|
||||
ts_parse_fn *parse_fn;
|
||||
|
|
@ -49,7 +50,7 @@ ts_document * ts_document_make();
|
|||
void ts_document_free(ts_document *);
|
||||
void ts_document_set_parser(ts_document *document, ts_parse_config config);
|
||||
void ts_document_set_text(ts_document *document, const char *text);
|
||||
ts_tree * ts_document_tree(const ts_document *document);
|
||||
const ts_tree * ts_document_tree(const ts_document *document);
|
||||
const char * ts_document_string(const ts_document *document);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue