Refactor parser header
Make separate lexer, stack and parser structs.
This commit is contained in:
parent
05a5f9c124
commit
0dc3a95d0c
11 changed files with 361 additions and 276 deletions
|
|
@ -24,7 +24,7 @@ SYMBOL_NAMES {
|
|||
"token2",
|
||||
};
|
||||
|
||||
static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count) {
|
||||
RECOVER_FN() {
|
||||
switch (state) {
|
||||
case 6:
|
||||
RECOVER(7, 1, EXPECT({ts_aux_sym_token2}));
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ SYMBOL_NAMES {
|
|||
"repeat_helper2",
|
||||
};
|
||||
|
||||
static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count) {
|
||||
RECOVER_FN() {
|
||||
switch (state) {
|
||||
case 3:
|
||||
RECOVER(52, 2, EXPECT({ts_sym_comma, ts_sym_right_brace}));
|
||||
|
|
|
|||
|
|
@ -5,11 +5,17 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
/*
|
||||
* Parsing DSL Macros
|
||||
*
|
||||
* Generated parser use these macros. They prevent the code generator
|
||||
* from having too much knowledge of the runtime types and functions.
|
||||
*/
|
||||
|
||||
//#define TS_DEBUG_PARSE
|
||||
//#define TS_DEBUG_LEX
|
||||
|
||||
|
|
@ -25,289 +31,114 @@ extern "C" {
|
|||
#define DEBUG_PARSE(...)
|
||||
#endif
|
||||
|
||||
static int INITIAL_STACK_SIZE = 100;
|
||||
static const char *ts_symbol_names[];
|
||||
|
||||
typedef int ts_state;
|
||||
static const ts_state ts_lex_state_error = -1;
|
||||
|
||||
typedef struct {
|
||||
ts_state state;
|
||||
ts_tree *node;
|
||||
} ts_stack_entry;
|
||||
|
||||
typedef struct {
|
||||
ts_input input;
|
||||
const char *chunk;
|
||||
size_t chunk_start;
|
||||
size_t chunk_size;
|
||||
size_t position_in_chunk;
|
||||
|
||||
size_t token_end_position;
|
||||
size_t token_start_position;
|
||||
|
||||
ts_tree *lookahead_node;
|
||||
ts_tree *prev_lookahead_node;
|
||||
ts_state lex_state;
|
||||
ts_stack_entry *stack;
|
||||
size_t stack_size;
|
||||
} ts_parser;
|
||||
|
||||
static void ts_lex(ts_parser *parser);
|
||||
static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count);
|
||||
static void ts_parser_advance(ts_parser *);
|
||||
|
||||
static ts_parser ts_parser_make(ts_input input) {
|
||||
ts_parser result = {
|
||||
.input = input,
|
||||
.chunk = NULL,
|
||||
.chunk_start = 0,
|
||||
.chunk_size = 0,
|
||||
.position_in_chunk = 0,
|
||||
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
|
||||
.lookahead_node = NULL,
|
||||
.prev_lookahead_node = NULL,
|
||||
.lex_state = 0,
|
||||
.stack = calloc(INITIAL_STACK_SIZE, sizeof(ts_stack_entry)),
|
||||
.stack_size = 0,
|
||||
};
|
||||
|
||||
ts_parser_advance(&result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t ts_parser_position(const ts_parser *parser) {
|
||||
return parser->chunk_start + parser->position_in_chunk;
|
||||
}
|
||||
|
||||
static char ts_parser_lookahead_char(const ts_parser *parser) {
|
||||
return parser->chunk[parser->position_in_chunk];
|
||||
}
|
||||
|
||||
static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) {
|
||||
ts_tree *node = parser->lookahead_node;
|
||||
return node ? node->symbol : ts_builtin_sym_error;
|
||||
}
|
||||
|
||||
static ts_state ts_parser_parse_state(const ts_parser *parser) {
|
||||
if (parser->stack_size == 0) return 0;
|
||||
return parser->stack[parser->stack_size - 1].state;
|
||||
}
|
||||
|
||||
static void ts_parser_push(ts_parser *parser, ts_state state, ts_tree *node) {
|
||||
ts_stack_entry *entry = (parser->stack + parser->stack_size);
|
||||
entry->state = state;
|
||||
entry->node = node;
|
||||
parser->stack_size++;
|
||||
}
|
||||
|
||||
static void ts_parser_shift(ts_parser *parser, ts_state parse_state) {
|
||||
DEBUG_PARSE("shift: %d \n", parse_state);
|
||||
ts_parser_push(parser, parse_state, parser->lookahead_node);
|
||||
parser->lookahead_node = parser->prev_lookahead_node;
|
||||
parser->prev_lookahead_node = NULL;
|
||||
}
|
||||
|
||||
static void ts_parser_shrink_stack(ts_parser *parser, size_t new_size) {
|
||||
for (size_t i = new_size; i < parser->stack_size; i++)
|
||||
ts_tree_release(parser->stack[i].node);
|
||||
parser->stack_size = new_size;
|
||||
}
|
||||
|
||||
static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) {
|
||||
size_t new_stack_size = parser->stack_size - immediate_child_count;
|
||||
|
||||
int child_count = 0;
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = parser->stack[new_stack_size + i].node;
|
||||
child_count += collapse_flags[i] ? ts_tree_child_count(child) : 1;
|
||||
}
|
||||
|
||||
int child_index = 0;
|
||||
size_t size = 0, offset = 0;
|
||||
ts_tree **children = malloc(child_count * sizeof(ts_tree *));
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = parser->stack[new_stack_size + i].node;
|
||||
if (i == 0) {
|
||||
offset = child->offset;
|
||||
size = child->size;
|
||||
} else {
|
||||
size += child->offset + child->size;
|
||||
}
|
||||
|
||||
if (collapse_flags[i]) {
|
||||
size_t grandchild_count = ts_tree_child_count(child);
|
||||
memcpy(children + child_index, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *)));
|
||||
child_index += grandchild_count;
|
||||
} else {
|
||||
memcpy(children + child_index, &child, sizeof(ts_tree *));
|
||||
child_index++;
|
||||
}
|
||||
}
|
||||
|
||||
parser->prev_lookahead_node = parser->lookahead_node;
|
||||
parser->lookahead_node = ts_tree_make_node(symbol, child_count, children, size, offset);
|
||||
ts_parser_shrink_stack(parser, new_stack_size);
|
||||
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser));
|
||||
}
|
||||
|
||||
static const char empty_chunk[1] = { '\0' };
|
||||
|
||||
static void ts_parser_advance(ts_parser *parser) {
|
||||
if (parser->position_in_chunk + 1 < parser->chunk_size) {
|
||||
parser->position_in_chunk++;
|
||||
} else {
|
||||
parser->chunk_start += parser->chunk_size;
|
||||
parser->chunk = parser->input.read_fn(parser->input.data, &parser->chunk_size);
|
||||
if (parser->chunk_size == 0) {
|
||||
parser->chunk = empty_chunk;
|
||||
parser->chunk_size = 1;
|
||||
}
|
||||
parser->position_in_chunk = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void ts_parser_advance_to_state(ts_parser *parser, ts_state lex_state) {
|
||||
DEBUG_LEX("character: '%c' \n", ts_parser_lookahead_char(parser));
|
||||
ts_parser_advance(parser);
|
||||
parser->lex_state = lex_state;
|
||||
}
|
||||
|
||||
static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) {
|
||||
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]);
|
||||
size_t position = ts_parser_position(parser);
|
||||
size_t size = position - parser->token_start_position;
|
||||
size_t offset = parser->token_start_position - parser->token_end_position;
|
||||
parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset);
|
||||
parser->token_end_position = position;
|
||||
}
|
||||
|
||||
static ts_tree * ts_parser_tree(ts_parser *parser) {
|
||||
DEBUG_PARSE("accept \n");
|
||||
return parser->stack[0].node;
|
||||
}
|
||||
|
||||
static void ts_parser_skip_whitespace(ts_parser *parser) {
|
||||
while (isspace(ts_parser_lookahead_char(parser)))
|
||||
ts_parser_advance(parser);
|
||||
parser->token_start_position = ts_parser_position(parser);
|
||||
}
|
||||
|
||||
static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) {
|
||||
ts_tree *error = ts_tree_make_error(ts_parser_lookahead_char(parser), count, expected_symbols, 0, 0);
|
||||
|
||||
while (1) {
|
||||
ts_tree_release(parser->lookahead_node);
|
||||
parser->lookahead_node = NULL;
|
||||
parser->lex_state = ts_lex_state_error;
|
||||
ts_lex(parser);
|
||||
|
||||
for (long i = parser->stack_size - 1; i >= 0; i--) {
|
||||
size_t count;
|
||||
ts_state to_state;
|
||||
const ts_symbol *symbols = ts_recover(parser->stack[i].state, &to_state, &count);
|
||||
for (size_t j = 0; j < count; j++) {
|
||||
if (symbols[j] == ts_parser_lookahead_sym(parser)) {
|
||||
ts_parser_shrink_stack(parser, i + 1);
|
||||
ts_parser_push(parser, to_state, error);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ts_parser_lookahead_char(parser)) {
|
||||
parser->stack[0].node = error;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - DSL
|
||||
#define PARSE_FN() \
|
||||
static const ts_tree * \
|
||||
ts_parse(void *data, ts_input input)
|
||||
|
||||
#define LEX_FN() \
|
||||
static void ts_lex(ts_parser *parser)
|
||||
static ts_tree * \
|
||||
ts_lex(ts_lexer *lexer, state_id lex_state)
|
||||
|
||||
#define PARSE_FN() \
|
||||
static const ts_tree * ts_parse(ts_input input)
|
||||
#define RECOVER_FN() \
|
||||
static const ts_symbol * \
|
||||
ts_recover(state_id state, state_id *to_state, size_t *count)
|
||||
|
||||
#define SYMBOL_NAMES \
|
||||
static const char *ts_symbol_names[] =
|
||||
|
||||
#define EXPORT_PARSER(name) \
|
||||
ts_parse_config name = { \
|
||||
.parse_fn = ts_parse, \
|
||||
.symbol_names = ts_symbol_names \
|
||||
};
|
||||
#define EXPORT_PARSER(constructor_name) \
|
||||
ts_parser constructor_name() { \
|
||||
ts_parser result = { \
|
||||
.parse_fn = ts_parse, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.data = ts_lr_parser_make(), \
|
||||
.free_fn = NULL \
|
||||
}; \
|
||||
return result; \
|
||||
}
|
||||
|
||||
#define START_PARSER() \
|
||||
ts_parser p = ts_parser_make(input), *parser = &p; \
|
||||
next_state:
|
||||
|
||||
#define START_LEXER() \
|
||||
ts_parser_skip_whitespace(parser); \
|
||||
if (!ts_parser_lookahead_char(parser)) { \
|
||||
parser->lookahead_node = ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \
|
||||
return; \
|
||||
} \
|
||||
ts_lr_parser *parser = (ts_lr_parser *)data; \
|
||||
ts_lr_parser_reset(parser); \
|
||||
parser->lexer.input = input; \
|
||||
ts_lexer_advance(&parser->lexer); \
|
||||
next_state:
|
||||
|
||||
#define LOOKAHEAD_SYM() \
|
||||
ts_parser_lookahead_sym(parser)
|
||||
|
||||
#define LOOKAHEAD_CHAR() \
|
||||
ts_parser_lookahead_char(parser)
|
||||
ts_lr_parser_lookahead_sym(parser)
|
||||
|
||||
#define PARSE_STATE() \
|
||||
ts_parser_parse_state(parser)
|
||||
|
||||
#define LEX_STATE() \
|
||||
parser->lex_state
|
||||
ts_stack_top_state(&parser->stack)
|
||||
|
||||
#define SET_LEX_STATE(state_index) \
|
||||
{ \
|
||||
parser->lex_state = state_index; \
|
||||
if (!parser->lookahead_node) ts_lex(parser); \
|
||||
}
|
||||
{ if (!parser->lookahead) parser->lookahead = ts_lex(&parser->lexer, state_index); }
|
||||
|
||||
#define SHIFT(state) \
|
||||
{ ts_parser_shift(parser, state); goto next_state; }
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ ts_parser_advance_to_state(parser, state_index); goto next_state; }
|
||||
{ \
|
||||
DEBUG_PARSE("shift: %d \n", state); \
|
||||
ts_lr_parser_shift(parser, state); \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define REDUCE(symbol, child_count, collapse_flags) \
|
||||
{ \
|
||||
static const int flags[] = collapse_flags; \
|
||||
ts_parser_reduce(parser, symbol, child_count, flags); \
|
||||
ts_lr_parser_reduce(parser, symbol, child_count, flags); \
|
||||
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_stack_state(stack)); \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{ goto done; }
|
||||
|
||||
#define ACCEPT_TOKEN(symbol) \
|
||||
{ ts_parser_set_lookahead_sym(parser, symbol); return; }
|
||||
|
||||
#define LEX_ERROR() \
|
||||
{ ts_parser_set_lookahead_sym(parser, ts_builtin_sym_error); return; }
|
||||
goto done;
|
||||
|
||||
#define PARSE_ERROR(count, inputs) \
|
||||
{ \
|
||||
static const ts_symbol expected_inputs[] = inputs; \
|
||||
if (ts_parser_handle_error(parser, count, expected_inputs)) \
|
||||
if (ts_lr_parser_handle_error(parser, count, expected_inputs)) \
|
||||
goto next_state; \
|
||||
else \
|
||||
goto done; \
|
||||
}
|
||||
|
||||
#define FINISH_PARSER() \
|
||||
done: \
|
||||
DEBUG_PARSE("accept \n"); \
|
||||
return ts_stack_root(&parser->stack);
|
||||
|
||||
#define START_LEXER() \
|
||||
ts_lexer_skip_whitespace(lexer); \
|
||||
if (!ts_lexer_lookahead_char(lexer)) { \
|
||||
return ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \
|
||||
} \
|
||||
next_state:
|
||||
|
||||
#define LEX_STATE() \
|
||||
lex_state
|
||||
|
||||
#define LOOKAHEAD_CHAR() \
|
||||
ts_lexer_lookahead_char(lexer)
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ \
|
||||
ts_lexer_advance(lexer); \
|
||||
lex_state = state_index; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol) \
|
||||
{ \
|
||||
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); \
|
||||
return ts_lexer_build_node(lexer, symbol); \
|
||||
}
|
||||
|
||||
#define LEX_ERROR() \
|
||||
return ts_lexer_build_node(lexer, ts_builtin_sym_error);
|
||||
|
||||
#define LEX_PANIC() \
|
||||
printf("Lex error: unexpected state %d", LEX_STATE());
|
||||
{ DEBUG_LEX("Lex error: unexpected state %d", LEX_STATE()); return NULL; }
|
||||
|
||||
#define PARSE_PANIC() \
|
||||
printf("Parse error: unexpected state %d", PARSE_STATE());
|
||||
{ DEBUG_PARSE("Parse error: unexpected state %d", PARSE_STATE()); }
|
||||
|
||||
#define RECOVER(new_state, symbol_count, values) \
|
||||
{ \
|
||||
|
|
@ -320,9 +151,169 @@ printf("Parse error: unexpected state %d", PARSE_STATE());
|
|||
#define EXPECT(...) __VA_ARGS__
|
||||
#define COLLAPSE(...) __VA_ARGS__
|
||||
|
||||
#define FINISH_PARSER() \
|
||||
done: \
|
||||
return ts_parser_tree(parser);
|
||||
|
||||
/*
|
||||
* Stack
|
||||
*/
|
||||
typedef int state_id;
|
||||
typedef struct {
|
||||
size_t size;
|
||||
struct {
|
||||
ts_tree *node;
|
||||
state_id state;
|
||||
} *entries;
|
||||
} ts_stack;
|
||||
|
||||
ts_stack ts_stack_make();
|
||||
ts_tree * ts_stack_root(ts_stack *stack);
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *collapse_flags);
|
||||
void ts_stack_shrink(ts_stack *stack, size_t new_size);
|
||||
void ts_stack_push(ts_stack *stack, state_id state, ts_tree *node);
|
||||
state_id ts_stack_top_state(const ts_stack *stack);
|
||||
|
||||
|
||||
/*
|
||||
* Lexer
|
||||
*/
|
||||
typedef struct {
|
||||
ts_input input;
|
||||
const char *chunk;
|
||||
size_t chunk_start;
|
||||
size_t chunk_size;
|
||||
size_t position_in_chunk;
|
||||
size_t token_end_position;
|
||||
size_t token_start_position;
|
||||
} ts_lexer;
|
||||
|
||||
static ts_lexer ts_lexer_make() {
|
||||
ts_lexer result = {
|
||||
.chunk = NULL,
|
||||
.chunk_start = 0,
|
||||
.chunk_size = 0,
|
||||
.position_in_chunk = 0,
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t ts_lexer_position(const ts_lexer *lexer) {
|
||||
return lexer->chunk_start + lexer->position_in_chunk;
|
||||
}
|
||||
|
||||
static char ts_lexer_lookahead_char(const ts_lexer *lexer) {
|
||||
return lexer->chunk[lexer->position_in_chunk];
|
||||
}
|
||||
|
||||
static const char empty_chunk[1] = "";
|
||||
|
||||
static void ts_lexer_advance(ts_lexer *lexer) {
|
||||
if (lexer->position_in_chunk + 1 < lexer->chunk_size) {
|
||||
lexer->position_in_chunk++;
|
||||
} else {
|
||||
lexer->chunk_start += lexer->chunk_size;
|
||||
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
|
||||
if (lexer->chunk_size == 0) {
|
||||
lexer->chunk = empty_chunk;
|
||||
lexer->chunk_size = 1;
|
||||
}
|
||||
lexer->position_in_chunk = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) {
|
||||
size_t current_position = ts_lexer_position(lexer);
|
||||
size_t size = current_position - lexer->token_start_position;
|
||||
size_t offset = lexer->token_start_position - lexer->token_end_position;
|
||||
lexer->token_end_position = current_position;
|
||||
return ts_tree_make_leaf(symbol, size, offset);
|
||||
}
|
||||
|
||||
static void ts_lexer_skip_whitespace(ts_lexer *lexer) {
|
||||
while (isspace(ts_lexer_lookahead_char(lexer)))
|
||||
ts_lexer_advance(lexer);
|
||||
lexer->token_start_position = ts_lexer_position(lexer);
|
||||
}
|
||||
|
||||
static const state_id ts_lex_state_error = -1;
|
||||
|
||||
|
||||
/*
|
||||
* Forward declarations
|
||||
* The file including this header should define these functions
|
||||
*/
|
||||
PARSE_FN();
|
||||
LEX_FN();
|
||||
RECOVER_FN();
|
||||
|
||||
|
||||
/*
|
||||
* Parser
|
||||
*/
|
||||
typedef struct {
|
||||
ts_lexer lexer;
|
||||
ts_stack stack;
|
||||
ts_tree *lookahead;
|
||||
ts_tree *previous_lookahead;
|
||||
} ts_lr_parser;
|
||||
|
||||
static ts_lr_parser * ts_lr_parser_make() {
|
||||
ts_lr_parser *result = malloc(sizeof(ts_lr_parser));
|
||||
result->lexer = ts_lexer_make();
|
||||
result->stack = ts_stack_make();
|
||||
return result;
|
||||
}
|
||||
|
||||
static void ts_lr_parser_reset(ts_lr_parser *parser) {
|
||||
ts_stack_shrink(&parser->stack, 0);
|
||||
parser->lookahead = NULL;
|
||||
parser->previous_lookahead = NULL;
|
||||
parser->lexer = ts_lexer_make();
|
||||
}
|
||||
|
||||
static ts_symbol ts_lr_parser_lookahead_sym(const ts_lr_parser *parser) {
|
||||
ts_tree *node = parser->lookahead;
|
||||
return node ? node->symbol : ts_builtin_sym_error;
|
||||
}
|
||||
|
||||
static void ts_lr_parser_shift(ts_lr_parser *parser, state_id parse_state) {
|
||||
ts_stack_push(&parser->stack, parse_state, parser->lookahead);
|
||||
parser->lookahead = parser->previous_lookahead;
|
||||
parser->previous_lookahead = NULL;
|
||||
}
|
||||
|
||||
static void ts_lr_parser_reduce(ts_lr_parser *parser, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) {
|
||||
ts_tree *lookahead = ts_stack_reduce(&parser->stack, symbol, immediate_child_count, collapse_flags);
|
||||
parser->previous_lookahead = parser->lookahead;
|
||||
parser->lookahead = lookahead;
|
||||
}
|
||||
|
||||
static int ts_lr_parser_handle_error(ts_lr_parser *parser, size_t count, const ts_symbol *expected_symbols) {
|
||||
ts_tree *error = ts_tree_make_error(ts_lexer_lookahead_char(&parser->lexer), count, expected_symbols, 0, 0);
|
||||
|
||||
for (;;) {
|
||||
ts_tree_release(parser->lookahead);
|
||||
parser->lookahead = ts_lex(&parser->lexer, ts_lex_state_error);
|
||||
|
||||
for (long i = parser->stack.size - 1; i >= 0; i--) {
|
||||
size_t count;
|
||||
state_id to_state;
|
||||
const ts_symbol *symbols = ts_recover(parser->stack.entries[i].state, &to_state, &count);
|
||||
for (size_t j = 0; j < count; j++) {
|
||||
if (symbols[j] == ts_lr_parser_lookahead_sym(parser)) {
|
||||
ts_stack_shrink(&parser->stack, i + 1);
|
||||
ts_stack_push(&parser->stack, to_state, error);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ts_lexer_lookahead_char(&parser->lexer)) {
|
||||
parser->stack.entries[0].node = error;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,16 +47,21 @@ typedef struct {
|
|||
void (* release_fn)(void *data);
|
||||
} ts_input;
|
||||
|
||||
typedef struct {
|
||||
const ts_tree * (* parse_fn)(ts_input);
|
||||
const char **symbol_names;
|
||||
} ts_parse_config;
|
||||
typedef struct {
|
||||
const ts_tree * (* parse_fn)(void *data, ts_input input);
|
||||
void (* free_fn)(void *data);
|
||||
const char **symbol_names;
|
||||
void *data;
|
||||
} ts_parser;
|
||||
|
||||
const ts_tree * ts_parser_parse(ts_parser *, ts_input);
|
||||
void ts_parser_free(ts_parser *);
|
||||
|
||||
typedef struct ts_document ts_document;
|
||||
|
||||
ts_document * ts_document_make();
|
||||
void ts_document_free(ts_document *doc);
|
||||
void ts_document_set_parser(ts_document *doc, ts_parse_config parser);
|
||||
void ts_document_set_parser(ts_document *doc, ts_parser parser);
|
||||
void ts_document_set_input(ts_document *doc, ts_input input);
|
||||
void ts_document_set_input_string(ts_document *doc, const char *text);
|
||||
void ts_document_edit(ts_document *doc, size_t position, size_t deleted_bytes, size_t inserted_bytes);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "runtime_spec_helper.h"
|
||||
|
||||
extern ts_parse_config ts_parse_config_arithmetic;
|
||||
extern "C" ts_parser ts_parse_config_arithmetic();
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -9,7 +9,8 @@ describe("arithmetic", []() {
|
|||
|
||||
before_each([&]() {
|
||||
doc = ts_document_make();
|
||||
ts_document_set_parser(doc, ts_parse_config_arithmetic);
|
||||
ts_parser parser = ts_parse_config_arithmetic();
|
||||
ts_document_set_parser(doc, parser);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "runtime_spec_helper.h"
|
||||
|
||||
extern ts_parse_config ts_parse_config_json;
|
||||
extern "C" ts_parser ts_parse_config_json();
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -9,7 +9,7 @@ describe("json", []() {
|
|||
|
||||
before_each([&]() {
|
||||
doc = ts_document_make();
|
||||
ts_document_set_parser(doc, ts_parse_config_json);
|
||||
ts_document_set_parser(doc, ts_parse_config_json());
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
#include "runtime_spec_helper.h"
|
||||
#include "helpers/spy_reader.h"
|
||||
|
||||
extern ts_parse_config ts_parse_config_json;
|
||||
|
||||
extern "C" ts_parser ts_parse_config_json();
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("parsing", [&]() {
|
||||
|
|
@ -11,7 +11,7 @@ describe("parsing", [&]() {
|
|||
|
||||
before_each([&]() {
|
||||
doc = ts_document_make();
|
||||
ts_document_set_parser(doc, ts_parse_config_json);
|
||||
ts_document_set_parser(doc, ts_parse_config_json());
|
||||
|
||||
reader = new SpyReader("{ \"key\": [1, 2] }", 5);
|
||||
ts_document_set_input(doc, reader->input);
|
||||
|
|
@ -31,7 +31,7 @@ describe("parsing", [&]() {
|
|||
"(value (number)) "
|
||||
"(value (number))))))"));
|
||||
});
|
||||
|
||||
|
||||
it("reads the entire input", [&]() {
|
||||
AssertThat(reader->chunks_read, Equals(vector<string>({
|
||||
"{ \"ke",
|
||||
|
|
@ -63,7 +63,7 @@ describe("parsing", [&]() {
|
|||
"(value (number))))"
|
||||
));
|
||||
});
|
||||
|
||||
|
||||
it_skip("re-reads only the changed portion of the input", [&]() {
|
||||
AssertThat(reader->chunks_read, Equals(vector<string>({
|
||||
""
|
||||
|
|
|
|||
|
|
@ -253,11 +253,9 @@ namespace tree_sitter {
|
|||
}
|
||||
cases += _default(recover_case(0, set<rules::Symbol>()));
|
||||
|
||||
string body = _switch("state", cases);
|
||||
return join({
|
||||
"static const ts_symbol * "
|
||||
"ts_recover(ts_state state, ts_state *to_state, size_t *count) {",
|
||||
indent(body),
|
||||
"RECOVER_FN() {",
|
||||
indent(_switch("state", cases)),
|
||||
"}"
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include <string.h>
|
||||
|
||||
struct ts_document {
|
||||
ts_parse_config parse_config;
|
||||
ts_parser parser;
|
||||
const ts_tree *tree;
|
||||
ts_input input;
|
||||
size_t error_count;
|
||||
|
|
@ -16,8 +16,8 @@ void ts_document_free(ts_document *document) {
|
|||
free(document);
|
||||
}
|
||||
|
||||
void ts_document_set_parser(ts_document *document, ts_parse_config config) {
|
||||
document->parse_config = config;
|
||||
void ts_document_set_parser(ts_document *document, ts_parser parser) {
|
||||
document->parser = parser;
|
||||
}
|
||||
|
||||
const ts_tree * ts_document_tree(const ts_document *document) {
|
||||
|
|
@ -25,17 +25,17 @@ const ts_tree * ts_document_tree(const ts_document *document) {
|
|||
}
|
||||
|
||||
const char * ts_document_string(const ts_document *document) {
|
||||
return ts_tree_string(document->tree, document->parse_config.symbol_names);
|
||||
return ts_tree_string(document->tree, document->parser.symbol_names);
|
||||
}
|
||||
|
||||
void ts_document_set_input(ts_document *document, ts_input input) {
|
||||
document->input = input;
|
||||
document->tree = document->parse_config.parse_fn(input);
|
||||
document->tree = ts_parser_parse(&document->parser, input);
|
||||
}
|
||||
|
||||
void ts_document_edit(ts_document *document, size_t position, size_t bytes_removed, size_t bytes_inserted) {
|
||||
document->input.seek_fn(document->input.data, 0);
|
||||
document->tree = document->parse_config.parse_fn(document->input);
|
||||
document->tree = ts_parser_parse(&document->parser, document->input);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
10
src/runtime/parser.c
Normal file
10
src/runtime/parser.c
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#include "tree_sitter/runtime.h"
|
||||
|
||||
const ts_tree * ts_parser_parse(ts_parser *parser, ts_input input) {
|
||||
return parser->parse_fn(parser->data, input);
|
||||
}
|
||||
|
||||
void ts_parser_free(ts_parser *parser) {
|
||||
if (parser->free_fn != NULL)
|
||||
parser->free_fn(parser->data);
|
||||
}
|
||||
80
src/runtime/stack.c
Normal file
80
src/runtime/stack.c
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#include "tree_sitter/runtime.h"
|
||||
#include <string.h>
|
||||
|
||||
typedef int state_id;
|
||||
static const state_id ts_lex_state_error = -1;
|
||||
|
||||
typedef struct {
|
||||
size_t size;
|
||||
struct {
|
||||
ts_tree *node;
|
||||
state_id state;
|
||||
} *entries;
|
||||
} ts_stack;
|
||||
|
||||
static int INITIAL_STACK_SIZE = 100;
|
||||
|
||||
ts_stack ts_stack_make() {
|
||||
ts_stack result = {
|
||||
.entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)),
|
||||
.size = 0,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
state_id ts_stack_top_state(const ts_stack *stack) {
|
||||
if (stack->size == 0) return 0;
|
||||
return stack->entries[stack->size - 1].state;
|
||||
}
|
||||
|
||||
ts_tree * ts_stack_root(ts_stack *stack) {
|
||||
return stack->entries[0].node;
|
||||
}
|
||||
|
||||
void ts_stack_push(ts_stack *stack, state_id state, ts_tree *node) {
|
||||
stack->entries[stack->size].state = state;
|
||||
stack->entries[stack->size].node = node;
|
||||
stack->size++;
|
||||
}
|
||||
|
||||
void ts_stack_shrink(ts_stack *stack, size_t new_size) {
|
||||
for (size_t i = new_size; i < stack->size; i++)
|
||||
ts_tree_release(stack->entries[i].node);
|
||||
stack->size = new_size;
|
||||
}
|
||||
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *collapse_flags) {
|
||||
size_t new_stack_size = stack->size - immediate_child_count;
|
||||
|
||||
int child_count = 0;
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = stack->entries[new_stack_size + i].node;
|
||||
child_count += collapse_flags[i] ? ts_tree_child_count(child) : 1;
|
||||
}
|
||||
|
||||
int child_index = 0;
|
||||
size_t size = 0, offset = 0;
|
||||
ts_tree **children = malloc(child_count * sizeof(ts_tree *));
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = stack->entries[new_stack_size + i].node;
|
||||
if (i == 0) {
|
||||
offset = child->offset;
|
||||
size = child->size;
|
||||
} else {
|
||||
size += child->offset + child->size;
|
||||
}
|
||||
|
||||
if (collapse_flags[i]) {
|
||||
size_t grandchild_count = ts_tree_child_count(child);
|
||||
memcpy(children + child_index, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *)));
|
||||
child_index += grandchild_count;
|
||||
} else {
|
||||
memcpy(children + child_index, &child, sizeof(ts_tree *));
|
||||
child_index++;
|
||||
}
|
||||
}
|
||||
|
||||
ts_tree *lookahead = ts_tree_make_node(symbol, child_count, children, size, offset);
|
||||
ts_stack_shrink(stack, new_stack_size);
|
||||
return lookahead;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue