Move lexer debugging logic out of public header
This commit is contained in:
parent
5c600942df
commit
7498725d7f
3 changed files with 108 additions and 106 deletions
|
|
@ -10,14 +10,20 @@ extern "C" {
|
|||
#include <stdbool.h>
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
typedef struct TSTree TSTree;
|
||||
|
||||
#define ts_lex_state_error 0
|
||||
#define TS_DEBUG_BUFFER_SIZE 512
|
||||
|
||||
typedef struct TSLexer {
|
||||
TSInput input;
|
||||
typedef struct TSTree TSTree;
|
||||
typedef unsigned short TSStateId;
|
||||
|
||||
typedef struct TSLexer {
|
||||
// Public
|
||||
void (*start_fn)(struct TSLexer *, TSStateId);
|
||||
void (*start_token_fn)(struct TSLexer *);
|
||||
bool (*advance_fn)(struct TSLexer *, TSStateId);
|
||||
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int, const char *);
|
||||
|
||||
// Private
|
||||
const char *chunk;
|
||||
size_t chunk_start;
|
||||
size_t chunk_size;
|
||||
|
|
@ -29,15 +35,11 @@ typedef struct TSLexer {
|
|||
size_t lookahead_size;
|
||||
int32_t lookahead;
|
||||
|
||||
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int);
|
||||
bool (*advance_fn)(struct TSLexer *);
|
||||
|
||||
TSInput input;
|
||||
TSDebugger debugger;
|
||||
char debug_buffer[TS_DEBUG_BUFFER_SIZE];
|
||||
} TSLexer;
|
||||
|
||||
typedef unsigned short TSStateId;
|
||||
|
||||
typedef enum {
|
||||
TSParseActionTypeError,
|
||||
TSParseActionTypeShift,
|
||||
|
|
@ -67,44 +69,34 @@ struct TSLanguage {
|
|||
TSTree *(*lex_fn)(TSLexer *, TSStateId);
|
||||
};
|
||||
|
||||
#define DEBUG_LEX(...) \
|
||||
if (lexer->debugger.debug_fn) { \
|
||||
snprintf(lexer->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
|
||||
lexer->debugger.debug_fn(lexer->debugger.data, lexer->debug_buffer); \
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#define START_LEXER() \
|
||||
lexer->start_fn(lexer, lex_state); \
|
||||
int32_t lookahead; \
|
||||
next_state: \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define START_TOKEN() lexer->start_token_fn(lexer);
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ \
|
||||
lexer->advance_fn(lexer, state_index); \
|
||||
lex_state = state_index; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define START_LEXER() \
|
||||
DEBUG_LEX("start state:%d", lex_state); \
|
||||
int32_t lookahead; \
|
||||
next_state: \
|
||||
lookahead = lexer->lookahead; \
|
||||
DEBUG_LEX((0 < lookahead &&lookahead < 255 ? "lookahead char:'%c'" \
|
||||
: "lookahead char:%d"), \
|
||||
lookahead);
|
||||
#define ACCEPT_TOKEN(symbol) \
|
||||
return lexer->accept_fn(lexer, symbol, ts_hidden_symbol_flags[symbol], \
|
||||
ts_symbol_names[symbol]);
|
||||
|
||||
#define START_TOKEN() \
|
||||
DEBUG_LEX("start_token chars:%lu", lexer->current_position.chars); \
|
||||
lexer->token_start_position = lexer->current_position;
|
||||
#define LEX_ERROR() ACCEPT_TOKEN(ts_builtin_sym_error);
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ \
|
||||
DEBUG_LEX("advance state:%d", state_index); \
|
||||
lexer->advance_fn(lexer); \
|
||||
lex_state = state_index; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol) \
|
||||
{ \
|
||||
DEBUG_LEX("accept_token sym:%s", ts_symbol_names[symbol]); \
|
||||
return lexer->accept_fn(lexer, symbol, ts_hidden_symbol_flags[symbol]); \
|
||||
}
|
||||
|
||||
#define LEX_ERROR() \
|
||||
{ \
|
||||
DEBUG_LEX("error"); \
|
||||
return lexer->accept_fn(lexer, ts_builtin_sym_error, 0); \
|
||||
}
|
||||
/*
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SHIFT(to_state_value) \
|
||||
{ \
|
||||
|
|
|
|||
|
|
@ -5,54 +5,66 @@
|
|||
#include "runtime/debugger.h"
|
||||
#include "utf8proc.h"
|
||||
|
||||
#define DEBUG(...) \
|
||||
if (lexer->debugger.debug_fn) { \
|
||||
snprintf(lexer->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
|
||||
lexer->debugger.debug_fn(lexer->debugger.data, lexer->debug_buffer); \
|
||||
}
|
||||
|
||||
static const char *empty_chunk = "";
|
||||
|
||||
static void ts_lexer_read_next_chunk(TSLexer *lexer) {
|
||||
static void read_next_chunk(TSLexer *lexer) {
|
||||
TSInput input = lexer->input;
|
||||
if (lexer->current_position.bytes != lexer->chunk_start + lexer->chunk_size)
|
||||
input.seek_fn(input.data, lexer->current_position);
|
||||
|
||||
lexer->chunk_start = lexer->current_position.bytes;
|
||||
lexer->chunk = input.read_fn(input.data, &lexer->chunk_size);
|
||||
if (!lexer->chunk_size)
|
||||
lexer->chunk = empty_chunk;
|
||||
}
|
||||
|
||||
static bool advance(TSLexer *lexer) {
|
||||
|
||||
/*
|
||||
* Return false if the Lexer has already reached the end of the input.
|
||||
*/
|
||||
if (lexer->chunk == empty_chunk)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Increment the Lexer's position.
|
||||
*/
|
||||
if (lexer->lookahead_size) {
|
||||
lexer->current_position.bytes += lexer->lookahead_size;
|
||||
lexer->current_position.chars += 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Request a new chunk of text from the Input if the Lexer has reached
|
||||
* the end of the current chunk.
|
||||
*/
|
||||
if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size) {
|
||||
ts_lexer_read_next_chunk(lexer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the next unicode character from the current chunk of text.
|
||||
*/
|
||||
static void read_lookahead(TSLexer *lexer) {
|
||||
size_t position_in_chunk = lexer->current_position.bytes - lexer->chunk_start;
|
||||
lexer->lookahead_size = utf8proc_iterate(
|
||||
(const uint8_t *)lexer->chunk + position_in_chunk,
|
||||
lexer->chunk_size - position_in_chunk + 1, &lexer->lookahead);
|
||||
|
||||
DEBUG((0 < lexer->lookahead && lexer->lookahead < 256) ? "lookahead char:'%c'"
|
||||
: "lookahead char:%d",
|
||||
lexer->lookahead);
|
||||
}
|
||||
|
||||
static void start(TSLexer *lexer, TSStateId lex_state) {
|
||||
DEBUG("start_lex state:%d", lex_state);
|
||||
}
|
||||
|
||||
static void start_token(TSLexer *lexer) {
|
||||
DEBUG("start_token chars:%lu", lexer->current_position.chars);
|
||||
lexer->token_start_position = lexer->current_position;
|
||||
}
|
||||
|
||||
static bool advance(TSLexer *lexer, TSStateId state) {
|
||||
DEBUG("advance state:%d", state);
|
||||
|
||||
if (lexer->chunk == empty_chunk)
|
||||
return false;
|
||||
|
||||
if (lexer->lookahead_size) {
|
||||
lexer->current_position.bytes += lexer->lookahead_size;
|
||||
lexer->current_position.chars += 1;
|
||||
}
|
||||
|
||||
if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size)
|
||||
read_next_chunk(lexer);
|
||||
|
||||
read_lookahead(lexer);
|
||||
return true;
|
||||
}
|
||||
|
||||
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
|
||||
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden,
|
||||
const char *symbol_name) {
|
||||
DEBUG("accept_token sym:%s", symbol_name);
|
||||
TSLength size =
|
||||
ts_length_sub(lexer->current_position, lexer->token_start_position);
|
||||
TSLength padding =
|
||||
|
|
@ -64,14 +76,15 @@ static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
|
|||
}
|
||||
|
||||
/*
|
||||
* The `advance` and `accept` methods are stored as fields on the Lexer so
|
||||
* that generated parsers can call them without needing to be linked against
|
||||
* this library.
|
||||
* The lexer's methods are stored as struct fields so that generated parsers
|
||||
* can call them without needing to be linked against this library.
|
||||
*/
|
||||
|
||||
TSLexer ts_lexer_make() {
|
||||
TSLexer result = (TSLexer) { .advance_fn = advance,
|
||||
TSLexer result = (TSLexer) { .start_fn = start,
|
||||
.start_token_fn = start_token,
|
||||
.advance_fn = advance,
|
||||
.accept_fn = accept,
|
||||
.debugger = ts_debugger_null(),
|
||||
.chunk = NULL,
|
||||
.chunk_start = 0,
|
||||
.chunk_size = 0,
|
||||
|
|
@ -79,17 +92,14 @@ TSLexer ts_lexer_make() {
|
|||
.token_start_position = ts_length_zero(),
|
||||
.token_end_position = ts_length_zero(),
|
||||
.lookahead = 0,
|
||||
.lookahead_size = 0, };
|
||||
.lookahead_size = 0,
|
||||
.debugger = ts_debugger_null() };
|
||||
return result;
|
||||
}
|
||||
|
||||
void ts_lexer_reset(TSLexer *lexer, TSLength position) {
|
||||
lexer->lookahead = 0;
|
||||
lexer->lookahead_size = 0;
|
||||
|
||||
lexer->token_end_position = position;
|
||||
lexer->current_position = position;
|
||||
ts_lexer_read_next_chunk(lexer);
|
||||
|
||||
lexer->advance_fn(lexer);
|
||||
read_next_chunk(lexer);
|
||||
read_lookahead(lexer);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
* Debugging
|
||||
*/
|
||||
|
||||
#define DEBUG_PARSE(...) \
|
||||
#define DEBUG(...) \
|
||||
if (parser->debugger.debug_fn) { \
|
||||
snprintf(parser->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
|
||||
parser->debugger.debug_fn(parser->debugger.data, \
|
||||
|
|
@ -50,7 +50,7 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) {
|
|||
if (left_subtree_end.chars < edit.position && !children)
|
||||
break;
|
||||
|
||||
DEBUG_PARSE("pop_left sym:%s", SYM_NAME(node->symbol));
|
||||
DEBUG("pop_left sym:%s", SYM_NAME(node->symbol));
|
||||
parser->stack.size--;
|
||||
left_subtree_end = ts_length_sub(left_subtree_end, ts_tree_total_size(node));
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) {
|
|||
TSStateId next_state =
|
||||
action.type == TSParseActionTypeShift ? action.data.to_state : state;
|
||||
|
||||
DEBUG_PARSE("push_left sym:%s", SYM_NAME(child->symbol));
|
||||
DEBUG("push_left sym:%s", SYM_NAME(child->symbol));
|
||||
ts_stack_push(&parser->stack, next_state, child);
|
||||
left_subtree_end =
|
||||
ts_length_add(left_subtree_end, ts_tree_total_size(child));
|
||||
|
|
@ -74,14 +74,14 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) {
|
|||
if (right_subtree_start < edit.position + edit.chars_inserted)
|
||||
break;
|
||||
|
||||
DEBUG_PARSE("push_right sym:%s", SYM_NAME(child->symbol));
|
||||
DEBUG("push_right sym:%s", SYM_NAME(child->symbol));
|
||||
ts_stack_push(&parser->right_stack, 0, child);
|
||||
}
|
||||
|
||||
ts_tree_release(node);
|
||||
}
|
||||
|
||||
DEBUG_PARSE("reuse_left chars:%lu", left_subtree_end.chars);
|
||||
DEBUG("reuse_left chars:%lu", left_subtree_end.chars);
|
||||
return left_subtree_end;
|
||||
}
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ static TSTree *break_down_right_stack(TSParser *parser) {
|
|||
size_t child_count;
|
||||
TSTree **children = ts_tree_children(node, &child_count);
|
||||
|
||||
DEBUG_PARSE("pop_right %s", SYM_NAME(node->symbol));
|
||||
DEBUG("pop_right %s", SYM_NAME(node->symbol));
|
||||
stack->size--;
|
||||
right_subtree_start += ts_tree_total_size(node).chars;
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ static TSTree *break_down_right_stack(TSParser *parser) {
|
|||
break;
|
||||
|
||||
TSTree *child = children[i];
|
||||
DEBUG_PARSE("push_right sym:%s", SYM_NAME(child->symbol));
|
||||
DEBUG("push_right sym:%s", SYM_NAME(child->symbol));
|
||||
ts_stack_push(stack, 0, child);
|
||||
right_subtree_start -= ts_tree_total_size(child).chars;
|
||||
}
|
||||
|
|
@ -133,7 +133,7 @@ static TSTree *get_next_node(TSParser *parser, TSStateId lex_state) {
|
|||
TSTree *node;
|
||||
|
||||
if ((node = break_down_right_stack(parser))) {
|
||||
DEBUG_PARSE("reuse sym:%s", SYM_NAME(node->symbol));
|
||||
DEBUG("reuse sym:%s", SYM_NAME(node->symbol));
|
||||
|
||||
parser->lexer.lookahead = 0;
|
||||
parser->lexer.lookahead_size = 0;
|
||||
|
|
@ -141,7 +141,7 @@ static TSTree *get_next_node(TSParser *parser, TSStateId lex_state) {
|
|||
ts_length_add(parser->lexer.current_position, ts_tree_total_size(node));
|
||||
} else {
|
||||
node = parser->language->lex_fn(&parser->lexer, lex_state);
|
||||
DEBUG_PARSE("lex sym:%s", SYM_NAME(node->symbol));
|
||||
DEBUG("lex sym:%s", SYM_NAME(node->symbol));
|
||||
}
|
||||
|
||||
return node;
|
||||
|
|
@ -224,7 +224,7 @@ static int handle_error(TSParser *parser) {
|
|||
parser->language, state_after_error, parser->lookahead->symbol);
|
||||
|
||||
if (action_after_error.type != TSParseActionTypeError) {
|
||||
DEBUG_PARSE("recover state:%u", state_after_error);
|
||||
DEBUG("recover state:%u", state_after_error);
|
||||
|
||||
ts_stack_shrink(&parser->stack, entry - parser->stack.entries + 1);
|
||||
parser->lookahead->padding = ts_length_zero();
|
||||
|
|
@ -242,7 +242,7 @@ static int handle_error(TSParser *parser) {
|
|||
* current lookahead token, advance to the next token. If no characters
|
||||
* were consumed, advance the lexer to the next character.
|
||||
*/
|
||||
DEBUG_PARSE("lex_again");
|
||||
DEBUG("skip_token");
|
||||
TSLength prev_position = parser->lexer.current_position;
|
||||
if (parser->lookahead)
|
||||
ts_tree_release(parser->lookahead);
|
||||
|
|
@ -253,8 +253,8 @@ static int handle_error(TSParser *parser) {
|
|||
* just skip it. If the end of input is reached, exit.
|
||||
*/
|
||||
if (ts_length_eq(parser->lexer.current_position, prev_position))
|
||||
if (!parser->lexer.advance_fn(&parser->lexer)) {
|
||||
DEBUG_PARSE("fail_to_recover");
|
||||
if (!parser->lexer.advance_fn(&parser->lexer, 0)) {
|
||||
DEBUG("fail_to_recover");
|
||||
|
||||
resize_error(parser, error);
|
||||
ts_stack_push(&parser->stack, 0, error);
|
||||
|
|
@ -323,11 +323,11 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
|
|||
|
||||
TSLength position;
|
||||
if (edit) {
|
||||
DEBUG_PARSE("edit pos:%lu inserted:%lu deleted:%lu", edit->position,
|
||||
DEBUG("edit pos:%lu inserted:%lu deleted:%lu", edit->position,
|
||||
edit->chars_inserted, edit->chars_removed);
|
||||
position = break_down_left_stack(parser, *edit);
|
||||
} else {
|
||||
DEBUG_PARSE("new_parse");
|
||||
DEBUG("new_parse");
|
||||
ts_stack_shrink(&parser->stack, 0);
|
||||
position = ts_length_zero();
|
||||
}
|
||||
|
|
@ -346,37 +346,37 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
|
|||
switch (action.type) {
|
||||
case TSParseActionTypeShift:
|
||||
if (parser->lookahead->symbol == ts_builtin_sym_error) {
|
||||
DEBUG_PARSE("error");
|
||||
DEBUG("error");
|
||||
if (!handle_error(parser))
|
||||
return finish(parser);
|
||||
} else {
|
||||
DEBUG_PARSE("shift state:%u", action.data.to_state);
|
||||
DEBUG("shift state:%u", action.data.to_state);
|
||||
shift(parser, action.data.to_state);
|
||||
}
|
||||
break;
|
||||
|
||||
case TSParseActionTypeShiftExtra:
|
||||
DEBUG_PARSE("shift_extra");
|
||||
DEBUG("shift_extra");
|
||||
shift_extra(parser);
|
||||
break;
|
||||
|
||||
case TSParseActionTypeReduce:
|
||||
DEBUG_PARSE("reduce sym:%s count:%u", SYM_NAME(action.data.symbol),
|
||||
DEBUG("reduce sym:%s count:%u", SYM_NAME(action.data.symbol),
|
||||
action.data.child_count);
|
||||
reduce(parser, action.data.symbol, action.data.child_count);
|
||||
break;
|
||||
|
||||
case TSParseActionTypeReduceExtra:
|
||||
DEBUG_PARSE("reduce_extra sym:%s", SYM_NAME(action.data.symbol));
|
||||
DEBUG("reduce_extra sym:%s", SYM_NAME(action.data.symbol));
|
||||
reduce_extra(parser, action.data.symbol);
|
||||
break;
|
||||
|
||||
case TSParseActionTypeAccept:
|
||||
DEBUG_PARSE("accept");
|
||||
DEBUG("accept");
|
||||
return finish(parser);
|
||||
|
||||
case TSParseActionTypeError:
|
||||
DEBUG_PARSE("error");
|
||||
DEBUG("error");
|
||||
if (!handle_error(parser))
|
||||
return finish(parser);
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue