Move lexer debugging logic out of public header

This commit is contained in:
Max Brunsfeld 2014-10-17 16:20:01 -07:00
parent 5c600942df
commit 7498725d7f
3 changed files with 108 additions and 106 deletions

View file

@ -10,14 +10,20 @@ extern "C" {
#include <stdbool.h>
#include "tree_sitter/runtime.h"
typedef struct TSTree TSTree;
#define ts_lex_state_error 0
#define TS_DEBUG_BUFFER_SIZE 512
typedef struct TSLexer {
TSInput input;
typedef struct TSTree TSTree;
typedef unsigned short TSStateId;
typedef struct TSLexer {
// Public
void (*start_fn)(struct TSLexer *, TSStateId);
void (*start_token_fn)(struct TSLexer *);
bool (*advance_fn)(struct TSLexer *, TSStateId);
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int, const char *);
// Private
const char *chunk;
size_t chunk_start;
size_t chunk_size;
@ -29,15 +35,11 @@ typedef struct TSLexer {
size_t lookahead_size;
int32_t lookahead;
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int);
bool (*advance_fn)(struct TSLexer *);
TSInput input;
TSDebugger debugger;
char debug_buffer[TS_DEBUG_BUFFER_SIZE];
} TSLexer;
typedef unsigned short TSStateId;
typedef enum {
TSParseActionTypeError,
TSParseActionTypeShift,
@ -67,44 +69,34 @@ struct TSLanguage {
TSTree *(*lex_fn)(TSLexer *, TSStateId);
};
#define DEBUG_LEX(...) \
if (lexer->debugger.debug_fn) { \
snprintf(lexer->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
lexer->debugger.debug_fn(lexer->debugger.data, lexer->debug_buffer); \
/*
* Lexer Macros
*/
#define START_LEXER() \
lexer->start_fn(lexer, lex_state); \
int32_t lookahead; \
next_state: \
lookahead = lexer->lookahead;
#define START_TOKEN() lexer->start_token_fn(lexer);
#define ADVANCE(state_index) \
{ \
lexer->advance_fn(lexer, state_index); \
lex_state = state_index; \
goto next_state; \
}
#define START_LEXER() \
DEBUG_LEX("start state:%d", lex_state); \
int32_t lookahead; \
next_state: \
lookahead = lexer->lookahead; \
DEBUG_LEX((0 < lookahead &&lookahead < 255 ? "lookahead char:'%c'" \
: "lookahead char:%d"), \
lookahead);
#define ACCEPT_TOKEN(symbol) \
return lexer->accept_fn(lexer, symbol, ts_hidden_symbol_flags[symbol], \
ts_symbol_names[symbol]);
#define START_TOKEN() \
DEBUG_LEX("start_token chars:%lu", lexer->current_position.chars); \
lexer->token_start_position = lexer->current_position;
#define LEX_ERROR() ACCEPT_TOKEN(ts_builtin_sym_error);
#define ADVANCE(state_index) \
{ \
DEBUG_LEX("advance state:%d", state_index); \
lexer->advance_fn(lexer); \
lex_state = state_index; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol) \
{ \
DEBUG_LEX("accept_token sym:%s", ts_symbol_names[symbol]); \
return lexer->accept_fn(lexer, symbol, ts_hidden_symbol_flags[symbol]); \
}
#define LEX_ERROR() \
{ \
DEBUG_LEX("error"); \
return lexer->accept_fn(lexer, ts_builtin_sym_error, 0); \
}
/*
* Parse Table Macros
*/
#define SHIFT(to_state_value) \
{ \

View file

@ -5,54 +5,66 @@
#include "runtime/debugger.h"
#include "utf8proc.h"
#define DEBUG(...) \
if (lexer->debugger.debug_fn) { \
snprintf(lexer->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
lexer->debugger.debug_fn(lexer->debugger.data, lexer->debug_buffer); \
}
static const char *empty_chunk = "";
static void ts_lexer_read_next_chunk(TSLexer *lexer) {
static void read_next_chunk(TSLexer *lexer) {
TSInput input = lexer->input;
if (lexer->current_position.bytes != lexer->chunk_start + lexer->chunk_size)
input.seek_fn(input.data, lexer->current_position);
lexer->chunk_start = lexer->current_position.bytes;
lexer->chunk = input.read_fn(input.data, &lexer->chunk_size);
if (!lexer->chunk_size)
lexer->chunk = empty_chunk;
}
static bool advance(TSLexer *lexer) {
/*
* Return false if the Lexer has already reached the end of the input.
*/
if (lexer->chunk == empty_chunk)
return false;
/*
* Increment the Lexer's position.
*/
if (lexer->lookahead_size) {
lexer->current_position.bytes += lexer->lookahead_size;
lexer->current_position.chars += 1;
}
/*
* Request a new chunk of text from the Input if the Lexer has reached
* the end of the current chunk.
*/
if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size) {
ts_lexer_read_next_chunk(lexer);
}
/*
* Read the next unicode character from the current chunk of text.
*/
static void read_lookahead(TSLexer *lexer) {
size_t position_in_chunk = lexer->current_position.bytes - lexer->chunk_start;
lexer->lookahead_size = utf8proc_iterate(
(const uint8_t *)lexer->chunk + position_in_chunk,
lexer->chunk_size - position_in_chunk + 1, &lexer->lookahead);
DEBUG((0 < lexer->lookahead && lexer->lookahead < 256) ? "lookahead char:'%c'"
: "lookahead char:%d",
lexer->lookahead);
}
static void start(TSLexer *lexer, TSStateId lex_state) {
DEBUG("start_lex state:%d", lex_state);
}
static void start_token(TSLexer *lexer) {
DEBUG("start_token chars:%lu", lexer->current_position.chars);
lexer->token_start_position = lexer->current_position;
}
static bool advance(TSLexer *lexer, TSStateId state) {
DEBUG("advance state:%d", state);
if (lexer->chunk == empty_chunk)
return false;
if (lexer->lookahead_size) {
lexer->current_position.bytes += lexer->lookahead_size;
lexer->current_position.chars += 1;
}
if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size)
read_next_chunk(lexer);
read_lookahead(lexer);
return true;
}
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden,
const char *symbol_name) {
DEBUG("accept_token sym:%s", symbol_name);
TSLength size =
ts_length_sub(lexer->current_position, lexer->token_start_position);
TSLength padding =
@ -64,14 +76,15 @@ static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
}
/*
* The `advance` and `accept` methods are stored as fields on the Lexer so
* that generated parsers can call them without needing to be linked against
* this library.
* The lexer's methods are stored as struct fields so that generated parsers
* can call them without needing to be linked against this library.
*/
TSLexer ts_lexer_make() {
TSLexer result = (TSLexer) { .advance_fn = advance,
TSLexer result = (TSLexer) { .start_fn = start,
.start_token_fn = start_token,
.advance_fn = advance,
.accept_fn = accept,
.debugger = ts_debugger_null(),
.chunk = NULL,
.chunk_start = 0,
.chunk_size = 0,
@ -79,17 +92,14 @@ TSLexer ts_lexer_make() {
.token_start_position = ts_length_zero(),
.token_end_position = ts_length_zero(),
.lookahead = 0,
.lookahead_size = 0, };
.lookahead_size = 0,
.debugger = ts_debugger_null() };
return result;
}
void ts_lexer_reset(TSLexer *lexer, TSLength position) {
lexer->lookahead = 0;
lexer->lookahead_size = 0;
lexer->token_end_position = position;
lexer->current_position = position;
ts_lexer_read_next_chunk(lexer);
lexer->advance_fn(lexer);
read_next_chunk(lexer);
read_lookahead(lexer);
}

View file

@ -13,7 +13,7 @@
* Debugging
*/
#define DEBUG_PARSE(...) \
#define DEBUG(...) \
if (parser->debugger.debug_fn) { \
snprintf(parser->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
parser->debugger.debug_fn(parser->debugger.data, \
@ -50,7 +50,7 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) {
if (left_subtree_end.chars < edit.position && !children)
break;
DEBUG_PARSE("pop_left sym:%s", SYM_NAME(node->symbol));
DEBUG("pop_left sym:%s", SYM_NAME(node->symbol));
parser->stack.size--;
left_subtree_end = ts_length_sub(left_subtree_end, ts_tree_total_size(node));
@ -62,7 +62,7 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) {
TSStateId next_state =
action.type == TSParseActionTypeShift ? action.data.to_state : state;
DEBUG_PARSE("push_left sym:%s", SYM_NAME(child->symbol));
DEBUG("push_left sym:%s", SYM_NAME(child->symbol));
ts_stack_push(&parser->stack, next_state, child);
left_subtree_end =
ts_length_add(left_subtree_end, ts_tree_total_size(child));
@ -74,14 +74,14 @@ static TSLength break_down_left_stack(TSParser *parser, TSInputEdit edit) {
if (right_subtree_start < edit.position + edit.chars_inserted)
break;
DEBUG_PARSE("push_right sym:%s", SYM_NAME(child->symbol));
DEBUG("push_right sym:%s", SYM_NAME(child->symbol));
ts_stack_push(&parser->right_stack, 0, child);
}
ts_tree_release(node);
}
DEBUG_PARSE("reuse_left chars:%lu", left_subtree_end.chars);
DEBUG("reuse_left chars:%lu", left_subtree_end.chars);
return left_subtree_end;
}
@ -111,7 +111,7 @@ static TSTree *break_down_right_stack(TSParser *parser) {
size_t child_count;
TSTree **children = ts_tree_children(node, &child_count);
DEBUG_PARSE("pop_right %s", SYM_NAME(node->symbol));
DEBUG("pop_right %s", SYM_NAME(node->symbol));
stack->size--;
right_subtree_start += ts_tree_total_size(node).chars;
@ -120,7 +120,7 @@ static TSTree *break_down_right_stack(TSParser *parser) {
break;
TSTree *child = children[i];
DEBUG_PARSE("push_right sym:%s", SYM_NAME(child->symbol));
DEBUG("push_right sym:%s", SYM_NAME(child->symbol));
ts_stack_push(stack, 0, child);
right_subtree_start -= ts_tree_total_size(child).chars;
}
@ -133,7 +133,7 @@ static TSTree *get_next_node(TSParser *parser, TSStateId lex_state) {
TSTree *node;
if ((node = break_down_right_stack(parser))) {
DEBUG_PARSE("reuse sym:%s", SYM_NAME(node->symbol));
DEBUG("reuse sym:%s", SYM_NAME(node->symbol));
parser->lexer.lookahead = 0;
parser->lexer.lookahead_size = 0;
@ -141,7 +141,7 @@ static TSTree *get_next_node(TSParser *parser, TSStateId lex_state) {
ts_length_add(parser->lexer.current_position, ts_tree_total_size(node));
} else {
node = parser->language->lex_fn(&parser->lexer, lex_state);
DEBUG_PARSE("lex sym:%s", SYM_NAME(node->symbol));
DEBUG("lex sym:%s", SYM_NAME(node->symbol));
}
return node;
@ -224,7 +224,7 @@ static int handle_error(TSParser *parser) {
parser->language, state_after_error, parser->lookahead->symbol);
if (action_after_error.type != TSParseActionTypeError) {
DEBUG_PARSE("recover state:%u", state_after_error);
DEBUG("recover state:%u", state_after_error);
ts_stack_shrink(&parser->stack, entry - parser->stack.entries + 1);
parser->lookahead->padding = ts_length_zero();
@ -242,7 +242,7 @@ static int handle_error(TSParser *parser) {
* current lookahead token, advance to the next token. If no characters
* were consumed, advance the lexer to the next character.
*/
DEBUG_PARSE("lex_again");
DEBUG("skip_token");
TSLength prev_position = parser->lexer.current_position;
if (parser->lookahead)
ts_tree_release(parser->lookahead);
@ -253,8 +253,8 @@ static int handle_error(TSParser *parser) {
* just skip it. If the end of input is reached, exit.
*/
if (ts_length_eq(parser->lexer.current_position, prev_position))
if (!parser->lexer.advance_fn(&parser->lexer)) {
DEBUG_PARSE("fail_to_recover");
if (!parser->lexer.advance_fn(&parser->lexer, 0)) {
DEBUG("fail_to_recover");
resize_error(parser, error);
ts_stack_push(&parser->stack, 0, error);
@ -323,11 +323,11 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
TSLength position;
if (edit) {
DEBUG_PARSE("edit pos:%lu inserted:%lu deleted:%lu", edit->position,
DEBUG("edit pos:%lu inserted:%lu deleted:%lu", edit->position,
edit->chars_inserted, edit->chars_removed);
position = break_down_left_stack(parser, *edit);
} else {
DEBUG_PARSE("new_parse");
DEBUG("new_parse");
ts_stack_shrink(&parser->stack, 0);
position = ts_length_zero();
}
@ -346,37 +346,37 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
switch (action.type) {
case TSParseActionTypeShift:
if (parser->lookahead->symbol == ts_builtin_sym_error) {
DEBUG_PARSE("error");
DEBUG("error");
if (!handle_error(parser))
return finish(parser);
} else {
DEBUG_PARSE("shift state:%u", action.data.to_state);
DEBUG("shift state:%u", action.data.to_state);
shift(parser, action.data.to_state);
}
break;
case TSParseActionTypeShiftExtra:
DEBUG_PARSE("shift_extra");
DEBUG("shift_extra");
shift_extra(parser);
break;
case TSParseActionTypeReduce:
DEBUG_PARSE("reduce sym:%s count:%u", SYM_NAME(action.data.symbol),
DEBUG("reduce sym:%s count:%u", SYM_NAME(action.data.symbol),
action.data.child_count);
reduce(parser, action.data.symbol, action.data.child_count);
break;
case TSParseActionTypeReduceExtra:
DEBUG_PARSE("reduce_extra sym:%s", SYM_NAME(action.data.symbol));
DEBUG("reduce_extra sym:%s", SYM_NAME(action.data.symbol));
reduce_extra(parser, action.data.symbol);
break;
case TSParseActionTypeAccept:
DEBUG_PARSE("accept");
DEBUG("accept");
return finish(parser);
case TSParseActionTypeError:
DEBUG_PARSE("error");
DEBUG("error");
if (!handle_error(parser))
return finish(parser);
break;