diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index d040824d..492784ef 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -13,10 +13,10 @@ extern "C" { typedef struct TSTree TSTree; #define ts_lex_state_error 0 +#define TS_DEBUG_BUFFER_SIZE 512 typedef struct TSLexer { TSInput input; - int debug; const char *chunk; size_t chunk_start; @@ -31,6 +31,9 @@ typedef struct TSLexer { TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int); bool (*advance_fn)(struct TSLexer *); + + TSDebugger debugger; + char debug_buffer[TS_DEBUG_BUFFER_SIZE]; } TSLexer; static inline int32_t ts_lexer_lookahead_char(const TSLexer *lexer) { @@ -81,42 +84,42 @@ struct TSLanguage { TSTree *(*lex_fn)(TSLexer *, TSStateId); }; -#define DEBUG_LEX(...) \ - if (lexer->debug) { \ - fprintf(stderr, "LEX "); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, "\n"); \ +#define DEBUG_LEX(...) \ + if (lexer->debugger.debug_fn) { \ + snprintf(lexer->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ + lexer->debugger.debug_fn(lexer->debugger.data, lexer->debug_buffer); \ } -#define START_LEXER() \ - DEBUG_LEX("START %d", lex_state); \ - int32_t lookahead; \ - next_state: \ - lookahead = ts_lexer_lookahead_char(lexer); \ - DEBUG_LEX((0 < lookahead &&lookahead <= 255 ? "CHAR '%c'" : "CHAR %d"), \ +#define START_LEXER() \ + DEBUG_LEX("start state:%d", lex_state); \ + int32_t lookahead; \ + next_state: \ + lookahead = ts_lexer_lookahead_char(lexer); \ + DEBUG_LEX((0 < lookahead &&lookahead < 255 ? "lookahead char:'%c'" \ + : "lookahead char:%d"), \ lookahead); -#define START_TOKEN() \ - DEBUG_LEX("START TOKEN %lu", lexer->current_position.chars); \ +#define START_TOKEN() \ + DEBUG_LEX("start_token chars:%lu", lexer->current_position.chars); \ ts_lexer_start_token(lexer); -#define ADVANCE(state_index) \ - { \ - DEBUG_LEX("ADVANCE %d", state_index); \ - ts_lexer_advance(lexer); \ - lex_state = state_index; \ - goto next_state; \ +#define ADVANCE(state_index) \ + { \ + DEBUG_LEX("advance state:%d", state_index); \ + ts_lexer_advance(lexer); \ + lex_state = state_index; \ + goto next_state; \ } #define ACCEPT_TOKEN(symbol) \ { \ - DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \ + DEBUG_LEX("accept_token sym:%s", ts_symbol_names[symbol]); \ return ts_lexer_accept(lexer, symbol, ts_hidden_symbol_flags[symbol]); \ } #define LEX_ERROR() \ { \ - DEBUG_LEX("ERROR"); \ + DEBUG_LEX("error"); \ return ts_lexer_accept(lexer, ts_builtin_sym_error, 0); \ } diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 2ea3e67d..a1c00060 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -20,6 +20,12 @@ typedef struct { void (*release_fn)(void *data); } TSInput; +typedef struct { + void *data; + void (*debug_fn)(void *data, const char *); + void (*release_fn)(void *data); +} TSDebugger; + typedef struct { size_t position; size_t chars_inserted; @@ -53,7 +59,8 @@ void ts_document_set_language(TSDocument *, const TSLanguage *); void ts_document_set_input(TSDocument *, TSInput); void ts_document_set_input_string(TSDocument *, const char *); void ts_document_edit(TSDocument *, TSInputEdit); -void ts_document_set_debug(TSDocument *, int); +void ts_document_debug_parse(TSDocument *, TSDebugger); +void ts_document_debug_lex(TSDocument *, TSDebugger); TSNode *ts_document_root_node(const TSDocument *); #define ts_builtin_sym_error 0 diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 0117f656..069ff7b4 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -1,4 +1,5 @@ #include "runtime/runtime_spec_helper.h" +#include "runtime/helpers/spy_debugger.h" extern "C" const TSLanguage * ts_language_json(); @@ -60,6 +61,73 @@ describe("Document", [&]() { }); }); }); + + describe("debugging", [&]() { + SpyDebugger *debugger; + + before_each([&]() { + ts_document_set_language(doc, ts_language_json()); + debugger = new SpyDebugger(); + }); + + describe("debug_lex(TSDebugger)", [&]() { + before_each([&]() { + ts_document_debug_lex(doc, debugger->debugger()); + }); + + it("calls the debugger with a message for each lex action", [&]() { + ts_document_set_input_string(doc, "[1, 2]"); + + AssertThat(debugger->messages, Contains("lookahead char:'1'")); + AssertThat(debugger->messages, Contains("accept_token sym:number")); + AssertThat(debugger->messages, Contains("advance state:1")); + }); + + describe("disabling debugging", [&]() { + before_each([&]() { + ts_document_debug_lex(doc, {}); + }); + + it("does not call the debugger any more", [&]() { + ts_document_set_input_string(doc, "[1, 2]"); + AssertThat(debugger->messages, IsEmpty()); + }); + + it("releases the old debugger", [&]() { + AssertThat(debugger->release_call_count, Equals(1)); + }); + }); + }); + + describe("debug_parse(TSDebugger)", [&]() { + before_each([&]() { + ts_document_debug_parse(doc, debugger->debugger()); + }); + + it("calls the debugger with a message for each parse action", [&]() { + ts_document_set_input_string(doc, "[1, 2]"); + + AssertThat(debugger->messages, Contains("lex sym:number")); + AssertThat(debugger->messages, Contains("shift state:1")); + AssertThat(debugger->messages, Contains("reduce sym:value count:1")); + }); + + describe("disabling debugging", [&]() { + before_each([&]() { + ts_document_debug_parse(doc, {}); + }); + + it("does not call the debugger any more", [&]() { + ts_document_set_input_string(doc, "[1, 2]"); + AssertThat(debugger->messages, IsEmpty()); + }); + + it("releases the old debugger", [&]() { + AssertThat(debugger->release_call_count, Equals(1)); + }); + }); + }); + }); }); END_TEST diff --git a/spec/runtime/helpers/spy_debugger.cc b/spec/runtime/helpers/spy_debugger.cc new file mode 100644 index 00000000..d23122a2 --- /dev/null +++ b/spec/runtime/helpers/spy_debugger.cc @@ -0,0 +1,28 @@ +#include "runtime/helpers/spy_debugger.h" +#include +#include + +using std::string; +using std::vector; + +static void spy_debug(void *data, const char *msg) { + SpyDebugger *debugger = static_cast(data); + debugger->messages.push_back(msg); +} + +static void spy_release(void *data) { + SpyDebugger *debugger = static_cast(data); + debugger->release_call_count++; +} + +TSDebugger SpyDebugger::debugger() { + TSDebugger result; + result.data = (void *)this; + result.debug_fn = spy_debug; + result.release_fn = spy_release; + return result; +} + +void SpyDebugger::clear() { + messages.clear(); +} diff --git a/spec/runtime/helpers/spy_debugger.h b/spec/runtime/helpers/spy_debugger.h new file mode 100644 index 00000000..1474ed80 --- /dev/null +++ b/spec/runtime/helpers/spy_debugger.h @@ -0,0 +1,18 @@ +#ifndef HELPERS_SPY_DEBUGGER_H_ +#define HELPERS_SPY_DEBUGGER_H_ + +#include +#include +#include "tree_sitter/runtime.h" + +class SpyDebugger { + public: + void clear(); + TSDebugger debugger(); + + std::vector messages; + size_t release_call_count; +}; + +#endif // HELPERS_SPY_DEBUGGER_H_ + diff --git a/src/runtime/document.c b/src/runtime/document.c index b9894fca..008b80ca 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -7,10 +7,13 @@ struct TSDocument { TSParser parser; TSInput input; TSNode *node; - int debug; }; -TSDocument *ts_document_make() { return calloc(sizeof(TSDocument), 1); } +TSDocument *ts_document_make() { + TSDocument *document = calloc(sizeof(TSDocument), 1); + document->parser = ts_parser_make(); + return document; +} void ts_document_free(TSDocument *document) { ts_parser_destroy(&document->parser); @@ -33,19 +36,16 @@ static void reparse(TSDocument *document, TSInputEdit *edit) { } void ts_document_set_language(TSDocument *document, const TSLanguage *language) { - ts_parser_destroy(&document->parser); - document->parser = ts_parser_make(language); - ts_document_set_debug(document, document->debug); + document->parser.language = language; reparse(document, NULL); } -void ts_document_set_debug(TSDocument *document, int debug) { - document->debug = debug; - document->parser.debug = debug; - if (debug > 1) - document->parser.lexer.debug = 1; - else - document->parser.lexer.debug = 0; +void ts_document_debug_parse(TSDocument *document, TSDebugger debugger) { + ts_parser_debug_parse(&document->parser, debugger); +} + +void ts_document_debug_lex(TSDocument *document, TSDebugger debugger) { + ts_parser_debug_lex(&document->parser, debugger); } void ts_document_set_input(TSDocument *document, TSInput input) { diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index bb7b6f1d..c58338a7 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -69,7 +69,7 @@ static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) { TSLexer ts_lexer_make() { TSLexer result = (TSLexer) { .advance_fn = advance, .accept_fn = accept, - .debug = 0, + .debugger = {}, .chunk = NULL, .chunk_start = 0, .chunk_size = 0, diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 8d40d04a..66dd69e7 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -12,10 +12,11 @@ * Private */ -#define DEBUG_PARSE(...) \ - if (parser->debug) { \ - fprintf(stderr, "PARSE " __VA_ARGS__); \ - fprintf(stderr, "\n"); \ +#define DEBUG_PARSE(...) \ + if (parser->debugger.debug_fn) { \ + snprintf(parser->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \ + parser->debugger.debug_fn(parser->debugger.data, \ + parser->lexer.debug_buffer); \ } static TSParseAction action_for(const TSLanguage *lang, TSStateId state, @@ -45,7 +46,7 @@ static size_t breakdown_right_stack(TSParser *parser, TSLength cur_position, size_t child_count; TSTree **children = ts_tree_children(node, &child_count); - DEBUG_PARSE("POP RIGHT %s", parser->language->symbol_names[node->symbol]); + DEBUG_PARSE("pop_right %s", parser->language->symbol_names[node->symbol]); stack->size--; right_subtree_start += ts_tree_total_size(node).chars; @@ -53,7 +54,7 @@ static size_t breakdown_right_stack(TSParser *parser, TSLength cur_position, TSTree *child = children[i]; if (right_subtree_start > cur_position.chars) { - DEBUG_PARSE("PUSH RIGHT %s", + DEBUG_PARSE("push_right %s", parser->language->symbol_names[child->symbol]); ts_stack_push(stack, 0, child); right_subtree_start -= ts_tree_total_size(child).chars; @@ -92,7 +93,8 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { if (left_subtree_end.chars < edit->position && !children) break; - DEBUG_PARSE("POP LEFT %s", parser->language->symbol_names[node->symbol]); + DEBUG_PARSE("pop_left sym: %s", + parser->language->symbol_names[node->symbol]); parser->stack.size--; left_subtree_end = ts_length_sub(left_subtree_end, ts_tree_total_size(node)); @@ -104,7 +106,8 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { TSStateId next_state = action.type == TSParseActionTypeShift ? action.data.to_state : state; - DEBUG_PARSE("PUSH LEFT %s", parser->language->symbol_names[child->symbol]); + DEBUG_PARSE("push_left sym: %s", + parser->language->symbol_names[child->symbol]); ts_stack_push(&parser->stack, next_state, child); left_subtree_end = ts_length_add(left_subtree_end, ts_tree_total_size(child)); @@ -116,7 +119,7 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { if (right_subtree_start < edit->position + edit->chars_inserted) break; - DEBUG_PARSE("PUSH RIGHT %s", + DEBUG_PARSE("push_right sym: %s", parser->language->symbol_names[child->symbol]); ts_stack_push(&parser->right_stack, 0, child); } @@ -124,7 +127,7 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) { ts_tree_release(node); } - DEBUG_PARSE("RESUME LEFT %lu", left_subtree_end.chars); + DEBUG_PARSE("reuse_left chars:%lu", left_subtree_end.chars); return left_subtree_end; } @@ -141,10 +144,11 @@ static void lex(TSParser *parser, TSStateId lex_state) { parser->lookahead = node; parser->lexer.current_position = ts_length_add(parser->lexer.current_position, ts_tree_total_size(node)); - DEBUG_PARSE("REUSE %s", parser->language->symbol_names[node->symbol]); + DEBUG_PARSE("reuse_right sym:%s", + parser->language->symbol_names[node->symbol]); } else { parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state); - DEBUG_PARSE("TOKEN %s", + DEBUG_PARSE("lex sym:%s", parser->language->symbol_names[parser->lookahead->symbol]); } } @@ -156,20 +160,32 @@ static void resize_error(TSParser *parser, TSTree *error) { error->padding); } +/* + * Parse Actions + */ + static void shift(TSParser *parser, TSStateId parse_state) { + DEBUG_PARSE("shift state:%d", parse_state); + if (ts_tree_is_extra(parser->lookahead)) parse_state = ts_stack_top_state(&parser->stack); + ts_stack_push(&parser->stack, parse_state, parser->lookahead); parser->lookahead = parser->next_lookahead; parser->next_lookahead = NULL; } static void shift_extra(TSParser *parser) { + DEBUG_PARSE("shift_extra"); + ts_tree_set_extra(parser->lookahead); shift(parser, 0); } static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) { + DEBUG_PARSE("reduce sym:%s count:%lu", parser->language->symbol_names[symbol], + child_count); + TSStack *stack = &parser->stack; parser->next_lookahead = parser->lookahead; @@ -197,6 +213,7 @@ static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) { } static void reduce_extra(TSParser *parser, TSSymbol symbol) { + DEBUG_PARSE("reduce_extra"); reduce(parser, symbol, 1); ts_tree_set_extra(parser->lookahead); } @@ -222,7 +239,7 @@ static int handle_error(TSParser *parser) { parser->language, state_after_error, parser->lookahead->symbol); if (action_after_error.type != TSParseActionTypeError) { - DEBUG_PARSE("RECOVER %u", state_after_error); + DEBUG_PARSE("recover state:%u", state_after_error); ts_stack_shrink(&parser->stack, entry - parser->stack.entries + 1); parser->lookahead->padding = ts_length_zero(); @@ -240,7 +257,7 @@ static int handle_error(TSParser *parser) { * current lookahead token, advance to the next token. If no characters * were consumed, advance the lexer to the next character. */ - DEBUG_PARSE("LEX AGAIN"); + DEBUG_PARSE("lex_again"); TSLength prev_position = parser->lexer.current_position; if (parser->lookahead) ts_tree_release(parser->lookahead); @@ -252,7 +269,7 @@ static int handle_error(TSParser *parser) { */ if (ts_length_eq(parser->lexer.current_position, prev_position)) if (!ts_lexer_advance(&parser->lexer)) { - DEBUG_PARSE("FAIL TO RECOVER"); + DEBUG_PARSE("fail_to_recover"); resize_error(parser, error); ts_stack_push(&parser->stack, 0, error); @@ -262,7 +279,9 @@ static int handle_error(TSParser *parser) { } } -static TSTree *get_root(TSParser *parser) { +static TSTree *finish(TSParser *parser) { + DEBUG_PARSE("finish"); + if (parser->stack.size == 0) ts_stack_push(&parser->stack, 0, ts_tree_make_error(ts_length_zero(), ts_length_zero(), 0)); @@ -277,20 +296,40 @@ static TSTree *get_root(TSParser *parser) { * Public */ -TSParser ts_parser_make(const TSLanguage *language) { +TSParser ts_parser_make() { return (TSParser) { .lexer = ts_lexer_make(), .stack = ts_stack_make(), .right_stack = ts_stack_make(), - .debug = 0, - .language = language, }; + .debugger = (TSDebugger) {} }; } void ts_parser_destroy(TSParser *parser) { + ts_stack_delete(&parser->stack); + ts_stack_delete(&parser->right_stack); + if (parser->lookahead) ts_tree_release(parser->lookahead); if (parser->next_lookahead) ts_tree_release(parser->next_lookahead); - ts_stack_delete(&parser->stack); + + if (parser->debugger.release_fn) + parser->debugger.release_fn(parser->debugger.data); + if (parser->lexer.debugger.release_fn) + parser->lexer.debugger.release_fn(parser->lexer.debugger.data); +} + +void ts_parser_debug_parse(TSParser *parser, TSDebugger debugger) { + TSDebugger old_debugger = parser->debugger; + if (old_debugger.release_fn) + old_debugger.release_fn(old_debugger.data); + parser->debugger = debugger; +} + +void ts_parser_debug_lex(TSParser *parser, TSDebugger debugger) { + TSDebugger old_debugger = parser->lexer.debugger; + if (old_debugger.release_fn) + old_debugger.release_fn(old_debugger.data); + parser->lexer.debugger = debugger; } const TSTree *ts_parser_parse(TSParser *parser, TSInput input, @@ -313,38 +352,30 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input, case TSParseActionTypeShift: if (parser->lookahead->symbol == ts_builtin_sym_error) { if (!handle_error(parser)) - return get_root(parser); + return finish(parser); } else { - DEBUG_PARSE("SHIFT %d", action.data.to_state); shift(parser, action.data.to_state); } break; case TSParseActionTypeShiftExtra: - DEBUG_PARSE("SHIFT EXTRA"); shift_extra(parser); break; case TSParseActionTypeReduce: - DEBUG_PARSE("REDUCE %s %d", - parser->language->symbol_names[action.data.symbol], - action.data.child_count); reduce(parser, action.data.symbol, action.data.child_count); break; case TSParseActionTypeReduceExtra: - DEBUG_PARSE("REDUCE EXTRA"); reduce_extra(parser, action.data.symbol); break; case TSParseActionTypeAccept: - DEBUG_PARSE("ACCEPT"); - return get_root(parser); + return finish(parser); case TSParseActionTypeError: - DEBUG_PARSE("ERROR"); if (!handle_error(parser)) - return get_root(parser); + return finish(parser); break; default: diff --git a/src/runtime/parser.h b/src/runtime/parser.h index 57135af9..4ef07bb7 100644 --- a/src/runtime/parser.h +++ b/src/runtime/parser.h @@ -12,14 +12,16 @@ typedef struct { TSStack stack; TSStack right_stack; size_t total_chars; - int debug; TSTree *lookahead; TSTree *next_lookahead; const TSLanguage *language; + TSDebugger debugger; } TSParser; -TSParser ts_parser_make(const TSLanguage *); +TSParser ts_parser_make(); void ts_parser_destroy(TSParser *); +void ts_parser_debug_parse(TSParser *, TSDebugger); +void ts_parser_debug_lex(TSParser *, TSDebugger); const TSTree *ts_parser_parse(TSParser *, TSInput, TSInputEdit *); #ifdef __cplusplus