diff --git a/include/parser.h b/include/parser.h index 9d746f0d..ec31dfe7 100644 --- a/include/parser.h +++ b/include/parser.h @@ -14,7 +14,7 @@ typedef struct TSStackEntry TSStackEntry; typedef struct TSParser { TSTree *tree; const char *input; - char *error_message; + const char *error_message; size_t position; TSTree *lookahead_node; TSState lex_state; @@ -47,7 +47,7 @@ next_state: ts_symbol_names[LOOKAHEAD_SYM()] #define START_LEXER() \ -if (LOOKAHEAD_SYM() > 0) return; \ +if (LOOKAHEAD_SYM() >= 0) return; \ if (LOOKAHEAD_CHAR() == '\0') { ACCEPT_TOKEN(ts_symbol___END__); } \ next_state: diff --git a/spec/runtime/arithmetic_spec.cpp b/spec/runtime/arithmetic_spec.cpp index 72cfc7f8..e2449965 100644 --- a/spec/runtime/arithmetic_spec.cpp +++ b/spec/runtime/arithmetic_spec.cpp @@ -6,11 +6,21 @@ extern TSParseConfig ts_parse_config_arithmetic; START_TEST describe("arithmetic", []() { - it("parses_numbers", [&]() { - TSDocument *document = TSDocumentMake(); + TSDocument *document; + + before_each([&]() { + document = TSDocumentMake(); TSDocumentSetUp(document, ts_parse_config_arithmetic); - TSDocumentSetText(document, "w"); - printf("%s", TSDocumentToString(document)); + }); + + it("parses variables", [&]() { + TSDocumentSetText(document, "x"); + AssertThat(string(TSDocumentToString(document)), Equals("(expression (term (factor (variable))))")); + }); + + it("parses products of variables", [&]() { + TSDocumentSetText(document, "x*y"); + AssertThat(string(TSDocumentToString(document)), Equals("(expression (term (factor (number)) (factor (number)))")); }); }); diff --git a/src/runtime/parser.c b/src/runtime/parser.cpp similarity index 59% rename from src/runtime/parser.c rename to src/runtime/parser.cpp index 467d6da6..aaab2a99 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.cpp @@ -1,6 +1,26 @@ #include "parser.h" #include -#include +#include + +using std::string; +using std::to_string; + +#define TS_DEBUG_PARSE +#define TS_DEBUG_LEX + +#ifdef TS_DEBUG_LEX +#define DEBUG_LEX(...) \ +fprintf(stderr, __VA_ARGS__) +#else +#define DEBUG_LEX(...) +#endif + +#ifdef TS_DEBUG_PARSE +#define DEBUG_PARSE(...) \ +fprintf(stderr, __VA_ARGS__) +#else +#define DEBUG_PARSE(...) +#endif static int INITIAL_STACK_SIZE = 100; @@ -17,13 +37,14 @@ TSParser TSParserMake(const char *input) { .position = 0, .lookahead_node = NULL, .lex_state = 0, - .stack = calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)), + .stack = (TSStackEntry *)calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)), .stack_size = 0, }; return result; } void TSParserShift(TSParser *parser, TSState parse_state) { + DEBUG_PARSE("shift %d \n", parse_state); TSStackEntry *entry = (parser->stack + parser->stack_size); entry->state = parse_state; entry->node = parser->lookahead_node; @@ -34,42 +55,37 @@ void TSParserShift(TSParser *parser, TSState parse_state) { void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) { parser->stack_size -= child_count; - TSTree **children = malloc(child_count * sizeof(TSTree *)); + TSTree **children = (TSTree **)malloc(child_count * sizeof(TSTree *)); for (int i = 0; i < child_count; i++) { - size_t j = parser->stack_size + i; - children[i] = parser->stack[j].node; + children[i] = parser->stack[parser->stack_size + i].node; } parser->lookahead_node = TSTreeMake(symbol, child_count, children); + DEBUG_PARSE("reduce: %ld, state: %u \n", symbol, TSParserParseState(parser)); } void TSParserError(TSParser *parser, size_t count, const char **expected_inputs) { - char *message = malloc(100 * sizeof(char)); - char *spot = message; - sprintf(message, "Unexpected token '%ld'. Expected: ", TSParserLookaheadSym(parser)); - spot += strlen(message); - for (int i = 0; i < count; i++) { - spot += 2; - sprintf(spot, "%s", expected_inputs[i]); - spot += strlen(expected_inputs[i]); - } - parser->error_message = message; + string result = "Unexpected token " + to_string(TSParserLookaheadSym(parser)) + ". "; + result += "Expected tokens:"; + for (int i = 0; i < count; i++) + result += string(" '") + expected_inputs[i] + "'"; + char *stuff = (char *)malloc(result.size() * sizeof(char)); + strcpy(stuff, result.c_str()); + parser->error_message = stuff; } void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs) { - char *message = malloc(100 * sizeof(char)); - char *spot = message; - sprintf(message, "Unexpected character '%c'. Expected: ", parser->input[parser->position]); - spot += 30; - for (int i = 0; i < count; i++) { - spot += 2; - sprintf(spot, "%s", expected_inputs[i]); - spot += strlen(expected_inputs[i]); - } - parser->error_message = message; + string result = string("Unexpected character '") + TSParserLookaheadChar(parser) + "'. "; + result += "Expected characters:"; + for (int i = 0; i < count; i++) + result += string(" ") + expected_inputs[i] + ""; + char *stuff = (char *)malloc(result.size() * sizeof(char)); + strcpy(stuff, result.c_str()); + parser->error_message = stuff; } void TSParserAdvance(TSParser *parser, TSState lex_state) { + DEBUG_LEX("character: '%c' \n", TSParserLookaheadChar(parser)); parser->position++; parser->lex_state = lex_state; } @@ -84,6 +100,7 @@ long TSParserLookaheadSym(const TSParser *parser) { } void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol) { + DEBUG_LEX("token: %ld \n", symbol); parser->lookahead_node = TSTreeMake(symbol, 0, NULL); } diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index 4d6a1842..42c779f2 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -55,7 +55,7 @@ 12FD40D8185FEEDF0041A84E /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; }; 12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; }; 12FD40DB185FEF0D0041A84E /* arithmetic_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */; }; - 12FD40DD185FF12C0041A84E /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DC185FF12C0041A84E /* parser.c */; }; + 12FD40DD185FF12C0041A84E /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DC185FF12C0041A84E /* parser.cpp */; }; 12FD40DF1860064C0041A84E /* tree.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DE1860064C0041A84E /* tree.c */; }; 12FD40E2186245FE0041A84E /* transitions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E0186245FE0041A84E /* transitions.cpp */; }; 12FD40E718639B910041A84E /* visitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E618639B910041A84E /* visitor.cpp */; }; @@ -151,7 +151,7 @@ 12FD40D1185EEB5E0041A84E /* runtime_specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = runtime_specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12FD40D4185FED9A0041A84E /* tree.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tree.h; sourceTree = ""; }; 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = arithmetic_spec.cpp; sourceTree = ""; }; - 12FD40DC185FF12C0041A84E /* parser.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = ""; }; + 12FD40DC185FF12C0041A84E /* parser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parser.cpp; sourceTree = ""; }; 12FD40DE1860064C0041A84E /* tree.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tree.c; sourceTree = ""; }; 12FD40E0186245FE0041A84E /* transitions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = transitions.cpp; sourceTree = ""; }; 12FD40E1186245FE0041A84E /* transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transitions.h; sourceTree = ""; }; @@ -358,7 +358,7 @@ 12FD40AD185EE5440041A84E /* runtime */ = { isa = PBXGroup; children = ( - 12FD40DC185FF12C0041A84E /* parser.c */, + 12FD40DC185FF12C0041A84E /* parser.cpp */, 12FD40DE1860064C0041A84E /* tree.c */, 12EDCF8C187C6282005A7A07 /* document.c */, ); @@ -528,7 +528,7 @@ 12FD40B3185EEB5E0041A84E /* seq.cpp in Sources */, 12FD40B4185EEB5E0041A84E /* table_builder.cpp in Sources */, 12FD40B6185EEB5E0041A84E /* arithmetic.cpp in Sources */, - 12FD40DD185FF12C0041A84E /* parser.c in Sources */, + 12FD40DD185FF12C0041A84E /* parser.cpp in Sources */, 12FD40B8185EEB5E0041A84E /* item.cpp in Sources */, 12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */, 12EDCF8D187C6282005A7A07 /* document.c in Sources */,