diff --git a/include/parser.h b/include/parser.h index 38cee290..82bb5d37 100644 --- a/include/parser.h +++ b/include/parser.h @@ -1,17 +1,38 @@ #ifndef __tree_sitter_parser_h__ #define __tree_sitter_parser_h__ - +#ifdef __cplusplus +extern "C" { +#endif + #include "tree.h" #include "parse_config.h" #include -#ifdef __cplusplus -extern "C" { +// #define TS_DEBUG_PARSE +// #define TS_DEBUG_LEX + +#ifdef TS_DEBUG_LEX +#define DEBUG_LEX(...) fprintf(stderr, __VA_ARGS__) +#else +#define DEBUG_LEX(...) #endif + +#ifdef TS_DEBUG_PARSE +#define DEBUG_PARSE(...) fprintf(stderr, __VA_ARGS__) +#else +#define DEBUG_PARSE(...) +#endif + +static int INITIAL_STACK_SIZE = 100; typedef int TSState; -typedef struct TSStackEntry TSStackEntry; -typedef struct TSParser { + +typedef struct { + TSState state; + TSTree *node; +} TSStackEntry; + +typedef struct { const char *input; size_t position; TSTree *lookahead_node; @@ -21,29 +42,96 @@ typedef struct TSParser { TSParseResult result; } TSParser; -TSParser TSParserMake(const char *input); -void TSParserShift(TSParser *parser, TSState state); -void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count); -void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs); -void TSParserError(TSParser *parser, size_t count, const char **expected_inputs); -void TSParserAcceptInput(TSParser *parser); -void TSParserAdvance(TSParser *parser, TSState lex_state); -TSState TSParserParseState(const TSParser *parser); -TSState TSParserLexState(const TSParser *parser); -TSParseResult TSParserResult(TSParser *parser); -void TSParserSetLexState(TSParser *parser, TSState state); -char TSParserLookaheadChar(const TSParser *parser); -long TSParserLookaheadSym(const TSParser *parser); -void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol); +static TSParser TSParserMake(const char *input) { + TSParser result = { + .input = input, + .position = 0, + .lookahead_node = NULL, + .lex_state = 0, + .stack = calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)), + .stack_size = 0, + .result = { + .tree = NULL, + .error = { + .type = TSParseErrorTypeNone, + .expected_inputs = NULL, + .expected_input_count = 0 + }, + }, + }; + return result; +} + +static char TSParserLookaheadChar(const TSParser *parser) { + return parser->input[parser->position]; +} + +static long TSParserLookaheadSym(const TSParser *parser) { + TSTree *node = parser->lookahead_node; + return node ? node->value : -1; +} + +static TSState TSParserParseState(const TSParser *parser) { + return parser->stack[parser->stack_size - 1].state; +} + +static void TSParserShift(TSParser *parser, TSState parse_state) { + DEBUG_PARSE("shift %d \n", parse_state); + TSStackEntry *entry = (parser->stack + parser->stack_size); + entry->state = parse_state; + entry->node = parser->lookahead_node; + parser->lookahead_node = NULL; + parser->stack_size++; +} + +static void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) { + parser->stack_size -= child_count; + + TSTree **children = malloc(child_count * sizeof(TSTree *)); + for (int i = 0; i < child_count; i++) { + children[i] = parser->stack[parser->stack_size + i].node; + } + + parser->lookahead_node = TSTreeMake(symbol, child_count, children); + DEBUG_PARSE("reduce: %ld, state: %u \n", symbol, TSParserParseState(parser)); +} + +static void TSParserError(TSParser *parser, size_t count, const char **expected_inputs) { + TSParseError *error = &parser->result.error; + error->type = TSParseErrorTypeSyntactic; + error->expected_input_count = count; + error->expected_inputs = expected_inputs; + error->lookahead_sym = TSParserLookaheadSym(parser); +} + +static void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs) { + TSParseError *error = &parser->result.error; + error->type = TSParseErrorTypeLexical; + error->expected_input_count = count; + error->expected_inputs = expected_inputs; + error->lookahead_sym = TSParserLookaheadSym(parser); +} + +static void TSParserAdvance(TSParser *parser, TSState lex_state) { + DEBUG_LEX("character: '%c' \n", TSParserLookaheadChar(parser)); + parser->position++; + parser->lex_state = lex_state; +} + +static void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol) { + DEBUG_LEX("token: %ld \n", symbol); + parser->lookahead_node = TSTreeMake(symbol, 0, NULL); +} + +static void TSParserAcceptInput(TSParser *parser) { + parser->result.tree = parser->stack[parser->stack_size - 1].node; +} #pragma mark - DSL #define START_PARSER() \ -TSParser p = TSParserMake(input), *parser = &p; \ +TSParser p = TSParserMake(input), *parser = &p; \ next_state: - -#define LOOKAHEAD_SYM_NAME() \ -ts_symbol_names[LOOKAHEAD_SYM()] #define START_LEXER() \ if (LOOKAHEAD_SYM() >= 0) return; \ @@ -60,26 +148,26 @@ TSParserLookaheadChar(parser) TSParserParseState(parser) #define LEX_STATE() \ -TSParserLexState(parser) +parser->lex_state + +#define SET_LEX_STATE(state_index) \ +{ parser->lex_state = state_index; ts_lex(parser); } #define SHIFT(state) \ { TSParserShift(parser, state); goto next_state; } -#define SET_LEX_STATE(state_index) \ -{ TSParserSetLexState(parser, state_index); ts_lex(parser); } - #define ADVANCE(state_index) \ { TSParserAdvance(parser, state_index); goto next_state; } +#define REDUCE(symbol, child_count) \ +{ TSParserReduce(parser, symbol, child_count); goto next_state; } + #define ACCEPT_INPUT() \ { TSParserAcceptInput(parser); goto done; } #define ACCEPT_TOKEN(symbol) \ { TSParserSetLookaheadSym(parser, symbol); goto done; } -#define REDUCE(symbol, child_count) \ -{ TSParserReduce(parser, symbol, child_count); goto next_state; } - #define PARSE_ERROR(count, inputs) \ { \ static const char *expected_inputs[] = inputs; \ @@ -89,28 +177,27 @@ goto done; \ #define LEX_ERROR(count, inputs) \ { \ - static const char *expected_inputs[] = inputs; \ - TSParserLexError(parser, count, expected_inputs); \ - goto done; \ +static const char *expected_inputs[] = inputs; \ +TSParserLexError(parser, count, expected_inputs); \ +goto done; \ } + +#define LEX_PANIC() \ +printf("Lex error: unexpected state %ud", LEX_STATE()); +#define PARSE_PANIC() \ +printf("Parse error: unexpected state %ud", PARSE_STATE()); + #define EXPECT(...) __VA_ARGS__ #define FINISH_PARSER() \ done: \ -return TSParserResult(parser); +return parser->result; #define FINISH_LEXER() \ done: - -#define LEX_PANIC() \ -printf("Lex error: unexpected state %ud", LEX_STATE()); - -#define PARSE_PANIC() \ -printf("Parse error: unexpected state %ud", PARSE_STATE()); #ifdef __cplusplus } #endif - #endif diff --git a/src/runtime/parser.c b/src/runtime/parser.c deleted file mode 100644 index 814c2604..00000000 --- a/src/runtime/parser.c +++ /dev/null @@ -1,121 +0,0 @@ -#include "parser.h" -#include - -#define TS_DEBUG_PARSE -#define TS_DEBUG_LEX - -#ifdef TS_DEBUG_LEX -#define DEBUG_LEX(...) fprintf(stderr, __VA_ARGS__) -#else -#define DEBUG_LEX(...) -#endif - -#ifdef TS_DEBUG_PARSE -#define DEBUG_PARSE(...) fprintf(stderr, __VA_ARGS__) -#else -#define DEBUG_PARSE(...) -#endif - -static int INITIAL_STACK_SIZE = 100; - -struct TSStackEntry { - TSState state; - TSTree *node; -}; - -TSParser TSParserMake(const char *input) { - TSParser result = { - .input = input, - .position = 0, - .lookahead_node = NULL, - .lex_state = 0, - .stack = calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)), - .stack_size = 0, - .result = { - .tree = NULL, - .error = { - .type = TSParseErrorTypeNone, - .expected_inputs = NULL, - .expected_input_count = 0 - }, - }, - }; - return result; -} - -void TSParserShift(TSParser *parser, TSState parse_state) { - DEBUG_PARSE("shift %d \n", parse_state); - TSStackEntry *entry = (parser->stack + parser->stack_size); - entry->state = parse_state; - entry->node = parser->lookahead_node; - parser->lookahead_node = NULL; - parser->stack_size++; -} - -void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) { - parser->stack_size -= child_count; - - TSTree **children = malloc(child_count * sizeof(TSTree *)); - for (int i = 0; i < child_count; i++) { - children[i] = parser->stack[parser->stack_size + i].node; - } - - parser->lookahead_node = TSTreeMake(symbol, child_count, children); - DEBUG_PARSE("reduce: %ld, state: %u \n", symbol, TSParserParseState(parser)); -} - -void TSParserError(TSParser *parser, size_t count, const char **expected_inputs) { - TSParseError *error = &parser->result.error; - error->type = TSParseErrorTypeSyntactic; - error->expected_input_count = count; - error->expected_inputs = expected_inputs; - error->lookahead_sym = TSParserLookaheadSym(parser); -} - -void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs) { - TSParseError *error = &parser->result.error; - error->type = TSParseErrorTypeLexical; - error->expected_input_count = count; - error->expected_inputs = expected_inputs; - error->lookahead_sym = TSParserLookaheadSym(parser); -} - -void TSParserAdvance(TSParser *parser, TSState lex_state) { - DEBUG_LEX("character: '%c' \n", TSParserLookaheadChar(parser)); - parser->position++; - parser->lex_state = lex_state; -} - -char TSParserLookaheadChar(const TSParser *parser) { - return parser->input[parser->position]; -} - -long TSParserLookaheadSym(const TSParser *parser) { - TSTree *node = parser->lookahead_node; - return node ? node->value : -1; -} - -void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol) { - DEBUG_LEX("token: %ld \n", symbol); - parser->lookahead_node = TSTreeMake(symbol, 0, NULL); -} - -TSState TSParserParseState(const TSParser *parser) { - return parser->stack[parser->stack_size - 1].state; -} - -TSState TSParserLexState(const TSParser *parser) { - return parser->lex_state; -} - -void TSParserSetLexState(TSParser *parser, TSState lex_state) { - parser->lex_state = lex_state; -} - -void TSParserAcceptInput(TSParser *parser) { - parser->result.tree = parser->stack[parser->stack_size - 1].node; -} - -TSParseResult TSParserResult(TSParser *parser) { - return parser->result; -} diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index 51381b44..f145eafc 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -44,7 +44,6 @@ 12FD40D8185FEEDF0041A84E /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; }; 12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; }; 12FD40DB185FEF0D0041A84E /* arithmetic_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */; }; - 12FD40DD185FF12C0041A84E /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DC185FF12C0041A84E /* parser.c */; }; 12FD40DF1860064C0041A84E /* tree.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DE1860064C0041A84E /* tree.c */; }; 12FD40E718639B910041A84E /* visitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E618639B910041A84E /* visitor.cpp */; }; 12FD40E918641FB70041A84E /* rules.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E818641FB70041A84E /* rules.cpp */; }; @@ -145,7 +144,6 @@ 12FD40D1185EEB5E0041A84E /* runtime_specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = runtime_specs; sourceTree = BUILT_PRODUCTS_DIR; }; 12FD40D4185FED9A0041A84E /* tree.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tree.h; sourceTree = ""; }; 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = arithmetic_spec.cpp; sourceTree = ""; }; - 12FD40DC185FF12C0041A84E /* parser.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = ""; }; 12FD40DE1860064C0041A84E /* tree.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tree.c; sourceTree = ""; }; 12FD40E41862B3530041A84E /* visitor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = visitor.h; sourceTree = ""; }; 12FD40E618639B910041A84E /* visitor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = visitor.cpp; sourceTree = ""; }; @@ -352,7 +350,6 @@ 12FD40AD185EE5440041A84E /* runtime */ = { isa = PBXGroup; children = ( - 12FD40DC185FF12C0041A84E /* parser.c */, 12FD40DE1860064C0041A84E /* tree.c */, 12EDCF8C187C6282005A7A07 /* document.c */, 12BC470318822A17005AC502 /* parse_config.cpp */, @@ -511,7 +508,6 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 12FD40DD185FF12C0041A84E /* parser.c in Sources */, 12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */, 12EDCF8D187C6282005A7A07 /* document.c in Sources */, 12FD40DF1860064C0041A84E /* tree.c in Sources */,