diff --git a/include/document.h b/include/document.h new file mode 100644 index 00000000..c1077096 --- /dev/null +++ b/include/document.h @@ -0,0 +1,21 @@ +#ifndef __tree_sitter_document_h__ +#define __tree_sitter_document_h__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef size_t TSSymbol; + +typedef struct { +} TSTree; + +TSTree TSTreeMake(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 00000000..ea8e08da --- /dev/null +++ b/include/parser.h @@ -0,0 +1,104 @@ +#ifndef __tree_sitter_parser_h__ +#define __tree_sitter_parser_h__ + +#include "document.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int TSState; +typedef struct { + TSState state; + TSSymbol symbol; +} TSStackEntry; + +typedef struct { + TSTree tree; + const char *input; + size_t position; + TSSymbol lookahead_sym; + TSState lex_state; + TSStackEntry *stack; + size_t stack_size; +} TSParser; + +TSParser TSParserMake(const char *input); +void TSParserShift(TSParser *parser, TSState state); +void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count); +void TSParserError(TSParser *parser); +void TSParserAdvance(TSParser *parser, TSState lex_state); +TSState TSParserParseState(const TSParser *parser); +TSState TSParserLexState(const TSParser *parser); +void TSParserSetLexState(TSParser *parser, TSState state); +char TSParserLookaheadChar(const TSParser *parser); +TSSymbol TSParserLookaheadSym(const TSParser *parser); +void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol); + +#pragma mark - DSL + +#define START_PARSER() \ +TSParser p = TSParserMake(input), *parser = &p; \ +next_state: \ +printf("parse state: %d, lookahead: %s\n", PARSE_STATE(), LOOKAHEAD_SYM_NAME()); + +#define LOOKAHEAD_SYM_NAME() \ +ts_symbol_names[LOOKAHEAD_SYM()] + +#define START_LEXER() \ +if (parser->lookahead_sym > 0) return; \ +next_state: \ +printf("lex state: %d, lookahead: %c\n", LEX_STATE(), LOOKAHEAD_CHAR()); \ +if (LOOKAHEAD_CHAR() == '\0') { \ + ACCEPT_TOKEN(ts_symbol___END__); \ +} \ + +#define LOOKAHEAD_SYM() \ +TSParserLookaheadSym(parser) + +#define LOOKAHEAD_CHAR() \ +TSParserLookaheadChar(parser) + +#define PARSE_STATE() \ +TSParserParseState(parser) + +#define LEX_STATE() \ +TSParserLexState(parser) + +#define SHIFT(state) \ +{ TSParserShift(parser, state); goto next_state; } + +#define SET_LEX_STATE(state_index) \ +{ TSParserSetLexState(parser, state_index); ts_lex(parser); } + +#define ADVANCE(state_index) \ +{ TSParserAdvance(parser, state_index); goto next_state; } + +#define ACCEPT_INPUT() \ +{ goto done; } + +#define ACCEPT_TOKEN(symbol) \ +{ TSParserSetLookaheadSym(parser, symbol); goto done; } + +#define REDUCE(symbol, child_count) \ +{ TSParserReduce(parser, symbol, child_count); goto next_state; } + +#define PARSE_ERROR() \ +{ TSParserError(parser); goto done; } + +#define LEX_ERROR() \ +{ TSParserError(parser); goto done; } + +#define FINISH_PARSER() \ +done: \ +return parser->tree; + +#define FINISH_LEXER() \ +done: + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/runtime.h b/include/runtime.h deleted file mode 100644 index 5ef700fe..00000000 --- a/include/runtime.h +++ /dev/null @@ -1,98 +0,0 @@ -#ifndef tree_sitter_runtime_h -#define tree_sitter_runtime_h - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -#pragma mark - Tree - -typedef struct { -} TSTree; - -TSTree TSTreeMake(); - -#pragma mark - Parser - -typedef size_t TSSymbol; -typedef int TSState; - -typedef struct { - TSTree tree; - const char *input; - size_t position; - TSState *state_stack; - TSSymbol *symbol_stack; - size_t state_count; - size_t symbol_count; -} TSParser; - -TSParser TSParserMake(const char *input); -void TSParserShift(TSParser *parser, TSState state); -void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count); -void TSParserError(TSParser *parser); -TSState TSParserParseState(const TSParser *parser); -TSState TSParserLexState(const TSParser *parser); -TSSymbol TSParserLookahead(const TSParser *parser); -void TSParserSetLexState(const TSParser *parser, TSState state); - -#pragma mark - DSL - -#define START_PARSER() \ - TSParser parser = TSParserMake(input); \ - start: \ - ts_lex(&parser); - -#define START_LEXER() \ - start: - -#define LOOKAHEAD_SYM() \ - TSParserLookahead(&parser) - -#define LOOKAHEAD_CHAR() \ - 'a' - -#define PARSE_STATE() \ - TSParserParseState(&parser) - -#define LEX_STATE() \ - TSParserLexState(parser) - -#define SHIFT(number) \ - { TSParserShift(&parser, number); break; } - -#define SET_LEX_STATE(state_index) \ - { TSParserSetLexState(&parser, state_index); } - -#define ADVANCE(state_index) \ - { break; } - -#define ACCEPT_INPUT() \ - { goto done; } - -#define ACCEPT_TOKEN(symbol) \ - { goto done; } - -#define REDUCE(symbol, child_count) \ - { TSParserReduce(&parser, symbol, child_count); break; } - -#define PARSE_ERROR() \ - TSParserError(&parser) - -#define LEX_ERROR() \ - TSParserError(parser) - -#define FINISH_PARSER() \ - done: \ - return parser.tree; - -#define FINISH_LEXER() \ - done: - -#endif - -#ifdef __cplusplus -} -#endif diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index a600c694..dced0afc 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -1,7 +1,7 @@ -#include "runtime.h" +#include "parser.h" #include -typedef enum { +enum ts_symbol { ts_symbol_expression, ts_symbol_term, ts_symbol_factor, @@ -12,7 +12,20 @@ typedef enum { ts_symbol_number, ts_symbol_variable, ts_symbol___END__ -} ts_symbol; +}; + +static const char *ts_symbol_names[] = { + "expression", + "term", + "factor", + "4", + "3", + "1", + "2", + "number", + "variable", + "__END__" +}; static void ts_lex(TSParser *parser) { START_LEXER(); diff --git a/src/compiler/code_gen/c_code.cpp b/src/compiler/code_gen/c_code.cpp index 00699422..31c52a6a 100644 --- a/src/compiler/code_gen/c_code.cpp +++ b/src/compiler/code_gen/c_code.cpp @@ -22,6 +22,21 @@ namespace tree_sitter { pos += replace.length(); } } + + string join(vector lines, string separator) { + string result; + bool started = false; + for (auto line : lines) { + if (started) result += separator; + started = true; + result += line; + } + return result; + } + + string join(vector lines) { + return join(lines, "\n"); + } string indent(string input) { string tab = " "; @@ -30,24 +45,32 @@ namespace tree_sitter { } string _switch(string condition, string body) { - return "switch (" + condition + ") {\n" + - indent(body) + "\n" - "}"; + return join({ + "switch (" + condition + ") {", + indent(body), + "}" + }); } string _case(string value, string body) { - return "case " + value + ":\n" + - indent(body) + "\n"; + return join({ + "case " + value + ":", + indent(body), "" + }); } string _default(string body) { - return "default:\n" + - indent(body); + return join({ + "default:", + indent(body) + }); } string _if(string condition, string body) { - return string("if (") + condition + ")\n" + - indent(body) + "\n"; + return join({ + "if (" + condition + ")", + indent(body), "" + }); } class CCodeGenerator { @@ -151,44 +174,56 @@ namespace tree_sitter { } string symbol_enum() { - string result = "typedef enum {\n"; + string result = "enum ts_symbol {\n"; for (string rule_name : rule_names) result += indent(symbol_id(rule_name)) + ",\n"; result += indent(symbol_id(ParseTable::END_OF_INPUT)); - return result + "\n" - "} ts_symbol;"; + return result + "\n};"; } - + + string rule_names_list() { + string result = "static const char *ts_symbol_names[] = {\n"; + for (string rule_name : rule_names) + result += indent(string("\"") + rule_name) + "\",\n"; + result += indent(string("\"") + ParseTable::END_OF_INPUT + "\""); + return result + "\n};"; + } + string includes() { - return string( - "#include \"runtime.h\"\n" - "#include "); + return join({ + "#include \"parser.h\"", + "#include " + }); } string parse_function() { - return - "TSTree ts_parse_arithmetic(const char *input) {\n" + - indent("START_PARSER();") + "\n" + - indent(switch_on_parse_state()) + "\n" + - indent("FINISH_PARSER();") + "\n" - "}"; + return join({ + "TSTree ts_parse_arithmetic(const char *input) {", + indent("START_PARSER();"), + indent(switch_on_parse_state()), + indent("FINISH_PARSER();"), + "}" + }); } string lex_function() { - return - "static void ts_lex(TSParser *parser) {\n" + - indent("START_LEXER();") + "\n" + - indent(switch_on_lex_state()) + "\n" + - indent("FINISH_LEXER();") + "\n" - "}"; + return join({ + "static void ts_lex(TSParser *parser) {", + indent("START_LEXER();"), + indent(switch_on_lex_state()), + indent("FINISH_LEXER();"), + "}" + }); } string code() { - return - includes() + "\n\n" + - symbol_enum() + "\n\n" + - lex_function() + "\n\n" + - parse_function() + "\n"; + return join({ + includes(), + symbol_enum(), + rule_names_list(), + lex_function(), + parse_function() + }, "\n\n") + "\n"; } }; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 87adda57..d3f06c8d 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -1,49 +1,63 @@ -#include "runtime.h" - - -static int INITIAL_STATE_STACK_SIZE = 100; -static int INITIAL_SYMBOL_STACK_SIZE = 100; +#include "parser.h" +#include +static int INITIAL_STACK_SIZE = 100; TSParser TSParserMake(const char *input) { - TSState *state_stack = calloc(INITIAL_STATE_STACK_SIZE, sizeof(*state_stack)); - TSSymbol *symbol_stack = calloc(INITIAL_SYMBOL_STACK_SIZE, sizeof(*symbol_stack)); TSParser result = { - .input = input, .tree = TSTreeMake(), + .input = input, .position = 0, - .state_stack = state_stack, - .symbol_stack = symbol_stack, - .state_count = 0, - .symbol_count = 0 + .lookahead_sym = 0, + .lex_state = 0, + .stack = calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)), + .stack_size = 0, }; return result; } -void TSParserShift(TSParser *parser, TSState state) { - +void TSParserShift(TSParser *parser, TSState parse_state) { + TSStackEntry *entry = (parser->stack + parser->stack_size); + entry->state = parse_state; + entry->symbol = parser->lookahead_sym; + parser->lookahead_sym = -1; + parser->stack_size++; } void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) { - + parser->lookahead_sym = symbol; + parser->stack_size -= child_count; } void TSParserError(TSParser *parser) { } -TSSymbol TSParserLookahead(const TSParser *parser) { - return 1; +void TSParserAdvance(TSParser *parser, TSState lex_state) { + parser->position++; + parser->lex_state = lex_state; +} + +char TSParserLookaheadChar(const TSParser *parser) { + return parser->input[parser->position]; +} + +TSSymbol TSParserLookaheadSym(const TSParser *parser) { + return parser->lookahead_sym; +} + +void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol) { + parser->lookahead_sym = symbol; } TSState TSParserParseState(const TSParser *parser) { - return 5; + return parser->stack[parser->stack_size - 1].state; } TSState TSParserLexState(const TSParser *parser) { - return 5; + return parser->lex_state; } -void TSParserSetLexState(const TSParser *parser, TSState lex_state) { - +void TSParserSetLexState(TSParser *parser, TSState lex_state) { + parser->lex_state = lex_state; } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 1f4a2b5b..469284f1 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -1,9 +1,7 @@ -#include "runtime.h" - +#include "document.h" TSTree TSTreeMake() { TSTree result = { - }; return result; } \ No newline at end of file diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index 19aec9cf..7a0c37d2 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -109,6 +109,7 @@ 12130621182C85D300FCF928 /* item_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = item_set.h; sourceTree = ""; }; 121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; }; 121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/compiler/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; }; + 121D8B3018795CC0003CF44B /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = ""; }; 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepare_grammar_spec.cpp; sourceTree = ""; }; 1225CC6518765737000D4723 /* prepare_grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepare_grammar.cpp; sourceTree = ""; }; 1225CC6618765737000D4723 /* prepare_grammar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prepare_grammar.h; sourceTree = ""; }; @@ -142,7 +143,7 @@ 12FD4063185E75290041A84E /* generate_parsers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = generate_parsers.cpp; path = spec/compiler/generate_parsers.cpp; sourceTree = SOURCE_ROOT; }; 12FD4065185E7C2F0041A84E /* arithmetic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = arithmetic.c; path = spec/fixtures/parsers/arithmetic.c; sourceTree = SOURCE_ROOT; }; 12FD40D1185EEB5E0041A84E /* runtime_specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = runtime_specs; sourceTree = BUILT_PRODUCTS_DIR; }; - 12FD40D4185FED9A0041A84E /* runtime.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = runtime.h; sourceTree = ""; }; + 12FD40D4185FED9A0041A84E /* document.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = document.h; sourceTree = ""; }; 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = arithmetic_spec.cpp; sourceTree = ""; }; 12FD40DC185FF12C0041A84E /* parser.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = ""; }; 12FD40DE1860064C0041A84E /* tree.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tree.c; sourceTree = ""; }; @@ -390,7 +391,8 @@ 12FD40D3185FED630041A84E /* include */ = { isa = PBXGroup; children = ( - 12FD40D4185FED9A0041A84E /* runtime.h */, + 12FD40D4185FED9A0041A84E /* document.h */, + 121D8B3018795CC0003CF44B /* parser.h */, ); path = include; sourceTree = "";