diff --git a/TreeSitter.xcodeproj/project.pbxproj b/TreeSitter.xcodeproj/project.pbxproj index ba8ea3a2..f0610a17 100644 --- a/TreeSitter.xcodeproj/project.pbxproj +++ b/TreeSitter.xcodeproj/project.pbxproj @@ -29,6 +29,8 @@ 12F8BE8E183C79B2006CCF99 /* char_class.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F8BE8C183C79B2006CCF99 /* char_class.cpp */; }; 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; }; 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; }; + 12FD4061185E68470041A84E /* c_code.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD405F185E68470041A84E /* c_code.cpp */; }; + 12FD4064185E75290041A84E /* code_gen_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD4063185E75290041A84E /* code_gen_spec.cpp */; }; 27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; }; /* End PBXBuildFile section */ @@ -158,6 +160,10 @@ 12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spec_helper.h; path = spec/spec_helper.h; sourceTree = SOURCE_ROOT; }; 12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = grammar.cpp; sourceTree = ""; }; 12F9A650182DD6BC00FAF50C /* grammar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = grammar.h; sourceTree = ""; }; + 12FD405F185E68470041A84E /* c_code.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = c_code.cpp; path = src/code_gen/c_code.cpp; sourceTree = SOURCE_ROOT; }; + 12FD4060185E68470041A84E /* c_code.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = c_code.h; path = src/code_gen/c_code.h; sourceTree = SOURCE_ROOT; }; + 12FD4063185E75290041A84E /* code_gen_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = code_gen_spec.cpp; path = spec/code_gen_spec.cpp; sourceTree = SOURCE_ROOT; }; + 12FD4065185E7C2F0041A84E /* arithmetic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = arithmetic.c; path = spec/test_parsers/arithmetic.c; sourceTree = SOURCE_ROOT; }; 27A340F3EEB184C040521323 /* pattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pattern.cpp; sourceTree = ""; }; 27A3438C4FA59A3882E8493B /* pattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pattern.h; sourceTree = ""; }; /* End PBXFileReference section */ @@ -458,6 +464,7 @@ 12E71701181D01890051A649 /* src */ = { isa = PBXGroup; children = ( + 12FD4067185E8AF40041A84E /* code_gen */, 12F9A64F182DD6BC00FAF50C /* grammar.cpp */, 12F9A650182DD6BC00FAF50C /* grammar.h */, 12130618182C84B700FCF928 /* lr */, @@ -479,16 +486,35 @@ 12E71796181D02A80051A649 /* spec */ = { isa = PBXGroup; children = ( - 12D1369F18357066005F3369 /* rules */, - 125120A118307FCA00C9B56A /* test_grammars */, + 12FD4063185E75290041A84E /* code_gen_spec.cpp */, 1214925C181E200B008E9BDA /* externals */, 1213061C182C854F00FCF928 /* lr */, 121492E9181E200B008E9BDA /* main.cpp */, + 12D1369F18357066005F3369 /* rules */, 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */, 12F9A64D182DD5FD00FAF50C /* spec_helper.h */, + 125120A118307FCA00C9B56A /* test_grammars */, + 12FD4062185E74DF0041A84E /* test_parsers */, ); - name = spec; - path = Specs; + path = spec; + sourceTree = ""; + }; + 12FD4062185E74DF0041A84E /* test_parsers */ = { + isa = PBXGroup; + children = ( + 12FD4065185E7C2F0041A84E /* arithmetic.c */, + ); + name = test_parsers; + path = spec/test_parsers; + sourceTree = ""; + }; + 12FD4067185E8AF40041A84E /* code_gen */ = { + isa = PBXGroup; + children = ( + 12FD405F185E68470041A84E /* c_code.cpp */, + 12FD4060185E68470041A84E /* c_code.h */, + ); + path = code_gen; sourceTree = ""; }; /* End PBXGroup section */ @@ -553,7 +579,9 @@ 12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */, 1213060E182C398300FCF928 /* choice.cpp in Sources */, 12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */, + 12FD4061185E68470041A84E /* c_code.cpp in Sources */, 125120A018307DEC00C9B56A /* parse_table.cpp in Sources */, + 12FD4064185E75290041A84E /* code_gen_spec.cpp in Sources */, 1214930E181E200B008E9BDA /* main.cpp in Sources */, 12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */, 12512093182F307C00C9B56A /* parse_table_builder_spec.cpp in Sources */, diff --git a/spec/code_gen_spec.cpp b/spec/code_gen_spec.cpp new file mode 100644 index 00000000..1eb6edcc --- /dev/null +++ b/spec/code_gen_spec.cpp @@ -0,0 +1,21 @@ +#include "spec_helper.h" +#include "test_grammars/arithmetic.h" +#include "parse_table_builder.h" +#include "parse_table.h" +#include "c_code.h" +#include + +using namespace std; +using namespace tree_sitter::lr; +using namespace tree_sitter; + +Describe(code_generation) { + string test_parser_dir = src_dir() + "/spec/test_parsers"; + + It(works_for_the_arithmetic_grammar) { + Grammar grammar = test_grammars::arithmetic(); + ParseTable table = ParseTableBuilder::build_table(grammar); + std::ofstream parser_file(test_parser_dir + "/arithmetic.c"); + parser_file << code_gen::c_code(grammar, table); + } +}; \ No newline at end of file diff --git a/spec/spec_helper.cpp b/spec/spec_helper.cpp index 2020d65d..e2b2dd89 100644 --- a/spec/spec_helper.cpp +++ b/spec/spec_helper.cpp @@ -27,3 +27,7 @@ namespace tree_sitter { } } } + +string src_dir() { + return string(getenv("TREESITTER_SRC_DIR")); +} \ No newline at end of file diff --git a/spec/spec_helper.h b/spec/spec_helper.h index fce2e83a..6bf6fc9c 100644 --- a/spec/spec_helper.h +++ b/spec/spec_helper.h @@ -25,4 +25,6 @@ namespace tree_sitter { } } +string src_dir(); + #endif diff --git a/spec/test_parsers/arithmetic.c b/spec/test_parsers/arithmetic.c new file mode 100644 index 00000000..b99535b6 --- /dev/null +++ b/spec/test_parsers/arithmetic.c @@ -0,0 +1,369 @@ +#include +#include + + +typedef enum { + ts_symbol_type_expression, + ts_symbol_type_term, + ts_symbol_type_right_paren, + ts_symbol_type_number, + ts_symbol_type_factor, + ts_symbol_type_variable, + ts_symbol_type_plus, + ts_symbol_type_times, + ts_symbol_type_left_paren, + ts_symbol_type___END__ +} ts_symbol_type; + + +TSTree * ts_parse_arithmetic() { + SETUP_PARSER() +start: + switch (PARSE_STATE()) { + case 0: { + switch (LOOKAHEAD()) { + case ts_symbol_type_left_paren: { + SHIFT(9); + break; + } + case ts_symbol_type_variable: { + SHIFT(8); + break; + } + case ts_symbol_type_factor: { + SHIFT(5); + break; + } + case ts_symbol_type_number: { + SHIFT(8); + break; + } + case ts_symbol_type_term: { + SHIFT(2); + break; + } + case ts_symbol_type_expression: { + SHIFT(1); + break; + } + default: { + ERROR(); + } + } + break; + } + case 1: { + switch (LOOKAHEAD()) { + case ts_symbol_type___END__: { + ACCEPT(); + break; + } + default: { + ERROR(); + } + } + break; + } + case 2: { + switch (LOOKAHEAD()) { + case ts_symbol_type_plus: { + SHIFT(3); + break; + } + default: { + ERROR(); + } + } + break; + } + case 3: { + switch (LOOKAHEAD()) { + case ts_symbol_type_variable: { + SHIFT(8); + break; + } + case ts_symbol_type_factor: { + SHIFT(5); + break; + } + case ts_symbol_type_left_paren: { + SHIFT(9); + break; + } + case ts_symbol_type_number: { + SHIFT(8); + break; + } + case ts_symbol_type_term: { + SHIFT(4); + break; + } + default: { + ERROR(); + } + } + break; + } + case 4: { + switch (LOOKAHEAD()) { + case ts_symbol_type_expression: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_term: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_right_paren: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_number: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_factor: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_variable: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_times: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_plus: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + case ts_symbol_type_left_paren: { + REDUCE(ts_symbol_type_expression, 3); + break; + } + default: { + ERROR(); + } + } + break; + } + case 5: { + switch (LOOKAHEAD()) { + case ts_symbol_type_times: { + SHIFT(6); + break; + } + default: { + ERROR(); + } + } + break; + } + case 6: { + switch (LOOKAHEAD()) { + case ts_symbol_type_left_paren: { + SHIFT(9); + break; + } + case ts_symbol_type_number: { + SHIFT(8); + break; + } + case ts_symbol_type_variable: { + SHIFT(8); + break; + } + case ts_symbol_type_factor: { + SHIFT(7); + break; + } + default: { + ERROR(); + } + } + break; + } + case 7: { + switch (LOOKAHEAD()) { + case ts_symbol_type_expression: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_term: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_right_paren: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_number: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_factor: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_variable: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_times: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_plus: { + REDUCE(ts_symbol_type_term, 3); + break; + } + case ts_symbol_type_left_paren: { + REDUCE(ts_symbol_type_term, 3); + break; + } + default: { + ERROR(); + } + } + break; + } + case 8: { + switch (LOOKAHEAD()) { + case ts_symbol_type_expression: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_term: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_right_paren: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_number: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_factor: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_variable: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_times: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_plus: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + case ts_symbol_type_left_paren: { + REDUCE(ts_symbol_type_factor, 1); + break; + } + default: { + ERROR(); + } + } + break; + } + case 9: { + switch (LOOKAHEAD()) { + case ts_symbol_type_left_paren: { + SHIFT(9); + break; + } + case ts_symbol_type_variable: { + SHIFT(8); + break; + } + case ts_symbol_type_factor: { + SHIFT(5); + break; + } + case ts_symbol_type_number: { + SHIFT(8); + break; + } + case ts_symbol_type_term: { + SHIFT(2); + break; + } + case ts_symbol_type_expression: { + SHIFT(10); + break; + } + default: { + ERROR(); + } + } + break; + } + case 10: { + switch (LOOKAHEAD()) { + case ts_symbol_type_right_paren: { + SHIFT(11); + break; + } + default: { + ERROR(); + } + } + break; + } + case 11: { + switch (LOOKAHEAD()) { + case ts_symbol_type_expression: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_term: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_right_paren: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_number: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_factor: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_variable: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_times: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_plus: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + case ts_symbol_type_left_paren: { + REDUCE(ts_symbol_type_factor, 3); + break; + } + default: { + ERROR(); + } + } + break; + } + default: { + ERROR() + } + } +done: + return PARSE_TREE(); +} diff --git a/src/code_gen/c_code.cpp b/src/code_gen/c_code.cpp new file mode 100644 index 00000000..a2ac9c39 --- /dev/null +++ b/src/code_gen/c_code.cpp @@ -0,0 +1,142 @@ +#include "c_code.h" +#include +#include +#include + +using std::string; +using std::unordered_map; +using std::unordered_set; +using std::vector; +using namespace tree_sitter::lr; + +namespace tree_sitter { + namespace code_gen { + static void str_replace(string &input, const string &search, const string &replace) { + size_t pos = 0; + while (1) { + pos = input.find(search, pos); + if (pos == string::npos) break; + input.erase(pos, search.length()); + input.insert(pos, replace); + pos += replace.length(); + } + } + + string indent(std::string input) { + string tab = " "; + str_replace(input, "\n", "\n" + tab); + return tab + input; + } + + string _switch(string condition, string body) { + return "switch (" + condition + ") {\n" + + indent(body) + "\n" + + "}"; + } + + string _case(string value, string body) { + return "case " + value + ": {\n" + + indent(body) + "\n" + + indent("break;") + "\n" + "}\n"; + } + + string _default(string body) { + return "default: {\n" + + indent(body) + "\n" + "}"; + } + + class CCodeGenerator { + const Grammar grammar; + const ParseTable parse_table; + const unordered_map symbol_ids; + public: + static unordered_map get_symbol_ids(vector rule_names) { + size_t i = 0; + unordered_map result; + for (string name : rule_names) { + result[name] = i; + i++; + } + result[ParseTable::END_OF_INPUT] = i; + return result; + } + + CCodeGenerator(const Grammar &grammar, const ParseTable &parse_table) : + grammar(grammar), + parse_table(parse_table), + symbol_ids(get_symbol_ids(grammar.rule_names())) + {} + + string symbol_id(string symbol_name) { + return "ts_symbol_type_" + symbol_name; + } + + string code_for_actions(const unordered_set &actions) { + auto action = *actions.begin(); + switch (action.type) { + case ParseActionTypeAccept: + return "ACCEPT();"; + case ParseActionTypeShift: + return "SHIFT(" + std::to_string(action.state_index) + ");"; + case ParseActionTypeReduce: + return "REDUCE(" + symbol_id(action.symbol_name) + ", " + std::to_string(action.child_symbol_count) + ");"; + default: + return ""; + } + } + + string switch_on_lookahead(const ParseState &parse_state) { + string body = ""; + for (auto pair : parse_state.actions) + body += _case(symbol_id(pair.first), code_for_actions(pair.second)); + body += _default("ERROR();"); + return _switch("LOOKAHEAD()", body); + } + + string switch_on_current_state(const ParseTable &parse_table) { + string body = ""; + for (int i = 0; i < parse_table.states.size(); i++) + body += _case(std::to_string(i), switch_on_lookahead(parse_table.states[i])); + body += _default("ERROR()"); + return _switch("PARSE_STATE()", body); + } + + string symbol_enum() { + string result = "typedef enum {\n"; + for (string rule_name : grammar.rule_names()) + result += indent(symbol_id(rule_name)) + ",\n"; + result += indent(symbol_id(ParseTable::END_OF_INPUT)); + return result + "\n" + "} ts_symbol_type;\n"; + } + + string parse_function() { + return + "TSTree * ts_parse_arithmetic() {\n" + + indent("SETUP_PARSER()") + "\n" + + "start:\n" + + indent(switch_on_current_state(parse_table)) + "\n" + + "done:\n" + + indent("return PARSE_TREE();") + "\n" + "}"; + } + + string code() { + return + "#include \n" + "#include \n" + "\n\n" + + symbol_enum() + + "\n\n" + + parse_function() + + "\n"; + } + }; + + string c_code(const Grammar &grammar, const ParseTable &parse_table) { + return CCodeGenerator(grammar, parse_table).code(); + } + } +} \ No newline at end of file diff --git a/src/code_gen/c_code.h b/src/code_gen/c_code.h new file mode 100644 index 00000000..c190f291 --- /dev/null +++ b/src/code_gen/c_code.h @@ -0,0 +1,13 @@ +#ifndef __tree_sitter__code_generator__ +#define __tree_sitter__code_generator__ + +#include "grammar.h" +#include "parse_table.h" + +namespace tree_sitter { + namespace code_gen { + std::string c_code(const Grammar &grammar, const lr::ParseTable &parse_table); + } +} + +#endif