diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index e9409425..869edbd3 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -1,24 +1,25 @@ +#include #include #include #include -#include #include #include #include "compiler/generate_code/c_code.h" -#include "compiler/util/string_helpers.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/lex_table.h" +#include "compiler/parse_table.h" #include "compiler/prepared_grammar.h" +#include "compiler/rules/built_in_symbols.h" +#include "compiler/util/string_helpers.h" namespace tree_sitter { namespace generate_code { -using std::string; -using std::to_string; using std::function; using std::map; -using std::vector; using std::set; -using std::pair; +using std::string; +using std::to_string; +using std::vector; using util::escape_char; class CCodeGenerator { @@ -46,32 +47,32 @@ class CCodeGenerator { string code() { buffer = ""; - includes(); - state_and_symbol_counts(); - symbol_enum(); - symbol_names_list(); - hidden_symbols_list(); - lex_function(); - lex_states_list(); - parse_table_array(); - parser_export(); + add_includes(); + add_state_and_symbol_counts(); + add_symbol_enum(); + add_symbol_names_list(); + add_hidden_symbols_list(); + add_lex_function(); + add_lex_states_list(); + add_parse_table(); + add_parser_export(); return buffer; } private: - void includes() { + void add_includes() { add("#include \"tree_sitter/parser.h\""); line(); } - void state_and_symbol_counts() { + void add_state_and_symbol_counts() { line("#define STATE_COUNT " + to_string(parse_table.states.size())); line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size())); line(); } - void symbol_enum() { + void add_symbol_enum() { line("enum {"); indent([&]() { bool at_start = true; @@ -88,7 +89,7 @@ class CCodeGenerator { line(); } - void symbol_names_list() { + void add_symbol_names_list() { line("static const char *ts_symbol_names[] = {"); indent([&]() { for (const auto &symbol : parse_table.symbols) @@ -98,7 +99,7 @@ class CCodeGenerator { line(); } - void hidden_symbols_list() { + void add_hidden_symbols_list() { line("static const int ts_hidden_symbol_flags[SYMBOL_COUNT] = {"); indent([&]() { for (const auto &symbol : parse_table.symbols) @@ -110,17 +111,24 @@ class CCodeGenerator { line(); } - void lex_function() { + void add_lex_function() { line("static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {"); indent([&]() { line("START_LEXER();"); - switch_on_lex_state(); + _switch("lex_state", [&]() { + for (size_t i = 0; i < lex_table.states.size(); i++) + _case(lex_state_index(i), + [&]() { add_lex_state(lex_table.states[i]); }); + _case("ts_lex_state_error", + [&]() { add_lex_state(lex_table.error_state); }); + _default([&]() { line("LEX_ERROR();"); }); + }); }); line("}"); line(); } - void lex_states_list() { + void add_lex_states_list() { line("static TSStateId ts_lex_states[STATE_COUNT] = {"); indent([&]() { size_t state_id = 0; @@ -132,7 +140,7 @@ class CCodeGenerator { line(); } - void parse_table_array() { + void add_parse_table() { size_t state_id = 0; line("#pragma GCC diagnostic push"); line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""); @@ -147,7 +155,7 @@ class CCodeGenerator { indent([&]() { for (const auto &pair : state.actions) { line("[" + symbol_id(pair.first) + "] = "); - code_for_parse_action(pair.second); + add_parse_action(pair.second); add(","); } }); @@ -161,16 +169,105 @@ class CCodeGenerator { line(); } - void parser_export() { + void add_parser_export() { line("EXPORT_LANGUAGE(ts_language_" + name + ");"); line(); } - string rule_name(const rules::Symbol &symbol) { - return symbol.is_token() ? lexical_grammar.rule_name(symbol) - : syntax_grammar.rule_name(symbol); + void add_lex_state(const LexState &lex_state) { + auto expected_inputs = lex_state.expected_inputs(); + if (lex_state.is_token_start) + line("START_TOKEN();"); + for (const auto &pair : lex_state.actions) + if (!pair.first.is_empty()) + _if([&]() { add_character_set_condition(pair.first); }, + [&]() { add_lex_actions(pair.second, expected_inputs); }); + add_lex_actions(lex_state.default_action, expected_inputs); } + void add_character_set_condition(const rules::CharacterSet &rule) { + if (rule.includes_all) { + add("!("); + add_character_range_conditions(rule.excluded_ranges()); + add(")"); + } else { + add_character_range_conditions(rule.included_ranges()); + } + } + + void add_character_range_conditions(const vector &ranges) { + if (ranges.size() == 1) { + add_character_range_condition(*ranges.begin()); + } else { + bool first = true; + for (const auto &range : ranges) { + if (!first) { + add(" ||"); + line(); + add_padding(); + } + + add("("); + add_character_range_condition(range); + add(")"); + + first = false; + } + } + } + + void add_character_range_condition(const rules::CharacterRange &range) { + string lookahead("lookahead"); + if (range.min == range.max) { + add(lookahead + " == " + escape_char(range.min)); + } else { + add(escape_char(range.min) + string(" <= ") + lookahead + " && " + + lookahead + " <= " + escape_char(range.max)); + } + } + + void add_lex_actions(const LexAction &action, + const set &expected_inputs) { + switch (action.type) { + case LexActionTypeAdvance: + line("ADVANCE(" + lex_state_index(action.state_index) + ");"); + break; + case LexActionTypeAccept: + line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");"); + break; + case LexActionTypeError: + line("LEX_ERROR();"); + break; + default: {} + } + } + + void add_parse_action(const ParseAction &action) { + switch (action.type) { + case ParseActionTypeAccept: + add("ACCEPT_INPUT()"); + break; + case ParseActionTypeShift: + add("SHIFT(" + to_string(action.state_index) + ")"); + break; + case ParseActionTypeShiftExtra: + add("SHIFT_EXTRA()"); + break; + case ParseActionTypeReduce: + add("REDUCE(" + symbol_id(action.symbol) + ", " + + to_string(action.consumed_symbol_count) + ")"); + break; + case ParseActionTypeReduceExtra: + add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")"); + break; + default: {} + } + } + + // Helper functions + + string lex_state_index(size_t i) { return to_string(i + 1); } + string symbol_id(const rules::Symbol &symbol) { if (symbol.is_built_in()) { if (symbol == rules::ERROR()) @@ -188,6 +285,49 @@ class CCodeGenerator { } } + string symbol_name(const rules::Symbol &symbol) { + if (symbol.is_built_in()) { + if (symbol == rules::ERROR()) + return "error"; + else if (symbol == rules::END_OF_INPUT()) + return "end"; + else + return "DOCUMENT"; + } else { + return rule_name(symbol); + } + } + + string rule_name(const rules::Symbol &symbol) { + return symbol.is_token() ? lexical_grammar.rule_name(symbol) + : syntax_grammar.rule_name(symbol); + } + + // C-code generation functions + + void _switch(string condition, function body) { + line("switch (" + condition + ") {"); + indent(body); + line("}"); + } + + void _case(string value, function body) { + line("case " + value + ":"); + indent(body); + } + + void _default(function body) { + line("default:"); + indent(body); + } + + void _if(function condition, function body) { + line("if ("); + indent(condition); + add(")"); + indent(body); + } + string sanitize_name(string name) { auto existing = sanitized_names.find(name); if (existing != sanitized_names.end()) @@ -220,165 +360,30 @@ class CCodeGenerator { return false; } - string lex_state_index(size_t i) { return to_string(i + 1); } - - string symbol_name(const rules::Symbol &symbol) { - if (symbol.is_built_in()) { - if (symbol == rules::ERROR()) - return "error"; - else if (symbol == rules::END_OF_INPUT()) - return "end"; - else - return "DOCUMENT"; - } else if (symbol.is_token() && symbol.is_auxiliary()) { - return rule_name(symbol); - } else { - return rule_name(symbol); - } - } - - string condition_for_character_range(const rules::CharacterRange &range) { - string lookahead("lookahead"); - if (range.min == range.max) { - return lookahead + " == " + escape_char(range.min); - } else { - return escape_char(range.min) + string(" <= ") + lookahead + " && " + - lookahead + " <= " + escape_char(range.max); - } - } - - void condition_for_character_ranges(const vector &ranges) { - if (ranges.size() == 1) { - add(condition_for_character_range(*ranges.begin())); - } else { - bool first = true; - for (const auto &range : ranges) { - string part = "(" + condition_for_character_range(range) + ")"; - if (first) { - add(part); - } else { - add(" ||"); - line(part); - } - first = false; - } - } - } - - void condition_for_character_set(const rules::CharacterSet &rule) { - if (rule.includes_all) { - add("!("); - condition_for_character_ranges(rule.excluded_ranges()); - add(")"); - } else { - condition_for_character_ranges(rule.included_ranges()); - } - } - - void code_for_parse_action(const ParseAction &action) { - switch (action.type) { - case ParseActionTypeAccept: - add("ACCEPT_INPUT()"); - break; - case ParseActionTypeShift: - add("SHIFT(" + to_string(action.state_index) + ")"); - break; - case ParseActionTypeShiftExtra: - add("SHIFT_EXTRA()"); - break; - case ParseActionTypeReduce: - add("REDUCE(" + symbol_id(action.symbol) + ", " + - to_string(action.consumed_symbol_count) + ")"); - break; - case ParseActionTypeReduceExtra: - add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")"); - break; - default: {} - } - } - - void code_for_lex_actions(const LexAction &action, - const set &expected_inputs) { - switch (action.type) { - case LexActionTypeAdvance: - line("ADVANCE(" + lex_state_index(action.state_index) + ");"); - break; - case LexActionTypeAccept: - line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");"); - break; - case LexActionTypeError: - line("LEX_ERROR();"); - break; - default: {} - } - } - - void code_for_lex_state(const LexState &lex_state) { - auto expected_inputs = lex_state.expected_inputs(); - if (lex_state.is_token_start) - line("START_TOKEN();"); - for (const auto &pair : lex_state.actions) - if (!pair.first.is_empty()) - _if([&]() { condition_for_character_set(pair.first); }, - [&]() { code_for_lex_actions(pair.second, expected_inputs); }); - code_for_lex_actions(lex_state.default_action, expected_inputs); - } - - void switch_on_lex_state() { - _switch("lex_state", [&]() { - for (size_t i = 0; i < lex_table.states.size(); i++) - _case(lex_state_index(i), - [&]() { code_for_lex_state(lex_table.states[i]); }); - _case("ts_lex_state_error", - [&]() { code_for_lex_state(lex_table.error_state); }); - _default([&]() { line("LEX_ERROR();"); }); - }); - } - - void _switch(string condition, function body) { - line("switch (" + condition + ") {"); - indent(body); - line("}"); - } - - void _case(string value, function body) { - line("case " + value + ":"); - indent(body); - } - - void _default(function body) { - line("default:"); - indent(body); - } - - void _if(function condition, function body) { - line("if ("); - indent(condition); - add(")"); - indent(body); - } + // General code generation functions void line() { line(""); } void line(string input) { add("\n"); if (!input.empty()) { - string space; - for (size_t i = 0; i < indent_level; i++) - space += " "; - add(space + input); + add_padding(); + add(input); } } - void add(string input) { buffer += input; } - - void indent(function body) { indent(body, 1); } - - void indent(function body, size_t n) { - indent_level += n; - body(); - indent_level -= n; + void add_padding() { + for (size_t i = 0; i < indent_level; i++) + add(" "); } + + void indent(function body) { + indent_level++; + body(); + indent_level--; + } + + void add(string input) { buffer += input; } }; string c_code(string name, const ParseTable &parse_table, diff --git a/src/compiler/generate_code/c_code.h b/src/compiler/generate_code/c_code.h index f63ab248..c520c2fb 100644 --- a/src/compiler/generate_code/c_code.h +++ b/src/compiler/generate_code/c_code.h @@ -2,20 +2,18 @@ #define COMPILER_GENERATE_CODE_C_CODE_H_ #include -#include -#include "compiler/parse_table.h" -#include "compiler/lex_table.h" namespace tree_sitter { -class SyntaxGrammar; + +class LexTable; class LexicalGrammar; +class ParseTable; +class SyntaxGrammar; namespace generate_code { -std::string c_code(std::string name, const ParseTable &parse_table, - const LexTable &lex_table, - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar); +std::string c_code(std::string, const ParseTable &, const LexTable &, + const SyntaxGrammar &, const LexicalGrammar &); } // namespace generate_code } // namespace tree_sitter