tree-sitter/src/compiler/generate_code/c_code.cc

284 lines
11 KiB
C++
Raw Normal View History

#include <map>
#include <set>
2014-03-09 22:45:33 -07:00
#include <string>
#include <utility>
#include <vector>
2014-03-09 21:37:21 -07:00
#include "compiler/generate_code/c_code.h"
#include "compiler/generate_code/helpers.h"
#include "compiler/rules/built_in_symbols.h"
2013-12-15 19:33:34 -08:00
namespace tree_sitter {
2014-02-12 23:06:26 -08:00
using std::string;
using std::to_string;
using std::map;
using std::vector;
using std::set;
using std::pair;
2014-03-09 19:49:35 -07:00
2014-01-11 15:14:17 -08:00
namespace generate_code {
2013-12-15 19:33:34 -08:00
string _switch(string condition, string body) {
return join({
"switch (" + condition + ") {",
indent(body),
"}"
});
2013-12-15 19:33:34 -08:00
}
2014-03-09 19:49:35 -07:00
2013-12-15 19:33:34 -08:00
string _case(string value, string body) {
return join({
"case " + value + ":",
indent(body), ""
});
2013-12-15 19:33:34 -08:00
}
string _default(string body) {
return join({
"default:",
indent(body)
});
2013-12-15 19:33:34 -08:00
}
2014-03-09 19:49:35 -07:00
2013-12-27 17:31:08 -08:00
string _if(string condition, string body) {
return join({
"if (" + condition + ")",
indent(body), ""
});
2013-12-27 17:31:08 -08:00
}
2014-03-09 19:49:35 -07:00
2013-12-15 19:33:34 -08:00
class CCodeGenerator {
2014-02-18 09:07:00 -08:00
const string name;
2013-12-15 19:33:34 -08:00
const ParseTable parse_table;
2013-12-27 17:31:08 -08:00
const LexTable lex_table;
2013-12-15 19:33:34 -08:00
public:
CCodeGenerator(string name, const ParseTable &parse_table, const LexTable &lex_table) :
name(name),
2013-12-27 17:31:08 -08:00
parse_table(parse_table),
lex_table(lex_table)
2013-12-15 19:33:34 -08:00
{}
2014-03-09 19:49:35 -07:00
string symbol_id(rules::Symbol symbol) {
2014-02-26 19:03:43 -08:00
if (symbol.is_built_in()) {
if (symbol == rules::ERROR)
return "ts_builtin_sym_error";
else
return "ts_builtin_sym_end";
2014-03-09 22:24:52 -07:00
} else if (symbol.is_auxiliary()) {
return "ts_aux_sym_" + symbol.name;
2014-03-09 22:24:52 -07:00
} else {
return "ts_sym_" + symbol.name;
2014-03-09 22:24:52 -07:00
}
2013-12-15 19:33:34 -08:00
}
2014-03-09 19:49:35 -07:00
2014-01-22 23:04:11 -08:00
string character_code(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
2014-03-24 09:14:29 -07:00
case '\n':
return "\\n";
case '\\':
return "\\\\";
2014-01-22 23:04:11 -08:00
default:
return string() + character;
}
}
2014-03-09 19:49:35 -07:00
string condition_for_character_range(const rules::CharacterRange &range) {
string lookahead("LOOKAHEAD_CHAR()");
if (range.min == range.max) {
return lookahead + " == '" + character_code(range.min) + "'";
} else {
2014-03-09 19:49:35 -07:00
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + character_code(range.max) + "'";
2013-12-15 19:33:34 -08:00
}
}
2014-03-09 19:49:35 -07:00
string condition_for_character_set(const rules::CharacterSet &set) {
vector<string> parts;
if (set.ranges.size() == 1) {
return condition_for_character_range(*set.ranges.begin());
} else {
for (auto &match : set.ranges)
parts.push_back("(" + condition_for_character_range(match) + ")");
return join(parts, " ||\n ");
}
}
2014-03-09 19:49:35 -07:00
string condition_for_character_rule(const rules::CharacterSet &rule) {
vector<string> parts;
pair<rules::CharacterSet, bool> representation = rule.most_compact_representation();
if (representation.second)
return condition_for_character_set(representation.first);
else
return "!(" + condition_for_character_set(rule.complement()) + ")";
}
2014-03-09 19:49:35 -07:00
string collapse_flags(vector<bool> flags) {
string result;
bool started = false;
for (auto flag : flags) {
if (started) result += ", ";
result += (flag ? "1" : "0");
started = true;
}
return result;
}
2014-03-09 19:49:35 -07:00
string code_for_parse_actions(const rules::Symbol &symbol, const set<ParseAction> &actions) {
string sym_id = symbol_id(symbol);
2013-12-27 17:31:08 -08:00
auto action = actions.begin();
switch (action->type) {
case ParseActionTypeAccept:
return "ACCEPT_INPUT(" + sym_id + ")";
case ParseActionTypeShift:
return "SHIFT(" + sym_id + ", " + to_string(action->state_index) + ")";
case ParseActionTypeReduce:
2014-03-09 23:51:33 -07:00
return "REDUCE(" +
sym_id + ", " +
2014-03-09 23:51:33 -07:00
symbol_id(action->symbol) + ", " +
to_string(action->child_flags.size()) + ", " +
"COLLAPSE({" + collapse_flags(action->child_flags) + "}))";
default:
return "";
2014-01-08 18:35:16 -08:00
}
}
2014-03-09 19:49:35 -07:00
string parse_error_call(const set<rules::Symbol> &expected_inputs) {
string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
2014-01-08 18:35:16 -08:00
bool started = false;
for (auto &symbol : expected_inputs) {
2014-01-08 18:35:16 -08:00
if (started) result += ", ";
started = true;
result += symbol_id(symbol);
2014-01-08 18:35:16 -08:00
}
result += "}));";
return result;
}
2013-12-27 17:31:08 -08:00
2014-03-09 23:51:33 -07:00
string code_for_lex_actions(const set<LexAction> &actions,
const set<rules::CharacterSet> &expected_inputs) {
2013-12-27 17:31:08 -08:00
auto action = actions.begin();
if (action == actions.end()) {
return "LEX_ERROR();";
2013-12-27 17:31:08 -08:00
} else {
switch (action->type) {
case LexActionTypeAdvance:
return "ADVANCE(" + to_string(action->state_index) + ");";
case LexActionTypeAccept:
return "ACCEPT_TOKEN(" + symbol_id(action->symbol) + ");";
2013-12-27 17:31:08 -08:00
case LexActionTypeError:
return "";
}
}
}
string switch_on_lookahead_char(const LexState &parse_state) {
string result = "";
2014-01-08 18:35:16 -08:00
auto expected_inputs = parse_state.expected_inputs();
2013-12-27 17:31:08 -08:00
for (auto pair : parse_state.actions)
2014-03-09 23:51:33 -07:00
result += _if(condition_for_character_rule(pair.first),
code_for_lex_actions(pair.second, expected_inputs));
2014-01-08 18:35:16 -08:00
result += code_for_lex_actions(parse_state.default_actions, expected_inputs);
2013-12-27 17:31:08 -08:00
return result;
}
string switch_on_lex_state() {
string body = "";
for (size_t i = 0; i < lex_table.states.size(); i++)
2013-12-27 17:31:08 -08:00
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state));
2014-01-08 18:35:16 -08:00
body += _default("LEX_PANIC();");
2013-12-27 17:31:08 -08:00
return _switch("LEX_STATE()", body);
}
2014-03-21 12:46:23 -07:00
string symbol_count() {
return "#define TS_SYMBOL_COUNT " + to_string(parse_table.symbols.size());
}
2014-03-09 19:49:35 -07:00
2013-12-15 19:33:34 -08:00
string symbol_enum() {
string result = "enum {\n";
for (auto symbol : parse_table.symbols)
2014-02-26 19:03:43 -08:00
if (!symbol.is_built_in())
result += indent(symbol_id(symbol)) + ",\n";
2014-01-22 23:04:11 -08:00
return result + "};";
2013-12-27 17:31:08 -08:00
}
string rule_names_list() {
string result = "SYMBOL_NAMES = {\n";
for (auto symbol : parse_table.symbols)
2014-02-26 19:03:43 -08:00
if (!symbol.is_built_in())
result += indent(string("\"") + symbol.name) + "\",\n";
2014-01-22 23:04:11 -08:00
return result + "};";
}
2013-12-27 17:31:08 -08:00
string includes() {
return "#include \"tree_sitter/parser.h\"";
2013-12-15 19:33:34 -08:00
}
2014-03-09 19:49:35 -07:00
2013-12-27 17:31:08 -08:00
string lex_function() {
return join({
"LEX_FN() {",
indent("START_LEXER();"),
indent(switch_on_lex_state()),
"}"
});
2013-12-27 17:31:08 -08:00
}
2014-03-09 19:49:35 -07:00
template<typename T>
vector<string> map_to_string(const vector<T> &inputs, std::function<string(T)> f) {
vector<string> result;
for (auto &item : inputs)
result.push_back(f(item));
return result;
}
string parse_table_row_for_state(const ParseState &state) {
string result("SET_LEX_STATE(" + to_string(state.lex_state_id) + ");");
for (auto &pair : state.actions) {
result += "\n" + code_for_parse_actions(pair.first, pair.second);
}
return result;
}
string parse_table_function() {
size_t state_id = 0;
2014-01-07 21:50:32 -08:00
return join({
"PARSE_TABLE() {",
indent(join({
"START_TABLE(" + to_string(parse_table.states.size()) + ")",
join(map_to_string<ParseState>(parse_table.states, [&](ParseState state) -> string {
return join({
"STATE(" + to_string(state_id++) + ");",
parse_table_row_for_state(state),
"END_STATE();"
});
}), "\n\n"),
"END_TABLE();",
}, "\n\n")),
2014-01-07 21:50:32 -08:00
"}"
});
}
2014-03-09 19:49:35 -07:00
string parser_export() {
return "EXPORT_PARSER(ts_parser_" + name + ");";
2014-01-08 18:35:16 -08:00
}
2014-03-09 19:49:35 -07:00
2013-12-15 19:33:34 -08:00
string code() {
return join({
includes(),
symbol_count(),
symbol_enum(),
rule_names_list(),
lex_function(),
parse_table_function(),
parser_export(),
}, "\n\n") + "\n";
2013-12-15 19:33:34 -08:00
}
};
2014-03-09 19:49:35 -07:00
string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table) {
return CCodeGenerator(name, parse_table, lex_table).code();
2013-12-15 19:33:34 -08:00
}
}
}