tree-sitter/src/compiler/generate_code/c_code.cpp

285 lines
11 KiB
C++
Raw Normal View History

2013-12-15 19:33:34 -08:00
#include "c_code.h"
#include <vector>
#include <unordered_map>
#include <unordered_set>
using std::string;
2013-12-27 17:31:08 -08:00
using std::to_string;
2013-12-15 19:33:34 -08:00
using std::unordered_map;
using std::unordered_set;
using std::vector;
namespace tree_sitter {
2014-01-11 15:14:17 -08:00
namespace generate_code {
2013-12-15 19:33:34 -08:00
static void str_replace(string &input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {
pos = input.find(search, pos);
if (pos == string::npos) break;
input.erase(pos, search.length());
input.insert(pos, replace);
pos += replace.length();
}
}
2014-01-08 18:35:16 -08:00
string join(vector<string> lines, string separator) {
string result;
bool started = false;
for (auto line : lines) {
if (started) result += separator;
started = true;
result += line;
}
return result;
}
string join(vector<string> lines) {
return join(lines, "\n");
}
2013-12-15 19:33:34 -08:00
2013-12-27 17:31:08 -08:00
string indent(string input) {
2013-12-15 19:33:34 -08:00
string tab = " ";
str_replace(input, "\n", "\n" + tab);
return tab + input;
}
string _switch(string condition, string body) {
return join({
"switch (" + condition + ") {",
indent(body),
"}"
});
2013-12-15 19:33:34 -08:00
}
string _case(string value, string body) {
return join({
"case " + value + ":",
indent(body), ""
});
2013-12-15 19:33:34 -08:00
}
string _default(string body) {
return join({
"default:",
indent(body)
});
2013-12-15 19:33:34 -08:00
}
2013-12-27 17:31:08 -08:00
string _if(string condition, string body) {
return join({
"if (" + condition + ")",
indent(body), ""
});
2013-12-27 17:31:08 -08:00
}
2013-12-15 19:33:34 -08:00
class CCodeGenerator {
const ParseTable parse_table;
2013-12-27 17:31:08 -08:00
const LexTable lex_table;
const string name;
2013-12-15 19:33:34 -08:00
public:
CCodeGenerator(string name, const ParseTable &parse_table, const LexTable &lex_table) :
name(name),
2013-12-27 17:31:08 -08:00
parse_table(parse_table),
lex_table(lex_table)
2013-12-15 19:33:34 -08:00
{}
string symbol_id(rules::Symbol symbol) {
if (symbol.is_auxiliary)
return "ts_aux_" + symbol.name;
else
return "ts_symbol_" + symbol.name;
2013-12-15 19:33:34 -08:00
}
2014-01-22 23:04:11 -08:00
string character_code(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
2014-01-22 23:04:11 -08:00
default:
return string() + character;
}
}
2013-12-27 17:31:08 -08:00
string condition_for_char_match(const CharMatch &char_match) {
auto value = "LOOKAHEAD_CHAR()";
switch (char_match.type) {
case CharMatchTypeClass:
switch (char_match.value.character_class) {
case CharClassDigit:
return string("isdigit(") + value + ")";
case CharClassWord:
return string("isalnum(") + value + ")";
}
case CharMatchTypeSpecific:
2014-01-22 23:04:11 -08:00
return string(value) + " == '" + character_code(char_match.value.character) + "'";
2013-12-15 19:33:34 -08:00
default:
return "";
}
}
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<rules::Symbol> &expected_inputs) {
2013-12-27 17:31:08 -08:00
auto action = actions.begin();
if (action == actions.end()) {
2014-01-08 18:35:16 -08:00
return parse_error_call(expected_inputs);
2013-12-27 17:31:08 -08:00
} else {
switch (action->type) {
case ParseActionTypeAccept:
return "ACCEPT_INPUT();";
case ParseActionTypeShift:
return "SHIFT(" + to_string(action->state_index) + ");";
case ParseActionTypeReduce:
return "REDUCE(" + symbol_id(action->symbol) + ", " + std::to_string(action->child_symbol_count) + ");";
2013-12-27 17:31:08 -08:00
default:
return "";
}
}
}
2014-01-08 18:35:16 -08:00
string parse_error_call(const unordered_set<rules::Symbol> &expected_inputs) {
2014-01-08 18:35:16 -08:00
string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
bool started = false;
for (auto symbol : expected_inputs) {
2014-01-08 18:35:16 -08:00
if (started) result += ", ";
started = true;
result += "\"" + symbol.name + "\"";
2014-01-08 18:35:16 -08:00
}
result += "}));";
return result;
}
string escape_string(string input) {
str_replace(input, "\"", "\\\"");
return input;
}
2014-01-08 18:35:16 -08:00
string lex_error_call(const unordered_set<CharMatch> &expected_inputs) {
string result = "LEX_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
bool started = false;
for (auto match : expected_inputs) {
if (started) result += ", ";
started = true;
result += "\"" + escape_string(CharMatchToString(match)) + "\"";
2014-01-08 18:35:16 -08:00
}
result += "}));";
return result;
}
2013-12-27 17:31:08 -08:00
2014-01-08 18:35:16 -08:00
string code_for_lex_actions(const unordered_set<LexAction> &actions, const unordered_set<CharMatch> &expected_inputs) {
2013-12-27 17:31:08 -08:00
auto action = actions.begin();
if (action == actions.end()) {
2014-01-08 18:35:16 -08:00
return lex_error_call(expected_inputs);
2013-12-27 17:31:08 -08:00
} else {
switch (action->type) {
case LexActionTypeAdvance:
return "ADVANCE(" + to_string(action->state_index) + ");";
case LexActionTypeAccept:
return "ACCEPT_TOKEN(" + symbol_id(action->symbol) + ");";
2013-12-27 17:31:08 -08:00
case LexActionTypeError:
return "";
}
}
}
string code_for_parse_state(const ParseState &parse_state) {
2013-12-15 19:33:34 -08:00
string body = "";
for (auto pair : parse_state.actions)
2014-01-08 18:35:16 -08:00
body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second, parse_state.expected_inputs()));
2014-01-25 23:40:51 -08:00
body += _default(parse_error_call(parse_state.expected_inputs()));
return
string("SET_LEX_STATE(") + to_string(parse_state.lex_state_index) + ");\n" +
_switch("LOOKAHEAD_SYM()", body);
2013-12-15 19:33:34 -08:00
}
2013-12-27 17:31:08 -08:00
string switch_on_lookahead_char(const LexState &parse_state) {
string result = "";
2014-01-08 18:35:16 -08:00
auto expected_inputs = parse_state.expected_inputs();
2013-12-27 17:31:08 -08:00
for (auto pair : parse_state.actions)
2014-01-08 18:35:16 -08:00
result += _if(condition_for_char_match(pair.first), code_for_lex_actions(pair.second, expected_inputs));
result += code_for_lex_actions(parse_state.default_actions, expected_inputs);
2013-12-27 17:31:08 -08:00
return result;
}
string switch_on_parse_state() {
2013-12-15 19:33:34 -08:00
string body = "";
for (int i = 0; i < parse_table.states.size(); i++)
body += _case(std::to_string(i), code_for_parse_state(parse_table.states[i]));
2014-01-08 18:35:16 -08:00
body += _default("PARSE_PANIC();");
2013-12-15 19:33:34 -08:00
return _switch("PARSE_STATE()", body);
}
2013-12-27 17:31:08 -08:00
string switch_on_lex_state() {
string body = "";
for (int i = 0; i < lex_table.states.size(); i++)
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
2014-01-08 18:35:16 -08:00
body += _default("LEX_PANIC();");
2013-12-27 17:31:08 -08:00
return _switch("LEX_STATE()", body);
}
2013-12-15 19:33:34 -08:00
string symbol_enum() {
string result = "enum ts_symbol {\n";
for (auto symbol : parse_table.symbols)
result += indent(symbol_id(symbol)) + ",\n";
2014-01-22 23:04:11 -08:00
return result + "};";
2013-12-27 17:31:08 -08:00
}
string rule_names_list() {
string result = "static const char *ts_symbol_names[] = {\n";
for (auto symbol : parse_table.symbols)
result += indent(string("\"") + symbol.name) + "\",\n";
2014-01-22 23:04:11 -08:00
return result + "};";
}
2013-12-27 17:31:08 -08:00
string includes() {
return join({
"#include \"parser.h\"",
"#include <ctype.h>"
});
2013-12-15 19:33:34 -08:00
}
2013-12-27 17:31:08 -08:00
string lex_function() {
return join({
"static void ts_lex(TSParser *parser) {",
indent("START_LEXER();"),
indent(switch_on_lex_state()),
indent("FINISH_LEXER();"),
"}"
});
2013-12-27 17:31:08 -08:00
}
2013-12-15 19:33:34 -08:00
2014-01-08 18:35:16 -08:00
string parse_function() {
2014-01-07 21:50:32 -08:00
return join({
2014-01-08 18:35:16 -08:00
"static TSParseResult ts_parse(const char *input) {",
indent("START_PARSER();"),
indent(switch_on_parse_state()),
indent("FINISH_PARSER();"),
2014-01-07 21:50:32 -08:00
"}"
});
}
2014-01-08 18:35:16 -08:00
string parse_config_struct() {
return join({
"TSParseConfig ts_parse_config_" + name + " = {",
2014-01-08 18:35:16 -08:00
indent(".parse_fn = ts_parse,"),
indent(".symbol_names = ts_symbol_names"),
"};"
});
}
2013-12-15 19:33:34 -08:00
string code() {
return join({
includes(),
symbol_enum(),
rule_names_list(),
lex_function(),
2014-01-07 21:50:32 -08:00
parse_function(),
2014-01-08 18:35:16 -08:00
parse_config_struct(),
}, "\n\n") + "\n";
2013-12-15 19:33:34 -08:00
}
};
string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table) {
return CCodeGenerator(name, parse_table, lex_table).code();
2013-12-15 19:33:34 -08:00
}
}
}