2014-02-10 18:38:01 -08:00
|
|
|
#include <map>
|
|
|
|
|
#include <set>
|
2014-03-09 22:45:33 -07:00
|
|
|
#include <string>
|
|
|
|
|
#include <utility>
|
|
|
|
|
#include <vector>
|
2014-03-09 21:37:21 -07:00
|
|
|
#include "compiler/generate_code/c_code.h"
|
|
|
|
|
#include "compiler/generate_code/helpers.h"
|
|
|
|
|
#include "compiler/rules/built_in_symbols.h"
|
2013-12-15 19:33:34 -08:00
|
|
|
|
|
|
|
|
namespace tree_sitter {
|
2014-02-12 23:06:26 -08:00
|
|
|
using std::string;
|
|
|
|
|
using std::to_string;
|
|
|
|
|
using std::map;
|
|
|
|
|
using std::vector;
|
|
|
|
|
using std::set;
|
|
|
|
|
using std::pair;
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-01-11 15:14:17 -08:00
|
|
|
namespace generate_code {
|
2013-12-15 19:33:34 -08:00
|
|
|
string _switch(string condition, string body) {
|
2014-01-05 15:43:00 -08:00
|
|
|
return join({
|
|
|
|
|
"switch (" + condition + ") {",
|
|
|
|
|
indent(body),
|
|
|
|
|
"}"
|
|
|
|
|
});
|
2013-12-15 19:33:34 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-15 19:33:34 -08:00
|
|
|
string _case(string value, string body) {
|
2014-01-05 15:43:00 -08:00
|
|
|
return join({
|
|
|
|
|
"case " + value + ":",
|
|
|
|
|
indent(body), ""
|
|
|
|
|
});
|
2013-12-15 19:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string _default(string body) {
|
2014-01-05 15:43:00 -08:00
|
|
|
return join({
|
|
|
|
|
"default:",
|
|
|
|
|
indent(body)
|
|
|
|
|
});
|
2013-12-15 19:33:34 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-27 17:31:08 -08:00
|
|
|
string _if(string condition, string body) {
|
2014-01-05 15:43:00 -08:00
|
|
|
return join({
|
|
|
|
|
"if (" + condition + ")",
|
|
|
|
|
indent(body), ""
|
|
|
|
|
});
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-15 19:33:34 -08:00
|
|
|
class CCodeGenerator {
|
2014-02-18 09:07:00 -08:00
|
|
|
const string name;
|
2013-12-15 19:33:34 -08:00
|
|
|
const ParseTable parse_table;
|
2013-12-27 17:31:08 -08:00
|
|
|
const LexTable lex_table;
|
2014-03-27 12:54:54 -07:00
|
|
|
const map<rules::Symbol, string> symbol_names;
|
2013-12-15 19:33:34 -08:00
|
|
|
public:
|
2014-03-27 12:54:54 -07:00
|
|
|
CCodeGenerator(string name,
|
|
|
|
|
const ParseTable &parse_table,
|
|
|
|
|
const LexTable &lex_table,
|
|
|
|
|
const map<rules::Symbol, string> &symbol_names) :
|
2014-01-23 13:44:49 -08:00
|
|
|
name(name),
|
2013-12-27 17:31:08 -08:00
|
|
|
parse_table(parse_table),
|
2014-03-27 12:54:54 -07:00
|
|
|
lex_table(lex_table),
|
|
|
|
|
symbol_names(symbol_names)
|
2013-12-15 19:33:34 -08:00
|
|
|
{}
|
2014-03-28 13:51:32 -07:00
|
|
|
|
2014-03-25 19:34:17 -07:00
|
|
|
string code() {
|
|
|
|
|
return join({
|
|
|
|
|
includes(),
|
2014-03-26 12:52:31 -07:00
|
|
|
state_and_symbol_counts(),
|
2014-03-25 19:34:17 -07:00
|
|
|
symbol_enum(),
|
2014-03-27 12:54:54 -07:00
|
|
|
symbol_names_list(),
|
2014-03-25 19:34:17 -07:00
|
|
|
hidden_symbols_list(),
|
|
|
|
|
lex_function(),
|
|
|
|
|
lex_states_list(),
|
|
|
|
|
parse_table_array(),
|
|
|
|
|
parser_export(),
|
|
|
|
|
}, "\n\n") + "\n";
|
|
|
|
|
}
|
2014-03-28 13:51:32 -07:00
|
|
|
|
2014-03-25 19:34:17 -07:00
|
|
|
private:
|
2014-01-27 12:40:06 -08:00
|
|
|
string symbol_id(rules::Symbol symbol) {
|
2014-02-26 19:03:43 -08:00
|
|
|
if (symbol.is_built_in()) {
|
2014-03-28 13:51:32 -07:00
|
|
|
return (symbol == rules::ERROR()) ?
|
2014-03-26 12:52:31 -07:00
|
|
|
"ts_builtin_sym_error" :
|
|
|
|
|
"ts_builtin_sym_end";
|
2014-03-09 22:24:52 -07:00
|
|
|
} else if (symbol.is_auxiliary()) {
|
2014-02-20 13:30:43 -08:00
|
|
|
return "ts_aux_sym_" + symbol.name;
|
2014-03-09 22:24:52 -07:00
|
|
|
} else {
|
2014-02-20 13:30:43 -08:00
|
|
|
return "ts_sym_" + symbol.name;
|
2014-03-09 22:24:52 -07:00
|
|
|
}
|
2013-12-15 19:33:34 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
string condition_for_character_range(const rules::CharacterRange &range) {
|
2014-03-25 19:51:34 -07:00
|
|
|
string lookahead("lookahead");
|
2014-02-05 18:56:04 -08:00
|
|
|
if (range.min == range.max) {
|
|
|
|
|
return lookahead + " == '" + character_code(range.min) + "'";
|
|
|
|
|
} else {
|
2014-03-09 19:49:35 -07:00
|
|
|
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
|
2014-02-10 13:20:43 -08:00
|
|
|
" && " + lookahead + " <= '" + character_code(range.max) + "'";
|
2013-12-15 19:33:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
string condition_for_character_set(const rules::CharacterSet &set) {
|
|
|
|
|
vector<string> parts;
|
2014-02-10 13:20:43 -08:00
|
|
|
if (set.ranges.size() == 1) {
|
|
|
|
|
return condition_for_character_range(*set.ranges.begin());
|
|
|
|
|
} else {
|
|
|
|
|
for (auto &match : set.ranges)
|
|
|
|
|
parts.push_back("(" + condition_for_character_range(match) + ")");
|
|
|
|
|
return join(parts, " ||\n ");
|
|
|
|
|
}
|
2014-02-05 18:56:04 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-02-03 13:05:51 -08:00
|
|
|
string condition_for_character_rule(const rules::CharacterSet &rule) {
|
2014-01-29 19:18:21 -08:00
|
|
|
vector<string> parts;
|
2014-02-05 18:56:04 -08:00
|
|
|
pair<rules::CharacterSet, bool> representation = rule.most_compact_representation();
|
|
|
|
|
if (representation.second)
|
|
|
|
|
return condition_for_character_set(representation.first);
|
|
|
|
|
else
|
|
|
|
|
return "!(" + condition_for_character_set(rule.complement()) + ")";
|
2014-01-29 19:18:21 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-25 19:34:17 -07:00
|
|
|
string code_for_parse_action(const ParseAction &action) {
|
2014-03-24 19:18:06 -07:00
|
|
|
switch (action.type) {
|
2014-02-06 09:06:52 -08:00
|
|
|
case ParseActionTypeAccept:
|
2014-03-25 19:34:17 -07:00
|
|
|
return "ACCEPT_INPUT()";
|
2014-02-06 09:06:52 -08:00
|
|
|
case ParseActionTypeShift:
|
2014-03-25 19:34:17 -07:00
|
|
|
return "SHIFT(" + to_string(action.state_index) + ")";
|
2014-02-06 09:06:52 -08:00
|
|
|
case ParseActionTypeReduce:
|
2014-03-09 23:51:33 -07:00
|
|
|
return "REDUCE(" +
|
2014-03-24 19:18:06 -07:00
|
|
|
symbol_id(action.symbol) + ", " +
|
2014-03-26 21:02:53 -07:00
|
|
|
to_string(action.consumed_symbol_count) + ")";
|
2014-02-06 09:06:52 -08:00
|
|
|
default:
|
|
|
|
|
return "";
|
2014-01-08 18:35:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-24 19:18:06 -07:00
|
|
|
string code_for_lex_actions(const LexAction &action,
|
2014-03-09 23:51:33 -07:00
|
|
|
const set<rules::CharacterSet> &expected_inputs) {
|
2014-03-24 19:18:06 -07:00
|
|
|
switch (action.type) {
|
|
|
|
|
case LexActionTypeAdvance:
|
|
|
|
|
return "ADVANCE(" + to_string(action.state_index) + ");";
|
|
|
|
|
case LexActionTypeAccept:
|
|
|
|
|
return "ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");";
|
|
|
|
|
case LexActionTypeError:
|
|
|
|
|
return "LEX_ERROR();";
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string switch_on_lookahead_char(const LexState &parse_state) {
|
|
|
|
|
string result = "";
|
2014-01-08 18:35:16 -08:00
|
|
|
auto expected_inputs = parse_state.expected_inputs();
|
2013-12-27 17:31:08 -08:00
|
|
|
for (auto pair : parse_state.actions)
|
2014-03-24 19:18:06 -07:00
|
|
|
if (!pair.first.is_empty())
|
|
|
|
|
result += _if(condition_for_character_rule(pair.first),
|
|
|
|
|
code_for_lex_actions(pair.second, expected_inputs));
|
|
|
|
|
result += code_for_lex_actions(parse_state.default_action, expected_inputs);
|
2013-12-27 17:31:08 -08:00
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string switch_on_lex_state() {
|
|
|
|
|
string body = "";
|
2014-03-04 18:28:28 -08:00
|
|
|
for (size_t i = 0; i < lex_table.states.size(); i++)
|
2013-12-27 17:31:08 -08:00
|
|
|
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
|
2014-02-24 18:42:54 -08:00
|
|
|
body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state));
|
2014-01-08 18:35:16 -08:00
|
|
|
body += _default("LEX_PANIC();");
|
2014-03-25 19:51:34 -07:00
|
|
|
return _switch("lex_state", body);
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
2014-03-21 12:46:23 -07:00
|
|
|
|
2014-03-26 12:52:31 -07:00
|
|
|
string state_and_symbol_counts() {
|
|
|
|
|
return join({
|
|
|
|
|
"STATE_COUNT = " + to_string(parse_table.states.size()) + ";",
|
|
|
|
|
"SYMBOL_COUNT = " + to_string(parse_table.symbols.size()) + ";"
|
|
|
|
|
});
|
2014-03-18 08:04:17 -07:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-15 19:33:34 -08:00
|
|
|
string symbol_enum() {
|
2014-02-20 13:30:43 -08:00
|
|
|
string result = "enum {\n";
|
2014-03-25 19:34:17 -07:00
|
|
|
size_t index = 2;
|
2014-01-27 12:40:06 -08:00
|
|
|
for (auto symbol : parse_table.symbols)
|
2014-02-26 19:03:43 -08:00
|
|
|
if (!symbol.is_built_in())
|
2014-03-25 19:34:17 -07:00
|
|
|
result += indent(symbol_id(symbol)) + " = " + to_string(index++) + ",\n";
|
2014-01-22 23:04:11 -08:00
|
|
|
return result + "};";
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
2014-01-05 15:43:00 -08:00
|
|
|
|
2014-03-27 12:54:54 -07:00
|
|
|
string symbol_names_list() {
|
|
|
|
|
set<rules::Symbol> symbols(parse_table.symbols);
|
2014-03-28 13:51:32 -07:00
|
|
|
symbols.insert(rules::END_OF_INPUT());
|
|
|
|
|
symbols.insert(rules::ERROR());
|
2014-03-25 19:34:17 -07:00
|
|
|
|
2014-03-27 12:54:54 -07:00
|
|
|
string result = "SYMBOL_NAMES = {\n";
|
2014-01-27 12:40:06 -08:00
|
|
|
for (auto symbol : parse_table.symbols)
|
2014-03-27 12:54:54 -07:00
|
|
|
result += indent("[" + symbol_id(symbol) + "] = \"" + symbol_names.find(symbol)->second) + "\",\n";
|
2014-01-22 23:04:11 -08:00
|
|
|
return result + "};";
|
2014-01-05 15:43:00 -08:00
|
|
|
}
|
2014-03-28 13:51:32 -07:00
|
|
|
|
2014-03-25 19:34:17 -07:00
|
|
|
string hidden_symbols_list() {
|
2014-03-26 12:52:31 -07:00
|
|
|
string result = "HIDDEN_SYMBOLS = {";
|
2014-03-25 09:05:55 -07:00
|
|
|
for (auto &symbol : parse_table.symbols)
|
2014-03-25 19:34:17 -07:00
|
|
|
if (symbol.is_hidden())
|
|
|
|
|
result += indent("\n[" + symbol_id(symbol) + "] = 1,");
|
|
|
|
|
return result + "\n};";
|
2014-03-25 09:05:55 -07:00
|
|
|
}
|
2014-01-05 15:43:00 -08:00
|
|
|
|
2013-12-27 17:31:08 -08:00
|
|
|
string includes() {
|
2014-02-17 12:53:57 -08:00
|
|
|
return "#include \"tree_sitter/parser.h\"";
|
2013-12-15 19:33:34 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-27 17:31:08 -08:00
|
|
|
string lex_function() {
|
2014-01-05 15:43:00 -08:00
|
|
|
return join({
|
2014-02-20 13:30:43 -08:00
|
|
|
"LEX_FN() {",
|
2014-01-05 15:43:00 -08:00
|
|
|
indent("START_LEXER();"),
|
|
|
|
|
indent(switch_on_lex_state()),
|
|
|
|
|
"}"
|
|
|
|
|
});
|
2013-12-27 17:31:08 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-17 13:32:14 -07:00
|
|
|
template<typename T>
|
|
|
|
|
vector<string> map_to_string(const vector<T> &inputs, std::function<string(T)> f) {
|
|
|
|
|
vector<string> result;
|
|
|
|
|
for (auto &item : inputs)
|
|
|
|
|
result.push_back(f(item));
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-25 19:34:17 -07:00
|
|
|
string lex_states_list() {
|
|
|
|
|
size_t state_id = 0;
|
|
|
|
|
return join({
|
2014-03-26 12:52:31 -07:00
|
|
|
"LEX_STATES = {",
|
2014-03-25 19:34:17 -07:00
|
|
|
indent(join(map_to_string<ParseState>(parse_table.states, [&](ParseState state) {
|
|
|
|
|
return "[" + to_string(state_id++) + "] = " + to_string(state.lex_state_id) + ",";
|
|
|
|
|
}))),
|
|
|
|
|
"};"
|
|
|
|
|
});
|
2014-03-17 13:32:14 -07:00
|
|
|
}
|
|
|
|
|
|
2014-03-25 19:34:17 -07:00
|
|
|
string parse_table_array() {
|
2014-03-17 13:32:14 -07:00
|
|
|
size_t state_id = 0;
|
2014-01-07 21:50:32 -08:00
|
|
|
return join({
|
2014-03-26 12:52:31 -07:00
|
|
|
"PARSE_TABLE = {",
|
2014-03-25 19:34:17 -07:00
|
|
|
indent(join(map_to_string<ParseState>(parse_table.states, [&](ParseState state) {
|
2014-03-29 19:00:31 -07:00
|
|
|
string result = "[" + to_string(state_id++) + "] = {\n";
|
2014-03-25 19:34:17 -07:00
|
|
|
for (auto &pair : state.actions)
|
2014-03-29 19:00:31 -07:00
|
|
|
result += indent("[" + symbol_id(pair.first) + "] = " + code_for_parse_action(pair.second) + ",") + "\n";
|
|
|
|
|
return result + "},\n";
|
|
|
|
|
}), "\n")),
|
2014-03-25 19:34:17 -07:00
|
|
|
"};"
|
2014-01-07 21:50:32 -08:00
|
|
|
});
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-15 14:48:28 -07:00
|
|
|
string parser_export() {
|
|
|
|
|
return "EXPORT_PARSER(ts_parser_" + name + ");";
|
2014-01-08 18:35:16 -08:00
|
|
|
}
|
2013-12-15 19:33:34 -08:00
|
|
|
};
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-27 12:54:54 -07:00
|
|
|
string c_code(string name,
|
|
|
|
|
const ParseTable &parse_table,
|
|
|
|
|
const LexTable &lex_table,
|
|
|
|
|
const map<rules::Symbol, string> &symbol_names) {
|
|
|
|
|
return CCodeGenerator(name, parse_table, lex_table, symbol_names).code();
|
2013-12-15 19:33:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|