tree-sitter/src/compiler/parse_grammar.cc

322 lines
9.4 KiB
C++

#include "compiler/parse_grammar.h"
#include <string>
#include <vector>
#include <unordered_set>
#include <utility>
#include "json.h"
#include "compiler/rule.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::unordered_set;
using std::pair;
using rules::Rule;
using rules::Blank;
using rules::Metadata;
using rules::Pattern;
using rules::String;
using rules::NamedSymbol;
struct ParseRuleResult {
Rule rule;
string error_message;
ParseRuleResult(const string &error_message) : error_message(error_message) {}
ParseRuleResult(const char *error_message) : error_message(error_message) {}
ParseRuleResult(Rule rule) : rule(rule) {}
};
ParseRuleResult parse_rule(json_value *rule_json) {
string error_message;
json_value rule_type_json;
string type;
if (!rule_json) {
return "Rule cannot be null";
}
if (rule_json->type != json_object) {
return "Rule type must be an object";
}
rule_type_json = rule_json->operator[]("type");
if (rule_type_json.type != json_string) {
return "Rule type must be a string";
}
type = rule_type_json.u.string.ptr;
if (type == "BLANK") {
return Rule(Blank{});
}
if (type == "CHOICE") {
json_value members_json = rule_json->operator[]("members");
if (members_json.type != json_array) {
return "Choice members must be an array";
}
vector<Rule> members;
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
json_value *member_json = members_json.u.array.values[i];
auto result = parse_rule(member_json);
if (!result.error_message.empty()) {
return "Invalid choice member: " + result.error_message;
}
members.push_back(result.rule);
}
return Rule::choice(members);
}
if (type == "SEQ") {
json_value members_json = rule_json->operator[]("members");
if (members_json.type != json_array) {
return "Seq members must be an array";
}
vector<Rule> members;
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
json_value *member_json = members_json.u.array.values[i];
auto result = parse_rule(member_json);
if (!result.error_message.empty()) {
return "Invalid choice member: " + result.error_message;
}
members.push_back(result.rule);
}
return Rule::seq(members);
}
if (type == "REPEAT") {
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid repeat content: " + result.error_message;
}
return Rule::choice({Rule::repeat(result.rule), Blank{}});
}
if (type == "REPEAT1") {
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid repeat content: " + result.error_message;
}
return Rule::repeat(result.rule);
}
if (type == "TOKEN") {
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid token content: " + result.error_message;
}
return Rule(Metadata::token(result.rule));
}
if (type == "PATTERN") {
json_value value_json = rule_json->operator[]("value");
if (value_json.type == json_string) {
return Rule(Pattern{value_json.u.string.ptr});
} else {
return "Pattern value must be a string";
}
}
if (type == "STRING") {
json_value value_json = rule_json->operator[]("value");
if (value_json.type == json_string) {
return Rule(String{value_json.u.string.ptr});
} else {
return "String rule value must be a string";
}
}
if (type == "SYMBOL") {
json_value value_json = rule_json->operator[]("name");
if (value_json.type == json_string) {
return Rule(NamedSymbol{value_json.u.string.ptr});
} else {
return "Symbol value must be a string";
}
}
if (type == "PREC") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return Rule(Metadata::prec(precedence_json.u.integer, result.rule));
}
if (type == "PREC_LEFT") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return Rule(Metadata::prec_left(precedence_json.u.integer, result.rule));
}
if (type == "PREC_RIGHT") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule));
}
return "Unknown rule type: " + type;
}
ParseGrammarResult parse_grammar(const string &input) {
string error_message;
string name;
InputGrammar grammar;
json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
char parse_error[json_error_max];
json_value *grammar_json =
json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
if (!grammar_json) {
error_message = string("Invalid JSON at ") + parse_error;
goto error;
}
if (grammar_json->type != json_object) {
error_message = "Body must be an object";
goto error;
}
name_json = grammar_json->operator[]("name");
if (name_json.type != json_string) {
error_message = "Name must be a string";
goto error;
}
name = name_json.u.string.ptr;
rules_json = grammar_json->operator[]("rules");
if (rules_json.type != json_object) {
error_message = "Rules must be an object";
goto error;
}
for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) {
json_object_entry entry_json = rules_json.u.object.values[i];
auto result = parse_rule(entry_json.value);
if (!result.error_message.empty()) {
error_message = result.error_message;
goto error;
}
grammar.variables.push_back(InputGrammar::Variable{
string(entry_json.name),
VariableTypeNamed,
result.rule
});
}
extras_json = grammar_json->operator[]("extras");
if (extras_json.type != json_none) {
if (extras_json.type != json_array) {
error_message = "Extras must be an array";
goto error;
}
for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) {
json_value *extra_json = extras_json.u.array.values[i];
auto result = parse_rule(extra_json);
if (!result.error_message.empty()) {
error_message = "Invalid extra token: " + result.error_message;
goto error;
}
grammar.extra_tokens.push_back(result.rule);
}
}
conflicts_json = grammar_json->operator[]("conflicts");
if (conflicts_json.type != json_none) {
if (conflicts_json.type != json_array) {
error_message = "Conflicts must be an array";
goto error;
}
for (size_t i = 0, length = conflicts_json.u.array.length; i < length; i++) {
json_value *conflict_json = conflicts_json.u.array.values[i];
if (conflict_json->type != json_array) {
error_message = "Each conflict entry must be an array";
goto error;
}
unordered_set<NamedSymbol> conflict;
for (size_t j = 0, conflict_length = conflict_json->u.array.length;
j < conflict_length; j++) {
json_value *conflict_entry_json = conflict_json->u.array.values[j];
if (conflict_entry_json->type != json_string) {
error_message = "Each conflict entry must be an array of strings";
goto error;
}
conflict.insert(rules::NamedSymbol{
string(conflict_entry_json->u.string.ptr)
});
}
grammar.expected_conflicts.push_back(conflict);
}
}
external_tokens_json = grammar_json->operator[]("externals");
if (external_tokens_json.type != json_none) {
if (external_tokens_json.type != json_array) {
error_message = "External tokens must be an array";
goto error;
}
for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
json_value *token_name_json = external_tokens_json.u.array.values[i];
if (token_name_json->type != json_string) {
error_message = "External token values must be strings";
goto error;
}
string token_name = token_name_json->u.string.ptr;
grammar.external_tokens.push_back({
token_name,
VariableTypeNamed,
rules::NONE()
});
}
}
json_value_free(grammar_json);
return { name, grammar, "" };
error:
if (grammar_json) {
json_value_free(grammar_json);
}
return { "", InputGrammar(), error_message };
}
} // namespace tree_sitter