#include "compiler/parse_grammar.h" #include #include #include #include #include "json.h" #include "compiler/rule.h" namespace tree_sitter { using std::string; using std::vector; using std::unordered_set; using std::pair; using rules::Rule; using rules::Blank; using rules::Metadata; using rules::Pattern; using rules::String; using rules::NamedSymbol; struct ParseRuleResult { Rule rule; string error_message; ParseRuleResult(const string &error_message) : error_message(error_message) {} ParseRuleResult(const char *error_message) : error_message(error_message) {} ParseRuleResult(Rule rule) : rule(rule) {} }; ParseRuleResult parse_rule(json_value *rule_json) { string error_message; json_value rule_type_json; string type; if (!rule_json) { return "Rule cannot be null"; } if (rule_json->type != json_object) { return "Rule type must be an object"; } rule_type_json = rule_json->operator[]("type"); if (rule_type_json.type != json_string) { return "Rule type must be a string"; } type = rule_type_json.u.string.ptr; if (type == "BLANK") { return Rule(Blank{}); } if (type == "CHOICE") { json_value members_json = rule_json->operator[]("members"); if (members_json.type != json_array) { return "Choice members must be an array"; } vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; auto result = parse_rule(member_json); if (!result.error_message.empty()) { return "Invalid choice member: " + result.error_message; } members.push_back(result.rule); } return Rule::choice(members); } if (type == "SEQ") { json_value members_json = rule_json->operator[]("members"); if (members_json.type != json_array) { return "Seq members must be an array"; } vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; auto result = parse_rule(member_json); if (!result.error_message.empty()) { return "Invalid choice member: " + result.error_message; } members.push_back(result.rule); } return Rule::seq(members); } if (type == "REPEAT") { json_value content_json = rule_json->operator[]("content"); auto result = parse_rule(&content_json); if (!result.error_message.empty()) { return "Invalid repeat content: " + result.error_message; } return Rule::choice({Rule::repeat(result.rule), Blank{}}); } if (type == "REPEAT1") { json_value content_json = rule_json->operator[]("content"); auto result = parse_rule(&content_json); if (!result.error_message.empty()) { return "Invalid repeat content: " + result.error_message; } return Rule::repeat(result.rule); } if (type == "TOKEN") { json_value content_json = rule_json->operator[]("content"); auto result = parse_rule(&content_json); if (!result.error_message.empty()) { return "Invalid token content: " + result.error_message; } return Rule(Metadata::token(result.rule)); } if (type == "PATTERN") { json_value value_json = rule_json->operator[]("value"); if (value_json.type == json_string) { return Rule(Pattern{value_json.u.string.ptr}); } else { return "Pattern value must be a string"; } } if (type == "STRING") { json_value value_json = rule_json->operator[]("value"); if (value_json.type == json_string) { return Rule(String{value_json.u.string.ptr}); } else { return "String rule value must be a string"; } } if (type == "SYMBOL") { json_value value_json = rule_json->operator[]("name"); if (value_json.type == json_string) { return Rule(NamedSymbol{value_json.u.string.ptr}); } else { return "Symbol value must be a string"; } } if (type == "PREC") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); auto result = parse_rule(&content_json); if (!result.error_message.empty()) { return "Invalid precedence content: " + result.error_message; } return Rule(Metadata::prec(precedence_json.u.integer, result.rule)); } if (type == "PREC_LEFT") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); auto result = parse_rule(&content_json); if (!result.error_message.empty()) { return "Invalid precedence content: " + result.error_message; } return Rule(Metadata::prec_left(precedence_json.u.integer, result.rule)); } if (type == "PREC_RIGHT") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); auto result = parse_rule(&content_json); if (!result.error_message.empty()) { return "Invalid precedence content: " + result.error_message; } return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule)); } return "Unknown rule type: " + type; } ParseGrammarResult parse_grammar(const string &input) { string error_message; string name; InputGrammar grammar; json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json; json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; char parse_error[json_error_max]; json_value *grammar_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error); if (!grammar_json) { error_message = string("Invalid JSON at ") + parse_error; goto error; } if (grammar_json->type != json_object) { error_message = "Body must be an object"; goto error; } name_json = grammar_json->operator[]("name"); if (name_json.type != json_string) { error_message = "Name must be a string"; goto error; } name = name_json.u.string.ptr; rules_json = grammar_json->operator[]("rules"); if (rules_json.type != json_object) { error_message = "Rules must be an object"; goto error; } for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) { json_object_entry entry_json = rules_json.u.object.values[i]; auto result = parse_rule(entry_json.value); if (!result.error_message.empty()) { error_message = result.error_message; goto error; } grammar.variables.push_back(InputGrammar::Variable{ string(entry_json.name), VariableTypeNamed, result.rule }); } extras_json = grammar_json->operator[]("extras"); if (extras_json.type != json_none) { if (extras_json.type != json_array) { error_message = "Extras must be an array"; goto error; } for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) { json_value *extra_json = extras_json.u.array.values[i]; auto result = parse_rule(extra_json); if (!result.error_message.empty()) { error_message = "Invalid extra token: " + result.error_message; goto error; } grammar.extra_tokens.push_back(result.rule); } } conflicts_json = grammar_json->operator[]("conflicts"); if (conflicts_json.type != json_none) { if (conflicts_json.type != json_array) { error_message = "Conflicts must be an array"; goto error; } for (size_t i = 0, length = conflicts_json.u.array.length; i < length; i++) { json_value *conflict_json = conflicts_json.u.array.values[i]; if (conflict_json->type != json_array) { error_message = "Each conflict entry must be an array"; goto error; } unordered_set conflict; for (size_t j = 0, conflict_length = conflict_json->u.array.length; j < conflict_length; j++) { json_value *conflict_entry_json = conflict_json->u.array.values[j]; if (conflict_entry_json->type != json_string) { error_message = "Each conflict entry must be an array of strings"; goto error; } conflict.insert(rules::NamedSymbol{ string(conflict_entry_json->u.string.ptr) }); } grammar.expected_conflicts.push_back(conflict); } } external_tokens_json = grammar_json->operator[]("externals"); if (external_tokens_json.type != json_none) { if (external_tokens_json.type != json_array) { error_message = "External tokens must be an array"; goto error; } for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) { json_value *token_name_json = external_tokens_json.u.array.values[i]; if (token_name_json->type != json_string) { error_message = "External token values must be strings"; goto error; } string token_name = token_name_json->u.string.ptr; grammar.external_tokens.push_back({ token_name, VariableTypeNamed, rules::NONE() }); } } json_value_free(grammar_json); return { name, grammar, "" }; error: if (grammar_json) { json_value_free(grammar_json); } return { "", InputGrammar(), error_message }; } } // namespace tree_sitter