diff --git a/examples/grammars/arithmetic.cc b/examples/grammars/arithmetic.cc index 4921703e..e414711c 100644 --- a/examples/grammars/arithmetic.cc +++ b/examples/grammars/arithmetic.cc @@ -2,28 +2,30 @@ #include "helpers.h" namespace tree_sitter_examples { - using tree_sitter::Grammar; - using namespace tree_sitter::rules; - extern const Grammar arithmetic({ - { "expression", choice({ - sym("sum"), - sym("difference"), - sym("product"), - sym("quotient"), - sym("exponent"), - sym("group"), - sym("number"), - sym("variable") }) }, +using tree_sitter::Grammar; +using namespace tree_sitter::rules; - { "sum", infix_op("+", "expression", 1) }, - { "difference", infix_op("-", "expression", 1) }, - { "product", infix_op("*", "expression", 2) }, - { "quotient", infix_op("/", "expression", 2) }, - { "exponent", infix_op("^", "expression", 3) }, - { "group", in_parens(err(sym("expression"))) }, +extern const Grammar arithmetic({ + { "expression", choice({ + sym("sum"), + sym("difference"), + sym("product"), + sym("quotient"), + sym("exponent"), + sym("group"), + sym("number"), + sym("variable") }) }, - { "number", pattern("\\d+") }, - { "variable", pattern("\\a[\\w_]*") }, - }); -} + { "sum", infix_op("+", "expression", 1) }, + { "difference", infix_op("-", "expression", 1) }, + { "product", infix_op("*", "expression", 2) }, + { "quotient", infix_op("/", "expression", 2) }, + { "exponent", infix_op("^", "expression", 3) }, + { "group", in_parens(err(sym("expression"))) }, + + { "number", pattern("\\d+") }, + { "variable", pattern("\\a[\\w_]*") }, +}); + +} // namespace tree_sitter_examples diff --git a/examples/grammars/golang.cc b/examples/grammars/golang.cc index 17f0a111..4a868771 100644 --- a/examples/grammars/golang.cc +++ b/examples/grammars/golang.cc @@ -2,170 +2,172 @@ #include "helpers.h" namespace tree_sitter_examples { - using tree_sitter::Grammar; - using namespace tree_sitter::rules; - static rule_ptr terminated(rule_ptr rule) { - return seq({ rule, choice({ - sym("_line_break"), - str(";") }) }); - } +using tree_sitter::Grammar; +using namespace tree_sitter::rules; - extern const Grammar golang = Grammar({ - { "program", seq({ - sym("package_directive"), - repeat(sym("imports_block")), - repeat(sym("declaration")) }) }, - { "package_directive", seq({ - keyword("package"), - sym("package_name") }) }, - { "imports_block", seq({ - keyword("import"), - choice({ - in_parens(err(repeat(sym("package_import")))), - sym("package_import") }) }) }, - { "package_import", sym("string") }, - { "declaration", choice({ - sym("type_declaration"), - sym("var_declaration"), - sym("func_declaration") }) }, +static rule_ptr terminated(rule_ptr rule) { + return seq({ rule, choice({ + sym("_line_break"), + str(";") }) }); +} - // Declarations - { "type_declaration", terminated(seq({ - keyword("type"), - sym("type_name"), - sym("type_expression") })) }, - { "var_declaration", terminated(seq({ - keyword("var"), - sym("var_name"), - choice({ - seq({ - optional(sym("type_expression")), - str("="), - sym("expression") }), - sym("type_expression") }) })) }, - { "func_declaration", terminated(seq({ - keyword("func"), - sym("var_name"), - sym("_func_signature"), - sym("block_statement") })) }, - { "block_statement", in_braces(err(repeat(sym("statement")))) }, - { "type_expression", choice({ - sym("pointer_type"), - sym("slice_type"), - sym("map_type"), - sym("interface_type"), - sym("struct_type"), - sym("type_name") }) }, +extern const Grammar golang = Grammar({ + { "program", seq({ + sym("package_directive"), + repeat(sym("imports_block")), + repeat(sym("declaration")) }) }, + { "package_directive", seq({ + keyword("package"), + sym("package_name") }) }, + { "imports_block", seq({ + keyword("import"), + choice({ + in_parens(err(repeat(sym("package_import")))), + sym("package_import") }) }) }, + { "package_import", sym("string") }, + { "declaration", choice({ + sym("type_declaration"), + sym("var_declaration"), + sym("func_declaration") }) }, - // Type expressions - { "pointer_type", seq({ - keyword("*"), - sym("type_expression") }) }, - { "map_type", seq({ - keyword("map"), - in_brackets(sym("type_expression")), - sym("type_expression") }) }, - { "slice_type", seq({ - in_brackets(blank()), - sym("type_expression") }) }, - { "struct_type", seq({ - keyword("struct"), - in_braces(repeat(seq({ - sym("var_name"), - sym("type_expression") }))) }) }, - { "interface_type", seq({ - keyword("interface"), - in_braces(repeat(seq({ - sym("var_name"), - sym("_func_signature") }))) }) }, + // Declarations + { "type_declaration", terminated(seq({ + keyword("type"), + sym("type_name"), + sym("type_expression") })) }, + { "var_declaration", terminated(seq({ + keyword("var"), + sym("var_name"), + choice({ + seq({ + optional(sym("type_expression")), + str("="), + sym("expression") }), + sym("type_expression") }) })) }, + { "func_declaration", terminated(seq({ + keyword("func"), + sym("var_name"), + sym("_func_signature"), + sym("block_statement") })) }, + { "block_statement", in_braces(err(repeat(sym("statement")))) }, + { "type_expression", choice({ + sym("pointer_type"), + sym("slice_type"), + sym("map_type"), + sym("interface_type"), + sym("struct_type"), + sym("type_name") }) }, - // Statements - { "statement", choice({ - sym("expression_statement"), - sym("return_statement"), - sym("declaration_statement"), - sym("range_statement"), - sym("if_statement") }) }, - { "return_statement", terminated(seq({ - keyword("return"), - comma_sep(sym("expression")) })) }, - { "declaration_statement", choice({ - sym("var_declaration"), - terminated(seq({ - comma_sep(sym("var_name")), - str(":="), - sym("expression") })) }) }, - { "range_statement", seq({ - keyword("for"), - sym("var_name"), - optional(seq({ str(","), sym("var_name") })), + // Type expressions + { "pointer_type", seq({ + keyword("*"), + sym("type_expression") }) }, + { "map_type", seq({ + keyword("map"), + in_brackets(sym("type_expression")), + sym("type_expression") }) }, + { "slice_type", seq({ + in_brackets(blank()), + sym("type_expression") }) }, + { "struct_type", seq({ + keyword("struct"), + in_braces(repeat(seq({ + sym("var_name"), + sym("type_expression") }))) }) }, + { "interface_type", seq({ + keyword("interface"), + in_braces(repeat(seq({ + sym("var_name"), + sym("_func_signature") }))) }) }, + + // Statements + { "statement", choice({ + sym("expression_statement"), + sym("return_statement"), + sym("declaration_statement"), + sym("range_statement"), + sym("if_statement") }) }, + { "return_statement", terminated(seq({ + keyword("return"), + comma_sep(sym("expression")) })) }, + { "declaration_statement", choice({ + sym("var_declaration"), + terminated(seq({ + comma_sep(sym("var_name")), str(":="), - keyword("range"), - sym("expression"), - sym("block_statement") }) }, - { "if_statement", seq({ - keyword("if"), - sym("expression"), - sym("block_statement"), - optional(seq({ - keyword("else"), - choice({ - sym("if_statement"), - sym("block_statement") }) })) }) }, - { "expression_statement", terminated(sym("expression")) }, - - // Value expressions - { "expression", choice({ - sym("call_expression"), - sym("selector_expression"), - sym("math_op"), - sym("bool_op"), - sym("number"), - sym("string"), - sym("var_name") }) }, - { "call_expression", seq({ - sym("expression"), - in_parens(comma_sep(sym("expression"))) }) }, - { "selector_expression", seq({ - sym("expression"), - str("."), - sym("var_name") }) }, - { "math_op", choice({ - infix_op("*", "expression", 2), - infix_op("/", "expression", 2), - infix_op("+", "expression", 1), - infix_op("-", "expression", 1) }) }, - { "bool_op", choice({ - infix_op("||", "expression", 1), - infix_op("&&", "expression", 2), - infix_op("==", "expression", 3), - infix_op("<=", "expression", 3), - infix_op("<", "expression", 3), - infix_op(">=", "expression", 3), - infix_op(">", "expression", 3), - prefix_op("!", "expression", 4) }) }, - { "_func_signature", seq({ - in_parens(comma_sep(seq({ - comma_sep1(sym("var_name")), - sym("type_expression") }))), + sym("expression") })) }) }, + { "range_statement", seq({ + keyword("for"), + sym("var_name"), + optional(seq({ str(","), sym("var_name") })), + str(":="), + keyword("range"), + sym("expression"), + sym("block_statement") }) }, + { "if_statement", seq({ + keyword("if"), + sym("expression"), + sym("block_statement"), + optional(seq({ + keyword("else"), choice({ - in_parens(choice({ - comma_sep1(seq({ sym("var_name"), sym("type_name") })), - comma_sep1(sym("type_name")) })), - sym("type_name"), - blank() }) }) }, + sym("if_statement"), + sym("block_statement") }) })) }) }, + { "expression_statement", terminated(sym("expression")) }, - { "_line_break", str("\n") }, + // Value expressions + { "expression", choice({ + sym("call_expression"), + sym("selector_expression"), + sym("math_op"), + sym("bool_op"), + sym("number"), + sym("string"), + sym("var_name") }) }, + { "call_expression", seq({ + sym("expression"), + in_parens(comma_sep(sym("expression"))) }) }, + { "selector_expression", seq({ + sym("expression"), + str("."), + sym("var_name") }) }, + { "math_op", choice({ + infix_op("*", "expression", 2), + infix_op("/", "expression", 2), + infix_op("+", "expression", 1), + infix_op("-", "expression", 1) }) }, + { "bool_op", choice({ + infix_op("||", "expression", 1), + infix_op("&&", "expression", 2), + infix_op("==", "expression", 3), + infix_op("<=", "expression", 3), + infix_op("<", "expression", 3), + infix_op(">=", "expression", 3), + infix_op(">", "expression", 3), + prefix_op("!", "expression", 4) }) }, + { "_func_signature", seq({ + in_parens(comma_sep(seq({ + comma_sep1(sym("var_name")), + sym("type_expression") }))), + choice({ + in_parens(choice({ + comma_sep1(seq({ sym("var_name"), sym("type_name") })), + comma_sep1(sym("type_name")) })), + sym("type_name"), + blank() }) }) }, - { "string", delimited("\"") }, - { "package_name", sym("_identifier") }, - { "var_name", sym("_identifier") }, - { "type_name", sym("_identifier") }, - { "_identifier", pattern("\\a[\\w_]*") }, - { "number", pattern("\\d+(\\.\\d+)?") }, - { "comment", keypattern("//[^\n]*") }, - }) + { "_line_break", str("\n") }, + + { "string", delimited("\"") }, + { "package_name", sym("_identifier") }, + { "var_name", sym("_identifier") }, + { "type_name", sym("_identifier") }, + { "_identifier", pattern("\\a[\\w_]*") }, + { "number", pattern("\\d+(\\.\\d+)?") }, + { "comment", keypattern("//[^\n]*") }, +}) .ubiquitous_tokens({ "comment", "_line_break" }) .separators({ ' ', '\t', '\r' }); -} + +} // namespace tree_sitter_examples diff --git a/examples/grammars/helpers.cc b/examples/grammars/helpers.cc index 5284664a..2212fdaa 100644 --- a/examples/grammars/helpers.cc +++ b/examples/grammars/helpers.cc @@ -1,58 +1,59 @@ #include "tree_sitter/compiler.h" namespace tree_sitter_examples { - using namespace tree_sitter::rules; - rule_ptr comma_sep1(rule_ptr element) { - return seq({ element, repeat(seq({ str(","), element })) }); - } +using namespace tree_sitter::rules; - rule_ptr comma_sep(rule_ptr element) { - return choice({ comma_sep1(element), blank() }); - } - - rule_ptr optional(rule_ptr rule) { - return choice({ rule, blank() }); - } - - rule_ptr in_parens(rule_ptr rule) { - return seq({ str("("), rule, str(")") }); - } - - rule_ptr in_braces(rule_ptr rule) { - return seq({ str("{"), rule, str("}") }); - } - - rule_ptr in_brackets(rule_ptr rule) { - return seq({ str("["), rule, str("]") }); - } - - rule_ptr infix_op(std::string op, std::string rule_name, int precedence) { - return prec(precedence, seq({ - sym(rule_name), - keyword(op), - sym(rule_name) })); - } - - rule_ptr prefix_op(std::string op, std::string rule_name, int precedence) { - return prec(precedence, seq({ - keyword(op), - sym(rule_name) })); - } - - rule_ptr postfix_op(std::string op, std::string rule_name, int precedence) { - return prec(precedence, seq({ - sym(rule_name), - keyword(op) })); - } - - rule_ptr delimited(std::string delimiter) { - return token(seq({ - str(delimiter), - repeat(choice({ - pattern("[^" + delimiter + "]"), - seq({ str("\\"), str(delimiter) }) })), - str(delimiter) - })); - } +rule_ptr comma_sep1(rule_ptr element) { + return seq({ element, repeat(seq({ str(","), element })) }); } + +rule_ptr comma_sep(rule_ptr element) { + return choice({ comma_sep1(element), blank() }); +} + +rule_ptr optional(rule_ptr rule) { + return choice({ rule, blank() }); +} + +rule_ptr in_parens(rule_ptr rule) { + return seq({ str("("), rule, str(")") }); +} + +rule_ptr in_braces(rule_ptr rule) { + return seq({ str("{"), rule, str("}") }); +} + +rule_ptr in_brackets(rule_ptr rule) { + return seq({ str("["), rule, str("]") }); +} + +rule_ptr infix_op(std::string op, std::string rule_name, int precedence) { + return prec(precedence, seq({ + sym(rule_name), + keyword(op), + sym(rule_name) })); +} + +rule_ptr prefix_op(std::string op, std::string rule_name, int precedence) { + return prec(precedence, seq({ + keyword(op), + sym(rule_name) })); +} + +rule_ptr postfix_op(std::string op, std::string rule_name, int precedence) { + return prec(precedence, seq({ + sym(rule_name), + keyword(op) })); +} + +rule_ptr delimited(std::string delimiter) { + return token(seq({ + str(delimiter), + repeat(choice({ + pattern("[^" + delimiter + "]"), + seq({ str("\\"), str(delimiter) }) })), + str(delimiter) })); +} + +} // namespace tree_sitter_examples diff --git a/examples/grammars/helpers.h b/examples/grammars/helpers.h index a982f774..f224fb9a 100644 --- a/examples/grammars/helpers.h +++ b/examples/grammars/helpers.h @@ -4,18 +4,20 @@ #include "tree_sitter/compiler.h" namespace tree_sitter_examples { - using namespace tree_sitter::rules; - rule_ptr comma_sep1(rule_ptr element); - rule_ptr comma_sep(rule_ptr element); - rule_ptr optional(rule_ptr rule); - rule_ptr in_parens(rule_ptr rule); - rule_ptr in_braces(rule_ptr rule); - rule_ptr in_brackets(rule_ptr rule); - rule_ptr infix_op(std::string op, std::string rule_name, int precedence); - rule_ptr prefix_op(std::string op, std::string rule_name, int precedence); - rule_ptr postfix_op(std::string op, std::string rule_name, int precedence); - rule_ptr delimited(std::string delimiter); -} +using namespace tree_sitter::rules; -#endif // TREESITTER_EXAMPLES_HELPERS_ \ No newline at end of file +rule_ptr comma_sep1(rule_ptr element); +rule_ptr comma_sep(rule_ptr element); +rule_ptr optional(rule_ptr rule); +rule_ptr in_parens(rule_ptr rule); +rule_ptr in_braces(rule_ptr rule); +rule_ptr in_brackets(rule_ptr rule); +rule_ptr infix_op(std::string op, std::string rule_name, int precedence); +rule_ptr prefix_op(std::string op, std::string rule_name, int precedence); +rule_ptr postfix_op(std::string op, std::string rule_name, int precedence); +rule_ptr delimited(std::string delimiter); + +} // namespace tree_sitter_examples + +#endif // TREESITTER_EXAMPLES_HELPERS_ diff --git a/examples/grammars/javascript.cc b/examples/grammars/javascript.cc index 4ed7a0e8..0987c3df 100644 --- a/examples/grammars/javascript.cc +++ b/examples/grammars/javascript.cc @@ -2,217 +2,219 @@ #include "helpers.h" namespace tree_sitter_examples { - using tree_sitter::Grammar; - using namespace tree_sitter::rules; - static rule_ptr terminated(rule_ptr rule) { - return seq({ rule, choice({ - sym("_line_break"), - str(";") }) }); - } +using tree_sitter::Grammar; +using namespace tree_sitter::rules; - extern const Grammar javascript = Grammar({ - { "program", repeat(sym("statement")) }, +static rule_ptr terminated(rule_ptr rule) { + return seq({ rule, choice({ + sym("_line_break"), + str(";") }) }); +} - // Statements - { "statement", choice({ - sym("statement_block"), - sym("if_statement"), - sym("try_statement"), - sym("switch_statement"), - sym("while_statement"), - sym("for_statement"), - sym("for_in_statement"), - sym("break_statement"), - sym("var_declaration"), - sym("throw_statement"), - sym("return_statement"), - sym("delete_statement"), - sym("expression_statement") }) }, - { "statement_block", in_braces(err(repeat(sym("statement")))) }, - { "for_statement", seq({ - keyword("for"), - in_parens(err(seq({ - choice({ - sym("var_declaration"), - sym("expression_statement") }), - sym("expression_statement"), - sym("expression") }))), - sym("statement") }) }, - { "for_in_statement", seq({ - keyword("for"), - in_parens(err(seq({ - optional(keyword("var")), - sym("identifier"), - keyword("in"), - sym("expression") }))), - sym("statement") }) }, - { "throw_statement", terminated(seq({ - keyword("throw"), - sym("expression") })) }, - { "if_statement", seq({ - keyword("if"), - in_parens(err(sym("expression"))), - sym("statement"), - optional(prec(1, seq({ - keyword("else"), - sym("statement") }))) }) }, - { "while_statement", seq({ - keyword("while"), - in_parens(err(sym("expression"))), - sym("statement") }) }, - { "try_statement", seq({ - keyword("try"), - sym("statement"), - optional(sym("catch_clause")), - optional(sym("finally_clause")) }) }, - { "catch_clause", seq({ - keyword("catch"), - in_parens(err(sym("identifier"))), - sym("statement") }) }, - { "finally_clause", seq({ - keyword("finally"), - sym("statement") }) }, - { "switch_statement", seq({ - keyword("switch"), - in_parens(err(sym("expression"))), - in_braces(repeat(sym("switch_case"))) }) }, - { "switch_case", seq({ +extern const Grammar javascript = Grammar({ + { "program", repeat(sym("statement")) }, + + // Statements + { "statement", choice({ + sym("statement_block"), + sym("if_statement"), + sym("try_statement"), + sym("switch_statement"), + sym("while_statement"), + sym("for_statement"), + sym("for_in_statement"), + sym("break_statement"), + sym("var_declaration"), + sym("throw_statement"), + sym("return_statement"), + sym("delete_statement"), + sym("expression_statement") }) }, + { "statement_block", in_braces(err(repeat(sym("statement")))) }, + { "for_statement", seq({ + keyword("for"), + in_parens(err(seq({ choice({ - seq({ - keyword("case"), - sym("expression") }), - keyword("default") }), - str(":"), - repeat(sym("statement")) }) }, - { "break_statement", terminated(keyword("break")) }, - { "var_declaration", terminated(seq({ - keyword("var"), - comma_sep(err(seq({ + sym("var_declaration"), + sym("expression_statement") }), + sym("expression_statement"), + sym("expression") }))), + sym("statement") }) }, + { "for_in_statement", seq({ + keyword("for"), + in_parens(err(seq({ + optional(keyword("var")), + sym("identifier"), + keyword("in"), + sym("expression") }))), + sym("statement") }) }, + { "throw_statement", terminated(seq({ + keyword("throw"), + sym("expression") })) }, + { "if_statement", seq({ + keyword("if"), + in_parens(err(sym("expression"))), + sym("statement"), + optional(prec(1, seq({ + keyword("else"), + sym("statement") }))) }) }, + { "while_statement", seq({ + keyword("while"), + in_parens(err(sym("expression"))), + sym("statement") }) }, + { "try_statement", seq({ + keyword("try"), + sym("statement"), + optional(sym("catch_clause")), + optional(sym("finally_clause")) }) }, + { "catch_clause", seq({ + keyword("catch"), + in_parens(err(sym("identifier"))), + sym("statement") }) }, + { "finally_clause", seq({ + keyword("finally"), + sym("statement") }) }, + { "switch_statement", seq({ + keyword("switch"), + in_parens(err(sym("expression"))), + in_braces(repeat(sym("switch_case"))) }) }, + { "switch_case", seq({ + choice({ + seq({ + keyword("case"), + sym("expression") }), + keyword("default") }), + str(":"), + repeat(sym("statement")) }) }, + { "break_statement", terminated(keyword("break")) }, + { "var_declaration", terminated(seq({ + keyword("var"), + comma_sep(err(seq({ sym("identifier"), optional(seq({ str("="), sym("expression") })) }))) })) }, - { "expression_statement", terminated(err(sym("expression"))) }, - { "return_statement", terminated(seq({ - keyword("return"), - optional(sym("expression")) })) }, - { "delete_statement", terminated(seq({ - keyword("delete"), - sym("property_access") })) }, + { "expression_statement", terminated(err(sym("expression"))) }, + { "return_statement", terminated(seq({ + keyword("return"), + optional(sym("expression")) })) }, + { "delete_statement", terminated(seq({ + keyword("delete"), + sym("property_access") })) }, - // Expressions - { "expression", choice({ - sym("function_expression"), - sym("function_call"), - sym("constructor_call"), - sym("property_access"), - sym("assignment"), - sym("ternary"), - sym("math_op"), - sym("bool_op"), - sym("object"), - sym("array"), - sym("regex"), - sym("string"), - sym("number"), - sym("true"), - sym("false"), - sym("null"), + // Expressions + { "expression", choice({ + sym("function_expression"), + sym("function_call"), + sym("constructor_call"), + sym("property_access"), + sym("assignment"), + sym("ternary"), + sym("math_op"), + sym("bool_op"), + sym("object"), + sym("array"), + sym("regex"), + sym("string"), + sym("number"), + sym("true"), + sym("false"), + sym("null"), + sym("identifier"), + sym("in_expression"), + sym("instanceof_expression"), + sym("typeof_expression"), + in_parens(sym("expression")) }) }, + { "in_expression", infix_op("in", "expression", 3) }, + { "instanceof_expression", infix_op("instanceof", "expression", 3) }, + { "typeof_expression", prefix_op("typeof", "expression", 3) }, + { "math_op", choice({ + prefix_op("++", "expression", 3), + prefix_op("--", "expression", 3), + postfix_op("++", "expression", 3), + postfix_op("--", "expression", 3), + prefix_op("+", "expression", 3), + prefix_op("-", "expression", 3), + infix_op("*", "expression", 2), + infix_op("/", "expression", 2), + infix_op("&", "expression", 2), + infix_op("|", "expression", 2), + infix_op("^", "expression", 2), + infix_op("+", "expression", 1), + infix_op("-", "expression", 1) }) }, + { "bool_op", choice({ + infix_op("||", "expression", 1), + infix_op("&&", "expression", 2), + infix_op("===", "expression", 3), + infix_op("==", "expression", 3), + infix_op("!==", "expression", 3), + infix_op("!=", "expression", 3), + infix_op("<=", "expression", 3), + infix_op("<", "expression", 3), + infix_op(">=", "expression", 3), + infix_op(">", "expression", 3), + prefix_op("!", "expression", 4) }) }, + { "ternary", seq({ + sym("expression"), + str("?"), + sym("expression"), + str(":"), + sym("expression") }) }, + { "assignment", prec(-1, seq({ + choice({ sym("identifier"), - sym("in_expression"), - sym("instanceof_expression"), - sym("typeof_expression"), - in_parens(sym("expression")) }) }, - { "in_expression", infix_op("in", "expression", 3) }, - { "instanceof_expression", infix_op("instanceof", "expression", 3) }, - { "typeof_expression", prefix_op("typeof", "expression", 3) }, - { "math_op", choice({ - prefix_op("++", "expression", 3), - prefix_op("--", "expression", 3), - postfix_op("++", "expression", 3), - postfix_op("--", "expression", 3), - prefix_op("+", "expression", 3), - prefix_op("-", "expression", 3), - infix_op("*", "expression", 2), - infix_op("/", "expression", 2), - infix_op("&", "expression", 2), - infix_op("|", "expression", 2), - infix_op("^", "expression", 2), - infix_op("+", "expression", 1), - infix_op("-", "expression", 1) }) }, - { "bool_op", choice({ - infix_op("||", "expression", 1), - infix_op("&&", "expression", 2), - infix_op("===", "expression", 3), - infix_op("==", "expression", 3), - infix_op("!==", "expression", 3), - infix_op("!=", "expression", 3), - infix_op("<=", "expression", 3), - infix_op("<", "expression", 3), - infix_op(">=", "expression", 3), - infix_op(">", "expression", 3), - prefix_op("!", "expression", 4) }) }, - { "ternary", seq({ - sym("expression"), - str("?"), - sym("expression"), - str(":"), - sym("expression") }) }, - { "assignment", prec(-1, seq({ - choice({ - sym("identifier"), - sym("property_access") }), - choice({ - str("="), - str("+="), - str("-="), - str("*="), - str("/=") }), - sym("expression") })) }, - { "function_expression", seq({ - keyword("function"), - optional(sym("identifier")), - sym("formal_parameters"), - sym("statement_block") }) }, - { "function_call", seq({ - sym("expression"), - in_parens(comma_sep(err(sym("expression")))) }) }, - { "constructor_call", seq({ - keyword("new"), - sym("function_call") }) }, - { "property_access", seq({ - sym("expression"), - prec(10, choice({ - seq({ - str("."), - sym("identifier") }), - in_brackets(sym("expression")) })) }) }, - { "formal_parameters", in_parens(comma_sep(sym("identifier"))) }, - - // Literals - { "comment", token(choice({ + sym("property_access") }), + choice({ + str("="), + str("+="), + str("-="), + str("*="), + str("/=") }), + sym("expression") })) }, + { "function_expression", seq({ + keyword("function"), + optional(sym("identifier")), + sym("formal_parameters"), + sym("statement_block") }) }, + { "function_call", seq({ + sym("expression"), + in_parens(comma_sep(err(sym("expression")))) }) }, + { "constructor_call", seq({ + keyword("new"), + sym("function_call") }) }, + { "property_access", seq({ + sym("expression"), + prec(10, choice({ seq({ - str("/*"), - repeat(pattern("[^*]|(*[^/])")), - str("*/") }), - pattern("//[^\n]*") })) }, - { "object", in_braces(comma_sep(err(seq({ - choice({ sym("string"), sym("identifier") }), - str(":"), - sym("expression") })))) }, - { "array", in_brackets(comma_sep(err(sym("expression")))) }, - { "regex", token(seq({ delimited("/"), optional(str("g")) })) }, - { "string", token(choice({ - delimited("\""), - delimited("'") })) }, - { "_line_break", str("\n") }, - { "identifier", pattern("[\\a_$][\\w_$]*") }, - { "number", pattern("\\d+(\\.\\d+)?") }, - { "null", keyword("null") }, - { "true", keyword("true") }, - { "false", keyword("false") }, - }) - .ubiquitous_tokens({ "comment", "_line_break" }) - .separators({ ' ', '\t', '\r' }); -} + str("."), + sym("identifier") }), + in_brackets(sym("expression")) })) }) }, + { "formal_parameters", in_parens(comma_sep(sym("identifier"))) }, + + // Literals + { "comment", token(choice({ + seq({ + str("/*"), + repeat(pattern("[^*]|(*[^/])")), + str("*/") }), + pattern("//[^\n]*") })) }, + { "object", in_braces(comma_sep(err(seq({ + choice({ sym("string"), sym("identifier") }), + str(":"), + sym("expression") })))) }, + { "array", in_brackets(comma_sep(err(sym("expression")))) }, + { "regex", token(seq({ delimited("/"), optional(str("g")) })) }, + { "string", token(choice({ + delimited("\""), + delimited("'") })) }, + { "_line_break", str("\n") }, + { "identifier", pattern("[\\a_$][\\w_$]*") }, + { "number", pattern("\\d+(\\.\\d+)?") }, + { "null", keyword("null") }, + { "true", keyword("true") }, + { "false", keyword("false") }, +}) + .ubiquitous_tokens({ "comment", "_line_break" }) + .separators({ ' ', '\t', '\r' }); + +} // namespace tree_sitter_examples diff --git a/examples/grammars/json.cc b/examples/grammars/json.cc index 8c6f9284..81af2368 100644 --- a/examples/grammars/json.cc +++ b/examples/grammars/json.cc @@ -2,27 +2,29 @@ #include "helpers.h" namespace tree_sitter_examples { - using tree_sitter::Grammar; - using namespace tree_sitter::rules; - extern const Grammar json({ - { "value", choice({ - sym("object"), - sym("array"), - sym("string"), - sym("number"), - sym("true"), - sym("false"), - sym("null"), }) }, - { "object", in_braces(comma_sep(err(seq({ - sym("string"), - str(":"), - sym("value") })))) }, - { "array", in_brackets(comma_sep(err(sym("value")))) }, - { "string", pattern("\"([^\"]|\\\\\")*\"") }, - { "number", pattern("\\d+(\\.\\d+)?") }, - { "null", keyword("null") }, - { "true", keyword("true") }, - { "false", keyword("false") }, - }); -} +using tree_sitter::Grammar; +using namespace tree_sitter::rules; + +extern const Grammar json({ + { "value", choice({ + sym("object"), + sym("array"), + sym("string"), + sym("number"), + sym("true"), + sym("false"), + sym("null"), }) }, + { "object", in_braces(comma_sep(err(seq({ + sym("string"), + str(":"), + sym("value") })))) }, + { "array", in_brackets(comma_sep(err(sym("value")))) }, + { "string", pattern("\"([^\"]|\\\\\")*\"") }, + { "number", pattern("\\d+(\\.\\d+)?") }, + { "null", keyword("null") }, + { "true", keyword("true") }, + { "false", keyword("false") }, +}); + +} // namespace tree_sitter_examples diff --git a/examples/parsers/json.c b/examples/parsers/json.c index fb618bee..a29dd3ba 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -319,7 +319,6 @@ LEX_FN() { ADVANCE(27); LEX_ERROR(); case ts_lex_state_error: - START_TOKEN(); if (lookahead == '\0') ADVANCE(25); if (('\t' <= lookahead && lookahead <= '\n') || diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 075f46d4..5a6eebf2 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -7,71 +7,71 @@ #include namespace tree_sitter { - namespace rules { - class Rule; - typedef std::shared_ptr rule_ptr; +namespace rules { +class Rule; +typedef std::shared_ptr rule_ptr; - std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule); +std::ostream &operator<<(std::ostream &stream, const rule_ptr &rule); - rule_ptr blank(); - rule_ptr choice(const std::vector &rules); - rule_ptr repeat(const rule_ptr &content); - rule_ptr seq(const std::vector &rules); - rule_ptr sym(const std::string &name); - rule_ptr pattern(const std::string &value); - rule_ptr str(const std::string &value); - rule_ptr keyword(const std::string &value); - rule_ptr keypattern(const std::string &value); - rule_ptr err(const rule_ptr &rule); - rule_ptr prec(int precedence, rule_ptr rule); - rule_ptr token(rule_ptr rule); - } +rule_ptr blank(); +rule_ptr choice(const std::vector &rules); +rule_ptr repeat(const rule_ptr &content); +rule_ptr seq(const std::vector &rules); +rule_ptr sym(const std::string &name); +rule_ptr pattern(const std::string &value); +rule_ptr str(const std::string &value); +rule_ptr keyword(const std::string &value); +rule_ptr keypattern(const std::string &value); +rule_ptr err(const rule_ptr &rule); +rule_ptr prec(int precedence, rule_ptr rule); +rule_ptr token(rule_ptr rule); +} - class Grammar { - protected: - const std::vector> rules_; - std::set ubiquitous_tokens_; - std::set separators_; +class Grammar { + protected: + const std::vector > rules_; + std::set ubiquitous_tokens_; + std::set separators_; - public: - Grammar(const std::vector> &rules); - bool operator==(const Grammar &other) const; - std::string start_rule_name() const; - const rules::rule_ptr rule(const std::string &name) const; + public: + Grammar(const std::vector > &rules); + bool operator==(const Grammar &other) const; + std::string start_rule_name() const; + const rules::rule_ptr rule(const std::string &name) const; - const std::vector> & rules() const; - const std::set & ubiquitous_tokens() const; - Grammar & ubiquitous_tokens(const std::set &ubiquitous_tokens); - const std::set & separators() const; - Grammar & separators(const std::set &separators); - }; + const std::vector > &rules() const; + const std::set &ubiquitous_tokens() const; + Grammar &ubiquitous_tokens(const std::set &ubiquitous_tokens); + const std::set &separators() const; + Grammar &separators(const std::set &separators); +}; - struct Conflict { - Conflict(std::string description); - std::string description; - bool operator==(const Conflict &other) const; - bool operator<(const Conflict &other) const; - }; +struct Conflict { + Conflict(std::string description); + std::string description; + bool operator==(const Conflict &other) const; + bool operator<(const Conflict &other) const; +}; - enum GrammarErrorType { - GrammarErrorTypeRegex, - GrammarErrorTypeUndefinedSymbol - }; +enum GrammarErrorType { + GrammarErrorTypeRegex, + GrammarErrorTypeUndefinedSymbol +}; - class GrammarError { - public: - GrammarError(GrammarErrorType type, std::string message); - bool operator==(const GrammarError &other) const; - GrammarErrorType type; - std::string message; - }; +class GrammarError { + public: + GrammarError(GrammarErrorType type, std::string message); + bool operator==(const GrammarError &other) const; + GrammarErrorType type; + std::string message; +}; - std::ostream& operator<<(std::ostream &stream, const Grammar &grammar); - std::ostream& operator<<(std::ostream &stream, const Conflict &conflict); - std::ostream& operator<<(std::ostream &stream, const GrammarError *error); +std::ostream &operator<<(std::ostream &stream, const Grammar &grammar); +std::ostream &operator<<(std::ostream &stream, const Conflict &conflict); +std::ostream &operator<<(std::ostream &stream, const GrammarError *error); - std::tuple, const GrammarError *> - compile(const Grammar &grammar, std::string name); +std::tuple, const GrammarError *> compile( + const Grammar &grammar, std::string name); } #endif // TREE_SITTER_COMPILER_H_ diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 4f7fc940..a9bac3fa 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -26,7 +26,7 @@ typedef struct { TSLexer ts_lexer_make(); int ts_lexer_advance(TSLexer *lexer); -TSTree * ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden); +TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden); static inline size_t ts_lexer_position(const TSLexer *lexer) { return lexer->chunk_start + lexer->position_in_chunk; @@ -53,11 +53,13 @@ typedef struct { TSStack ts_stack_make(); void ts_stack_delete(TSStack *); -TSTree * ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t immediate_child_count, const int *hidden_symbol_flags, int gather_extras); +TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, + size_t immediate_child_count, + const int *hidden_symbol_flags, int gather_extras); void ts_stack_shrink(TSStack *stack, size_t new_size); void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node); TSStateId ts_stack_top_state(const TSStack *stack); -TSTree * ts_stack_top_node(const TSStack *stack); +TSTree *ts_stack_top_node(const TSStack *stack); size_t ts_stack_right_position(const TSStack *stack); typedef enum { @@ -86,7 +88,7 @@ typedef struct { const int *hidden_symbol_flags; const TSParseAction *parse_table; const TSStateId *lex_states; - TSTree * (* lex_fn)(TSParser *, TSStateId); + TSTree *(*lex_fn)(TSParser *, TSStateId); } TSParserConfig; struct TSParser { @@ -98,91 +100,99 @@ struct TSParser { TSParserConfig config; }; -TSParser * ts_parser_make(TSParserConfig); +TSParser *ts_parser_make(TSParserConfig); void ts_parser_free(TSParser *); TSParserConfig ts_parser_config(TSParser *); -const TSTree * ts_parser_parse(TSParser *parser, TSInput input, TSInputEdit *edit); +const TSTree *ts_parser_parse(TSParser *parser, TSInput input, + TSInputEdit *edit); void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit); -TSTree * ts_parser_step(TSParser *parser); +TSTree *ts_parser_step(TSParser *parser); -#define SYMBOL_NAMES \ - static const char *ts_symbol_names[] +#define SYMBOL_NAMES static const char *ts_symbol_names[] -#define HIDDEN_SYMBOLS \ - static const int ts_hidden_symbol_flags[SYMBOL_COUNT] +#define HIDDEN_SYMBOLS static const int ts_hidden_symbol_flags[SYMBOL_COUNT] -#define LEX_STATES \ - static TSStateId ts_lex_states[STATE_COUNT] +#define LEX_STATES static TSStateId ts_lex_states[STATE_COUNT] #define PARSE_TABLE \ static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] -#define LEX_FN() \ - static TSTree * ts_lex(TSParser *parser, TSStateId lex_state) +#define LEX_FN() static TSTree *ts_lex(TSParser *parser, TSStateId lex_state) -#define DEBUG_LEX(...) \ - if (parser->lexer.debug) { fprintf(stderr, "\n" __VA_ARGS__); } +#define DEBUG_LEX(...) \ + if (parser->lexer.debug) { \ + fprintf(stderr, "\n" __VA_ARGS__); \ + } -#define START_LEXER() \ - DEBUG_LEX("LEX %d", lex_state); \ - char lookahead; \ - next_state: \ +#define START_LEXER() \ + DEBUG_LEX("LEX %d", lex_state); \ + char lookahead; \ + next_state: \ lookahead = ts_lexer_lookahead_char(&parser->lexer); \ DEBUG_LEX("CHAR '%c'", lookahead); -#define START_TOKEN() \ - ts_lexer_start_token(&parser->lexer); +#define START_TOKEN() ts_lexer_start_token(&parser->lexer); -#define ADVANCE(state_index) \ - { \ - DEBUG_LEX("ADVANCE %d", state_index); \ - if (!ts_lexer_advance(&parser->lexer)) ACCEPT_TOKEN(ts_builtin_sym_end); \ - lex_state = state_index; goto next_state; \ +#define ADVANCE(state_index) \ + { \ + DEBUG_LEX("ADVANCE %d", state_index); \ + if (!ts_lexer_advance(&parser->lexer)) \ + ACCEPT_TOKEN(ts_builtin_sym_end); \ + lex_state = state_index; \ + goto next_state; \ } -#define ACCEPT_TOKEN(symbol) \ - { \ - DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \ - return ts_lexer_build_node(&parser->lexer, symbol, ts_hidden_symbol_flags[symbol]); \ +#define ACCEPT_TOKEN(symbol) \ + { \ + DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \ + return ts_lexer_build_node(&parser->lexer, symbol, \ + ts_hidden_symbol_flags[symbol]); \ } -#define LEX_ERROR() \ - { \ - DEBUG_LEX("ERROR"); \ +#define LEX_ERROR() \ + { \ + DEBUG_LEX("ERROR"); \ return ts_lexer_build_node(&parser->lexer, ts_builtin_sym_error, 0); \ } -#define LEX_PANIC() \ - { \ +#define LEX_PANIC() \ + { \ DEBUG_LEX("LEX ERROR: unexpected state %d", lex_state); \ - return NULL; \ + return NULL; \ } -#define SHIFT(to_state_value) \ - { .type = TSParseActionTypeShift, .data = { .to_state = to_state_value } } +#define SHIFT(to_state_value) \ + { \ + .type = TSParseActionTypeShift, .data = { .to_state = to_state_value } \ + } #define SHIFT_EXTRA() \ { .type = TSParseActionTypeShiftExtra } -#define REDUCE_EXTRA(symbol_val) \ - { .type = TSParseActionTypeReduceExtra, .data = { .symbol = symbol_val } } +#define REDUCE_EXTRA(symbol_val) \ + { \ + .type = TSParseActionTypeReduceExtra, .data = { .symbol = symbol_val } \ + } -#define REDUCE(symbol_val, child_count_val) \ - { .type = TSParseActionTypeReduce, .data = { .symbol = symbol_val, .child_count = child_count_val } } +#define REDUCE(symbol_val, child_count_val) \ + { \ + .type = TSParseActionTypeReduce, \ + .data = { .symbol = symbol_val, .child_count = child_count_val } \ + } #define ACCEPT_INPUT() \ { .type = TSParseActionTypeAccept } -#define EXPORT_PARSER(constructor_name) \ - TSParser * constructor_name() { \ - return ts_parser_make((TSParserConfig) { \ - .symbol_count = SYMBOL_COUNT, \ - .hidden_symbol_flags = ts_hidden_symbol_flags, \ +#define EXPORT_PARSER(constructor_name) \ + TSParser *constructor_name() { \ + return ts_parser_make((TSParserConfig) { \ + .symbol_count = SYMBOL_COUNT, \ + .hidden_symbol_flags = ts_hidden_symbol_flags, \ .parse_table = (const TSParseAction *)ts_parse_actions, \ - .lex_states = ts_lex_states, \ - .symbol_names = ts_symbol_names, \ - .lex_fn = ts_lex, \ - }); \ + .lex_states = ts_lex_states, \ + .symbol_names = ts_symbol_names, \ + .lex_fn = ts_lex, \ + }); \ } #ifdef __cplusplus diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 5b775371..c842caad 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -9,9 +9,9 @@ extern "C" { typedef struct { void *data; - const char * (* read_fn)(void *data, size_t *bytes_read); - int (* seek_fn)(void *data, size_t position); - void (* release_fn)(void *data); + const char *(*read_fn)(void *data, size_t *bytes_read); + int (*seek_fn)(void *data, size_t position); + void (*release_fn)(void *data); } TSInput; typedef struct { @@ -29,26 +29,26 @@ typedef struct TSDocument TSDocument; size_t ts_node_pos(const TSNode *); size_t ts_node_size(const TSNode *); TSSymbol ts_node_sym(const TSNode *); -TSNode * ts_node_child(TSNode *, size_t); +TSNode *ts_node_child(TSNode *, size_t); size_t ts_node_child_count(const TSNode *); -TSNode * ts_node_leaf_at_pos(TSNode *, size_t); -TSNode * ts_node_parent(TSNode *node); -TSNode * ts_node_next_sibling(TSNode *node); -TSNode * ts_node_prev_sibling(TSNode *node); -const char * ts_node_name(const TSNode *); -const char * ts_node_string(const TSNode *); +TSNode *ts_node_leaf_at_pos(TSNode *, size_t); +TSNode *ts_node_parent(TSNode *node); +TSNode *ts_node_next_sibling(TSNode *node); +TSNode *ts_node_prev_sibling(TSNode *node); +const char *ts_node_name(const TSNode *); +const char *ts_node_string(const TSNode *); void ts_node_retain(TSNode *node); void ts_node_release(TSNode *node); int ts_node_eq(const TSNode *, const TSNode *); -TSDocument * ts_document_make(); +TSDocument *ts_document_make(); void ts_document_free(TSDocument *doc); void ts_document_set_parser(TSDocument *doc, TSParser *parser); void ts_document_set_input(TSDocument *doc, TSInput input); void ts_document_set_input_string(TSDocument *doc, const char *text); void ts_document_edit(TSDocument *doc, TSInputEdit edit); -const char * ts_document_string(const TSDocument *doc); -TSNode * ts_document_root_node(const TSDocument *document); +const char *ts_document_string(const TSDocument *doc); +TSNode *ts_document_root_node(const TSDocument *document); #define ts_builtin_sym_error 0 #define ts_builtin_sym_end 1 diff --git a/script/lint.sh b/script/lint.sh index da1d6672..bc24d1bb 100755 --- a/script/lint.sh +++ b/script/lint.sh @@ -11,6 +11,6 @@ fi $CPPLINT \ --root=src \ --linelength=110 \ - --filter=-legal/copyright,-readability/namespace,-whitespace/indent,-whitespace/line_length,-readability/todo \ + --filter=-legal/copyright,-whitespace/indent,-whitespace/line_length,-readability/todo \ $(find src/compiler -type f) \ 2>&1 diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 63387cfa..f376795f 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -15,128 +15,133 @@ #include "compiler/build_tables/item_set_transitions.h" namespace tree_sitter { - using std::string; - using std::map; - using std::unordered_map; - using std::set; - using std::make_shared; - using rules::Symbol; - using rules::CharacterSet; +namespace build_tables { - namespace build_tables { - class LexTableBuilder { - const LexicalGrammar lex_grammar; - ParseTable *parse_table; - LexConflictManager conflict_manager; - unordered_map lex_state_ids; - LexTable lex_table; +using std::string; +using std::map; +using std::unordered_map; +using std::set; +using std::make_shared; +using rules::Symbol; +using rules::CharacterSet; - LexItemSet build_lex_item_set(const set &symbols) { - LexItemSet result; - for (const auto &symbol : symbols) { - if (symbol == rules::ERROR()) - continue; - else if (symbol == rules::END_OF_INPUT()) - result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy()))); - else if (symbol.is_token()) - result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol)))); - } - return result; - } +class LexTableBuilder { + const LexicalGrammar lex_grammar; + ParseTable *parse_table; + LexConflictManager conflict_manager; + unordered_map lex_state_ids; + LexTable lex_table; - LexStateId add_lex_state(const LexItemSet &item_set) { - auto pair = lex_state_ids.find(item_set); - if (pair == lex_state_ids.end()) { - LexStateId state_id = lex_table.add_state(); - lex_state_ids[item_set] = state_id; - add_accept_token_actions(item_set, state_id); - add_advance_actions(item_set, state_id); - add_token_start(item_set, state_id); - return state_id; - } else { - return pair->second; - } - } - - void add_error_lex_state() { - LexItemSet item_set = build_lex_item_set(parse_table->symbols); - add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID); - add_advance_actions(item_set, LexTable::ERROR_STATE_ID); - } - - void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { - auto transitions = char_transitions(item_set); - for (const auto &transition : transitions) { - CharacterSet rule = transition.first; - LexItemSet new_item_set = transition.second; - LexStateId new_state_id = add_lex_state(new_item_set); - auto action = LexAction::Advance(new_state_id, precedence_values_for_item_set(new_item_set)); - if (conflict_manager.resolve_lex_action(lex_table.state(state_id).default_action, action)) - lex_table.state(state_id).actions[rule] = action; - } - } - - void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) { - for (const LexItem &item : item_set) { - if (item.is_done()) { - auto current_action = lex_table.state(state_id).default_action; - auto new_action = LexAction::Accept(item.lhs, item.precedence()); - if (conflict_manager.resolve_lex_action(current_action, new_action)) - lex_table.state(state_id).default_action = new_action; - } - } - } - - void add_token_start(const LexItemSet &item_set, LexStateId state_id) { - for (const auto &item : item_set) - if (item.is_token_start()) - lex_table.state(state_id).is_token_start = true; - } - - CharacterSet separator_set() const { - set ranges; - for (char c : lex_grammar.separators) - ranges.insert(c); - return CharacterSet(ranges); - } - - rules::rule_ptr after_separators(rules::rule_ptr rule) { - return rules::Seq::Build({ - make_shared( - make_shared(separator_set().copy()), - map({ - {rules::START_TOKEN, 1}, - {rules::PRECEDENCE, -1}, - })), - rule, - }); - } - - set precedence_values_for_item_set(const LexItemSet &item_set) const { - set result; - for (const auto &item : item_set) - result.insert(item.precedence()); - return result; - } - - public: - LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) : - lex_grammar(lex_grammar), - parse_table(parse_table), - conflict_manager(LexConflictManager(lex_grammar)) {} - - LexTable build() { - for (auto &parse_state : parse_table->states) { - LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs()); - parse_state.lex_state_id = add_lex_state(item_set); - } - add_error_lex_state(); - return lex_table; - } - }; - - LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) { - return LexTableBuilder(parse_table, lex_grammar).build(); - } + LexItemSet build_lex_item_set(const set &symbols) { + LexItemSet result; + for (const auto &symbol : symbols) { + if (symbol == rules::ERROR()) + continue; + else if (symbol == rules::END_OF_INPUT()) + result.insert( + LexItem(symbol, after_separators(CharacterSet({ 0 }).copy()))); + else if (symbol.is_token()) + result.insert( + LexItem(symbol, after_separators(lex_grammar.rule(symbol)))); } + return result; + } + + LexStateId add_lex_state(const LexItemSet &item_set) { + auto pair = lex_state_ids.find(item_set); + if (pair == lex_state_ids.end()) { + LexStateId state_id = lex_table.add_state(); + lex_state_ids[item_set] = state_id; + add_accept_token_actions(item_set, state_id); + add_advance_actions(item_set, state_id); + add_token_start(item_set, state_id); + return state_id; + } else { + return pair->second; + } + } + + void add_error_lex_state() { + LexItemSet item_set = build_lex_item_set(parse_table->symbols); + add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID); + add_advance_actions(item_set, LexTable::ERROR_STATE_ID); + } + + void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { + auto transitions = char_transitions(item_set); + for (const auto &transition : transitions) { + CharacterSet rule = transition.first; + LexItemSet new_item_set = transition.second; + LexStateId new_state_id = add_lex_state(new_item_set); + auto action = LexAction::Advance( + new_state_id, precedence_values_for_item_set(new_item_set)); + if (conflict_manager.resolve_lex_action( + lex_table.state(state_id).default_action, action)) + lex_table.state(state_id).actions[rule] = action; + } + } + + void add_accept_token_actions(const LexItemSet &item_set, + LexStateId state_id) { + for (const LexItem &item : item_set) { + if (item.is_done()) { + auto current_action = lex_table.state(state_id).default_action; + auto new_action = LexAction::Accept(item.lhs, item.precedence()); + if (conflict_manager.resolve_lex_action(current_action, new_action)) + lex_table.state(state_id).default_action = new_action; + } + } + } + + void add_token_start(const LexItemSet &item_set, LexStateId state_id) { + for (const auto &item : item_set) + if (item.is_token_start()) + lex_table.state(state_id).is_token_start = true; + } + + CharacterSet separator_set() const { + set ranges; + for (char c : lex_grammar.separators) + ranges.insert(c); + return CharacterSet(ranges); + } + + rules::rule_ptr after_separators(rules::rule_ptr rule) { + return rules::Seq::Build( + { make_shared( + make_shared(separator_set().copy()), + map( + { { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })), + rule, }); + } + + set precedence_values_for_item_set(const LexItemSet &item_set) const { + set result; + for (const auto &item : item_set) + result.insert(item.precedence()); + return result; + } + + public: + LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) + : lex_grammar(lex_grammar), + parse_table(parse_table), + conflict_manager(LexConflictManager(lex_grammar)) {} + + LexTable build() { + for (auto &parse_state : parse_table->states) { + LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs()); + parse_state.lex_state_id = add_lex_state(item_set); + } + add_error_lex_state(); + return lex_table; + } +}; + +LexTable build_lex_table(ParseTable *parse_table, + const LexicalGrammar &lex_grammar) { + return LexTableBuilder(parse_table, lex_grammar).build(); } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/build_lex_table.h b/src/compiler/build_tables/build_lex_table.h index 8f0209d9..acf35595 100644 --- a/src/compiler/build_tables/build_lex_table.h +++ b/src/compiler/build_tables/build_lex_table.h @@ -5,12 +5,16 @@ #include "compiler/lex_table.h" namespace tree_sitter { - class LexicalGrammar; - class ParseTable; - namespace build_tables { - LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar); - } -} +class LexicalGrammar; +class ParseTable; + +namespace build_tables { + +LexTable build_lex_table(ParseTable *parse_table, + const LexicalGrammar &lex_grammar); + +} // namespace build_tables +} // namespace tree_sitter #endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_ diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 89ecd877..6f1cb4de 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -14,142 +14,156 @@ #include "compiler/build_tables/first_set.h" namespace tree_sitter { - using std::pair; - using std::string; - using std::vector; - using std::set; - using std::map; - using std::unordered_map; - using std::make_shared; - using rules::Symbol; +namespace build_tables { - namespace build_tables { - class ParseTableBuilder { - const SyntaxGrammar grammar; - ParseConflictManager conflict_manager; - unordered_map parse_state_ids; - vector> item_sets_to_process; - ParseTable parse_table; +using std::pair; +using std::string; +using std::vector; +using std::set; +using std::map; +using std::unordered_map; +using std::make_shared; +using rules::Symbol; - ParseStateId add_parse_state(const ParseItemSet &item_set) { - auto pair = parse_state_ids.find(item_set); - if (pair == parse_state_ids.end()) { - ParseStateId state_id = parse_table.add_state(); - parse_state_ids[item_set] = state_id; - item_sets_to_process.push_back({ item_set, state_id }); - return state_id; - } else { - return pair->second; - } - } +class ParseTableBuilder { + const SyntaxGrammar grammar; + ParseConflictManager conflict_manager; + unordered_map parse_state_ids; + vector > item_sets_to_process; + ParseTable parse_table; - void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (const auto &pair : item_set) { - const ParseItem &item = pair.first; - const set &lookahead_symbols = pair.second; - - if (item.is_done()) { - ParseAction action = (item.lhs == rules::START()) ? - ParseAction::Accept() : - ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence()); - for (auto &lookahead_sym : lookahead_symbols) - if (should_add_action(state_id, lookahead_sym, action)) - parse_table.add_action(state_id, lookahead_sym, action); - } - } - } - - void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { - for (const auto &transition : sym_transitions(item_set, grammar)) { - const Symbol &symbol = transition.first; - const ParseItemSet &next_item_set = transition.second; - - ParseAction new_action = ParseAction::Shift(0, precedence_values_for_item_set(next_item_set)); - if (should_add_action(state_id, symbol, new_action)) { - ParseStateId new_state_id = add_parse_state(next_item_set); - new_action.state_index = new_state_id; - parse_table.add_action(state_id, symbol, new_action); - } - } - } - - void add_shift_extra_actions(ParseStateId state_id) { - const map &actions = parse_table.states[state_id].actions; - for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) { - const auto &pair_for_symbol = actions.find(ubiquitous_symbol); - if (pair_for_symbol == actions.end()) { - parse_table.add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra()); - } - } - } - - void add_reduce_extra_actions(ParseStateId state_id) { - const map &actions = parse_table.states[state_id].actions; - for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) { - const auto &pair_for_symbol = actions.find(ubiquitous_symbol); - - if (pair_for_symbol != actions.end() && pair_for_symbol->second.type == ParseActionTypeShift) { - size_t shift_state_id = pair_for_symbol->second.state_index; - for (const auto &pair : actions) { - const Symbol &lookahead_sym = pair.first; - ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol); - if (should_add_action(shift_state_id, lookahead_sym, reduce_extra)) - parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra); - } - } - } - } - - bool should_add_action(ParseStateId state_id, const Symbol &symbol, const ParseAction &action) { - auto current_actions = parse_table.states[state_id].actions; - auto current_action = current_actions.find(symbol); - return ( - current_action == current_actions.end() || - conflict_manager.resolve_parse_action(symbol, current_action->second, action)); - } - - set precedence_values_for_item_set(const ParseItemSet &item_set) { - set result; - for (const auto &pair : item_set) { - const ParseItem &item = pair.first; - if (item.consumed_symbol_count > 0) - result.insert(item.precedence()); - } - return result; - } - - public: - ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) : - grammar(grammar), - conflict_manager(ParseConflictManager(grammar, lex_grammar)) {} - - pair> build() { - ParseItem start_item(rules::START(), make_shared(0), 0); - add_parse_state(item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar)); - - parse_table.symbols.insert(rules::ERROR()); - - while (!item_sets_to_process.empty()) { - auto pair = item_sets_to_process.back(); - ParseItemSet &item_set = pair.first; - ParseStateId &state_id = pair.second; - item_sets_to_process.pop_back(); - - add_reduce_actions(item_set, state_id); - add_shift_actions(item_set, state_id); - add_shift_extra_actions(state_id); - } - - for (ParseStateId state_id = 0; state_id < parse_table.states.size(); state_id++) - add_reduce_extra_actions(state_id); - - return { parse_table, conflict_manager.conflicts() }; - } - }; - - pair> - build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) { - return ParseTableBuilder(grammar, lex_grammar).build(); - } + ParseStateId add_parse_state(const ParseItemSet &item_set) { + auto pair = parse_state_ids.find(item_set); + if (pair == parse_state_ids.end()) { + ParseStateId state_id = parse_table.add_state(); + parse_state_ids[item_set] = state_id; + item_sets_to_process.push_back({ item_set, state_id }); + return state_id; + } else { + return pair->second; } + } + + void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { + for (const auto &pair : item_set) { + const ParseItem &item = pair.first; + const set &lookahead_symbols = pair.second; + + if (item.is_done()) { + ParseAction action = + (item.lhs == rules::START()) + ? ParseAction::Accept() + : ParseAction::Reduce(item.lhs, item.consumed_symbol_count, + item.precedence()); + for (auto &lookahead_sym : lookahead_symbols) + if (should_add_action(state_id, lookahead_sym, action)) + parse_table.add_action(state_id, lookahead_sym, action); + } + } + } + + void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { + for (const auto &transition : sym_transitions(item_set, grammar)) { + const Symbol &symbol = transition.first; + const ParseItemSet &next_item_set = transition.second; + + ParseAction new_action = + ParseAction::Shift(0, precedence_values_for_item_set(next_item_set)); + if (should_add_action(state_id, symbol, new_action)) { + ParseStateId new_state_id = add_parse_state(next_item_set); + new_action.state_index = new_state_id; + parse_table.add_action(state_id, symbol, new_action); + } + } + } + + void add_shift_extra_actions(ParseStateId state_id) { + const map &actions = + parse_table.states[state_id].actions; + for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) { + const auto &pair_for_symbol = actions.find(ubiquitous_symbol); + if (pair_for_symbol == actions.end()) { + parse_table.add_action(state_id, ubiquitous_symbol, + ParseAction::ShiftExtra()); + } + } + } + + void add_reduce_extra_actions(ParseStateId state_id) { + const map &actions = + parse_table.states[state_id].actions; + for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) { + const auto &pair_for_symbol = actions.find(ubiquitous_symbol); + + if (pair_for_symbol != actions.end() && + pair_for_symbol->second.type == ParseActionTypeShift) { + size_t shift_state_id = pair_for_symbol->second.state_index; + for (const auto &pair : actions) { + const Symbol &lookahead_sym = pair.first; + ParseAction reduce_extra = + ParseAction::ReduceExtra(ubiquitous_symbol); + if (should_add_action(shift_state_id, lookahead_sym, reduce_extra)) + parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra); + } + } + } + } + + bool should_add_action(ParseStateId state_id, const Symbol &symbol, + const ParseAction &action) { + auto current_actions = parse_table.states[state_id].actions; + auto current_action = current_actions.find(symbol); + return (current_action == current_actions.end() || + conflict_manager.resolve_parse_action( + symbol, current_action->second, action)); + } + + set precedence_values_for_item_set(const ParseItemSet &item_set) { + set result; + for (const auto &pair : item_set) { + const ParseItem &item = pair.first; + if (item.consumed_symbol_count > 0) + result.insert(item.precedence()); + } + return result; + } + + public: + ParseTableBuilder(const SyntaxGrammar &grammar, + const LexicalGrammar &lex_grammar) + : grammar(grammar), + conflict_manager(ParseConflictManager(grammar, lex_grammar)) {} + + pair > build() { + ParseItem start_item(rules::START(), make_shared(0), 0); + add_parse_state( + item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar)); + + parse_table.symbols.insert(rules::ERROR()); + + while (!item_sets_to_process.empty()) { + auto pair = item_sets_to_process.back(); + ParseItemSet &item_set = pair.first; + ParseStateId &state_id = pair.second; + item_sets_to_process.pop_back(); + + add_reduce_actions(item_set, state_id); + add_shift_actions(item_set, state_id); + add_shift_extra_actions(state_id); + } + + for (ParseStateId state_id = 0; state_id < parse_table.states.size(); + state_id++) + add_reduce_extra_actions(state_id); + + return { parse_table, conflict_manager.conflicts() }; + } +}; + +pair > build_parse_table( + const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) { + return ParseTableBuilder(grammar, lex_grammar).build(); } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/build_parse_table.h b/src/compiler/build_tables/build_parse_table.h index d1414235..a2dbcb17 100644 --- a/src/compiler/build_tables/build_parse_table.h +++ b/src/compiler/build_tables/build_parse_table.h @@ -7,13 +7,13 @@ #include "compiler/parse_table.h" namespace tree_sitter { - class SyntaxGrammar; - class LexicalGrammar; +class SyntaxGrammar; +class LexicalGrammar; - namespace build_tables { - std::pair> - build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar); - } +namespace build_tables { +std::pair > build_parse_table( + const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar); +} } #endif // COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_ diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index a6ac96ab..24d882e0 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -4,19 +4,20 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - using std::tuple; - using std::vector; - using std::make_tuple; +namespace build_tables { - namespace build_tables { - tuple> - build_tables(const SyntaxGrammar &grammar, - const LexicalGrammar &lex_grammar) { - auto parse_table_result = build_parse_table(grammar, lex_grammar); - ParseTable parse_table = parse_table_result.first; - vector conflicts = parse_table_result.second; - LexTable lex_table = build_lex_table(&parse_table, lex_grammar); - return make_tuple(parse_table, lex_table, conflicts); - } - } +using std::tuple; +using std::vector; +using std::make_tuple; + +tuple > build_tables( + const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) { + auto parse_table_result = build_parse_table(grammar, lex_grammar); + ParseTable parse_table = parse_table_result.first; + vector conflicts = parse_table_result.second; + LexTable lex_table = build_lex_table(&parse_table, lex_grammar); + return make_tuple(parse_table, lex_table, conflicts); } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/build_tables.h b/src/compiler/build_tables/build_tables.h index b4bbd0ed..c84aa193 100644 --- a/src/compiler/build_tables/build_tables.h +++ b/src/compiler/build_tables/build_tables.h @@ -8,14 +8,13 @@ #include "compiler/lex_table.h" namespace tree_sitter { - class SyntaxGrammar; - class LexicalGrammar; +class SyntaxGrammar; +class LexicalGrammar; - namespace build_tables { - std::tuple> - build_tables(const SyntaxGrammar &grammar, - const LexicalGrammar &lex_grammar); - } +namespace build_tables { +std::tuple > build_tables( + const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar); +} } #endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_ diff --git a/src/compiler/build_tables/first_set.cc b/src/compiler/build_tables/first_set.cc index b61e433f..b8b0d8ad 100644 --- a/src/compiler/build_tables/first_set.cc +++ b/src/compiler/build_tables/first_set.cc @@ -9,53 +9,55 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - using std::set; - using rules::Symbol; +namespace build_tables { - namespace build_tables { - class FirstSet : public rules::RuleFn> { - const SyntaxGrammar *grammar; - set visited_symbols; +using std::set; +using rules::Symbol; - public: - explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {} +class FirstSet : public rules::RuleFn > { + const SyntaxGrammar *grammar; + set visited_symbols; - set apply_to(const Symbol *rule) { - auto insertion_result = visited_symbols.insert(*rule); - if (insertion_result.second) { - return (rule->is_token()) ? - set({ *rule }) : - apply(grammar->rule(*rule)); - } else { - return set(); - } - } + public: + explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {} - set apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } - - set apply_to(const rules::Choice *rule) { - set result; - for (const auto &el : rule->elements) { - auto &&next_syms = apply(el); - result.insert(next_syms.begin(), next_syms.end()); - } - return result; - } - - set apply_to(const rules::Seq *rule) { - auto &&result = apply(rule->left); - if (rule_can_be_blank(rule->left, *grammar)) { - auto &&right_symbols = apply(rule->right); - result.insert(right_symbols.begin(), right_symbols.end()); - } - return result; - } - }; - - set first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) { - return FirstSet(&grammar).apply(rule); - } + set apply_to(const Symbol *rule) { + auto insertion_result = visited_symbols.insert(*rule); + if (insertion_result.second) { + return (rule->is_token()) ? set({ *rule }) + : apply(grammar->rule(*rule)); + } else { + return set(); } + } + + set apply_to(const rules::Metadata *rule) { + return apply(rule->rule); + } + + set apply_to(const rules::Choice *rule) { + set result; + for (const auto &el : rule->elements) { + auto &&next_syms = apply(el); + result.insert(next_syms.begin(), next_syms.end()); + } + return result; + } + + set apply_to(const rules::Seq *rule) { + auto &&result = apply(rule->left); + if (rule_can_be_blank(rule->left, *grammar)) { + auto &&right_symbols = apply(rule->right); + result.insert(right_symbols.begin(), right_symbols.end()); + } + return result; + } +}; + +set first_set(const rules::rule_ptr &rule, + const SyntaxGrammar &grammar) { + return FirstSet(&grammar).apply(rule); } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/first_set.h b/src/compiler/build_tables/first_set.h index 733203b1..9e7dd800 100644 --- a/src/compiler/build_tables/first_set.h +++ b/src/compiler/build_tables/first_set.h @@ -6,18 +6,20 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - class SyntaxGrammar; - namespace build_tables { +class SyntaxGrammar; - /* - * Returns the set of terminal symbols that can appear at - * the beginning of a string derivable from a given rule, - * in a given grammar. - */ - std::set - first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar); - } -} +namespace build_tables { + +/* + * Returns the set of terminal symbols that can appear at + * the beginning of a string derivable from a given rule, + * in a given grammar. + */ +std::set first_set(const rules::rule_ptr &rule, + const SyntaxGrammar &grammar); + +} // namespace build_tables +} // namespace tree_sitter #endif // COMPILER_BUILD_TABLES_FIRST_SET_H_ diff --git a/src/compiler/build_tables/get_metadata.cc b/src/compiler/build_tables/get_metadata.cc index 8efb178b..c172c623 100644 --- a/src/compiler/build_tables/get_metadata.cc +++ b/src/compiler/build_tables/get_metadata.cc @@ -3,28 +3,28 @@ #include "compiler/rules/seq.h" namespace tree_sitter { - namespace build_tables { - int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) { - class GetMetadata : public rules::RuleFn { - rules::MetadataKey metadata_key; +namespace build_tables { - int apply_to(const rules::Metadata *rule) { - int result = rule->value_for(metadata_key); - return (result != 0) ? result : apply(rule->rule); - } +int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) { + class GetMetadata : public rules::RuleFn { + rules::MetadataKey metadata_key; - // TODO - - // Remove this. It is currently needed to make the rule generated - // by `LexTableBuilder::after_separators` have the right precedence. - int apply_to(const rules::Seq *rule) { - return apply(rule->left); - } - - public: - explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {} - }; - - return GetMetadata(key).apply(rule); - } + int apply_to(const rules::Metadata *rule) { + int result = rule->value_for(metadata_key); + return (result != 0) ? result : apply(rule->rule); } + + // TODO - + // Remove this. It is currently needed to make the rule generated + // by `LexTableBuilder::after_separators` have the right precedence. + int apply_to(const rules::Seq *rule) { return apply(rule->left); } + + public: + explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {} + }; + + return GetMetadata(key).apply(rule); } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/get_metadata.h b/src/compiler/build_tables/get_metadata.h index ed15374e..6960b233 100644 --- a/src/compiler/build_tables/get_metadata.h +++ b/src/compiler/build_tables/get_metadata.h @@ -5,9 +5,9 @@ #include "compiler/rules/metadata.h" namespace tree_sitter { - namespace build_tables { - int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key); - } +namespace build_tables { +int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key); +} } #endif // COMPILER_BUILD_TABLES_GET_METADATA_H_ diff --git a/src/compiler/build_tables/item.cc b/src/compiler/build_tables/item.cc index 9c0fda72..e3313c9b 100644 --- a/src/compiler/build_tables/item.cc +++ b/src/compiler/build_tables/item.cc @@ -5,17 +5,14 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - namespace build_tables { - Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) : - lhs(lhs), - rule(rule) {} +namespace build_tables { - bool Item::is_done() const { - return rule_can_be_blank(rule); - } +Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) + : lhs(lhs), rule(rule) {} - int Item::precedence() const { - return get_metadata(rule, rules::PRECEDENCE); - } - } -} +bool Item::is_done() const { return rule_can_be_blank(rule); } + +int Item::precedence() const { return get_metadata(rule, rules::PRECEDENCE); } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/item.h b/src/compiler/build_tables/item.h index de7e501c..72b72269 100644 --- a/src/compiler/build_tables/item.h +++ b/src/compiler/build_tables/item.h @@ -5,17 +5,19 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - namespace build_tables { - class Item { - public: - Item(const rules::Symbol &lhs, rules::rule_ptr rule); - bool is_done() const; - int precedence() const; +namespace build_tables { - rules::Symbol lhs; - rules::rule_ptr rule; - }; - } -} +class Item { + public: + Item(const rules::Symbol &lhs, rules::rule_ptr rule); + bool is_done() const; + int precedence() const; + + rules::Symbol lhs; + rules::rule_ptr rule; +}; + +} // namespace build_tables +} // namespace tree_sitter #endif // COMPILER_BUILD_TABLES_ITEM_H_ diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index 1be3b9e5..08660b9c 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -10,50 +10,56 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - using std::set; - using std::vector; - using std::pair; - using rules::Symbol; - using rules::rule_ptr; +namespace build_tables { - namespace build_tables { - const ParseItemSet item_set_closure(const ParseItem &starting_item, - const set &starting_lookahead_symbols, - const SyntaxGrammar &grammar) { - ParseItemSet result; +using std::set; +using std::vector; +using std::pair; +using rules::Symbol; +using rules::rule_ptr; - vector>> items_to_process = {{starting_item, starting_lookahead_symbols}}; - while (!items_to_process.empty()) { - ParseItem item = items_to_process.back().first; - set new_lookahead_symbols = items_to_process.back().second; - items_to_process.pop_back(); +const ParseItemSet item_set_closure( + const ParseItem &starting_item, + const set &starting_lookahead_symbols, + const SyntaxGrammar &grammar) { + ParseItemSet result; - set &lookahead_symbols = result[item]; - size_t previous_size = lookahead_symbols.size(); - lookahead_symbols.insert(new_lookahead_symbols.begin(), new_lookahead_symbols.end()); + vector>> items_to_process = { + { starting_item, starting_lookahead_symbols } + }; - if (lookahead_symbols.size() == previous_size) - continue; + while (!items_to_process.empty()) { + ParseItem item = items_to_process.back().first; + set new_lookahead_symbols = items_to_process.back().second; + items_to_process.pop_back(); - for (const auto &pair : sym_transitions(item.rule)) { - const Symbol &symbol = pair.first; - const rule_ptr &next_rule = pair.second; + set &lookahead_symbols = result[item]; + size_t previous_size = lookahead_symbols.size(); + lookahead_symbols.insert(new_lookahead_symbols.begin(), + new_lookahead_symbols.end()); - if (symbol.is_token() || symbol.is_built_in()) - continue; + if (lookahead_symbols.size() == previous_size) + continue; - set next_lookahead_symbols = first_set(next_rule, grammar); - if (rule_can_be_blank(next_rule, grammar)) - next_lookahead_symbols.insert(lookahead_symbols.begin(), lookahead_symbols.end()); + for (const auto &pair : sym_transitions(item.rule)) { + const Symbol &symbol = pair.first; + const rule_ptr &next_rule = pair.second; - items_to_process.push_back({ - ParseItem(symbol, grammar.rule(symbol), 0), - next_lookahead_symbols - }); - } - } + if (symbol.is_token() || symbol.is_built_in()) + continue; - return result; - } + set next_lookahead_symbols = first_set(next_rule, grammar); + if (rule_can_be_blank(next_rule, grammar)) + next_lookahead_symbols.insert(lookahead_symbols.begin(), + lookahead_symbols.end()); + + items_to_process.push_back({ ParseItem(symbol, grammar.rule(symbol), 0), + next_lookahead_symbols }); } + } + + return result; } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h index d0e75955..f7633879 100644 --- a/src/compiler/build_tables/item_set_closure.h +++ b/src/compiler/build_tables/item_set_closure.h @@ -6,13 +6,13 @@ #include "compiler/build_tables/parse_item.h" namespace tree_sitter { - class SyntaxGrammar; +class SyntaxGrammar; - namespace build_tables { - const ParseItemSet item_set_closure(const ParseItem &item, - const std::set &lookahead_symbols, - const SyntaxGrammar &grammar); - } +namespace build_tables { +const ParseItemSet item_set_closure( + const ParseItem &item, const std::set &lookahead_symbols, + const SyntaxGrammar &grammar); +} } #endif // COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ diff --git a/src/compiler/build_tables/item_set_transitions.cc b/src/compiler/build_tables/item_set_transitions.cc index e0b80292..e90d0a3b 100644 --- a/src/compiler/build_tables/item_set_transitions.cc +++ b/src/compiler/build_tables/item_set_transitions.cc @@ -7,43 +7,49 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - using std::map; - using std::set; - using rules::CharacterSet; - using rules::Symbol; +namespace build_tables { - namespace build_tables { - map - sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) { - map result; - for (const auto &pair : item_set) { - const ParseItem &item = pair.first; - const set &lookahead_symbols = pair.second; - for (auto &transition : sym_transitions(item.rule)) { - ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1); - merge_sym_transition(&result, { transition.first, item_set_closure(new_item, lookahead_symbols, grammar) }, - [](ParseItemSet *left, const ParseItemSet *right) { - for (auto &pair : *right) - left->operator[](pair.first).insert(pair.second.begin(), pair.second.end()); - }); - } - } - return result; - } +using std::map; +using std::set; +using rules::CharacterSet; +using rules::Symbol; - map - char_transitions(const LexItemSet &item_set) { - map result; - for (const LexItem &item : item_set) { - for (auto &transition : char_transitions(item.rule)) { - LexItem next_item(item.lhs, transition.second); - merge_char_transition(&result, { transition.first, LexItemSet({ next_item }) }, - [](LexItemSet *left, const LexItemSet *right) { - left->insert(right->begin(), right->end()); - }); - } - } - return result; - } +map sym_transitions(const ParseItemSet &item_set, + const SyntaxGrammar &grammar) { + map result; + for (const auto &pair : item_set) { + const ParseItem &item = pair.first; + const set &lookahead_symbols = pair.second; + for (auto &transition : sym_transitions(item.rule)) { + ParseItem new_item(item.lhs, transition.second, + item.consumed_symbol_count + 1); + merge_sym_transition( + &result, { transition.first, + item_set_closure(new_item, lookahead_symbols, grammar) }, + [](ParseItemSet *left, const ParseItemSet *right) { + for (auto &pair : *right) + left->operator[](pair.first) + .insert(pair.second.begin(), pair.second.end()); + }); } + } + return result; } + +map char_transitions(const LexItemSet &item_set) { + map result; + for (const LexItem &item : item_set) { + for (auto &transition : char_transitions(item.rule)) { + LexItem next_item(item.lhs, transition.second); + merge_char_transition( + &result, { transition.first, LexItemSet({ next_item }) }, + [](LexItemSet *left, const LexItemSet *right) { + left->insert(right->begin(), right->end()); + }); + } + } + return result; +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index faf2b37c..221377a0 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -6,19 +6,23 @@ #include "compiler/build_tables/parse_item.h" namespace tree_sitter { - class SyntaxGrammar; - namespace rules { - class CharacterSet; - class Symbol; - } - namespace build_tables { - std::map - sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar); +class SyntaxGrammar; - std::map - char_transitions(const LexItemSet &item_set); - } +namespace rules { +class CharacterSet; +class Symbol; } +namespace build_tables { + +std::map sym_transitions( + const ParseItemSet &item_set, const SyntaxGrammar &grammar); + +std::map char_transitions( + const LexItemSet &item_set); + +} // namespace build_tables +} // namespace tree_sitter + #endif // COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_ diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index 6c3d4f91..0e346dad 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -7,49 +7,49 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - namespace build_tables { - using std::string; - using std::to_string; - using std::map; - using std::set; - using std::vector; +namespace build_tables { - LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) : - grammar(grammar) {} +using std::string; +using std::to_string; +using std::map; +using std::set; +using std::vector; - bool LexConflictManager::resolve_lex_action(const LexAction &old_action, - const LexAction &new_action) { - if (new_action.type < old_action.type) - return !resolve_lex_action(new_action, old_action); +LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) + : grammar(grammar) {} - switch (old_action.type) { - case LexActionTypeError: - return true; - case LexActionTypeAccept: { - int old_precedence = *old_action.precedence_values.begin(); - switch (new_action.type) { - case LexActionTypeAccept: { - int new_precedence = *new_action.precedence_values.begin(); - if (new_precedence > old_precedence) { - return true; - } else if (new_precedence < old_precedence) { - return false; - } else { - return new_action.symbol.index < old_action.symbol.index; - } - } - case LexActionTypeAdvance: { - return true; - } - default: - return false; - } +bool LexConflictManager::resolve_lex_action(const LexAction &old_action, + const LexAction &new_action) { + if (new_action.type < old_action.type) + return !resolve_lex_action(new_action, old_action); - return true; - } - default: - return false; - } + switch (old_action.type) { + case LexActionTypeError: + return true; + case LexActionTypeAccept: { + int old_precedence = *old_action.precedence_values.begin(); + switch (new_action.type) { + case LexActionTypeAccept: { + int new_precedence = *new_action.precedence_values.begin(); + if (new_precedence > old_precedence) { + return true; + } else if (new_precedence < old_precedence) { + return false; + } else { + return new_action.symbol.index < old_action.symbol.index; + } } + case LexActionTypeAdvance: { return true; } + default: + return false; + } + + return true; } + default: + return false; + } } + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index 76ee3f45..1b0bfaf3 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -6,16 +6,18 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - namespace build_tables { - class LexConflictManager { - const LexicalGrammar grammar; +namespace build_tables { - public: - explicit LexConflictManager(const LexicalGrammar &grammar); - bool resolve_lex_action(const LexAction &old_action, - const LexAction &new_action); - }; - } -} +class LexConflictManager { + const LexicalGrammar grammar; + + public: + explicit LexConflictManager(const LexicalGrammar &grammar); + bool resolve_lex_action(const LexAction &old_action, + const LexAction &new_action); +}; + +} // namespace build_tables +} // namespace tree_sitter #endif // COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_ diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index b82dac7e..d19a0814 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -6,45 +6,42 @@ #include "compiler/rules/visitor.h" namespace tree_sitter { - using std::string; - using std::ostream; - using std::vector; +namespace build_tables { - namespace build_tables { - LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) : - Item(lhs, rule) {} +using std::string; +using std::ostream; +using std::vector; - bool LexItem::operator==(const LexItem &other) const { - return (other.lhs == lhs) && other.rule->operator==(*rule); - } +LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) + : Item(lhs, rule) {} - bool LexItem::is_token_start() const { - class IsTokenStart : public rules::RuleFn { - bool apply_to(const rules::Seq *rule) { - if (apply(rule->left)) - return true; - else if (rule_can_be_blank(rule->left)) - return apply(rule->right); - else - return false; - } - - bool apply_to(const rules::Metadata *rule) { - return rule->value_for(rules::START_TOKEN); - } - }; - - return IsTokenStart().apply(rule); - } - - ostream& operator<<(ostream &stream, const LexItem &item) { - return stream << - string("#"); - } - } +bool LexItem::operator==(const LexItem &other) const { + return (other.lhs == lhs) && other.rule->operator==(*rule); } +bool LexItem::is_token_start() const { + class IsTokenStart : public rules::RuleFn { + bool apply_to(const rules::Seq *rule) { + if (apply(rule->left)) + return true; + else if (rule_can_be_blank(rule->left)) + return apply(rule->right); + else + return false; + } + + bool apply_to(const rules::Metadata *rule) { + return rule->value_for(rules::START_TOKEN); + } + }; + + return IsTokenStart().apply(rule); +} + +ostream &operator<<(ostream &stream, const LexItem &item) { + return stream << string("#"); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/lex_item.h b/src/compiler/build_tables/lex_item.h index 7ee51993..c2bd1e08 100644 --- a/src/compiler/build_tables/lex_item.h +++ b/src/compiler/build_tables/lex_item.h @@ -6,39 +6,42 @@ #include "compiler/build_tables/item.h" namespace tree_sitter { - namespace build_tables { - class LexItem : public Item { - public: - LexItem(const rules::Symbol &lhs, rules::rule_ptr rule); - bool operator==(const LexItem &other) const; - bool is_token_start() const; - }; +namespace build_tables { - std::ostream& operator<<(std::ostream &stream, const LexItem &item); +class LexItem : public Item { + public: + LexItem(const rules::Symbol &lhs, rules::rule_ptr rule); + bool operator==(const LexItem &other) const; + bool is_token_start() const; +}; - typedef std::unordered_set LexItemSet; - } -} +std::ostream &operator<<(std::ostream &stream, const LexItem &item); + +typedef std::unordered_set LexItemSet; + +} // namespace build_tables +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::Item &item) const { - return - hash()(item.lhs) ^ - hash()(item.rule); - } - }; - template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const { - size_t result = hash()(set.size()); - for (auto item : set) - result ^= hash()(item); - return result; - } - }; -} +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::Item &item) const { + return hash()(item.lhs) ^ + hash()(item.rule); + } +}; + +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const { + size_t result = hash()(set.size()); + for (auto item : set) + result ^= hash()(item); + return result; + } +}; + +} // namespace std #endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_ diff --git a/src/compiler/build_tables/merge_transitions.h b/src/compiler/build_tables/merge_transitions.h index ccc0362e..59bafde1 100644 --- a/src/compiler/build_tables/merge_transitions.h +++ b/src/compiler/build_tables/merge_transitions.h @@ -7,69 +7,71 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - namespace build_tables { +namespace build_tables { - /* - * Merges a new transition into a map with symbol keys. - * If the symbol already exists in the map, the new value for that - * symbol will be computed by merging the old and new values - * using the given function. - */ - template - void merge_sym_transition(std::map *left, - const std::pair &new_pair, - std::function merge_fn) { - auto new_symbol = new_pair.first; - for (auto &existing_pair : *left) { - auto existing_symbol = existing_pair.first; - if (new_symbol < existing_symbol) break; - if (existing_symbol == new_symbol) { - merge_fn(&existing_pair.second, &new_pair.second); - return; - } - } - left->insert(new_pair); - } - - /* - * Merges two transition maps with character set keys. If the - * two maps contain values for overlapping character sets, the - * new value for the two sets' intersection will be computed by - * merging the old and new values using the given function. - */ - template - void merge_char_transition(std::map *left, - const std::pair &new_pair, - std::function merge_fn) { - rules::CharacterSet new_char_set = new_pair.first; - T new_value = new_pair.second; - - std::map pairs_to_insert; - - auto iter = left->begin(); - while (iter != left->end()) { - rules::CharacterSet char_set = iter->first; - T value = iter->second; - - rules::CharacterSet intersection = char_set.remove_set(new_char_set); - if (!intersection.is_empty()) { - new_char_set.remove_set(intersection); - if (!char_set.is_empty()) - pairs_to_insert.insert({ char_set, value }); - merge_fn(&value, &new_value); - pairs_to_insert.insert({ intersection, value }); - left->erase(iter++); - } else { - ++iter; - } - } - - left->insert(pairs_to_insert.begin(), pairs_to_insert.end()); - - if (!new_char_set.is_empty()) - left->insert({ new_char_set, new_pair.second }); - } +/* + * Merges a new transition into a map with symbol keys. + * If the symbol already exists in the map, the new value for that + * symbol will be computed by merging the old and new values + * using the given function. + */ +template +void merge_sym_transition(std::map *left, + const std::pair &new_pair, + std::function merge_fn) { + auto new_symbol = new_pair.first; + for (auto &existing_pair : *left) { + auto existing_symbol = existing_pair.first; + if (new_symbol < existing_symbol) + break; + if (existing_symbol == new_symbol) { + merge_fn(&existing_pair.second, &new_pair.second); + return; } + } + left->insert(new_pair); } +/* + * Merges two transition maps with character set keys. If the + * two maps contain values for overlapping character sets, the + * new value for the two sets' intersection will be computed by + * merging the old and new values using the given function. + */ +template +void merge_char_transition(std::map *left, + const std::pair &new_pair, + std::function merge_fn) { + rules::CharacterSet new_char_set = new_pair.first; + T new_value = new_pair.second; + + std::map pairs_to_insert; + + auto iter = left->begin(); + while (iter != left->end()) { + rules::CharacterSet char_set = iter->first; + T value = iter->second; + + rules::CharacterSet intersection = char_set.remove_set(new_char_set); + if (!intersection.is_empty()) { + new_char_set.remove_set(intersection); + if (!char_set.is_empty()) + pairs_to_insert.insert({ char_set, value }); + merge_fn(&value, &new_value); + pairs_to_insert.insert({ intersection, value }); + left->erase(iter++); + } else { + ++iter; + } + } + + left->insert(pairs_to_insert.begin(), pairs_to_insert.end()); + + if (!new_char_set.is_empty()) + left->insert({ new_char_set, new_pair.second }); +} + +} // namespace build_tables +} // namespace tree_sitter + #endif // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_ diff --git a/src/compiler/build_tables/parse_conflict_manager.cc b/src/compiler/build_tables/parse_conflict_manager.cc index 5f0a6a2f..e867c636 100644 --- a/src/compiler/build_tables/parse_conflict_manager.cc +++ b/src/compiler/build_tables/parse_conflict_manager.cc @@ -8,127 +8,130 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - namespace build_tables { - using std::string; - using std::to_string; - using std::map; - using std::set; - using std::vector; +namespace build_tables { - ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar, - const LexicalGrammar &lex_grammar) : - parse_grammar(parse_grammar), - lex_grammar(lex_grammar) {} +using std::string; +using std::to_string; +using std::map; +using std::set; +using std::vector; - bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol, - const ParseAction &old_action, - const ParseAction &new_action) { - if (new_action.type < old_action.type) - return !resolve_parse_action(symbol, new_action, old_action); +ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar, + const LexicalGrammar &lex_grammar) + : parse_grammar(parse_grammar), lex_grammar(lex_grammar) {} - switch (old_action.type) { - case ParseActionTypeError: - return true; - case ParseActionTypeShift: { - int min_precedence = *old_action.precedence_values.begin(); - int max_precedence = *old_action.precedence_values.rbegin(); - switch (new_action.type) { - case ParseActionTypeReduce: { - int new_precedence = *new_action.precedence_values.rbegin(); - if (max_precedence > new_precedence) { - if (min_precedence < new_precedence) - record_conflict(symbol, old_action, new_action); - return false; - } else if (max_precedence < new_precedence) { - return true; - } else { - record_conflict(symbol, old_action, new_action); - return false; - } - } - default: - return false; - } - } - case ParseActionTypeReduce: - switch (new_action.type) { - case ParseActionTypeReduce: { - int old_precedence = *old_action.precedence_values.begin(); - int new_precedence = *new_action.precedence_values.begin(); - if (new_precedence > old_precedence) { - return true; - } else if (new_precedence < old_precedence) { - return false; - } else { - record_conflict(symbol, old_action, new_action); - return new_action.symbol.index < old_action.symbol.index; - } - } - default: - return false; - } - default: - return false; - } - } - - const vector ParseConflictManager::conflicts() const { - vector result; - result.insert(result.end(), conflicts_.begin(), conflicts_.end()); - return result; - } - - string precedence_string(const ParseAction &action) { - string precedences = "(precedence "; - bool started = false; - for (auto value : action.precedence_values) { - if (started) precedences += ", "; - started = true; - precedences += to_string(value); - } - return precedences + ")"; - } - - string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) { - switch (action.type) { - case ParseActionTypeShift: - return "shift " + precedence_string(action); - case ParseActionTypeReduce: { - string name = parse_grammar.rule_name(action.symbol); - if (name == "") - return "ERROR" + to_string(action.symbol.index); - else - return "reduce " + name + " " + precedence_string(action); - } - case ParseActionTypeAccept: - return "accept"; - default: - return "error"; - } - } - - string ParseConflictManager::symbol_name(const rules::Symbol &symbol) { - if (symbol.is_built_in()) { - if (symbol == rules::ERROR()) - return "ERROR"; - else if (symbol == rules::END_OF_INPUT()) - return "END_OF_INPUT"; - else - return ""; - } - - if (symbol.is_token()) - return lex_grammar.rule_name(symbol); - else - return parse_grammar.rule_name(symbol); - } - - void ParseConflictManager::record_conflict(const rules::Symbol &symbol, - const ParseAction &left, - const ParseAction &right) { - conflicts_.insert(Conflict(symbol_name(symbol) + ": " + - message_for_action(left, parse_grammar) + " / " + - message_for_action(right, parse_grammar))); +bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol, + const ParseAction &old_action, + const ParseAction &new_action) { + if (new_action.type < old_action.type) + return !resolve_parse_action(symbol, new_action, old_action); + + switch (old_action.type) { + case ParseActionTypeError: + return true; + case ParseActionTypeShift: { + int min_precedence = *old_action.precedence_values.begin(); + int max_precedence = *old_action.precedence_values.rbegin(); + switch (new_action.type) { + case ParseActionTypeReduce: { + int new_precedence = *new_action.precedence_values.rbegin(); + if (max_precedence > new_precedence) { + if (min_precedence < new_precedence) + record_conflict(symbol, old_action, new_action); + return false; + } else if (max_precedence < new_precedence) { + return true; + } else { + record_conflict(symbol, old_action, new_action); + return false; + } } + default: + return false; + } } + case ParseActionTypeReduce: + switch (new_action.type) { + case ParseActionTypeReduce: { + int old_precedence = *old_action.precedence_values.begin(); + int new_precedence = *new_action.precedence_values.begin(); + if (new_precedence > old_precedence) { + return true; + } else if (new_precedence < old_precedence) { + return false; + } else { + record_conflict(symbol, old_action, new_action); + return new_action.symbol.index < old_action.symbol.index; + } + } + default: + return false; + } + default: + return false; + } } + +const vector ParseConflictManager::conflicts() const { + vector result; + result.insert(result.end(), conflicts_.begin(), conflicts_.end()); + return result; +} + +string precedence_string(const ParseAction &action) { + string precedences = "(precedence "; + bool started = false; + for (auto value : action.precedence_values) { + if (started) + precedences += ", "; + started = true; + precedences += to_string(value); + } + return precedences + ")"; +} + +string message_for_action(const ParseAction &action, + const SyntaxGrammar &parse_grammar) { + switch (action.type) { + case ParseActionTypeShift: + return "shift " + precedence_string(action); + case ParseActionTypeReduce: { + string name = parse_grammar.rule_name(action.symbol); + if (name == "") + return "ERROR" + to_string(action.symbol.index); + else + return "reduce " + name + " " + precedence_string(action); + } + case ParseActionTypeAccept: + return "accept"; + default: + return "error"; + } +} + +string ParseConflictManager::symbol_name(const rules::Symbol &symbol) { + if (symbol.is_built_in()) { + if (symbol == rules::ERROR()) + return "ERROR"; + else if (symbol == rules::END_OF_INPUT()) + return "END_OF_INPUT"; + else + return ""; + } + + if (symbol.is_token()) + return lex_grammar.rule_name(symbol); + else + return parse_grammar.rule_name(symbol); +} + +void ParseConflictManager::record_conflict(const rules::Symbol &symbol, + const ParseAction &left, + const ParseAction &right) { + conflicts_.insert(Conflict(symbol_name(symbol) + ": " + + message_for_action(left, parse_grammar) + " / " + + message_for_action(right, parse_grammar))); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_conflict_manager.h b/src/compiler/build_tables/parse_conflict_manager.h index 3bdbad9b..245abb34 100644 --- a/src/compiler/build_tables/parse_conflict_manager.h +++ b/src/compiler/build_tables/parse_conflict_manager.h @@ -11,25 +11,28 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - namespace build_tables { - class ParseConflictManager { - const SyntaxGrammar parse_grammar; - const LexicalGrammar lex_grammar; - std::set conflicts_; +namespace build_tables { - public: - ParseConflictManager(const SyntaxGrammar &parse_grammar, - const LexicalGrammar &lex_grammar); - bool resolve_parse_action(const rules::Symbol &symbol, - const ParseAction &old_action, - const ParseAction &new_action); - const std::vector conflicts() const; +class ParseConflictManager { + const SyntaxGrammar parse_grammar; + const LexicalGrammar lex_grammar; + std::set conflicts_; - private: - std::string symbol_name(const rules::Symbol &symbol); - void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right); - }; - } -} + public: + ParseConflictManager(const SyntaxGrammar &parse_grammar, + const LexicalGrammar &lex_grammar); + bool resolve_parse_action(const rules::Symbol &symbol, + const ParseAction &old_action, + const ParseAction &new_action); + const std::vector conflicts() const; + + private: + std::string symbol_name(const rules::Symbol &symbol); + void record_conflict(const rules::Symbol &symbol, const ParseAction &left, + const ParseAction &right); +}; + +} // namespace build_tables +} // namespace tree_sitter #endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_ diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 0096f202..e436bd54 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -3,29 +3,28 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - using std::pair; - using std::set; - using std::string; - using std::to_string; - using std::ostream; +namespace build_tables { - namespace build_tables { - ParseItem::ParseItem(const rules::Symbol &lhs, - const rules::rule_ptr rule, - size_t consumed_symbol_count) : - Item(lhs, rule), - consumed_symbol_count(consumed_symbol_count) {} +using std::pair; +using std::set; +using std::string; +using std::to_string; +using std::ostream; - bool ParseItem::operator==(const ParseItem &other) const { - return - (lhs == other.lhs) && - (consumed_symbol_count == other.consumed_symbol_count) && - (rule == other.rule || rule->operator==(*other.rule)); - } +ParseItem::ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, + size_t consumed_symbol_count) + : Item(lhs, rule), consumed_symbol_count(consumed_symbol_count) {} - ostream& operator<<(ostream &stream, const ParseItem &item) { - return stream << string("#"); - } - } +bool ParseItem::operator==(const ParseItem &other) const { + return (lhs == other.lhs) && + (consumed_symbol_count == other.consumed_symbol_count) && + (rule == other.rule || rule->operator==(*other.rule)); } +ostream &operator<<(ostream &stream, const ParseItem &item) { + return stream << string("#"); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index 2c638885..c9a08a3f 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -8,44 +8,48 @@ #include "compiler/build_tables/item.h" namespace tree_sitter { - namespace build_tables { - class ParseItem : public Item { - public: - ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule, const size_t consumed_symbol_count); - bool operator==(const ParseItem &other) const; - size_t consumed_symbol_count; - }; +namespace build_tables { - std::ostream& operator<<(std::ostream &stream, const ParseItem &item); +class ParseItem : public Item { + public: + ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule, + const size_t consumed_symbol_count); + bool operator==(const ParseItem &other) const; + size_t consumed_symbol_count; +}; - typedef std::unordered_map> ParseItemSet; - } -} +std::ostream &operator<<(std::ostream &stream, const ParseItem &item); + +typedef std::unordered_map > ParseItemSet; + +} // namespace build_tables +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::ParseItem &item) const { - return - hash()(item.lhs) ^ - hash()(item.rule) ^ - hash()(item.consumed_symbol_count); - } - }; - template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const { - size_t result = hash()(set.size()); - for (auto &pair : set) { - result ^= hash()(pair.first); - result ^= hash()(pair.second.size()); - for (auto &symbol : pair.second) - result ^= hash()(symbol); - } - return result; - } - }; -} +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::ParseItem &item) const { + return hash()(item.lhs) ^ + hash()(item.rule) ^ + hash()(item.consumed_symbol_count); + } +}; + +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const { + size_t result = hash()(set.size()); + for (auto &pair : set) { + result ^= hash()(pair.first); + result ^= hash()(pair.second.size()); + for (auto &symbol : pair.second) + result ^= hash()(symbol); + } + return result; + } +}; + +} // namespace std #endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_ diff --git a/src/compiler/build_tables/rule_can_be_blank.cc b/src/compiler/build_tables/rule_can_be_blank.cc index 6a7d8f83..d17dcba8 100644 --- a/src/compiler/build_tables/rule_can_be_blank.cc +++ b/src/compiler/build_tables/rule_can_be_blank.cc @@ -9,60 +9,59 @@ #include "compiler/rules/blank.h" #include "compiler/rules/metadata.h" -namespace tree_sitter { - using std::set; +namespace tree_sitter { +namespace build_tables { - namespace build_tables { - class CanBeBlank : public rules::RuleFn { - protected: - bool apply_to(const rules::Blank *) { - return true; - } +using std::set; - bool apply_to(const rules::Repeat *rule) { - return true; - } +class CanBeBlank : public rules::RuleFn { + protected: + bool apply_to(const rules::Blank *) { return true; } - bool apply_to(const rules::Choice *rule) { - for (const auto &element : rule->elements) - if (apply(element)) return true; - return false; - } + bool apply_to(const rules::Repeat *rule) { return true; } - bool apply_to(const rules::Seq *rule) { - return apply(rule->left) && apply(rule->right); - } + bool apply_to(const rules::Choice *rule) { + for (const auto &element : rule->elements) + if (apply(element)) + return true; + return false; + } - bool apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } - }; + bool apply_to(const rules::Seq *rule) { + return apply(rule->left) && apply(rule->right); + } - class CanBeBlankRecursive : public CanBeBlank { - const SyntaxGrammar *grammar; - set visited_symbols; - using CanBeBlank::visit; + bool apply_to(const rules::Metadata *rule) { return apply(rule->rule); } +}; - public: - using CanBeBlank::apply_to; - explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {} +class CanBeBlankRecursive : public CanBeBlank { + const SyntaxGrammar *grammar; + set visited_symbols; + using CanBeBlank::visit; - bool apply_to(const rules::Symbol *rule) { - if (visited_symbols.find(*rule) == visited_symbols.end()) { - visited_symbols.insert(*rule); - return !rule->is_token() && apply(grammar->rule(*rule)); - } else { - return false; - } - } - }; + public: + using CanBeBlank::apply_to; + explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) + : grammar(grammar) {} - bool rule_can_be_blank(const rules::rule_ptr &rule) { - return CanBeBlank().apply(rule); - } - - bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) { - return CanBeBlankRecursive(&grammar).apply(rule); - } + bool apply_to(const rules::Symbol *rule) { + if (visited_symbols.find(*rule) == visited_symbols.end()) { + visited_symbols.insert(*rule); + return !rule->is_token() && apply(grammar->rule(*rule)); + } else { + return false; } + } +}; + +bool rule_can_be_blank(const rules::rule_ptr &rule) { + return CanBeBlank().apply(rule); } + +bool rule_can_be_blank(const rules::rule_ptr &rule, + const SyntaxGrammar &grammar) { + return CanBeBlankRecursive(&grammar).apply(rule); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/rule_can_be_blank.h b/src/compiler/build_tables/rule_can_be_blank.h index bf1fbe90..f7d0d7b3 100644 --- a/src/compiler/build_tables/rule_can_be_blank.h +++ b/src/compiler/build_tables/rule_can_be_blank.h @@ -4,12 +4,16 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - class SyntaxGrammar; - namespace build_tables { - bool rule_can_be_blank(const rules::rule_ptr &rule); - bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar); - } -} +class SyntaxGrammar; + +namespace build_tables { + +bool rule_can_be_blank(const rules::rule_ptr &rule); +bool rule_can_be_blank(const rules::rule_ptr &rule, + const SyntaxGrammar &grammar); + +} // namespace build_tables +} // namespace tree_sitter #endif // COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_ diff --git a/src/compiler/build_tables/rule_transitions.cc b/src/compiler/build_tables/rule_transitions.cc index 719f1fe4..3975ec6b 100644 --- a/src/compiler/build_tables/rule_transitions.cc +++ b/src/compiler/build_tables/rule_transitions.cc @@ -13,87 +13,91 @@ #include "compiler/rules/visitor.h" namespace tree_sitter { - using std::map; - using std::make_shared; - using rules::rule_ptr; - using rules::Symbol; - using rules::CharacterSet; +namespace build_tables { - namespace build_tables { - template - void merge_transitions(map *left, const map &right); +using std::map; +using std::make_shared; +using rules::rule_ptr; +using rules::Symbol; +using rules::CharacterSet; - template<> - void merge_transitions(map *left, const map &right) { - for (auto &pair : right) - merge_char_transition(left, pair, [](rule_ptr *left, const rule_ptr *right) { - *left = rules::Choice::Build({ *left, *right }); - }); - } +template +void merge_transitions(map *left, const map &right); - template<> - void merge_transitions(map *left, const map &right) { - for (auto &pair : right) - merge_sym_transition(left, pair, [](rule_ptr *left, const rule_ptr *right) { - *left = rules::Choice::Build({ *left, *right }); - }); - } - - template - class RuleTransitions : public rules::RuleFn> { - map apply_to_atom(const rules::Rule *rule) { - auto atom = dynamic_cast(rule); - if (atom) - return map({{ *atom, make_shared() }}); - else - return map(); - } - - map apply_to(const CharacterSet *rule) { - return apply_to_atom(rule); - } - - map apply_to(const Symbol *rule) { - return apply_to_atom(rule); - } - - map apply_to(const rules::Choice *rule) { - map result; - for (const auto &el : rule->elements) - merge_transitions(&result, this->apply(el)); - return result; - } - - map apply_to(const rules::Seq *rule) { - auto result = this->apply(rule->left); - for (auto &pair : result) - pair.second = rules::Seq::Build({ pair.second, rule->right }); - if (rule_can_be_blank(rule->left)) - merge_transitions(&result, this->apply(rule->right)); - return result; - } - - map apply_to(const rules::Repeat *rule) { - auto result = this->apply(rule->content); - for (auto &pair : result) - pair.second = rules::Seq::Build({ pair.second, rule->copy() }); - return result; - } - - map apply_to(const rules::Metadata *rule) { - auto result = this->apply(rule->rule); - for (auto &pair : result) - pair.second = make_shared(pair.second, rule->value); - return result; - } - }; - - map char_transitions(const rule_ptr &rule) { - return RuleTransitions().apply(rule); - } - - map sym_transitions(const rule_ptr &rule) { - return RuleTransitions().apply(rule); - } - } +template <> +void merge_transitions(map *left, + const map &right) { + for (auto &pair : right) + merge_char_transition(left, pair, + [](rule_ptr *left, const rule_ptr *right) { + *left = rules::Choice::Build({ *left, *right }); + }); } + +template <> +void merge_transitions(map *left, + const map &right) { + for (auto &pair : right) + merge_sym_transition(left, pair, + [](rule_ptr *left, const rule_ptr *right) { + *left = rules::Choice::Build({ *left, *right }); + }); +} + +template +class RuleTransitions : public rules::RuleFn > { + map apply_to_atom(const rules::Rule *rule) { + auto atom = dynamic_cast(rule); + if (atom) + return map({ { *atom, make_shared() } }); + else + return map(); + } + + map apply_to(const CharacterSet *rule) { + return apply_to_atom(rule); + } + + map apply_to(const Symbol *rule) { return apply_to_atom(rule); } + + map apply_to(const rules::Choice *rule) { + map result; + for (const auto &el : rule->elements) + merge_transitions(&result, this->apply(el)); + return result; + } + + map apply_to(const rules::Seq *rule) { + auto result = this->apply(rule->left); + for (auto &pair : result) + pair.second = rules::Seq::Build({ pair.second, rule->right }); + if (rule_can_be_blank(rule->left)) + merge_transitions(&result, this->apply(rule->right)); + return result; + } + + map apply_to(const rules::Repeat *rule) { + auto result = this->apply(rule->content); + for (auto &pair : result) + pair.second = rules::Seq::Build({ pair.second, rule->copy() }); + return result; + } + + map apply_to(const rules::Metadata *rule) { + auto result = this->apply(rule->rule); + for (auto &pair : result) + pair.second = make_shared(pair.second, rule->value); + return result; + } +}; + +map char_transitions(const rule_ptr &rule) { + return RuleTransitions().apply(rule); +} + +map sym_transitions(const rule_ptr &rule) { + return RuleTransitions().apply(rule); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/rule_transitions.h b/src/compiler/build_tables/rule_transitions.h index 06b19820..3fdb4b07 100644 --- a/src/compiler/build_tables/rule_transitions.h +++ b/src/compiler/build_tables/rule_transitions.h @@ -5,14 +5,16 @@ #include "compiler/rules/character_set.h" #include "compiler/rules/symbol.h" -namespace tree_sitter { - namespace build_tables { - std::map - char_transitions(const rules::rule_ptr &rule); +namespace tree_sitter { +namespace build_tables { - std::map - sym_transitions(const rules::rule_ptr &rule); - } -} +std::map char_transitions( + const rules::rule_ptr &rule); + +std::map sym_transitions( + const rules::rule_ptr &rule); + +} // namespace build_tables +} // namespace tree_sitter #endif // COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_ diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 0177ac32..be3b612c 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -5,29 +5,33 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - using std::tuple; - using std::string; - using std::vector; - using std::get; - using std::make_tuple; - tuple, const GrammarError *> - compile(const Grammar &grammar, std::string name) { - auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar); - const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); - const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); - const GrammarError *error = get<2>(prepare_grammar_result); +using std::tuple; +using std::string; +using std::vector; +using std::get; +using std::make_tuple; - if (error) - return make_tuple("", vector(), error); +tuple, const GrammarError *> compile( + const Grammar &grammar, std::string name) { + auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar); + const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); + const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); + const GrammarError *error = get<2>(prepare_grammar_result); - auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar); - const ParseTable &parse_table = get<0>(table_build_result); - const LexTable &lex_table = get<1>(table_build_result); - const vector &conflicts = get<2>(table_build_result); + if (error) + return make_tuple("", vector(), error); - string code = generate_code::c_code(name, parse_table, lex_table, syntax_grammar, lexical_grammar); + auto table_build_result = + build_tables::build_tables(syntax_grammar, lexical_grammar); + const ParseTable &parse_table = get<0>(table_build_result); + const LexTable &lex_table = get<1>(table_build_result); + const vector &conflicts = get<2>(table_build_result); - return make_tuple(code, conflicts, nullptr); - } + string code = generate_code::c_code(name, parse_table, lex_table, + syntax_grammar, lexical_grammar); + + return make_tuple(code, conflicts, nullptr); } + +} // namespace tree_sitter diff --git a/src/compiler/conflict.cc b/src/compiler/conflict.cc index 7fb942c4..e28403d1 100644 --- a/src/compiler/conflict.cc +++ b/src/compiler/conflict.cc @@ -2,19 +2,21 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - using std::string; - Conflict::Conflict(string description) : description(description) {} +using std::string; - bool Conflict::operator==(const tree_sitter::Conflict &other) const { - return other.description == description; - } +Conflict::Conflict(string description) : description(description) {} - bool Conflict::operator<(const tree_sitter::Conflict &other) const { - return other.description < description; - } - - std::ostream& operator<<(std::ostream &stream, const Conflict &conflict) { - return stream << "#"; - } +bool Conflict::operator==(const tree_sitter::Conflict &other) const { + return other.description == description; } + +bool Conflict::operator<(const tree_sitter::Conflict &other) const { + return other.description < description; +} + +std::ostream &operator<<(std::ostream &stream, const Conflict &conflict) { + return stream << "#"; +} + +} // namespace tree_sitter diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index d2dede17..cdc2f695 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -10,393 +10,378 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - using std::string; - using std::to_string; - using std::function; - using std::map; - using std::vector; - using std::set; - using std::pair; - using util::escape_char; +namespace generate_code { +using std::string; +using std::to_string; +using std::function; +using std::map; +using std::vector; +using std::set; +using std::pair; +using util::escape_char; - namespace generate_code { - class CCodeGenerator { - string buffer; - size_t indent_level; +class CCodeGenerator { + string buffer; + size_t indent_level; - const string name; - const ParseTable parse_table; - const LexTable lex_table; - const SyntaxGrammar syntax_grammar; - const LexicalGrammar lexical_grammar; - map sanitized_names; + const string name; + const ParseTable parse_table; + const LexTable lex_table; + const SyntaxGrammar syntax_grammar; + const LexicalGrammar lexical_grammar; + map sanitized_names; - public: - CCodeGenerator(string name, - const ParseTable &parse_table, - const LexTable &lex_table, - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar) : - indent_level(0), - name(name), - parse_table(parse_table), - lex_table(lex_table), - syntax_grammar(syntax_grammar), - lexical_grammar(lexical_grammar) {} + public: + CCodeGenerator(string name, const ParseTable &parse_table, + const LexTable &lex_table, const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar) + : indent_level(0), + name(name), + parse_table(parse_table), + lex_table(lex_table), + syntax_grammar(syntax_grammar), + lexical_grammar(lexical_grammar) {} - string code() { - buffer = ""; + string code() { + buffer = ""; - includes(); - state_and_symbol_counts(); - symbol_enum(); - symbol_names_list(); - hidden_symbols_list(); - lex_function(); - lex_states_list(); - parse_table_array(); - parser_export(); + includes(); + state_and_symbol_counts(); + symbol_enum(); + symbol_names_list(); + hidden_symbols_list(); + lex_function(); + lex_states_list(); + parse_table_array(); + parser_export(); - return buffer; - } + return buffer; + } - private: - void includes() { - add("#include \"tree_sitter/parser.h\""); - line(); - } + private: + void includes() { + add("#include \"tree_sitter/parser.h\""); + line(); + } - void state_and_symbol_counts() { - line("#define STATE_COUNT " + to_string(parse_table.states.size())); - line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size())); - line(); - } + void state_and_symbol_counts() { + line("#define STATE_COUNT " + to_string(parse_table.states.size())); + line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size())); + line(); + } - void symbol_enum() { - line("enum {"); - indent([&]() { - bool at_start = true; - for (auto symbol : parse_table.symbols) - if (!symbol.is_built_in()) { - if (at_start) - line(symbol_id(symbol) + " = ts_start_sym,"); - else - line(symbol_id(symbol) + ","); - at_start = false; - } - }); - line("};"); - line(); - } - - void symbol_names_list() { - set symbols(parse_table.symbols); - symbols.insert(rules::END_OF_INPUT()); - symbols.insert(rules::ERROR()); - - line("SYMBOL_NAMES = {"); - indent([&]() { - for (auto symbol : parse_table.symbols) - line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\","); - }); - line("};"); - line(); - } - - void hidden_symbols_list() { - line("HIDDEN_SYMBOLS = {"); - indent([&]() { - for (auto &symbol : parse_table.symbols) - if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_')) - line("[" + symbol_id(symbol) + "] = 1,"); - }); - line("};"); - line(); - } - - void lex_function() { - line("LEX_FN() {"); - indent([&]() { - line("START_LEXER();"); - switch_on_lex_state(); - }); - line("}"); - line(); - } - - void lex_states_list() { - line("LEX_STATES = {"); - indent([&]() { - size_t state_id = 0; - for (auto &state : parse_table.states) - line("[" + to_string(state_id++) + "] = " + lex_state_index(state.lex_state_id) + ","); - }); - line("};"); - line(); - } - - void parse_table_array() { - size_t state_id = 0; - line("#pragma GCC diagnostic push"); - line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""); - line(); - line("PARSE_TABLE = {"); - - indent([&]() { - for (auto &state : parse_table.states) { - line("[" + to_string(state_id++) + "] = {"); - indent([&]() { - for (auto &pair : state.actions) { - line("[" + symbol_id(pair.first) + "] = "); - code_for_parse_action(pair.second); - add(","); - } - }); - line("},"); - } - }); - - line("};"); - line(); - line("#pragma GCC diagnostic pop"); - line(); - } - - void parser_export() { - line("EXPORT_PARSER(ts_parser_" + name + ");"); - line(); - } - - string rule_name(const rules::Symbol &symbol) { - return symbol.is_token() ? - lexical_grammar.rule_name(symbol) : - syntax_grammar.rule_name(symbol); - } - - string symbol_id(const rules::Symbol &symbol) { - if (symbol.is_built_in()) { - return (symbol == rules::ERROR()) ? - "ts_builtin_sym_error" : - "ts_builtin_sym_end"; - } else { - string name = sanitize_name(rule_name(symbol)); - if (symbol.is_auxiliary()) - return "ts_aux_sym_" + name; - else - return "ts_sym_" + name; - } - } - - string sanitize_name(string name) { - auto existing = sanitized_names.find(name); - if (existing != sanitized_names.end()) - return existing->second; - - string stripped_name; - for (char c : name) { - if (('a' <= c && c <= 'z') || - ('A' <= c && c <= 'Z') || - ('0' <= c && c <= '9') || - (c == '_')) { - stripped_name += c; - } - } - - for (size_t extra_number = 0;; extra_number++) { - string suffix = extra_number ? to_string(extra_number) : ""; - string unique_name = stripped_name + suffix; - if (unique_name == "") - continue; - if (!has_sanitized_name(unique_name)) { - sanitized_names.insert({ name, unique_name }); - return unique_name; - } - } - } - - bool has_sanitized_name(string name) { - for (auto &pair : sanitized_names) - if (pair.second == name) - return true; - return false; - } - - string lex_state_index(size_t i) { - return to_string(i + 1); - } - - string symbol_name(const rules::Symbol &symbol) { - if (symbol.is_built_in()) { - return (symbol == rules::ERROR()) ? "error" : "end"; - } else if (symbol.is_token() && symbol.is_auxiliary()) { - return rule_name(symbol); - } else { - return rule_name(symbol); - } - } - - string condition_for_character_range(const rules::CharacterRange &range) { - string lookahead("lookahead"); - if (range.min == range.max) { - return lookahead + " == '" + escape_char(range.min) + "'"; - } else { - return string("'") + escape_char(range.min) + string("' <= ") + lookahead + - " && " + lookahead + " <= '" + escape_char(range.max) + "'"; - } - } - - void condition_for_character_set(const rules::CharacterSet &set) { - if (set.ranges.size() == 1) { - add(condition_for_character_range(*set.ranges.begin())); - } else { - bool first = true; - for (auto &match : set.ranges) { - string part = "(" + condition_for_character_range(match) + ")"; - if (first) { - add(part); - } else { - add(" ||"); - line(part); - } - first = false; - } - } - } - - void condition_for_character_rule(const rules::CharacterSet &rule) { - pair representation = rule.most_compact_representation(); - if (representation.second) { - condition_for_character_set(representation.first); - } else { - add("!("); - condition_for_character_set(rule.complement()); - add(")"); - } - } - - void code_for_parse_action(const ParseAction &action) { - switch (action.type) { - case ParseActionTypeAccept: - add("ACCEPT_INPUT()"); - break; - case ParseActionTypeShift: - add("SHIFT(" + to_string(action.state_index) + ")"); - break; - case ParseActionTypeShiftExtra: - add("SHIFT_EXTRA()"); - break; - case ParseActionTypeReduce: - add("REDUCE(" + - symbol_id(action.symbol) + ", " + - to_string(action.consumed_symbol_count) + ")"); - break; - case ParseActionTypeReduceExtra: - add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")"); - break; - default: {} - } - } - - void code_for_lex_actions(const LexAction &action, - const set &expected_inputs) { - switch (action.type) { - case LexActionTypeAdvance: - line("ADVANCE(" + lex_state_index(action.state_index) + ");"); - break; - case LexActionTypeAccept: - line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");"); - break; - case LexActionTypeError: - line("LEX_ERROR();"); - break; - default: {} - } - } - - void code_for_lex_state(const LexState &lex_state) { - auto expected_inputs = lex_state.expected_inputs(); - if (lex_state.is_token_start) - line("START_TOKEN();"); - for (auto pair : lex_state.actions) - if (!pair.first.is_empty()) - _if([&]() { condition_for_character_rule(pair.first); }, - [&]() { code_for_lex_actions(pair.second, expected_inputs); }); - code_for_lex_actions(lex_state.default_action, expected_inputs); - } - - void switch_on_lex_state() { - _switch("lex_state", [&]() { - for (size_t i = 0; i < lex_table.states.size(); i++) - _case(lex_state_index(i), [&]() { - code_for_lex_state(lex_table.states[i]); - }); - _case("ts_lex_state_error", [&]() { - code_for_lex_state(lex_table.error_state); - }); - _default([&]() { - line("LEX_PANIC();"); - }); - }); - } - - void _switch(string condition, function body) { - line("switch (" + condition + ") {"); - indent(body); - line("}"); - } - - void _case(string value, function body) { - line("case " + value + ":"); - indent(body); - } - - void _default(function body) { - line("default:"); - indent(body); - } - - void _if(function condition, function body) { - line("if ("); - indent(condition); - add(")"); - indent(body); - } - - void line() { - line(""); - } - - void line(string input) { - add("\n"); - if (!input.empty()) { - string space; - for (size_t i = 0; i < indent_level; i++) - space += " "; - add(space + input); - } - } - - void add(string input) { - buffer += input; - } - - void indent(function body) { - indent(body, 1); - } - - void indent(function body, size_t n) { - indent_level += n; - body(); - indent_level -= n; - } - }; - - string c_code(string name, - const ParseTable &parse_table, - const LexTable &lex_table, - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar) { - return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code(); + void symbol_enum() { + line("enum {"); + indent([&]() { + bool at_start = true; + for (auto symbol : parse_table.symbols) + if (!symbol.is_built_in()) { + if (at_start) + line(symbol_id(symbol) + " = ts_start_sym,"); + else + line(symbol_id(symbol) + ","); + at_start = false; } + }); + line("};"); + line(); + } + + void symbol_names_list() { + set symbols(parse_table.symbols); + symbols.insert(rules::END_OF_INPUT()); + symbols.insert(rules::ERROR()); + + line("SYMBOL_NAMES = {"); + indent([&]() { + for (auto symbol : parse_table.symbols) + line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\","); + }); + line("};"); + line(); + } + + void hidden_symbols_list() { + line("HIDDEN_SYMBOLS = {"); + indent([&]() { + for (auto &symbol : parse_table.symbols) + if (!symbol.is_built_in() && + (symbol.is_auxiliary() || rule_name(symbol)[0] == '_')) + line("[" + symbol_id(symbol) + "] = 1,"); + }); + line("};"); + line(); + } + + void lex_function() { + line("LEX_FN() {"); + indent([&]() { + line("START_LEXER();"); + switch_on_lex_state(); + }); + line("}"); + line(); + } + + void lex_states_list() { + line("LEX_STATES = {"); + indent([&]() { + size_t state_id = 0; + for (auto &state : parse_table.states) + line("[" + to_string(state_id++) + "] = " + + lex_state_index(state.lex_state_id) + ","); + }); + line("};"); + line(); + } + + void parse_table_array() { + size_t state_id = 0; + line("#pragma GCC diagnostic push"); + line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""); + line(); + line("PARSE_TABLE = {"); + + indent([&]() { + for (auto &state : parse_table.states) { + line("[" + to_string(state_id++) + "] = {"); + indent([&]() { + for (auto &pair : state.actions) { + line("[" + symbol_id(pair.first) + "] = "); + code_for_parse_action(pair.second); + add(","); + } + }); + line("},"); + } + }); + + line("};"); + line(); + line("#pragma GCC diagnostic pop"); + line(); + } + + void parser_export() { + line("EXPORT_PARSER(ts_parser_" + name + ");"); + line(); + } + + string rule_name(const rules::Symbol &symbol) { + return symbol.is_token() ? lexical_grammar.rule_name(symbol) + : syntax_grammar.rule_name(symbol); + } + + string symbol_id(const rules::Symbol &symbol) { + if (symbol.is_built_in()) { + return (symbol == rules::ERROR()) ? "ts_builtin_sym_error" + : "ts_builtin_sym_end"; + } else { + string name = sanitize_name(rule_name(symbol)); + if (symbol.is_auxiliary()) + return "ts_aux_sym_" + name; + else + return "ts_sym_" + name; } + } + + string sanitize_name(string name) { + auto existing = sanitized_names.find(name); + if (existing != sanitized_names.end()) + return existing->second; + + string stripped_name; + for (char c : name) { + if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9') || (c == '_')) { + stripped_name += c; + } + } + + for (size_t extra_number = 0;; extra_number++) { + string suffix = extra_number ? to_string(extra_number) : ""; + string unique_name = stripped_name + suffix; + if (unique_name == "") + continue; + if (!has_sanitized_name(unique_name)) { + sanitized_names.insert({ name, unique_name }); + return unique_name; + } + } + } + + bool has_sanitized_name(string name) { + for (auto &pair : sanitized_names) + if (pair.second == name) + return true; + return false; + } + + string lex_state_index(size_t i) { return to_string(i + 1); } + + string symbol_name(const rules::Symbol &symbol) { + if (symbol.is_built_in()) { + return (symbol == rules::ERROR()) ? "error" : "end"; + } else if (symbol.is_token() && symbol.is_auxiliary()) { + return rule_name(symbol); + } else { + return rule_name(symbol); + } + } + + string condition_for_character_range(const rules::CharacterRange &range) { + string lookahead("lookahead"); + if (range.min == range.max) { + return lookahead + " == '" + escape_char(range.min) + "'"; + } else { + return string("'") + escape_char(range.min) + string("' <= ") + + lookahead + " && " + lookahead + " <= '" + escape_char(range.max) + + "'"; + } + } + + void condition_for_character_set(const rules::CharacterSet &set) { + if (set.ranges.size() == 1) { + add(condition_for_character_range(*set.ranges.begin())); + } else { + bool first = true; + for (auto &match : set.ranges) { + string part = "(" + condition_for_character_range(match) + ")"; + if (first) { + add(part); + } else { + add(" ||"); + line(part); + } + first = false; + } + } + } + + void condition_for_character_rule(const rules::CharacterSet &rule) { + pair representation = + rule.most_compact_representation(); + if (representation.second) { + condition_for_character_set(representation.first); + } else { + add("!("); + condition_for_character_set(rule.complement()); + add(")"); + } + } + + void code_for_parse_action(const ParseAction &action) { + switch (action.type) { + case ParseActionTypeAccept: + add("ACCEPT_INPUT()"); + break; + case ParseActionTypeShift: + add("SHIFT(" + to_string(action.state_index) + ")"); + break; + case ParseActionTypeShiftExtra: + add("SHIFT_EXTRA()"); + break; + case ParseActionTypeReduce: + add("REDUCE(" + symbol_id(action.symbol) + ", " + + to_string(action.consumed_symbol_count) + ")"); + break; + case ParseActionTypeReduceExtra: + add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")"); + break; + default: {} + } + } + + void code_for_lex_actions(const LexAction &action, + const set &expected_inputs) { + switch (action.type) { + case LexActionTypeAdvance: + line("ADVANCE(" + lex_state_index(action.state_index) + ");"); + break; + case LexActionTypeAccept: + line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");"); + break; + case LexActionTypeError: + line("LEX_ERROR();"); + break; + default: {} + } + } + + void code_for_lex_state(const LexState &lex_state) { + auto expected_inputs = lex_state.expected_inputs(); + if (lex_state.is_token_start) + line("START_TOKEN();"); + for (auto pair : lex_state.actions) + if (!pair.first.is_empty()) + _if([&]() { condition_for_character_rule(pair.first); }, + [&]() { code_for_lex_actions(pair.second, expected_inputs); }); + code_for_lex_actions(lex_state.default_action, expected_inputs); + } + + void switch_on_lex_state() { + _switch("lex_state", [&]() { + for (size_t i = 0; i < lex_table.states.size(); i++) + _case(lex_state_index(i), + [&]() { code_for_lex_state(lex_table.states[i]); }); + _case("ts_lex_state_error", + [&]() { code_for_lex_state(lex_table.error_state); }); + _default([&]() { line("LEX_PANIC();"); }); + }); + } + + void _switch(string condition, function body) { + line("switch (" + condition + ") {"); + indent(body); + line("}"); + } + + void _case(string value, function body) { + line("case " + value + ":"); + indent(body); + } + + void _default(function body) { + line("default:"); + indent(body); + } + + void _if(function condition, function body) { + line("if ("); + indent(condition); + add(")"); + indent(body); + } + + void line() { line(""); } + + void line(string input) { + add("\n"); + if (!input.empty()) { + string space; + for (size_t i = 0; i < indent_level; i++) + space += " "; + add(space + input); + } + } + + void add(string input) { buffer += input; } + + void indent(function body) { indent(body, 1); } + + void indent(function body, size_t n) { + indent_level += n; + body(); + indent_level -= n; + } +}; + +string c_code(string name, const ParseTable &parse_table, + const LexTable &lex_table, const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar) { + return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, + lexical_grammar).code(); } + +} // namespace generate_code +} // namespace tree_sitter diff --git a/src/compiler/generate_code/c_code.h b/src/compiler/generate_code/c_code.h index 7a4cd1fa..f63ab248 100644 --- a/src/compiler/generate_code/c_code.h +++ b/src/compiler/generate_code/c_code.h @@ -7,16 +7,17 @@ #include "compiler/lex_table.h" namespace tree_sitter { - class SyntaxGrammar; - class LexicalGrammar; +class SyntaxGrammar; +class LexicalGrammar; - namespace generate_code { - std::string c_code(std::string name, - const ParseTable &parse_table, - const LexTable &lex_table, - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar); - } -} +namespace generate_code { + +std::string c_code(std::string name, const ParseTable &parse_table, + const LexTable &lex_table, + const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar); + +} // namespace generate_code +} // namespace tree_sitter #endif // COMPILER_GENERATE_CODE_C_CODE_H_ diff --git a/src/compiler/grammar.cc b/src/compiler/grammar.cc index bf4e32a1..3057917e 100644 --- a/src/compiler/grammar.cc +++ b/src/compiler/grammar.cc @@ -2,83 +2,83 @@ #include "compiler/rules/rule.h" namespace tree_sitter { - using std::ostream; - using std::pair; - using std::set; - using std::string; - using std::vector; - using rules::rule_ptr; - Grammar::Grammar(const std::vector> &rules) : - rules_(rules), - ubiquitous_tokens_({}), - separators_({ ' ', '\r', '\t', '\n' }) {} +using std::ostream; +using std::pair; +using std::set; +using std::string; +using std::vector; +using rules::rule_ptr; - bool Grammar::operator==(const Grammar &other) const { - if (other.rules_.size() != rules_.size()) return false; +Grammar::Grammar( + const std::vector > &rules) + : rules_(rules), + ubiquitous_tokens_({}), + separators_({ ' ', '\r', '\t', '\n' }) {} - for (size_t i = 0; i < rules_.size(); i++) { - auto &pair = rules_[i]; - auto &other_pair = other.rules_[i]; - if (other_pair.first != pair.first) return false; - if (!other_pair.second->operator==(*pair.second)) return false; - } +bool Grammar::operator==(const Grammar &other) const { + if (other.rules_.size() != rules_.size()) + return false; - return true; - } + for (size_t i = 0; i < rules_.size(); i++) { + auto &pair = rules_[i]; + auto &other_pair = other.rules_[i]; + if (other_pair.first != pair.first) + return false; + if (!other_pair.second->operator==(*pair.second)) + return false; + } - string Grammar::start_rule_name() const { - return rules_.front().first; - } - - ostream& operator<<(ostream &stream, const Grammar &grammar) { - stream << string("# "); - stream << pair.second; - started = true; - } - return stream << string("}>"); - } - - GrammarError::GrammarError(GrammarErrorType type, std::string message) : - type(type), - message(message) {} - - bool GrammarError::operator==(const GrammarError &other) const { - return type == other.type && message == other.message; - } - - ostream& operator<<(ostream &stream, const GrammarError *error) { - if (error) - return stream << (string("#message + "'>"); - else - return stream << string("#"); - } - - const set & Grammar::ubiquitous_tokens() const { - return ubiquitous_tokens_; - } - - Grammar & Grammar::ubiquitous_tokens(const set &ubiquitous_tokens) { - ubiquitous_tokens_ = ubiquitous_tokens; - return *this; - } - - const set & Grammar::separators() const { - return separators_; - } - - Grammar & Grammar::separators(const set &separators) { - separators_ = separators; - return *this; - } - - const vector> & Grammar::rules() const { - return rules_; - } + return true; } + +string Grammar::start_rule_name() const { return rules_.front().first; } + +ostream &operator<<(ostream &stream, const Grammar &grammar) { + stream << string("# "); + stream << pair.second; + started = true; + } + return stream << string("}>"); +} + +GrammarError::GrammarError(GrammarErrorType type, std::string message) + : type(type), message(message) {} + +bool GrammarError::operator==(const GrammarError &other) const { + return type == other.type && message == other.message; +} + +ostream &operator<<(ostream &stream, const GrammarError *error) { + if (error) + return stream << (string("#message + "'>"); + else + return stream << string("#"); +} + +const set &Grammar::ubiquitous_tokens() const { + return ubiquitous_tokens_; +} + +Grammar &Grammar::ubiquitous_tokens(const set &ubiquitous_tokens) { + ubiquitous_tokens_ = ubiquitous_tokens; + return *this; +} + +const set &Grammar::separators() const { return separators_; } + +Grammar &Grammar::separators(const set &separators) { + separators_ = separators; + return *this; +} + +const vector > &Grammar::rules() const { return rules_; } + +} // namespace tree_sitter diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index 2fd24d08..25548cc1 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -2,75 +2,79 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - using std::string; - using std::to_string; - using std::map; - using std::set; - using rules::Symbol; - using rules::CharacterSet; - LexAction::LexAction() : - type(LexActionTypeError), - symbol(Symbol(-1)), - state_index(-1), - precedence_values({0}) {} +using std::string; +using std::to_string; +using std::map; +using std::set; +using rules::Symbol; +using rules::CharacterSet; - LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, set precedence_values) : - type(type), - symbol(symbol), - state_index(state_index), - precedence_values(precedence_values) {} +LexAction::LexAction() + : type(LexActionTypeError), + symbol(Symbol(-1)), + state_index(-1), + precedence_values({ 0 }) {} - LexAction LexAction::Error() { - return LexAction(LexActionTypeError, -1, Symbol(-1), {0}); - } +LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, + set precedence_values) + : type(type), + symbol(symbol), + state_index(state_index), + precedence_values(precedence_values) {} - LexAction LexAction::Advance(size_t state_index, set precedence_values) { - return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_values); - } - - LexAction LexAction::Accept(Symbol symbol, int precedence) { - return LexAction(LexActionTypeAccept, -1, symbol, { precedence }); - } - - bool LexAction::operator==(const LexAction &other) const { - return - (type == other.type) && - (state_index == other.state_index) && - (symbol == other.symbol); - } - - std::ostream& operator<<(std::ostream &stream, const LexAction &action) { - switch (action.type) { - case LexActionTypeError: - return stream << string("#"); - case LexActionTypeAccept: - return stream << string("#"; - case LexActionTypeAdvance: - return stream << string("#"; - default: - return stream; - } - } - - set LexState::expected_inputs() const { - set result; - for (auto &pair : actions) - result.insert(pair.first); - return result; - } - - LexStateId LexTable::add_state() { - states.push_back(LexState()); - return states.size() - 1; - } - - LexState & LexTable::state(LexStateId id) { - if (id < 0) - return error_state; - else - return states[id]; - } - - const LexStateId LexTable::ERROR_STATE_ID = -1; +LexAction LexAction::Error() { + return LexAction(LexActionTypeError, -1, Symbol(-1), { 0 }); } + +LexAction LexAction::Advance(size_t state_index, set precedence_values) { + return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), + precedence_values); +} + +LexAction LexAction::Accept(Symbol symbol, int precedence) { + return LexAction(LexActionTypeAccept, -1, symbol, { precedence }); +} + +bool LexAction::operator==(const LexAction &other) const { + return (type == other.type) && (state_index == other.state_index) && + (symbol == other.symbol); +} + +std::ostream &operator<<(std::ostream &stream, const LexAction &action) { + switch (action.type) { + case LexActionTypeError: + return stream << string("#"); + case LexActionTypeAccept: + return stream << string("#"; + case LexActionTypeAdvance: + return stream << string("#"; + default: + return stream; + } +} + +set LexState::expected_inputs() const { + set result; + for (auto &pair : actions) + result.insert(pair.first); + return result; +} + +LexStateId LexTable::add_state() { + states.push_back(LexState()); + return states.size() - 1; +} + +LexState &LexTable::state(LexStateId id) { + if (id < 0) + return error_state; + else + return states[id]; +} + +const LexStateId LexTable::ERROR_STATE_ID = -1; + +} // namespace tree_sitter diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 953d01a3..9184d650 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -9,61 +9,69 @@ #include "compiler/rules/character_set.h" namespace tree_sitter { - typedef enum { - LexActionTypeError, - LexActionTypeAccept, - LexActionTypeAdvance - } LexActionType; - class LexAction { - LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, std::set precedence_values); - public: - LexAction(); - static LexAction Accept(rules::Symbol symbol, int precedence); - static LexAction Error(); - static LexAction Advance(size_t state_index, std::set precedence_values); - bool operator==(const LexAction &action) const; +typedef enum { + LexActionTypeError, + LexActionTypeAccept, + LexActionTypeAdvance +} LexActionType; - LexActionType type; - rules::Symbol symbol; - size_t state_index; - std::set precedence_values; - }; +class LexAction { + LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, + std::set precedence_values); - std::ostream& operator<<(std::ostream &stream, const LexAction &item); -} + public: + LexAction(); + static LexAction Accept(rules::Symbol symbol, int precedence); + static LexAction Error(); + static LexAction Advance(size_t state_index, std::set precedence_values); + bool operator==(const LexAction &action) const; + + LexActionType type; + rules::Symbol symbol; + size_t state_index; + std::set precedence_values; +}; + +std::ostream &operator<<(std::ostream &stream, const LexAction &item); + +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::LexAction &action) const { - return (hash()(action.type) ^ - hash()(action.symbol) ^ - hash()(action.state_index)); - } - }; -} + +template <> +struct hash { + size_t operator()(const tree_sitter::LexAction &action) const { + return (hash()(action.type) ^ + hash()(action.symbol) ^ + hash()(action.state_index)); + } +}; + +} // namespace std namespace tree_sitter { - class LexState { - public: - std::map actions; - LexAction default_action; - std::set expected_inputs() const; - bool is_token_start; - }; - typedef int64_t LexStateId; +class LexState { + public: + std::map actions; + LexAction default_action; + std::set expected_inputs() const; + bool is_token_start; +}; - class LexTable { - public: - static const LexStateId ERROR_STATE_ID; - LexStateId add_state(); - LexState & state(LexStateId state_id); +typedef int64_t LexStateId; - std::vector states; - LexState error_state; - }; -} +class LexTable { + public: + static const LexStateId ERROR_STATE_ID; + LexStateId add_state(); + LexState &state(LexStateId state_id); + + std::vector states; + LexState error_state; +}; + +} // namespace tree_sitter #endif // COMPILER_LEX_TABLE_H_ diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 939688b9..c3d2a122 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -2,109 +2,120 @@ #include namespace tree_sitter { - using std::string; - using std::ostream; - using std::to_string; - using std::set; - using std::vector; - using rules::Symbol; - ParseAction::ParseAction(ParseActionType type, - ParseStateId state_index, - Symbol symbol, - size_t consumed_symbol_count, - set precedence_values) : - type(type), - symbol(symbol), - state_index(state_index), - consumed_symbol_count(consumed_symbol_count), - precedence_values(precedence_values) {} +using std::string; +using std::ostream; +using std::to_string; +using std::set; +using std::vector; +using rules::Symbol; - ParseAction::ParseAction() : - type(ParseActionTypeError), - symbol(Symbol(-1)), - state_index(-1), - consumed_symbol_count(0) {} +ParseAction::ParseAction(ParseActionType type, ParseStateId state_index, + Symbol symbol, size_t consumed_symbol_count, + set precedence_values) + : type(type), + symbol(symbol), + state_index(state_index), + consumed_symbol_count(consumed_symbol_count), + precedence_values(precedence_values) {} - ParseAction ParseAction::Error() { - return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 }); - } +ParseAction::ParseAction() + : type(ParseActionTypeError), + symbol(Symbol(-1)), + state_index(-1), + consumed_symbol_count(0) {} - ParseAction ParseAction::Accept() { - return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 }); - } - - ParseAction ParseAction::Shift(ParseStateId state_index, set precedence_values) { - return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, precedence_values); - } - - ParseAction ParseAction::ShiftExtra() { - return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 }); - } - - ParseAction ParseAction::ReduceExtra(Symbol symbol) { - return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 }); - } - - ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, int precedence) { - return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count, { precedence }); - } - - bool ParseAction::operator==(const ParseAction &other) const { - bool types_eq = type == other.type; - bool symbols_eq = symbol == other.symbol; - bool state_indices_eq = state_index == other.state_index; - bool consumed_symbol_counts_eq = consumed_symbol_count == other.consumed_symbol_count; - return types_eq && symbols_eq && state_indices_eq && consumed_symbol_counts_eq; - } - - ostream& operator<<(ostream &stream, const ParseAction &action) { - switch (action.type) { - case ParseActionTypeError: - return stream << string("#"); - case ParseActionTypeAccept: - return stream << string("#"); - case ParseActionTypeShift: - return stream << (string("#"); - case ParseActionTypeShiftExtra: - return stream << string("#"); - case ParseActionTypeReduce: - return stream << ("#"); - default: - return stream; - } - } - - ParseState::ParseState() : lex_state_id(-1) {} - - set ParseState::expected_inputs() const { - set result; - for (auto &pair : actions) - result.insert(pair.first); - return result; - } - - ostream& operator<<(ostream &stream, const ParseState &state) { - stream << string("# ") << pair.second; - started = true; - } - stream << string(">"); - return stream; - } - - ParseStateId ParseTable::add_state() { - states.push_back(ParseState()); - return states.size() - 1; - } - - void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) { - symbols.insert(symbol); - states[id].actions[symbol] = action; - } +ParseAction ParseAction::Error() { + return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 }); } + +ParseAction ParseAction::Accept() { + return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 }); +} + +ParseAction ParseAction::Shift(ParseStateId state_index, + set precedence_values) { + return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, + precedence_values); +} + +ParseAction ParseAction::ShiftExtra() { + return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 }); +} + +ParseAction ParseAction::ReduceExtra(Symbol symbol) { + return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 }); +} + +ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, + int precedence) { + return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count, + { precedence }); +} + +bool ParseAction::operator==(const ParseAction &other) const { + bool types_eq = type == other.type; + bool symbols_eq = symbol == other.symbol; + bool state_indices_eq = state_index == other.state_index; + bool consumed_symbol_counts_eq = + consumed_symbol_count == other.consumed_symbol_count; + return types_eq && symbols_eq && state_indices_eq && + consumed_symbol_counts_eq; +} + +ostream &operator<<(ostream &stream, const ParseAction &action) { + switch (action.type) { + case ParseActionTypeError: + return stream << string("#"); + case ParseActionTypeAccept: + return stream << string("#"); + case ParseActionTypeShift: + return stream << (string("#"); + case ParseActionTypeShiftExtra: + return stream << string("#"); + case ParseActionTypeReduce: + return stream << ("#"); + default: + return stream; + } +} + +ParseState::ParseState() : lex_state_id(-1) {} + +set ParseState::expected_inputs() const { + set result; + for (auto &pair : actions) + result.insert(pair.first); + return result; +} + +ostream &operator<<(ostream &stream, const ParseState &state) { + stream << string("# ") << pair.second; + started = true; + } + stream << string(">"); + return stream; +} + +ParseStateId ParseTable::add_state() { + states.push_back(ParseState()); + return states.size() - 1; +} + +void ParseTable::add_action(ParseStateId id, Symbol symbol, + ParseAction action) { + symbols.insert(symbol); + states[id].actions[symbol] = action; +} + +} // namespace tree_sitter diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index cab16124..d0b12bb9 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -9,75 +9,82 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - typedef uint64_t ParseStateId; - typedef enum { - ParseActionTypeError, - ParseActionTypeShift, - ParseActionTypeShiftExtra, - ParseActionTypeReduce, - ParseActionTypeReduceExtra, - ParseActionTypeAccept, - } ParseActionType; +typedef uint64_t ParseStateId; - class ParseAction { - ParseAction(ParseActionType type, - ParseStateId state_index, - rules::Symbol symbol, - size_t consumed_symbol_count, - std::set precedence_values); - public: - ParseAction(); - static ParseAction Accept(); - static ParseAction Error(); - static ParseAction Shift(ParseStateId state_index, std::set precedence_values); - static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, int precedence); - static ParseAction ShiftExtra(); - static ParseAction ReduceExtra(rules::Symbol symbol); - bool operator==(const ParseAction &action) const; +typedef enum { + ParseActionTypeError, + ParseActionTypeShift, + ParseActionTypeShiftExtra, + ParseActionTypeReduce, + ParseActionTypeReduceExtra, + ParseActionTypeAccept, +} ParseActionType; - ParseActionType type; - rules::Symbol symbol; - ParseStateId state_index; - size_t consumed_symbol_count; - std::set precedence_values; - }; +class ParseAction { + ParseAction(ParseActionType type, ParseStateId state_index, + rules::Symbol symbol, size_t consumed_symbol_count, + std::set precedence_values); - std::ostream& operator<<(std::ostream &stream, const ParseAction &item); -} + public: + ParseAction(); + static ParseAction Accept(); + static ParseAction Error(); + static ParseAction Shift(ParseStateId state_index, + std::set precedence_values); + static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, + int precedence); + static ParseAction ShiftExtra(); + static ParseAction ReduceExtra(rules::Symbol symbol); + bool operator==(const ParseAction &action) const; + + ParseActionType type; + rules::Symbol symbol; + ParseStateId state_index; + size_t consumed_symbol_count; + std::set precedence_values; +}; + +std::ostream &operator<<(std::ostream &stream, const ParseAction &item); + +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::ParseAction &action) const { - return ( - hash()(action.type) ^ - hash()(action.symbol) ^ - hash()(action.state_index) ^ - hash()(action.consumed_symbol_count)); - } - }; -} + +template <> +struct hash { + size_t operator()(const tree_sitter::ParseAction &action) const { + return (hash()(action.type) ^ + hash()(action.symbol) ^ + hash()(action.state_index) ^ + hash()(action.consumed_symbol_count)); + } +}; + +} // namespace std namespace tree_sitter { - class ParseState { - public: - ParseState(); - std::map actions; - std::set expected_inputs() const; - LexStateId lex_state_id; - }; - std::ostream& operator<<(std::ostream &stream, const ParseState &state); +class ParseState { + public: + ParseState(); + std::map actions; + std::set expected_inputs() const; + LexStateId lex_state_id; +}; - class ParseTable { - public: - ParseStateId add_state(); - void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action); +std::ostream &operator<<(std::ostream &stream, const ParseState &state); - std::vector states; - std::set symbols; - }; -} +class ParseTable { + public: + ParseStateId add_state(); + void add_action(ParseStateId state_id, rules::Symbol symbol, + ParseAction action); + + std::vector states; + std::set symbols; +}; + +} // namespace tree_sitter #endif // COMPILER_PARSE_TABLE_H_ diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index f69fe230..07bf04d2 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -11,55 +11,57 @@ #include "compiler/rules/repeat.h" namespace tree_sitter { - using std::string; - using std::vector; - using std::pair; - using std::to_string; - using std::make_shared; - using rules::rule_ptr; - using rules::Blank; - using rules::Choice; - using rules::Repeat; - using rules::Rule; - using rules::Seq; - using rules::Symbol; +namespace prepare_grammar { - namespace prepare_grammar { - class ExpandRepeats : public rules::IdentityRuleFn { - string rule_name; +using std::string; +using std::vector; +using std::pair; +using std::to_string; +using std::make_shared; +using rules::rule_ptr; +using rules::Blank; +using rules::Choice; +using rules::Repeat; +using rules::Rule; +using rules::Seq; +using rules::Symbol; - rule_ptr apply_to(const Repeat *rule) { - rule_ptr inner_rule = apply(rule->content); - size_t index = aux_rules.size(); - string helper_rule_name = rule_name + string("_repeat") + to_string(index); - rule_ptr repeat_symbol = make_shared(offset + index, rules::SymbolOptionAuxiliary); - aux_rules.push_back({ - helper_rule_name, - Choice::Build({ - Seq::Build({ inner_rule, repeat_symbol }), - make_shared() - }) - }); - return repeat_symbol; - } +class ExpandRepeats : public rules::IdentityRuleFn { + string rule_name; - public: - ExpandRepeats(string rule_name, size_t offset) : rule_name(rule_name), offset(offset) {} + rule_ptr apply_to(const Repeat *rule) { + rule_ptr inner_rule = apply(rule->content); + size_t index = aux_rules.size(); + string helper_rule_name = rule_name + string("_repeat") + to_string(index); + rule_ptr repeat_symbol = + make_shared(offset + index, rules::SymbolOptionAuxiliary); + aux_rules.push_back( + { helper_rule_name, + Choice::Build({ Seq::Build({ inner_rule, repeat_symbol }), + make_shared() }) }); + return repeat_symbol; + } - size_t offset; - vector> aux_rules; - }; + public: + ExpandRepeats(string rule_name, size_t offset) + : rule_name(rule_name), offset(offset) {} - SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) { - vector> rules, aux_rules(grammar.aux_rules); + size_t offset; + vector > aux_rules; +}; - for (auto &pair : grammar.rules) { - ExpandRepeats expander(pair.first, aux_rules.size()); - rules.push_back({ pair.first, expander.apply(pair.second) }); - aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end()); - } +SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) { + vector > rules, aux_rules(grammar.aux_rules); - return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens); - } - } + for (auto &pair : grammar.rules) { + ExpandRepeats expander(pair.first, aux_rules.size()); + rules.push_back({ pair.first, expander.apply(pair.second) }); + aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), + expander.aux_rules.end()); + } + + return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens); } + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/expand_repeats.h b/src/compiler/prepare_grammar/expand_repeats.h index 7a8b6e44..19c687f2 100644 --- a/src/compiler/prepare_grammar/expand_repeats.h +++ b/src/compiler/prepare_grammar/expand_repeats.h @@ -4,11 +4,14 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - class SyntaxGrammar; - namespace prepare_grammar { - SyntaxGrammar expand_repeats(const SyntaxGrammar &); - } -} +class SyntaxGrammar; + +namespace prepare_grammar { + +SyntaxGrammar expand_repeats(const SyntaxGrammar &); + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_ diff --git a/src/compiler/prepare_grammar/expand_tokens.cc b/src/compiler/prepare_grammar/expand_tokens.cc index a8fde253..cd643aa5 100644 --- a/src/compiler/prepare_grammar/expand_tokens.cc +++ b/src/compiler/prepare_grammar/expand_tokens.cc @@ -12,60 +12,59 @@ #include "compiler/prepare_grammar/parse_regex.h" namespace tree_sitter { - using std::string; - using std::vector; - using std::pair; - using std::make_shared; - using rules::rule_ptr; - using rules::String; - using rules::Pattern; +namespace prepare_grammar { - namespace prepare_grammar { - class ExpandTokens : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; +using std::string; +using std::vector; +using std::pair; +using std::make_shared; +using rules::rule_ptr; +using rules::String; +using rules::Pattern; - rule_ptr apply_to(const String *rule) { - vector elements; - for (char val : rule->value) - elements.push_back(rules::CharacterSet({ val }).copy()); - return rules::Seq::Build(elements); - } +class ExpandTokens : public rules::IdentityRuleFn { + using rules::IdentityRuleFn::apply_to; - rule_ptr apply_to(const Pattern *rule) { - auto pair = parse_regex(rule->value); - if (!error) - error = pair.second; - return pair.first; - } + rule_ptr apply_to(const String *rule) { + vector elements; + for (char val : rule->value) + elements.push_back(rules::CharacterSet({ val }).copy()); + return rules::Seq::Build(elements); + } - public: - const GrammarError *error; - ExpandTokens() : error(nullptr) {} - }; + rule_ptr apply_to(const Pattern *rule) { + auto pair = parse_regex(rule->value); + if (!error) + error = pair.second; + return pair.first; + } - pair - expand_tokens(const LexicalGrammar &grammar) { - vector> rules, aux_rules; - ExpandTokens expander; + public: + const GrammarError *error; + ExpandTokens() : error(nullptr) {} +}; - for (auto &pair : grammar.rules) { - auto rule = expander.apply(pair.second); - if (expander.error) - return { LexicalGrammar(), expander.error }; - rules.push_back({ pair.first, rule }); - } +pair expand_tokens( + const LexicalGrammar &grammar) { + vector > rules, aux_rules; + ExpandTokens expander; - for (auto &pair : grammar.aux_rules) { - auto rule = expander.apply(pair.second); - if (expander.error) - return { LexicalGrammar(), expander.error }; - aux_rules.push_back({ pair.first, rule }); - } + for (auto &pair : grammar.rules) { + auto rule = expander.apply(pair.second); + if (expander.error) + return { LexicalGrammar(), expander.error }; + rules.push_back({ pair.first, rule }); + } - return { - LexicalGrammar(rules, aux_rules, grammar.separators), - nullptr, - }; - } - } + for (auto &pair : grammar.aux_rules) { + auto rule = expander.apply(pair.second); + if (expander.error) + return { LexicalGrammar(), expander.error }; + aux_rules.push_back({ pair.first, rule }); + } + + return { LexicalGrammar(rules, aux_rules, grammar.separators), nullptr, }; } + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/expand_tokens.h b/src/compiler/prepare_grammar/expand_tokens.h index 8deb55f4..5f666e43 100644 --- a/src/compiler/prepare_grammar/expand_tokens.h +++ b/src/compiler/prepare_grammar/expand_tokens.h @@ -5,13 +5,15 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - class LexicalGrammar; - namespace prepare_grammar { - std::pair - expand_tokens(const LexicalGrammar &); - } -} +class LexicalGrammar; + +namespace prepare_grammar { + +std::pair expand_tokens( + const LexicalGrammar &); + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_ - diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 6219d130..6b73c00a 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -14,120 +14,123 @@ #include "compiler/prepare_grammar/token_description.h" namespace tree_sitter { - using std::pair; - using std::string; - using std::map; - using std::to_string; - using std::vector; - using std::set; - using std::make_shared; - using rules::rule_ptr; - using rules::Symbol; +namespace prepare_grammar { - namespace prepare_grammar { - class IsToken : public rules::RuleFn { - bool apply_to(const rules::String *rule) { return true; } - bool apply_to(const rules::Pattern *rule) { return true; } - bool apply_to(const rules::Metadata *rule) { return rule->value_for(rules::IS_TOKEN); } - }; +using std::pair; +using std::string; +using std::map; +using std::to_string; +using std::vector; +using std::set; +using std::make_shared; +using rules::rule_ptr; +using rules::Symbol; - class SymbolInliner : public rules::IdentityRuleFn { - map replacements; - using rules::IdentityRuleFn::apply_to; +class IsToken : public rules::RuleFn { + bool apply_to(const rules::String *rule) { return true; } + bool apply_to(const rules::Pattern *rule) { return true; } + bool apply_to(const rules::Metadata *rule) { + return rule->value_for(rules::IS_TOKEN); + } +}; - int new_index_for_symbol(const Symbol &symbol) { - int result = symbol.index; - for (const auto &pair : replacements) - if (pair.first.index < symbol.index && - pair.first.is_auxiliary() == symbol.is_auxiliary()) - result--; - return result; - } +class SymbolInliner : public rules::IdentityRuleFn { + map replacements; + using rules::IdentityRuleFn::apply_to; - rule_ptr apply_to(const Symbol *rule) { - return replace_symbol(*rule).copy(); - } + int new_index_for_symbol(const Symbol &symbol) { + int result = symbol.index; + for (const auto &pair : replacements) + if (pair.first.index < symbol.index && + pair.first.is_auxiliary() == symbol.is_auxiliary()) + result--; + return result; + } - public: - Symbol replace_symbol(const Symbol &rule) { - if (rule.is_built_in()) return rule; - auto replacement_pair = replacements.find(rule); - if (replacement_pair != replacements.end()) - return replacement_pair->second; - else - return Symbol(new_index_for_symbol(rule), rule.options); - } + rule_ptr apply_to(const Symbol *rule) { return replace_symbol(*rule).copy(); } - SymbolInliner(const map &replacements) : replacements(replacements) {} - }; + public: + Symbol replace_symbol(const Symbol &rule) { + if (rule.is_built_in()) + return rule; + auto replacement_pair = replacements.find(rule); + if (replacement_pair != replacements.end()) + return replacement_pair->second; + else + return Symbol(new_index_for_symbol(rule), rule.options); + } - const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary); + SymbolInliner(const map &replacements) + : replacements(replacements) {} +}; - class TokenExtractor : public rules::IdentityRuleFn { - rule_ptr apply_to_token(const rules::Rule *input) { - auto rule = input->copy(); - for (size_t i = 0; i < tokens.size(); i++) - if (tokens[i].second->operator==(*rule)) - return make_shared(i, SymbolOptionAuxToken); - size_t index = tokens.size(); - tokens.push_back({ token_description(rule), rule }); - return make_shared(index, SymbolOptionAuxToken); - } +const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption( + rules::SymbolOptionToken | rules::SymbolOptionAuxiliary); - rule_ptr default_apply(const rules::Rule *rule) { - auto result = rule->copy(); - if (IsToken().apply(rule->copy())) { - return apply_to_token(rule); - } else { - return result; - } - } +class TokenExtractor : public rules::IdentityRuleFn { + rule_ptr apply_to_token(const rules::Rule *input) { + auto rule = input->copy(); + for (size_t i = 0; i < tokens.size(); i++) + if (tokens[i].second->operator==(*rule)) + return make_shared(i, SymbolOptionAuxToken); + size_t index = tokens.size(); + tokens.push_back({ token_description(rule), rule }); + return make_shared(index, SymbolOptionAuxToken); + } - rule_ptr apply_to(const rules::Metadata *rule) { - auto result = rule->copy(); - if (IsToken().apply(rule->copy())) { - return apply_to_token(rule); - } else { - return rules::IdentityRuleFn::apply_to(rule); - } - } - - public: - vector> tokens; - }; - - pair extract_tokens(const InternedGrammar &input_grammar) { - vector> rules, tokens, aux_rules, aux_tokens; - set ubiquitous_tokens; - - TokenExtractor extractor; - map symbol_replacements; - - for (size_t i = 0; i < input_grammar.rules.size(); i++) { - auto pair = input_grammar.rules[i]; - if (IsToken().apply(pair.second)) { - tokens.push_back(pair); - symbol_replacements.insert({ - Symbol(i), - Symbol(tokens.size() - 1, rules::SymbolOptionToken) - }); - } else { - rules.push_back({ pair.first, extractor.apply(pair.second) }); - } - } - - aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end()); - - SymbolInliner inliner(symbol_replacements); - for (auto &pair : rules) - pair.second = inliner.apply(pair.second); - for (auto &symbol : input_grammar.ubiquitous_tokens) - ubiquitous_tokens.insert(inliner.replace_symbol(symbol)); - - return { - SyntaxGrammar(rules, aux_rules, ubiquitous_tokens), - LexicalGrammar(tokens, aux_tokens, input_grammar.separators), - }; - } + rule_ptr default_apply(const rules::Rule *rule) { + auto result = rule->copy(); + if (IsToken().apply(rule->copy())) { + return apply_to_token(rule); + } else { + return result; } + } + + rule_ptr apply_to(const rules::Metadata *rule) { + auto result = rule->copy(); + if (IsToken().apply(rule->copy())) { + return apply_to_token(rule); + } else { + return rules::IdentityRuleFn::apply_to(rule); + } + } + + public: + vector > tokens; +}; + +pair extract_tokens( + const InternedGrammar &input_grammar) { + vector > rules, tokens, aux_rules, aux_tokens; + set ubiquitous_tokens; + + TokenExtractor extractor; + map symbol_replacements; + + for (size_t i = 0; i < input_grammar.rules.size(); i++) { + auto pair = input_grammar.rules[i]; + if (IsToken().apply(pair.second)) { + tokens.push_back(pair); + symbol_replacements.insert( + { Symbol(i), Symbol(tokens.size() - 1, rules::SymbolOptionToken) }); + } else { + rules.push_back({ pair.first, extractor.apply(pair.second) }); + } + } + + aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), + extractor.tokens.end()); + + SymbolInliner inliner(symbol_replacements); + for (auto &pair : rules) + pair.second = inliner.apply(pair.second); + for (auto &symbol : input_grammar.ubiquitous_tokens) + ubiquitous_tokens.insert(inliner.replace_symbol(symbol)); + + return { SyntaxGrammar(rules, aux_rules, ubiquitous_tokens), + LexicalGrammar(tokens, aux_tokens, input_grammar.separators), }; } + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/extract_tokens.h b/src/compiler/prepare_grammar/extract_tokens.h index 4f6d1ef6..fc94c0f1 100644 --- a/src/compiler/prepare_grammar/extract_tokens.h +++ b/src/compiler/prepare_grammar/extract_tokens.h @@ -5,12 +5,16 @@ #include "compiler/prepare_grammar/interned_grammar.h" namespace tree_sitter { - class SyntaxGrammar; - class LexicalGrammar; - namespace prepare_grammar { - std::pair extract_tokens(const InternedGrammar &); - } -} +class SyntaxGrammar; +class LexicalGrammar; + +namespace prepare_grammar { + +std::pair extract_tokens( + const InternedGrammar &); + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_ diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index 192f330b..f7b21603 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -10,70 +10,72 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - using std::string; - using rules::rule_ptr; - using std::vector; - using std::set; - using std::pair; - using std::make_shared; +namespace prepare_grammar { - namespace prepare_grammar { - class InternSymbols : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; +using std::string; +using rules::rule_ptr; +using std::vector; +using std::set; +using std::pair; +using std::make_shared; - rule_ptr apply_to(const rules::NamedSymbol *rule) { - auto result = symbol_for_rule_name(rule->name); - if (!result.get()) missing_rule_name = rule->name; - return result; - } +class InternSymbols : public rules::IdentityRuleFn { + using rules::IdentityRuleFn::apply_to; - public: - std::shared_ptr symbol_for_rule_name(string rule_name) { - for (size_t i = 0; i < grammar.rules().size(); i++) - if (grammar.rules()[i].first == rule_name) - return make_shared(i); - return nullptr; - } + rule_ptr apply_to(const rules::NamedSymbol *rule) { + auto result = symbol_for_rule_name(rule->name); + if (!result.get()) + missing_rule_name = rule->name; + return result; + } - explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {} - const Grammar grammar; - string missing_rule_name; - }; + public: + std::shared_ptr symbol_for_rule_name(string rule_name) { + for (size_t i = 0; i < grammar.rules().size(); i++) + if (grammar.rules()[i].first == rule_name) + return make_shared(i); + return nullptr; + } - pair missing_rule_error(string rule_name) { - InternedGrammar grammar; - return { - grammar, - new GrammarError(GrammarErrorTypeUndefinedSymbol, - "Undefined rule '" + rule_name + "'") - }; - } + explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {} + const Grammar grammar; + string missing_rule_name; +}; - pair intern_symbols(const Grammar &grammar) { - InternSymbols interner(grammar); - vector> rules; - - for (auto &pair : grammar.rules()) { - auto new_rule = interner.apply(pair.second); - if (!interner.missing_rule_name.empty()) - return missing_rule_error(interner.missing_rule_name); - rules.push_back({ pair.first, new_rule }); - } - - set ubiquitous_tokens; - for (auto &name : grammar.ubiquitous_tokens()) { - auto token = interner.symbol_for_rule_name(name); - if (!token.get()) - return missing_rule_error(name); - ubiquitous_tokens.insert(*token); - } - - InternedGrammar result; - result.rules = rules; - result.ubiquitous_tokens = ubiquitous_tokens; - result.separators = grammar.separators(); - - return { result, nullptr }; - } - } +pair missing_rule_error( + string rule_name) { + InternedGrammar grammar; + return { grammar, new GrammarError(GrammarErrorTypeUndefinedSymbol, + "Undefined rule '" + rule_name + "'") }; } + +pair intern_symbols( + const Grammar &grammar) { + InternSymbols interner(grammar); + vector > rules; + + for (auto &pair : grammar.rules()) { + auto new_rule = interner.apply(pair.second); + if (!interner.missing_rule_name.empty()) + return missing_rule_error(interner.missing_rule_name); + rules.push_back({ pair.first, new_rule }); + } + + set ubiquitous_tokens; + for (auto &name : grammar.ubiquitous_tokens()) { + auto token = interner.symbol_for_rule_name(name); + if (!token.get()) + return missing_rule_error(name); + ubiquitous_tokens.insert(*token); + } + + InternedGrammar result; + result.rules = rules; + result.ubiquitous_tokens = ubiquitous_tokens; + result.separators = grammar.separators(); + + return { result, nullptr }; +} + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/intern_symbols.h b/src/compiler/prepare_grammar/intern_symbols.h index 34281c30..bc6380e7 100644 --- a/src/compiler/prepare_grammar/intern_symbols.h +++ b/src/compiler/prepare_grammar/intern_symbols.h @@ -7,11 +7,15 @@ #include "compiler/prepare_grammar/interned_grammar.h" namespace tree_sitter { - class Grammar; - namespace prepare_grammar { - std::pair intern_symbols(const Grammar &); - } -} +class Grammar; + +namespace prepare_grammar { + +std::pair intern_symbols( + const Grammar &); + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_INTERN_SYMBOLS_H_ diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h index e92e2398..5baeb808 100644 --- a/src/compiler/prepare_grammar/interned_grammar.h +++ b/src/compiler/prepare_grammar/interned_grammar.h @@ -9,14 +9,16 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - namespace prepare_grammar { - class InternedGrammar { - public: - std::vector> rules; - std::set ubiquitous_tokens; - std::set separators; - }; - } -} +namespace prepare_grammar { + +class InternedGrammar { + public: + std::vector > rules; + std::set ubiquitous_tokens; + std::set separators; +}; + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_ diff --git a/src/compiler/prepare_grammar/parse_regex.cc b/src/compiler/prepare_grammar/parse_regex.cc index 70f3af9d..b325cc3d 100644 --- a/src/compiler/prepare_grammar/parse_regex.cc +++ b/src/compiler/prepare_grammar/parse_regex.cc @@ -10,202 +10,193 @@ #include "compiler/util/string_helpers.h" namespace tree_sitter { - using std::string; - using std::vector; - using std::pair; - using std::make_shared; - using rules::rule_ptr; - using rules::CharacterSet; - using rules::Seq; - using rules::Blank; - using rules::Choice; - using rules::Repeat; - using rules::CharacterRange; - using rules::blank; +namespace prepare_grammar { - namespace prepare_grammar { - class PatternParser { - public: - explicit PatternParser(const string &input) : - input(input), - length(input.length()), - position(0) {} +using std::string; +using std::vector; +using std::pair; +using std::make_shared; +using rules::rule_ptr; +using rules::CharacterSet; +using rules::Seq; +using rules::Blank; +using rules::Choice; +using rules::Repeat; +using rules::CharacterRange; +using rules::blank; - pair rule(bool nested) { - vector choices = {}; - do { - if (!choices.empty()) { - if (peek() == '|') - next(); - else - break; - } - auto pair = term(nested); - if (pair.second) - return { blank(), pair.second }; - choices.push_back(pair.first); - } while (has_more_input()); - auto rule = (choices.size() > 1) ? make_shared(choices) : choices.front(); - return { rule, nullptr }; - } +class PatternParser { + public: + explicit PatternParser(const string &input) + : input(input), length(input.length()), position(0) {} - private: - pair term(bool nested) { - rule_ptr result = blank(); - do { - if (peek() == '|') - break; - if (nested && peek() == ')') - break; - auto pair = factor(); - if (pair.second) - return { blank(), pair.second }; - result = Seq::Build({ result, pair.first }); - } while (has_more_input()); - return { result, nullptr }; - } + pair rule(bool nested) { + vector choices = {}; + do { + if (!choices.empty()) { + if (peek() == '|') + next(); + else + break; + } + auto pair = term(nested); + if (pair.second) + return { blank(), pair.second }; + choices.push_back(pair.first); + } while (has_more_input()); + auto rule = + (choices.size() > 1) ? make_shared(choices) : choices.front(); + return { rule, nullptr }; + } - pair factor() { - auto pair = atom(); - if (pair.second) - return { blank(), pair.second }; - rule_ptr result = pair.first; - if (has_more_input()) { - switch (peek()) { - case '*': - next(); - result = make_shared(result); - break; - case '+': - next(); - result = make_shared(result, make_shared(result)); - break; - case '?': - next(); - result = Choice::Build({ result, make_shared() }); - break; - } - } - return { result, nullptr }; - } + private: + pair term(bool nested) { + rule_ptr result = blank(); + do { + if (peek() == '|') + break; + if (nested && peek() == ')') + break; + auto pair = factor(); + if (pair.second) + return { blank(), pair.second }; + result = Seq::Build({ result, pair.first }); + } while (has_more_input()); + return { result, nullptr }; + } - pair atom() { - switch (peek()) { - case '(': { - next(); - auto pair = rule(true); - if (pair.second) - return { blank(), pair.second }; - if (peek() != ')') - return error("unmatched open paren"); - next(); - return { pair.first, nullptr }; - } - case '[': { - next(); - auto pair = char_set(); - if (pair.second) - return { blank(), pair.second }; - if (peek() != ']') - return error("unmatched open square bracket"); - next(); - return { pair.first.copy(), nullptr }; - } - case ')': { - return error("unmatched close paren"); - } - case ']': { - return error("unmatched close square bracket"); - } - case '.': { - next(); - return { CharacterSet({ '\n' }).complement().copy(), nullptr }; - } - default: { - auto pair = single_char(); - if (pair.second) - return { blank(), pair.second }; - return { pair.first.copy(), nullptr }; - } - } - } + pair factor() { + auto pair = atom(); + if (pair.second) + return { blank(), pair.second }; + rule_ptr result = pair.first; + if (has_more_input()) { + switch (peek()) { + case '*': + next(); + result = make_shared(result); + break; + case '+': + next(); + result = make_shared(result, make_shared(result)); + break; + case '?': + next(); + result = Choice::Build({ result, make_shared() }); + break; + } + } + return { result, nullptr }; + } - pair char_set() { - bool is_affirmative = true; - if (peek() == '^') { - next(); - is_affirmative = false; - } - CharacterSet result; - while (has_more_input() && (peek() != ']')) { - auto pair = single_char(); - if (pair.second) - return { CharacterSet(), pair.second }; - result.add_set(pair.first); - } - if (!is_affirmative) - result = result.complement(); - return { result, nullptr }; - } + pair atom() { + switch (peek()) { + case '(': { + next(); + auto pair = rule(true); + if (pair.second) + return { blank(), pair.second }; + if (peek() != ')') + return error("unmatched open paren"); + next(); + return { pair.first, nullptr }; + } + case '[': { + next(); + auto pair = char_set(); + if (pair.second) + return { blank(), pair.second }; + if (peek() != ']') + return error("unmatched open square bracket"); + next(); + return { pair.first.copy(), nullptr }; + } + case ')': { return error("unmatched close paren"); } + case ']': { return error("unmatched close square bracket"); } + case '.': { + next(); + return { CharacterSet({ '\n' }).complement().copy(), nullptr }; + } + default: { + auto pair = single_char(); + if (pair.second) + return { blank(), pair.second }; + return { pair.first.copy(), nullptr }; + } + } + } - pair single_char() { - CharacterSet value; - switch (peek()) { - case '\\': - next(); - value = escaped_char(peek()); - next(); - break; - default: - char first_char = peek(); - next(); - if (peek() == '-') { - next(); - value = CharacterSet({ CharacterRange(first_char, peek()) }); - next(); - } else { - value = CharacterSet({ first_char }); - } - } - return { value, nullptr }; - } + pair char_set() { + bool is_affirmative = true; + if (peek() == '^') { + next(); + is_affirmative = false; + } + CharacterSet result; + while (has_more_input() && (peek() != ']')) { + auto pair = single_char(); + if (pair.second) + return { CharacterSet(), pair.second }; + result.add_set(pair.first); + } + if (!is_affirmative) + result = result.complement(); + return { result, nullptr }; + } - CharacterSet escaped_char(char value) { - switch (value) { - case 'a': - return CharacterSet({ {'a', 'z'}, {'A', 'Z'} }); - case 'w': - return CharacterSet({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'}}); - case 'd': - return CharacterSet({ {'0', '9'} }); - default: - return CharacterSet({ value }); - } - } - - void next() { - position++; - } - - char peek() { - return input[position]; - } - - bool has_more_input() { - return position < length; - } - - pair error(string msg) { - return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) }; - } - - const string input; - const size_t length; - size_t position; - }; - - pair parse_regex(const std::string &input) { - return PatternParser(input).rule(false); + pair single_char() { + CharacterSet value; + switch (peek()) { + case '\\': + next(); + value = escaped_char(peek()); + next(); + break; + default: + char first_char = peek(); + next(); + if (peek() == '-') { + next(); + value = CharacterSet({ CharacterRange(first_char, peek()) }); + next(); + } else { + value = CharacterSet({ first_char }); } } + return { value, nullptr }; + } + + CharacterSet escaped_char(char value) { + switch (value) { + case 'a': + return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' } }); + case 'w': + return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } }); + case 'd': + return CharacterSet({ { '0', '9' } }); + default: + return CharacterSet({ value }); + } + } + + void next() { position++; } + + char peek() { return input[position]; } + + bool has_more_input() { return position < length; } + + pair error(string msg) { + return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) }; + } + + const string input; + const size_t length; + size_t position; +}; + +pair parse_regex(const std::string &input) { + return PatternParser(input).rule(false); } + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/parse_regex.h b/src/compiler/prepare_grammar/parse_regex.h index 4741118c..2255bea1 100644 --- a/src/compiler/prepare_grammar/parse_regex.h +++ b/src/compiler/prepare_grammar/parse_regex.h @@ -6,10 +6,12 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - namespace prepare_grammar { - std::pair - parse_regex(const std::string &); - } -} +namespace prepare_grammar { + +std::pair parse_regex( + const std::string &); + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_PARSE_REGEX_H_ diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index 42a39560..ec117b80 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -7,29 +7,31 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - using std::tuple; - using std::make_tuple; +namespace prepare_grammar { - namespace prepare_grammar { - tuple - prepare_grammar(const Grammar &input_grammar) { - auto result = intern_symbols(input_grammar); - const InternedGrammar &grammar = result.first; - const GrammarError *error = result.second; +using std::tuple; +using std::make_tuple; - if (error) - return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); +tuple prepare_grammar( + const Grammar &input_grammar) { + auto result = intern_symbols(input_grammar); + const InternedGrammar &grammar = result.first; + const GrammarError *error = result.second; - auto grammars = extract_tokens(grammar); - const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first); - auto expand_tokens_result = expand_tokens(grammars.second); - const LexicalGrammar &lex_grammar = expand_tokens_result.first; - error = expand_tokens_result.second; + if (error) + return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); - if (error) - return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + auto grammars = extract_tokens(grammar); + const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first); + auto expand_tokens_result = expand_tokens(grammars.second); + const LexicalGrammar &lex_grammar = expand_tokens_result.first; + error = expand_tokens_result.second; - return make_tuple(rule_grammar, lex_grammar, nullptr); - } - } + if (error) + return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + + return make_tuple(rule_grammar, lex_grammar, nullptr); } + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/prepare_grammar.h b/src/compiler/prepare_grammar/prepare_grammar.h index 7c382be9..32f917bc 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.h +++ b/src/compiler/prepare_grammar/prepare_grammar.h @@ -5,13 +5,16 @@ #include "compiler/prepared_grammar.h" namespace tree_sitter { - class Grammar; - class GrammarError; - namespace prepare_grammar { - std::tuple - prepare_grammar(const Grammar &); - } -} +class Grammar; +class GrammarError; + +namespace prepare_grammar { + +std::tuple prepare_grammar( + const Grammar &); + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_ diff --git a/src/compiler/prepare_grammar/token_description.cc b/src/compiler/prepare_grammar/token_description.cc index a3f50976..97a0eb08 100644 --- a/src/compiler/prepare_grammar/token_description.cc +++ b/src/compiler/prepare_grammar/token_description.cc @@ -9,36 +9,36 @@ #include "compiler/util/string_helpers.h" namespace tree_sitter { - using std::string; +namespace prepare_grammar { - namespace prepare_grammar { - class TokenDescription : public rules::RuleFn { - string apply_to(const rules::Pattern *rule) { - return "/" + util::escape_string(rule->value) + "/"; - } +using std::string; - string apply_to(const rules::String *rule) { - return "'" + util::escape_string(rule->value) + "'"; - } +class TokenDescription : public rules::RuleFn { + string apply_to(const rules::Pattern *rule) { + return "/" + util::escape_string(rule->value) + "/"; + } - string apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } + string apply_to(const rules::String *rule) { + return "'" + util::escape_string(rule->value) + "'"; + } - string apply_to(const rules::Seq *rule) { - return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")"; - } + string apply_to(const rules::Metadata *rule) { return apply(rule->rule); } - string apply_to(const rules::Choice *rule) { - string result = "(choice"; - for (auto &element : rule->elements) - result += " " + apply(element); - return result + ")"; - } - }; + string apply_to(const rules::Seq *rule) { + return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")"; + } - std::string token_description(const rules::rule_ptr &rule) { - return TokenDescription().apply(rule); - } - } + string apply_to(const rules::Choice *rule) { + string result = "(choice"; + for (auto &element : rule->elements) + result += " " + apply(element); + return result + ")"; + } +}; + +std::string token_description(const rules::rule_ptr &rule) { + return TokenDescription().apply(rule); } + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/token_description.h b/src/compiler/prepare_grammar/token_description.h index 4bd69e1b..bd6a87cb 100644 --- a/src/compiler/prepare_grammar/token_description.h +++ b/src/compiler/prepare_grammar/token_description.h @@ -5,9 +5,11 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { - namespace prepare_grammar { - std::string token_description(const rules::rule_ptr &); - } -} +namespace prepare_grammar { + +std::string token_description(const rules::rule_ptr &); + +} // namespace prepare_grammar +} // namespace tree_sitter #endif // COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_ diff --git a/src/compiler/prepared_grammar.cc b/src/compiler/prepared_grammar.cc index b0a15949..82687924 100644 --- a/src/compiler/prepared_grammar.cc +++ b/src/compiler/prepared_grammar.cc @@ -5,54 +5,52 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - using std::string; - using std::pair; - using std::vector; - using std::set; - const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const { - return symbol.is_auxiliary() ? - aux_rules[symbol.index].second : - rules[symbol.index].second; - } +using std::string; +using std::pair; +using std::vector; +using std::set; - const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const { - return symbol.is_auxiliary() ? - aux_rules[symbol.index].first : - rules[symbol.index].first; - } - - PreparedGrammar::PreparedGrammar() {} - SyntaxGrammar::SyntaxGrammar() {} - LexicalGrammar::LexicalGrammar() {} - - SyntaxGrammar::SyntaxGrammar( - const vector> &rules, - const vector> &aux_rules) : - PreparedGrammar(rules, aux_rules) {} - - LexicalGrammar::LexicalGrammar( - const vector> &rules, - const vector> &aux_rules) : - PreparedGrammar(rules, aux_rules) {} - - PreparedGrammar::PreparedGrammar( - const vector> &rules, - const vector> &aux_rules) : - rules(rules), - aux_rules(aux_rules) {} - - SyntaxGrammar::SyntaxGrammar( - const vector> &rules, - const vector> &aux_rules, - const set &ubiquitous_tokens) : - PreparedGrammar(rules, aux_rules), - ubiquitous_tokens(ubiquitous_tokens) {} - - LexicalGrammar::LexicalGrammar( - const vector> &rules, - const vector> &aux_rules, - const set &separators) : - PreparedGrammar(rules, aux_rules), - separators(separators) {} +const rules::rule_ptr &PreparedGrammar::rule(const rules::Symbol &symbol) + const { + return symbol.is_auxiliary() ? aux_rules[symbol.index].second + : rules[symbol.index].second; } + +const string &PreparedGrammar::rule_name(const rules::Symbol &symbol) const { + return symbol.is_auxiliary() ? aux_rules[symbol.index].first + : rules[symbol.index].first; +} + +PreparedGrammar::PreparedGrammar() {} +SyntaxGrammar::SyntaxGrammar() {} +LexicalGrammar::LexicalGrammar() {} + +SyntaxGrammar::SyntaxGrammar( + const vector > &rules, + const vector > &aux_rules) + : PreparedGrammar(rules, aux_rules) {} + +LexicalGrammar::LexicalGrammar( + const vector > &rules, + const vector > &aux_rules) + : PreparedGrammar(rules, aux_rules) {} + +PreparedGrammar::PreparedGrammar( + const vector > &rules, + const vector > &aux_rules) + : rules(rules), aux_rules(aux_rules) {} + +SyntaxGrammar::SyntaxGrammar( + const vector > &rules, + const vector > &aux_rules, + const set &ubiquitous_tokens) + : PreparedGrammar(rules, aux_rules), ubiquitous_tokens(ubiquitous_tokens) {} + +LexicalGrammar::LexicalGrammar( + const vector > &rules, + const vector > &aux_rules, + const set &separators) + : PreparedGrammar(rules, aux_rules), separators(separators) {} + +} // namespace tree_sitter diff --git a/src/compiler/prepared_grammar.h b/src/compiler/prepared_grammar.h index 80b3ff40..be015c9d 100644 --- a/src/compiler/prepared_grammar.h +++ b/src/compiler/prepared_grammar.h @@ -9,47 +9,49 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - class PreparedGrammar { - public: - PreparedGrammar(); - PreparedGrammar( - const std::vector> &rules, - const std::vector> &aux_rules); - const std::vector> rules; - const std::vector> aux_rules; +class PreparedGrammar { + public: + PreparedGrammar(); + PreparedGrammar( + const std::vector > &rules, + const std::vector > &aux_rules); - const std::string & rule_name(const rules::Symbol &symbol) const; - const rules::rule_ptr & rule(const rules::Symbol &symbol) const; - }; + const std::vector > rules; + const std::vector > aux_rules; - class SyntaxGrammar : public PreparedGrammar { - public: - SyntaxGrammar(); - SyntaxGrammar( - const std::vector> &rules, - const std::vector> &aux_rules); - SyntaxGrammar( - const std::vector> &rules, - const std::vector> &aux_rules, - const std::set &ubiquitous_tokens); + const std::string &rule_name(const rules::Symbol &symbol) const; + const rules::rule_ptr &rule(const rules::Symbol &symbol) const; +}; - std::set ubiquitous_tokens; - }; +class SyntaxGrammar : public PreparedGrammar { + public: + SyntaxGrammar(); + SyntaxGrammar( + const std::vector > &rules, + const std::vector > &aux_rules); + SyntaxGrammar( + const std::vector > &rules, + const std::vector > &aux_rules, + const std::set &ubiquitous_tokens); - class LexicalGrammar : public PreparedGrammar { - public: - LexicalGrammar(); - LexicalGrammar( - const std::vector> &rules, - const std::vector> &aux_rules); - LexicalGrammar( - const std::vector> &rules, - const std::vector> &aux_rules, - const std::set &separators); + std::set ubiquitous_tokens; +}; - std::set separators; - }; -} +class LexicalGrammar : public PreparedGrammar { + public: + LexicalGrammar(); + LexicalGrammar( + const std::vector > &rules, + const std::vector > &aux_rules); + LexicalGrammar( + const std::vector > &rules, + const std::vector > &aux_rules, + const std::set &separators); + + std::set separators; +}; + +} // namespace tree_sitter #endif // COMPILER_PREPARED_GRAMMAR_H_ diff --git a/src/compiler/rules/blank.cc b/src/compiler/rules/blank.cc index 06ab6d63..497ba34e 100644 --- a/src/compiler/rules/blank.cc +++ b/src/compiler/rules/blank.cc @@ -2,28 +2,22 @@ #include #include "compiler/rules/visitor.h" -namespace tree_sitter { - namespace rules { - Blank::Blank() {} +namespace tree_sitter { +namespace rules { - bool Blank::operator==(const Rule &rule) const { - return dynamic_cast(&rule) != nullptr; - } +Blank::Blank() {} - size_t Blank::hash_code() const { - return 0; - } - - rule_ptr Blank::copy() const { - return std::make_shared(); - } - - std::string Blank::to_string() const { - return "#"; - } - - void Blank::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool Blank::operator==(const Rule &rule) const { + return dynamic_cast(&rule) != nullptr; } + +size_t Blank::hash_code() const { return 0; } + +rule_ptr Blank::copy() const { return std::make_shared(); } + +std::string Blank::to_string() const { return "#"; } + +void Blank::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/blank.h b/src/compiler/rules/blank.h index 91e9bc7b..2ddc8e2e 100644 --- a/src/compiler/rules/blank.h +++ b/src/compiler/rules/blank.h @@ -4,19 +4,21 @@ #include #include "compiler/rules/rule.h" -namespace tree_sitter { - namespace rules { - class Blank : public Rule { - public: - Blank(); +namespace tree_sitter { +namespace rules { - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - }; - } -} +class Blank : public Rule { + public: + Blank(); + + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_BLANK_H_ diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc index 96d41f18..a6befc8e 100644 --- a/src/compiler/rules/built_in_symbols.cc +++ b/src/compiler/rules/built_in_symbols.cc @@ -1,9 +1,11 @@ #include "compiler/rules/built_in_symbols.h" namespace tree_sitter { - namespace rules { - Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); } - Symbol ERROR() { return Symbol(-2, SymbolOptionToken); } - Symbol START() { return Symbol(-3); } - } -} +namespace rules { + +Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); } +Symbol ERROR() { return Symbol(-2, SymbolOptionToken); } +Symbol START() { return Symbol(-3); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h index bdbec4e5..ebe945b9 100644 --- a/src/compiler/rules/built_in_symbols.h +++ b/src/compiler/rules/built_in_symbols.h @@ -4,11 +4,11 @@ #include "compiler/rules/symbol.h" namespace tree_sitter { - namespace rules { - Symbol ERROR(); - Symbol START(); - Symbol END_OF_INPUT(); - } +namespace rules { +Symbol ERROR(); +Symbol START(); +Symbol END_OF_INPUT(); +} } #endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_ diff --git a/src/compiler/rules/character_range.cc b/src/compiler/rules/character_range.cc index ceded613..3927af90 100644 --- a/src/compiler/rules/character_range.cc +++ b/src/compiler/rules/character_range.cc @@ -3,50 +3,55 @@ #include namespace tree_sitter { - using std::string; +namespace rules { - namespace rules { - static const unsigned char MAX_CHAR = -1; +using std::string; - CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {} - CharacterRange::CharacterRange(unsigned char min, unsigned char max) : min(min), max(max) {} +static const unsigned char MAX_CHAR = -1; - bool CharacterRange::operator==(const CharacterRange &other) const { - return min == other.min && max == other.max; - } +CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {} +CharacterRange::CharacterRange(unsigned char min, unsigned char max) + : min(min), max(max) {} - bool CharacterRange::operator<(const CharacterRange &other) const { - if (min < other.min) return true; - if (min > other.min) return false; - if (max < other.max) return true; - return false; - } - - string escape_character(unsigned char input) { - switch (input) { - case '\0': - return ""; - case '\n': - return "\\n"; - case '\r': - return "\\r"; - case '\t': - return "\\t"; - case MAX_CHAR: - return ""; - default: - return string() + static_cast(input); - } - } - - string CharacterRange::to_string() const { - if (min == 0 && max == MAX_CHAR) - return ""; - if (min == max) - return escape_character(min); - else - return string() + escape_character(min) + "-" + escape_character(max); - } - } +bool CharacterRange::operator==(const CharacterRange &other) const { + return min == other.min && max == other.max; } +bool CharacterRange::operator<(const CharacterRange &other) const { + if (min < other.min) + return true; + if (min > other.min) + return false; + if (max < other.max) + return true; + return false; +} + +string escape_character(unsigned char input) { + switch (input) { + case '\0': + return ""; + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\t': + return "\\t"; + case MAX_CHAR: + return ""; + default: + return string() + static_cast(input); + } +} + +string CharacterRange::to_string() const { + if (min == 0 && max == MAX_CHAR) + return ""; + if (min == max) + return escape_character(min); + else + return string() + escape_character(min) + "-" + escape_character(max); +} + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/character_range.h b/src/compiler/rules/character_range.h index b27d312e..ed0f0620 100644 --- a/src/compiler/rules/character_range.h +++ b/src/compiler/rules/character_range.h @@ -5,29 +5,34 @@ #include namespace tree_sitter { - namespace rules { - struct CharacterRange { - unsigned char min; - unsigned char max; +namespace rules { - // IMPLICIT_CONSTRUCTORS - CharacterRange(unsigned char value); - CharacterRange(unsigned char min, unsigned char max); +struct CharacterRange { + unsigned char min; + unsigned char max; - bool operator==(const CharacterRange &other) const; - bool operator<(const CharacterRange &others) const; - std::string to_string() const; - }; - } -} + // IMPLICIT_CONSTRUCTORS + CharacterRange(unsigned char value); + CharacterRange(unsigned char min, unsigned char max); + + bool operator==(const CharacterRange &other) const; + bool operator<(const CharacterRange &others) const; + std::string to_string() const; +}; + +} // namespace rules +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::rules::CharacterRange &range) const { - return (hash()(range.min) ^ hash()(range.max)); - } - }; -} + +template <> +struct hash { + size_t operator()(const tree_sitter::rules::CharacterRange &range) const { + return (hash()(range.min) ^ + hash()(range.max)); + } +}; + +} // namespace std #endif // COMPILER_RULES_CHARACTER_RANGE_H_ diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index ef0c0df4..93bbee02 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -3,140 +3,141 @@ #include #include "compiler/rules/visitor.h" +namespace tree_sitter { +namespace rules { + using std::string; using std::hash; using std::set; using std::pair; using std::initializer_list; -namespace tree_sitter { - namespace rules { - static const unsigned char MAX_CHAR = -1; +static const unsigned char MAX_CHAR = -1; - CharacterSet::CharacterSet() : ranges({}) {} - CharacterSet::CharacterSet(const set &ranges) : ranges(ranges) {} - CharacterSet::CharacterSet(const initializer_list &ranges) : ranges(ranges) {} +CharacterSet::CharacterSet() : ranges({}) {} +CharacterSet::CharacterSet(const set &ranges) + : ranges(ranges) {} +CharacterSet::CharacterSet(const initializer_list &ranges) + : ranges(ranges) {} - bool CharacterSet::operator==(const Rule &rule) const { - const CharacterSet *other = dynamic_cast(&rule); - return other && (ranges == other->ranges); - } - - bool CharacterSet::operator<(const CharacterSet &other) const { - return ranges < other.ranges; - } - - size_t CharacterSet::hash_code() const { - size_t result = std::hash()(ranges.size()); - for (auto &range : ranges) { - result ^= std::hash()(range.min); - result ^= std::hash()(range.max); - } - return result; - } - - rule_ptr CharacterSet::copy() const { - return std::make_shared(*this); - } - - string CharacterSet::to_string() const { - string result("#"; - } - - CharacterSet CharacterSet::complement() const { - CharacterSet result({ {0, MAX_CHAR} }); - result.remove_set(*this); - return result; - } - - std::pair CharacterSet::most_compact_representation() const { - auto first_range = *ranges.begin(); - if (first_range.min == 0 && first_range.max > 0) { - return { this->complement(), false }; - } else { - return { *this, true }; - } - } - - void add_range(CharacterSet *self, CharacterRange addition) { - set new_ranges; - for (auto range : self->ranges) { - bool is_adjacent = false; - if (range.min < addition.min && range.max >= addition.min - 1) { - is_adjacent = true; - addition.min = range.min; - } - if (range.max > addition.max && range.min <= addition.max + 1) { - is_adjacent = true; - addition.max = range.max; - } - if (!is_adjacent) { - new_ranges.insert(range); - } - } - new_ranges.insert(addition); - self->ranges = new_ranges; - } - - CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) { - CharacterSet removed_set; - set new_ranges; - for (auto range : self->ranges) { - if (range_to_remove.min <= range.min) { - if (range_to_remove.max < range.min) { - new_ranges.insert(range); - } else if (range_to_remove.max < range.max) { - new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); - add_range(&removed_set, CharacterRange(range.min, range_to_remove.max)); - } else { - add_range(&removed_set, range); - } - } else if (range_to_remove.min <= range.max) { - if (range_to_remove.max < range.max) { - new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); - new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); - add_range(&removed_set, range_to_remove); - } else { - new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); - add_range(&removed_set, CharacterRange(range_to_remove.min, range.max)); - } - } else { - new_ranges.insert(range); - } - } - self->ranges = new_ranges; - return removed_set; - } - - bool CharacterSet::is_empty() const { - return ranges.empty(); - } - - void CharacterSet::add_set(const CharacterSet &other) { - for (auto &other_range : other.ranges) { - add_range(this, other_range); - } - } - - CharacterSet CharacterSet::remove_set(const CharacterSet &other) { - CharacterSet result; - for (auto &other_range : other.ranges) { - auto removed_set = remove_range(this, other_range); - result.add_set(removed_set); - } - return result; - } - - CharacterSet CharacterSet::intersect(const CharacterSet &set) const { - CharacterSet copy = *this; - return copy.remove_set(set); - } - - void CharacterSet::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool CharacterSet::operator==(const Rule &rule) const { + const CharacterSet *other = dynamic_cast(&rule); + return other && (ranges == other->ranges); } + +bool CharacterSet::operator<(const CharacterSet &other) const { + return ranges < other.ranges; +} + +size_t CharacterSet::hash_code() const { + size_t result = std::hash()(ranges.size()); + for (auto &range : ranges) { + result ^= std::hash()(range.min); + result ^= std::hash()(range.max); + } + return result; +} + +rule_ptr CharacterSet::copy() const { + return std::make_shared(*this); +} + +string CharacterSet::to_string() const { + string result("#"; +} + +CharacterSet CharacterSet::complement() const { + CharacterSet result({ { 0, MAX_CHAR } }); + result.remove_set(*this); + return result; +} + +std::pair CharacterSet::most_compact_representation() + const { + auto first_range = *ranges.begin(); + if (first_range.min == 0 && first_range.max > 0) { + return { this->complement(), false }; + } else { + return { *this, true }; + } +} + +void add_range(CharacterSet *self, CharacterRange addition) { + set new_ranges; + for (auto range : self->ranges) { + bool is_adjacent = false; + if (range.min < addition.min && range.max >= addition.min - 1) { + is_adjacent = true; + addition.min = range.min; + } + if (range.max > addition.max && range.min <= addition.max + 1) { + is_adjacent = true; + addition.max = range.max; + } + if (!is_adjacent) { + new_ranges.insert(range); + } + } + new_ranges.insert(addition); + self->ranges = new_ranges; +} + +CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) { + CharacterSet removed_set; + set new_ranges; + for (auto range : self->ranges) { + if (range_to_remove.min <= range.min) { + if (range_to_remove.max < range.min) { + new_ranges.insert(range); + } else if (range_to_remove.max < range.max) { + new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); + add_range(&removed_set, CharacterRange(range.min, range_to_remove.max)); + } else { + add_range(&removed_set, range); + } + } else if (range_to_remove.min <= range.max) { + if (range_to_remove.max < range.max) { + new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); + new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); + add_range(&removed_set, range_to_remove); + } else { + new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); + add_range(&removed_set, CharacterRange(range_to_remove.min, range.max)); + } + } else { + new_ranges.insert(range); + } + } + self->ranges = new_ranges; + return removed_set; +} + +bool CharacterSet::is_empty() const { return ranges.empty(); } + +void CharacterSet::add_set(const CharacterSet &other) { + for (auto &other_range : other.ranges) { + add_range(this, other_range); + } +} + +CharacterSet CharacterSet::remove_set(const CharacterSet &other) { + CharacterSet result; + for (auto &other_range : other.ranges) { + auto removed_set = remove_range(this, other_range); + result.add_set(removed_set); + } + return result; +} + +CharacterSet CharacterSet::intersect(const CharacterSet &set) const { + CharacterSet copy = *this; + return copy.remove_set(set); +} + +void CharacterSet::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index 9bf87bfb..6c71cf5f 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -8,40 +8,44 @@ #include "compiler/rules/rule.h" #include "compiler/rules/character_range.h" -namespace tree_sitter { - namespace rules { - class CharacterSet : public Rule { - public: - CharacterSet(); - explicit CharacterSet(const std::set &ranges); - explicit CharacterSet(const std::initializer_list &ranges); +namespace tree_sitter { +namespace rules { - bool operator==(const Rule& other) const; - bool operator<(const CharacterSet &) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +class CharacterSet : public Rule { + public: + CharacterSet(); + explicit CharacterSet(const std::set &ranges); + explicit CharacterSet(const std::initializer_list &ranges); - void add_set(const CharacterSet &other); - CharacterSet remove_set(const CharacterSet &other); - CharacterSet complement() const; - CharacterSet intersect(const CharacterSet &) const; - std::pair most_compact_representation() const; - bool is_empty() const; + bool operator==(const Rule &other) const; + bool operator<(const CharacterSet &) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; - std::set ranges; - }; - } -} + void add_set(const CharacterSet &other); + CharacterSet remove_set(const CharacterSet &other); + CharacterSet complement() const; + CharacterSet intersect(const CharacterSet &) const; + std::pair most_compact_representation() const; + bool is_empty() const; + + std::set ranges; +}; + +} // namespace rules +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::rules::CharacterSet &rule) const { - return rule.hash_code(); - } - }; -} + +template <> +struct hash { + size_t operator()(const tree_sitter::rules::CharacterSet &rule) const { + return rule.hash_code(); + } +}; + +} // namespace std #endif // COMPILER_RULES_CHARACTER_SET_H_ diff --git a/src/compiler/rules/choice.cc b/src/compiler/rules/choice.cc index cf3f9ee0..548dcb48 100644 --- a/src/compiler/rules/choice.cc +++ b/src/compiler/rules/choice.cc @@ -3,50 +3,51 @@ #include #include "compiler/rules/visitor.h" -namespace tree_sitter { - using std::string; - using std::make_shared; - using std::vector; - using std::set; - using std::dynamic_pointer_cast; +namespace tree_sitter { +namespace rules { - namespace rules { - Choice::Choice(const vector &elements) : elements(elements) {} +using std::string; +using std::make_shared; +using std::vector; +using std::set; +using std::dynamic_pointer_cast; - rule_ptr Choice::Build(const vector &elements) { - return make_shared(elements); - } +Choice::Choice(const vector &elements) : elements(elements) {} - bool Choice::operator==(const Rule &rule) const { - const Choice *other = dynamic_cast(&rule); - if (!other) return false; - size_t size = elements.size(); - if (size != other->elements.size()) return false; - for (size_t i = 0; i < size; i++) - if (!elements[i]->operator==(*other->elements[i])) return false; - return true; - } - - size_t Choice::hash_code() const { - size_t result = std::hash()(elements.size()); - for (const auto &element : elements) - result ^= element->hash_code(); - return result; - } - - rule_ptr Choice::copy() const { - return std::make_shared(*this); - } - - string Choice::to_string() const { - string result = "#to_string(); - return result + ">"; - } - - void Choice::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +rule_ptr Choice::Build(const vector &elements) { + return make_shared(elements); } + +bool Choice::operator==(const Rule &rule) const { + const Choice *other = dynamic_cast(&rule); + if (!other) + return false; + size_t size = elements.size(); + if (size != other->elements.size()) + return false; + for (size_t i = 0; i < size; i++) + if (!elements[i]->operator==(*other->elements[i])) + return false; + return true; +} + +size_t Choice::hash_code() const { + size_t result = std::hash()(elements.size()); + for (const auto &element : elements) + result ^= element->hash_code(); + return result; +} + +rule_ptr Choice::copy() const { return std::make_shared(*this); } + +string Choice::to_string() const { + string result = "#to_string(); + return result + ">"; +} + +void Choice::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/choice.h b/src/compiler/rules/choice.h index 3d6f62ff..5f971db1 100644 --- a/src/compiler/rules/choice.h +++ b/src/compiler/rules/choice.h @@ -6,21 +6,23 @@ #include "compiler/rules/rule.h" namespace tree_sitter { - namespace rules { - class Choice : public Rule { - public: - explicit Choice(const std::vector &elements); - static rule_ptr Build(const std::vector &rules); +namespace rules { - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +class Choice : public Rule { + public: + explicit Choice(const std::vector &elements); + static rule_ptr Build(const std::vector &rules); - const std::vector elements; - }; - } -} + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; + + const std::vector elements; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_CHOICE_H_ diff --git a/src/compiler/rules/metadata.cc b/src/compiler/rules/metadata.cc index b45a5ceb..8828607a 100644 --- a/src/compiler/rules/metadata.cc +++ b/src/compiler/rules/metadata.cc @@ -3,45 +3,44 @@ #include #include "compiler/rules/visitor.h" -namespace tree_sitter { - using std::hash; - using std::make_shared; - using std::map; +namespace tree_sitter { +namespace rules { - namespace rules { - Metadata::Metadata(rule_ptr rule, map values) : rule(rule), value(values) {} +using std::hash; +using std::make_shared; +using std::map; - bool Metadata::operator==(const Rule &rule) const { - auto other = dynamic_cast(&rule); - return other && other->value == value && other->rule->operator==(*this->rule); - } +Metadata::Metadata(rule_ptr rule, map values) + : rule(rule), value(values) {} - size_t Metadata::hash_code() const { - size_t result = hash()(value.size()); - for (auto &pair : value) { - result ^= hash()(pair.first); - result ^= hash()(pair.second); - } - return result; - } - - rule_ptr Metadata::copy() const { - return make_shared(rule->copy(), value); - } - - int Metadata::value_for(MetadataKey key) const { - auto pair = value.find(key); - return (pair != value.end()) ? - pair->second : - 0; - } - - std::string Metadata::to_string() const { - return "#to_string() + ">"; - } - - void Metadata::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool Metadata::operator==(const Rule &rule) const { + auto other = dynamic_cast(&rule); + return other && other->value == value && other->rule->operator==(*this->rule); } + +size_t Metadata::hash_code() const { + size_t result = hash()(value.size()); + for (auto &pair : value) { + result ^= hash()(pair.first); + result ^= hash()(pair.second); + } + return result; +} + +rule_ptr Metadata::copy() const { + return make_shared(rule->copy(), value); +} + +int Metadata::value_for(MetadataKey key) const { + auto pair = value.find(key); + return (pair != value.end()) ? pair->second : 0; +} + +std::string Metadata::to_string() const { + return "#to_string() + ">"; +} + +void Metadata::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/metadata.h b/src/compiler/rules/metadata.h index c8fd7d59..f46b07e5 100644 --- a/src/compiler/rules/metadata.h +++ b/src/compiler/rules/metadata.h @@ -5,30 +5,32 @@ #include #include "compiler/rules/rule.h" -namespace tree_sitter { - namespace rules { - typedef enum { - START_TOKEN, - PRECEDENCE, - IS_TOKEN, - DESCRIPTION, - } MetadataKey; +namespace tree_sitter { +namespace rules { - class Metadata : public Rule { - public: - Metadata(rule_ptr rule, std::map value); +typedef enum { + START_TOKEN, + PRECEDENCE, + IS_TOKEN, + DESCRIPTION, +} MetadataKey; - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - int value_for(MetadataKey key) const; +class Metadata : public Rule { + public: + Metadata(rule_ptr rule, std::map value); - const rule_ptr rule; - const std::map value; - }; - } -} + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; + int value_for(MetadataKey key) const; + + const rule_ptr rule; + const std::map value; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_METADATA_H_ diff --git a/src/compiler/rules/named_symbol.cc b/src/compiler/rules/named_symbol.cc index a18afad6..2281bbf6 100644 --- a/src/compiler/rules/named_symbol.cc +++ b/src/compiler/rules/named_symbol.cc @@ -2,32 +2,30 @@ #include #include "compiler/rules/visitor.h" -namespace tree_sitter { - using std::string; - using std::hash; +namespace tree_sitter { +namespace rules { - namespace rules { - NamedSymbol::NamedSymbol(const std::string &name) : name(name) {} +using std::string; +using std::hash; - bool NamedSymbol::operator==(const Rule &rule) const { - auto other = dynamic_cast(&rule); - return other && other->name == name; - } +NamedSymbol::NamedSymbol(const std::string &name) : name(name) {} - size_t NamedSymbol::hash_code() const { - return hash()(name); - } - - rule_ptr NamedSymbol::copy() const { - return std::make_shared(*this); - } - - string NamedSymbol::to_string() const { - return string("#"; - } - - void NamedSymbol::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool NamedSymbol::operator==(const Rule &rule) const { + auto other = dynamic_cast(&rule); + return other && other->name == name; } + +size_t NamedSymbol::hash_code() const { return hash()(name); } + +rule_ptr NamedSymbol::copy() const { + return std::make_shared(*this); +} + +string NamedSymbol::to_string() const { + return string("#"; +} + +void NamedSymbol::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/named_symbol.h b/src/compiler/rules/named_symbol.h index ab6bc892..08b8c508 100644 --- a/src/compiler/rules/named_symbol.h +++ b/src/compiler/rules/named_symbol.h @@ -4,21 +4,23 @@ #include #include "compiler/rules/rule.h" -namespace tree_sitter { - namespace rules { - class NamedSymbol : public Rule { - public: - explicit NamedSymbol(const std::string &name); +namespace tree_sitter { +namespace rules { - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +class NamedSymbol : public Rule { + public: + explicit NamedSymbol(const std::string &name); - std::string name; - }; - } -} + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; + + std::string name; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_NAMED_SYMBOL_H_ diff --git a/src/compiler/rules/pattern.cc b/src/compiler/rules/pattern.cc index f3d839c1..645fcad8 100644 --- a/src/compiler/rules/pattern.cc +++ b/src/compiler/rules/pattern.cc @@ -4,31 +4,27 @@ #include "compiler/util/string_helpers.h" namespace tree_sitter { - namespace rules { - using std::string; - using std::hash; +namespace rules { - Pattern::Pattern(const string &string) : value(string) {} +using std::string; +using std::hash; - bool Pattern::operator==(tree_sitter::rules::Rule const &other) const { - auto pattern = dynamic_cast(&other); - return pattern && (pattern->value == value); - } +Pattern::Pattern(const string &string) : value(string) {} - size_t Pattern::hash_code() const { - return hash()(value); - } - - rule_ptr Pattern::copy() const { - return std::make_shared(*this); - } - - string Pattern::to_string() const { - return string("#"; - } - - void Pattern::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool Pattern::operator==(tree_sitter::rules::Rule const &other) const { + auto pattern = dynamic_cast(&other); + return pattern && (pattern->value == value); } + +size_t Pattern::hash_code() const { return hash()(value); } + +rule_ptr Pattern::copy() const { return std::make_shared(*this); } + +string Pattern::to_string() const { + return string("#"; +} + +void Pattern::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/pattern.h b/src/compiler/rules/pattern.h index 962adc29..de50f851 100644 --- a/src/compiler/rules/pattern.h +++ b/src/compiler/rules/pattern.h @@ -5,21 +5,22 @@ #include "compiler/rules/rule.h" namespace tree_sitter { - namespace rules { - class Pattern : public Rule { - public: - explicit Pattern(const std::string &string); +namespace rules { - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +class Pattern : public Rule { + public: + explicit Pattern(const std::string &string); - const std::string value; - }; - } -} + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; + + const std::string value; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_PATTERN_H_ - diff --git a/src/compiler/rules/repeat.cc b/src/compiler/rules/repeat.cc index dd4e3b6e..bcf1807f 100644 --- a/src/compiler/rules/repeat.cc +++ b/src/compiler/rules/repeat.cc @@ -3,30 +3,26 @@ #include "compiler/rules/visitor.h" namespace tree_sitter { - using std::string; +namespace rules { - namespace rules { - Repeat::Repeat(const rule_ptr content) : content(content) {} +using std::string; - bool Repeat::operator==(const Rule &rule) const { - const Repeat *other = dynamic_cast(&rule); - return other && (*other->content == *content); - } +Repeat::Repeat(const rule_ptr content) : content(content) {} - size_t Repeat::hash_code() const { - return content->hash_code(); - } - - rule_ptr Repeat::copy() const { - return std::make_shared(*this); - } - - string Repeat::to_string() const { - return string("#to_string() + ">"; - } - - void Repeat::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool Repeat::operator==(const Rule &rule) const { + const Repeat *other = dynamic_cast(&rule); + return other && (*other->content == *content); } + +size_t Repeat::hash_code() const { return content->hash_code(); } + +rule_ptr Repeat::copy() const { return std::make_shared(*this); } + +string Repeat::to_string() const { + return string("#to_string() + ">"; +} + +void Repeat::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/repeat.h b/src/compiler/rules/repeat.h index e510f4ea..a0f97bd0 100644 --- a/src/compiler/rules/repeat.h +++ b/src/compiler/rules/repeat.h @@ -4,21 +4,23 @@ #include #include "compiler/rules/rule.h" -namespace tree_sitter { - namespace rules { - class Repeat : public Rule { - public: - explicit Repeat(rule_ptr content); +namespace tree_sitter { +namespace rules { - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +class Repeat : public Rule { + public: + explicit Repeat(rule_ptr content); - const rule_ptr content; - }; - } -} + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; + + const rule_ptr content; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_REPEAT_H_ diff --git a/src/compiler/rules/rule.cc b/src/compiler/rules/rule.cc index 9a6a7e02..1ce1aa2e 100644 --- a/src/compiler/rules/rule.cc +++ b/src/compiler/rules/rule.cc @@ -2,26 +2,28 @@ #include namespace tree_sitter { - using std::ostream; - using std::string; +namespace rules { - namespace rules { - bool Rule::operator!=(const Rule &other) const { - return !this->operator==(other); - } +using std::ostream; +using std::string; - ostream& operator<<(ostream& stream, const Rule &rule) { - return stream << rule.to_string(); - } - - ostream& operator<<(ostream& stream, const rule_ptr &rule) { - if (rule.get()) - stream << *rule; - else - stream << string("#"); - return stream; - } - - Rule::~Rule() {} - } +bool Rule::operator!=(const Rule &other) const { + return !this->operator==(other); } + +ostream &operator<<(ostream &stream, const Rule &rule) { + return stream << rule.to_string(); +} + +ostream &operator<<(ostream &stream, const rule_ptr &rule) { + if (rule.get()) + stream << *rule; + else + stream << string("#"); + return stream; +} + +Rule::~Rule() {} + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/rule.h b/src/compiler/rules/rule.h index 132aa71a..02e63d55 100644 --- a/src/compiler/rules/rule.h +++ b/src/compiler/rules/rule.h @@ -5,35 +5,39 @@ #include namespace tree_sitter { - namespace rules { - class Visitor; - class Rule; +namespace rules { - typedef std::shared_ptr rule_ptr; +class Visitor; +class Rule; - class Rule { - public: - virtual bool operator==(const Rule& other) const = 0; - bool operator!=(const Rule& other) const; - virtual size_t hash_code() const = 0; - virtual rule_ptr copy() const = 0; - virtual std::string to_string() const = 0; - virtual void accept(Visitor *visitor) const = 0; - virtual ~Rule(); - }; +typedef std::shared_ptr rule_ptr; - std::ostream& operator<<(std::ostream& stream, const Rule &rule); - std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule); - } -} +class Rule { + public: + virtual bool operator==(const Rule &other) const = 0; + bool operator!=(const Rule &other) const; + virtual size_t hash_code() const = 0; + virtual rule_ptr copy() const = 0; + virtual std::string to_string() const = 0; + virtual void accept(Visitor *visitor) const = 0; + virtual ~Rule(); +}; + +std::ostream &operator<<(std::ostream &stream, const Rule &rule); +std::ostream &operator<<(std::ostream &stream, const rule_ptr &rule); + +} // namespace rules +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::rules::rule_ptr &rule) const { - return typeid(*rule).hash_code() ^ rule->hash_code(); - } - }; -} + +template <> +struct hash { + size_t operator()(const tree_sitter::rules::rule_ptr &rule) const { + return typeid(*rule).hash_code() ^ rule->hash_code(); + } +}; + +} // namespace std #endif // COMPILER_RULES_RULE_H_ diff --git a/src/compiler/rules/rules.cc b/src/compiler/rules/rules.cc index 30a368b7..41a3e966 100644 --- a/src/compiler/rules/rules.cc +++ b/src/compiler/rules/rules.cc @@ -16,65 +16,55 @@ #include "compiler/rules/built_in_symbols.h" namespace tree_sitter { - using std::make_shared; - using std::string; - using std::set; - using std::vector; - using std::map; +namespace rules { - namespace rules { - static const int KEYWORD_PRECEDENCE = 100; +using std::make_shared; +using std::string; +using std::set; +using std::vector; +using std::map; - static rule_ptr metadata(rule_ptr rule, map values) { - return std::make_shared(rule, values); - } +static const int KEYWORD_PRECEDENCE = 100; - rule_ptr blank() { - return make_shared(); - } - - rule_ptr choice(const vector &rules) { - return Choice::Build(rules); - } - - rule_ptr repeat(const rule_ptr &content) { - return std::make_shared(content); - } - - rule_ptr seq(const vector &rules) { - return Seq::Build(rules); - } - - rule_ptr sym(const string &name) { - return make_shared(name); - } - - rule_ptr pattern(const string &value) { - return make_shared(value); - } - - rule_ptr str(const string &value) { - return make_shared(value); - } - - rule_ptr keyword(const string &value) { - return token(prec(KEYWORD_PRECEDENCE, str(value))); - } - - rule_ptr keypattern(const string &value) { - return token(prec(KEYWORD_PRECEDENCE, pattern(value))); - } - - rule_ptr err(const rule_ptr &rule) { - return choice({ rule, ERROR().copy() }); - } - - rule_ptr prec(int precedence, rule_ptr rule) { - return metadata(rule, {{ PRECEDENCE, precedence }}); - } - - rule_ptr token(rule_ptr rule) { - return metadata(rule, {{ IS_TOKEN, 1 }}); - } - } +static rule_ptr metadata(rule_ptr rule, map values) { + return std::make_shared(rule, values); } + +rule_ptr blank() { return make_shared(); } + +rule_ptr choice(const vector &rules) { return Choice::Build(rules); } + +rule_ptr repeat(const rule_ptr &content) { + return std::make_shared(content); +} + +rule_ptr seq(const vector &rules) { return Seq::Build(rules); } + +rule_ptr sym(const string &name) { return make_shared(name); } + +rule_ptr pattern(const string &value) { return make_shared(value); } + +rule_ptr str(const string &value) { return make_shared(value); } + +rule_ptr keyword(const string &value) { + return token(prec(KEYWORD_PRECEDENCE, str(value))); +} + +rule_ptr keypattern(const string &value) { + return token(prec(KEYWORD_PRECEDENCE, pattern(value))); +} + +rule_ptr err(const rule_ptr &rule) { + return choice({ rule, ERROR().copy() }); +} + +rule_ptr prec(int precedence, rule_ptr rule) { + return metadata(rule, { { PRECEDENCE, precedence } }); +} + +rule_ptr token(rule_ptr rule) { + return metadata(rule, { { IS_TOKEN, 1 } }); +} + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/seq.cc b/src/compiler/rules/seq.cc index 738b524e..cc34621a 100644 --- a/src/compiler/rules/seq.cc +++ b/src/compiler/rules/seq.cc @@ -4,39 +4,36 @@ #include "compiler/rules/blank.h" namespace tree_sitter { - using std::make_shared; - using std::string; - using std::vector; +namespace rules { - namespace rules { - Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {} +using std::make_shared; +using std::string; +using std::vector; - rule_ptr Seq::Build(const std::vector &rules) { - rule_ptr result = make_shared(); - for (auto &rule : rules) - result = (typeid(*result) != typeid(Blank)) ? make_shared(result, rule) : rule; - return result; - } +Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {} - bool Seq::operator==(const Rule &rule) const { - const Seq *other = dynamic_cast(&rule); - return other && (*other->left == *left) && (*other->right == *right); - } - - size_t Seq::hash_code() const { - return left->hash_code() ^ right->hash_code(); - } - - rule_ptr Seq::copy() const { - return std::make_shared(*this); - } - - string Seq::to_string() const { - return string("#to_string() + " " + right->to_string() + ">"; - } - - void Seq::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +rule_ptr Seq::Build(const std::vector &rules) { + rule_ptr result = make_shared(); + for (auto &rule : rules) + result = (typeid(*result) != typeid(Blank)) ? make_shared(result, rule) + : rule; + return result; } + +bool Seq::operator==(const Rule &rule) const { + const Seq *other = dynamic_cast(&rule); + return other && (*other->left == *left) && (*other->right == *right); +} + +size_t Seq::hash_code() const { return left->hash_code() ^ right->hash_code(); } + +rule_ptr Seq::copy() const { return std::make_shared(*this); } + +string Seq::to_string() const { + return string("#to_string() + " " + right->to_string() + ">"; +} + +void Seq::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/seq.h b/src/compiler/rules/seq.h index 56b0793c..fa625d06 100644 --- a/src/compiler/rules/seq.h +++ b/src/compiler/rules/seq.h @@ -5,23 +5,25 @@ #include #include "compiler/rules/rule.h" -namespace tree_sitter { - namespace rules { - class Seq : public Rule { - public: - Seq(rule_ptr left, rule_ptr right); - static rule_ptr Build(const std::vector &rules); +namespace tree_sitter { +namespace rules { - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +class Seq : public Rule { + public: + Seq(rule_ptr left, rule_ptr right); + static rule_ptr Build(const std::vector &rules); - const rule_ptr left; - const rule_ptr right; - }; - } -} + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; + + const rule_ptr left; + const rule_ptr right; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_SEQ_H_ diff --git a/src/compiler/rules/string.cc b/src/compiler/rules/string.cc index 6b8326dc..4576a2ac 100644 --- a/src/compiler/rules/string.cc +++ b/src/compiler/rules/string.cc @@ -2,32 +2,26 @@ #include #include "compiler/rules/visitor.h" -namespace tree_sitter { - using std::string; - using std::hash; +namespace tree_sitter { +namespace rules { - namespace rules { - String::String(string value) : value(value) {} +using std::string; +using std::hash; - bool String::operator==(const Rule &rule) const { - const String *other = dynamic_cast(&rule); - return other && (other->value == value); - } +String::String(string value) : value(value) {} - size_t String::hash_code() const { - return hash()(value); - } - - rule_ptr String::copy() const { - return std::make_shared(*this); - } - - string String::to_string() const { - return string("#"; - } - - void String::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool String::operator==(const Rule &rule) const { + const String *other = dynamic_cast(&rule); + return other && (other->value == value); } + +size_t String::hash_code() const { return hash()(value); } + +rule_ptr String::copy() const { return std::make_shared(*this); } + +string String::to_string() const { return string("#"; } + +void String::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/string.h b/src/compiler/rules/string.h index 2dc07ecf..6a6dc534 100644 --- a/src/compiler/rules/string.h +++ b/src/compiler/rules/string.h @@ -4,21 +4,23 @@ #include #include "compiler/rules/rule.h" -namespace tree_sitter { - namespace rules { - class String : public Rule { - public: - explicit String(std::string value); +namespace tree_sitter { +namespace rules { - bool operator==(const Rule& other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +class String : public Rule { + public: + explicit String(std::string value); - const std::string value; - }; - } -} + bool operator==(const Rule &other) const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; + + const std::string value; +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_STRING_H_ diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc index 3e0337ff..7a9cd462 100644 --- a/src/compiler/rules/symbol.cc +++ b/src/compiler/rules/symbol.cc @@ -3,63 +3,54 @@ #include #include "compiler/rules/visitor.h" -namespace tree_sitter { - using std::string; - using std::to_string; - using std::hash; +namespace tree_sitter { +namespace rules { - namespace rules { - Symbol::Symbol(int index) : - index(index), - options(SymbolOption(0)) {} +using std::string; +using std::to_string; +using std::hash; - Symbol::Symbol(int index, SymbolOption options) : - index(index), - options(options) {} +Symbol::Symbol(int index) : index(index), options(SymbolOption(0)) {} - bool Symbol::operator==(const Symbol &other) const { - return (other.index == index) && (other.options == options); - } +Symbol::Symbol(int index, SymbolOption options) + : index(index), options(options) {} - bool Symbol::operator==(const Rule &rule) const { - const Symbol *other = dynamic_cast(&rule); - return other && this->operator==(*other); - } - - size_t Symbol::hash_code() const { - return hash()(index) ^ hash()(options); - } - - rule_ptr Symbol::copy() const { - return std::make_shared(*this); - } - - string Symbol::to_string() const { - string name = (options & SymbolOptionAuxiliary) ? "aux_" : ""; - name += (options & SymbolOptionToken) ? "token" : "sym"; - return "#<" + name + " " + std::to_string(index) + ">"; - } - - bool Symbol::operator<(const Symbol &other) const { - if (options < other.options) return true; - if (options > other.options) return false; - return (index < other.index); - } - - bool Symbol::is_token() const { - return options & SymbolOptionToken; - } - - bool Symbol::is_built_in() const { - return index < 0; - } - - bool Symbol::is_auxiliary() const { - return options & SymbolOptionAuxiliary; - } - - void Symbol::accept(Visitor *visitor) const { - visitor->visit(this); - } - } +bool Symbol::operator==(const Symbol &other) const { + return (other.index == index) && (other.options == options); } + +bool Symbol::operator==(const Rule &rule) const { + const Symbol *other = dynamic_cast(&rule); + return other && this->operator==(*other); +} + +size_t Symbol::hash_code() const { + return hash()(index) ^ hash()(options); +} + +rule_ptr Symbol::copy() const { return std::make_shared(*this); } + +string Symbol::to_string() const { + string name = (options & SymbolOptionAuxiliary) ? "aux_" : ""; + name += (options & SymbolOptionToken) ? "token" : "sym"; + return "#<" + name + " " + std::to_string(index) + ">"; +} + +bool Symbol::operator<(const Symbol &other) const { + if (options < other.options) + return true; + if (options > other.options) + return false; + return (index < other.index); +} + +bool Symbol::is_token() const { return options & SymbolOptionToken; } + +bool Symbol::is_built_in() const { return index < 0; } + +bool Symbol::is_auxiliary() const { return options & SymbolOptionAuxiliary; } + +void Symbol::accept(Visitor *visitor) const { visitor->visit(this); } + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index 5a4e1135..699ab8a6 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -4,44 +4,48 @@ #include #include "compiler/rules/rule.h" -namespace tree_sitter { - namespace rules { - typedef enum { - SymbolOptionToken = 1 << 0, - SymbolOptionAuxiliary = 1 << 1, - } SymbolOption; +namespace tree_sitter { +namespace rules { - class Symbol : public Rule { - public: - explicit Symbol(int index); - Symbol(int index, SymbolOption options); +typedef enum { + SymbolOptionToken = 1 << 0, + SymbolOptionAuxiliary = 1 << 1, +} SymbolOption; - bool operator==(const Symbol &other) const; - bool operator==(const Rule &other) const; +class Symbol : public Rule { + public: + explicit Symbol(int index); + Symbol(int index, SymbolOption options); - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; + bool operator==(const Symbol &other) const; + bool operator==(const Rule &other) const; - bool operator<(const Symbol &other) const; - bool is_token() const; - bool is_built_in() const; - bool is_auxiliary() const; + size_t hash_code() const; + rule_ptr copy() const; + std::string to_string() const; + void accept(Visitor *visitor) const; - int index; - SymbolOption options; - }; - } -} + bool operator<(const Symbol &other) const; + bool is_token() const; + bool is_built_in() const; + bool is_auxiliary() const; + + int index; + SymbolOption options; +}; + +} // namespace rules +} // namespace tree_sitter namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::rules::Symbol &rule) const { - return rule.hash_code(); - } - }; -} + +template <> +struct hash { + size_t operator()(const tree_sitter::rules::Symbol &rule) const { + return rule.hash_code(); + } +}; + +} // std #endif // COMPILER_RULES_SYMBOL_H_ diff --git a/src/compiler/rules/visitor.cc b/src/compiler/rules/visitor.cc index 6e8d12a0..f0bdc3c3 100644 --- a/src/compiler/rules/visitor.cc +++ b/src/compiler/rules/visitor.cc @@ -11,32 +11,34 @@ #include "compiler/rules/repeat.h" namespace tree_sitter { - using std::vector; +namespace rules { - namespace rules { - Visitor::~Visitor() {} +using std::vector; - rule_ptr IdentityRuleFn::default_apply(const Rule *rule) { - return rule->copy(); - } +Visitor::~Visitor() {} - rule_ptr IdentityRuleFn::apply_to(const Choice *rule) { - vector rules; - for (const auto &el : rule->elements) - rules.push_back(apply(el)); - return Choice::Build(rules); - } - - rule_ptr IdentityRuleFn::apply_to(const Seq *rule) { - return Seq::Build({ apply(rule->left), apply(rule->right) }); - } - - rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) { - return std::make_shared(apply(rule->content)); - } - - rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) { - return std::make_shared(apply(rule->rule), rule->value); - } - } +rule_ptr IdentityRuleFn::default_apply(const Rule *rule) { + return rule->copy(); } + +rule_ptr IdentityRuleFn::apply_to(const Choice *rule) { + vector rules; + for (const auto &el : rule->elements) + rules.push_back(apply(el)); + return Choice::Build(rules); +} + +rule_ptr IdentityRuleFn::apply_to(const Seq *rule) { + return Seq::Build({ apply(rule->left), apply(rule->right) }); +} + +rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) { + return std::make_shared(apply(rule->content)); +} + +rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) { + return std::make_shared(apply(rule->rule), rule->value); +} + +} // namespace rules +} // namespace tree_sitter diff --git a/src/compiler/rules/visitor.h b/src/compiler/rules/visitor.h index 1fd3e7ca..917be1e4 100644 --- a/src/compiler/rules/visitor.h +++ b/src/compiler/rules/visitor.h @@ -4,79 +4,101 @@ #include "compiler/rules/rule.h" namespace tree_sitter { - namespace rules { - class Blank; - class NamedSymbol; - class CharacterSet; - class Choice; - class Repeat; - class Seq; - class String; - class Symbol; - class Pattern; - class Metadata; +namespace rules { - class Visitor { - public: - virtual void visit(const Blank *rule) = 0; - virtual void visit(const CharacterSet *rule) = 0; - virtual void visit(const Choice *rule) = 0; - virtual void visit(const Metadata *rule) = 0; - virtual void visit(const Pattern *rule) = 0; - virtual void visit(const Repeat *rule) = 0; - virtual void visit(const Seq *rule) = 0; - virtual void visit(const String *rule) = 0; - virtual void visit(const NamedSymbol *rule) = 0; - virtual void visit(const Symbol *rule) = 0; - virtual ~Visitor(); - }; +class Blank; +class NamedSymbol; +class CharacterSet; +class Choice; +class Repeat; +class Seq; +class String; +class Symbol; +class Pattern; +class Metadata; - template - class RuleFn : private Visitor { - public: - T apply(const rule_ptr &rule) { - value_ = T(); - rule->accept(this); - return value_; - } +class Visitor { + public: + virtual void visit(const Blank *rule) = 0; + virtual void visit(const CharacterSet *rule) = 0; + virtual void visit(const Choice *rule) = 0; + virtual void visit(const Metadata *rule) = 0; + virtual void visit(const Pattern *rule) = 0; + virtual void visit(const Repeat *rule) = 0; + virtual void visit(const Seq *rule) = 0; + virtual void visit(const String *rule) = 0; + virtual void visit(const NamedSymbol *rule) = 0; + virtual void visit(const Symbol *rule) = 0; + virtual ~Visitor(); +}; - protected: - virtual T default_apply(const Rule *rule) { return T(); } - virtual T apply_to(const Blank *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const CharacterSet *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const Choice *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const Metadata *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const Pattern *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const Repeat *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const Seq *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const String *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const NamedSymbol *rule) { return default_apply((const Rule *)rule); } - virtual T apply_to(const Symbol *rule) { return default_apply((const Rule *)rule); } +template +class RuleFn : private Visitor { + public: + T apply(const rule_ptr &rule) { + value_ = T(); + rule->accept(this); + return value_; + } - void visit(const Blank *rule) { value_ = apply_to(rule); } - void visit(const CharacterSet *rule) { value_ = apply_to(rule); } - void visit(const Choice *rule) { value_ = apply_to(rule); } - void visit(const Metadata *rule) { value_ = apply_to(rule); } - void visit(const Pattern *rule) { value_ = apply_to(rule); } - void visit(const Repeat *rule) { value_ = apply_to(rule); } - void visit(const Seq *rule) { value_ = apply_to(rule); } - void visit(const String *rule) { value_ = apply_to(rule); } - void visit(const NamedSymbol *rule) { value_ = apply_to(rule); } - void visit(const Symbol *rule) { value_ = apply_to(rule); } + protected: + virtual T default_apply(const Rule *rule) { return T(); } + virtual T apply_to(const Blank *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const CharacterSet *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const Choice *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const Metadata *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const Pattern *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const Repeat *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const Seq *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const String *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const NamedSymbol *rule) { + return default_apply((const Rule *)rule); + } + virtual T apply_to(const Symbol *rule) { + return default_apply((const Rule *)rule); + } - private: - T value_; - }; + void visit(const Blank *rule) { value_ = apply_to(rule); } + void visit(const CharacterSet *rule) { value_ = apply_to(rule); } + void visit(const Choice *rule) { value_ = apply_to(rule); } + void visit(const Metadata *rule) { value_ = apply_to(rule); } + void visit(const Pattern *rule) { value_ = apply_to(rule); } + void visit(const Repeat *rule) { value_ = apply_to(rule); } + void visit(const Seq *rule) { value_ = apply_to(rule); } + void visit(const String *rule) { value_ = apply_to(rule); } + void visit(const NamedSymbol *rule) { value_ = apply_to(rule); } + void visit(const Symbol *rule) { value_ = apply_to(rule); } - class IdentityRuleFn : public RuleFn { - protected: - virtual rule_ptr default_apply(const Rule *rule); - virtual rule_ptr apply_to(const Choice *rule); - virtual rule_ptr apply_to(const Metadata *rule); - virtual rule_ptr apply_to(const Seq *rule); - virtual rule_ptr apply_to(const Repeat *rule); - }; - } -} + private: + T value_; +}; + +class IdentityRuleFn : public RuleFn { + protected: + virtual rule_ptr default_apply(const Rule *rule); + virtual rule_ptr apply_to(const Choice *rule); + virtual rule_ptr apply_to(const Metadata *rule); + virtual rule_ptr apply_to(const Seq *rule); + virtual rule_ptr apply_to(const Repeat *rule); +}; + +} // namespace rules +} // namespace tree_sitter #endif // COMPILER_RULES_VISITOR_H_ diff --git a/src/compiler/util/string_helpers.cc b/src/compiler/util/string_helpers.cc index 49d29906..0d136832 100644 --- a/src/compiler/util/string_helpers.cc +++ b/src/compiler/util/string_helpers.cc @@ -2,47 +2,50 @@ #include namespace tree_sitter { - using std::string; - using std::vector; - using std::set; +namespace util { - namespace util { - void str_replace(string *input, const string &search, const string &replace) { - size_t pos = 0; - while (1) { - pos = input->find(search, pos); - if (pos == string::npos) break; - input->erase(pos, search.length()); - input->insert(pos, replace); - pos += replace.length(); - } - } +using std::string; +using std::vector; +using std::set; - string escape_string(string input) { - str_replace(&input, "\"", "\\\""); - str_replace(&input, "\n", "\\n"); - return input; - } - - string escape_char(char character) { - switch (character) { - case '\0': - return "\\0"; - case '"': - return "\\\""; - case '\'': - return "\\'"; - case '\n': - return "\\n"; - case '\r': - return "\\r"; - case '\t': - return "\\t"; - case '\\': - return "\\\\"; - default: - return string() + character; - } - } - } +void str_replace(string *input, const string &search, const string &replace) { + size_t pos = 0; + while (1) { + pos = input->find(search, pos); + if (pos == string::npos) + break; + input->erase(pos, search.length()); + input->insert(pos, replace); + pos += replace.length(); + } } + +string escape_string(string input) { + str_replace(&input, "\"", "\\\""); + str_replace(&input, "\n", "\\n"); + return input; +} + +string escape_char(char character) { + switch (character) { + case '\0': + return "\\0"; + case '"': + return "\\\""; + case '\'': + return "\\'"; + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\t': + return "\\t"; + case '\\': + return "\\\\"; + default: + return string() + character; + } +} + +} // namespace util +} // namespace tree_sitter diff --git a/src/compiler/util/string_helpers.h b/src/compiler/util/string_helpers.h index 66f92222..89d18d99 100644 --- a/src/compiler/util/string_helpers.h +++ b/src/compiler/util/string_helpers.h @@ -6,11 +6,14 @@ #include namespace tree_sitter { - namespace util { - void str_replace(std::string *input, const std::string &search, const std::string &replace); - std::string escape_string(std::string input); - std::string escape_char(char character); - } -} +namespace util { + +void str_replace(std::string *input, const std::string &search, + const std::string &replace); +std::string escape_string(std::string input); +std::string escape_char(char character); + +} // namespace util +} // namespace tree_sitter #endif // COMPILER_UTIL_STRING_HELPERS_H_ diff --git a/src/runtime/document.c b/src/runtime/document.c index f9817ba3..59f24eb1 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -11,11 +11,9 @@ struct TSDocument { size_t error_count; }; -TSDocument * ts_document_make() { +TSDocument *ts_document_make() { TSDocument *document = malloc(sizeof(TSDocument)); - *document = (TSDocument) { - .input = (TSInput) {} - }; + *document = (TSDocument) { .input = (TSInput) {} }; return document; } @@ -33,12 +31,13 @@ void ts_document_set_parser(TSDocument *document, TSParser *parser) { document->parser = parser; } -const TSTree * ts_document_tree(const TSDocument *document) { +const TSTree *ts_document_tree(const TSDocument *document) { return document->tree; } -const char * ts_document_string(const TSDocument *document) { - return ts_tree_string(document->tree, ts_parser_config(document->parser).symbol_names); +const char *ts_document_string(const TSDocument *document) { + return ts_tree_string(document->tree, + ts_parser_config(document->parser).symbol_names); } void ts_document_set_input(TSDocument *document, TSInput input) { @@ -50,7 +49,8 @@ void ts_document_edit(TSDocument *document, TSInputEdit edit) { document->tree = ts_parser_parse(document->parser, document->input, &edit); } -const char * ts_document_symbol_name(const TSDocument *document, const TSTree *tree) { +const char *ts_document_symbol_name(const TSDocument *document, + const TSTree *tree) { return ts_parser_config(document->parser).symbol_names[tree->symbol]; } @@ -60,7 +60,7 @@ typedef struct { size_t length; } TSStringInput; -const char * ts_string_input_read(void *d, size_t *bytes_read) { +const char *ts_string_input_read(void *d, size_t *bytes_read) { TSStringInput *data = (TSStringInput *)d; if (data->position >= data->length) { *bytes_read = 0; @@ -83,24 +83,22 @@ TSInput ts_string_input_make(const char *string) { data->string = string; data->position = 0; data->length = strlen(string); - TSInput input = { - .data = (void *)data, - .read_fn = ts_string_input_read, - .seek_fn = ts_string_input_seek, - .release_fn = free, - }; - return input; + return (TSInput) { .data = (void *)data, + .read_fn = ts_string_input_read, + .seek_fn = ts_string_input_seek, + .release_fn = free }; } void ts_document_set_input_string(TSDocument *document, const char *text) { ts_document_set_input(document, ts_string_input_make(text)); } -TSNode * ts_document_root_node(const TSDocument *document) { - return ts_node_make_root(document->tree, document->parser->config.symbol_names); +TSNode *ts_document_root_node(const TSDocument *document) { + return ts_node_make_root(document->tree, + document->parser->config.symbol_names); } -TSNode * ts_document_get_node(const TSDocument *document, size_t pos) { +TSNode *ts_document_get_node(const TSDocument *document, size_t pos) { TSNode *root = ts_document_root_node(document); TSNode *result = ts_node_leaf_at_pos(root, pos); ts_node_release(root); diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index f4f42ac0..bb42b853 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -2,16 +2,14 @@ #include "runtime/tree.h" TSLexer ts_lexer_make() { - return (TSLexer) { - .chunk = NULL, - .debug = 0, - .chunk_start = 0, - .chunk_size = 0, - .position_in_chunk = 0, - .token_start_position = 0, - .token_end_position = 0, - .reached_end = 0 - }; + return (TSLexer) { .chunk = NULL, + .debug = 0, + .chunk_start = 0, + .chunk_size = 0, + .position_in_chunk = 0, + .token_start_position = 0, + .token_end_position = 0, + .reached_end = 0 }; } int ts_lexer_advance(TSLexer *lexer) { @@ -33,11 +31,10 @@ int ts_lexer_advance(TSLexer *lexer) { return 1; } -TSTree * ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) { +TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) { size_t current_position = ts_lexer_position(lexer); size_t size = current_position - lexer->token_start_position; size_t offset = lexer->token_start_position - lexer->token_end_position; lexer->token_end_position = current_position; return ts_tree_make_leaf(symbol, size, offset, is_hidden); } - diff --git a/src/runtime/node.c b/src/runtime/node.c index fa490fae..79ae45b3 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -1,34 +1,33 @@ #include "runtime/node.h" #include "runtime/tree.h" -TSNode * ts_node_make(const TSTree *tree, TSNode *parent, size_t index, size_t start_position, const char **names) { - if (parent) ts_node_retain(parent); +TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index, + size_t start_position, const char **names) { + if (parent) + ts_node_retain(parent); TSNode *result = malloc(sizeof(TSNode)); - *result = (TSNode) { - .ref_count = 1, - .parent = parent, - .index = index, - .content = tree, - .start_position = start_position, - .names = names, - }; + *result = (TSNode) { .ref_count = 1, + .parent = parent, + .index = index, + .content = tree, + .start_position = start_position, + .names = names, }; return result; } -TSNode * ts_node_make_root(const TSTree *tree, const char **names) { +TSNode *ts_node_make_root(const TSTree *tree, const char **names) { while (ts_tree_is_wrapper(tree)) tree = tree->children[0]; return ts_node_make(tree, NULL, 0, 0, names); } -void ts_node_retain(TSNode *node) { - node->ref_count++; -} +void ts_node_retain(TSNode *node) { node->ref_count++; } void ts_node_release(TSNode *node) { node->ref_count--; if (node->ref_count == 0) { - if (node->parent) ts_node_release(node->parent); + if (node->parent) + ts_node_release(node->parent); free(node); } } @@ -37,31 +36,27 @@ size_t ts_node_pos(const TSNode *node) { return node->start_position + node->content->offset; } -size_t ts_node_size(const TSNode *node) { - return node->content->size; -} +size_t ts_node_size(const TSNode *node) { return node->content->size; } int ts_node_eq(const TSNode *left, const TSNode *right) { return ts_tree_equals(left->content, right->content); } -const char * ts_node_name(const TSNode *node) { +const char *ts_node_name(const TSNode *node) { return node->names[node->content->symbol]; } -const char * ts_node_string(const TSNode *node) { +const char *ts_node_string(const TSNode *node) { return ts_tree_string(node->content, node->names); } -TSNode * ts_node_parent(TSNode *child) { - return child->parent; -} +TSNode *ts_node_parent(TSNode *child) { return child->parent; } -TSNode * ts_node_prev_sibling(TSNode *child) { +TSNode *ts_node_prev_sibling(TSNode *child) { return ts_node_child(child->parent, child->index - 1); } -TSNode * ts_node_next_sibling(TSNode *child) { +TSNode *ts_node_next_sibling(TSNode *child) { return ts_node_child(child->parent, child->index + 1); } @@ -71,25 +66,29 @@ size_t ts_node_child_count(const TSNode *parent) { return result; } -TSNode * ts_node_child(TSNode *parent, size_t index) { +TSNode *ts_node_child(TSNode *parent, size_t index) { size_t child_count; - TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count); + TSChildWithPosition *children = + ts_tree_visible_children(parent->content, &child_count); if (child_count <= index) return NULL; size_t position = parent->start_position + children[index].position; - return ts_node_make(children[index].tree, parent, index, position, parent->names); + return ts_node_make(children[index].tree, parent, index, position, + parent->names); } -TSNode * ts_node_leaf_at_pos(TSNode *parent, size_t position) { +TSNode *ts_node_leaf_at_pos(TSNode *parent, size_t position) { size_t child_count; - TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count); + TSChildWithPosition *children = + ts_tree_visible_children(parent->content, &child_count); for (size_t i = 0; i < child_count; i++) { TSChildWithPosition child = children[i]; size_t child_left = child.position + child.tree->offset; if (child_left > position) break; if (child_left + child.tree->size > position) { - TSNode *node = ts_node_make(child.tree, parent, i, child.position, parent->names); + TSNode *node = + ts_node_make(child.tree, parent, i, child.position, parent->names); TSNode *result = ts_node_leaf_at_pos(node, position); ts_node_release(node); return result; diff --git a/src/runtime/node.h b/src/runtime/node.h index 825a319f..88ecc7fb 100644 --- a/src/runtime/node.h +++ b/src/runtime/node.h @@ -13,7 +13,8 @@ struct TSNode { const char **names; }; -TSNode * ts_node_make(const TSTree *tree, TSNode *parent, size_t index, size_t start_position, const char **names); -TSNode * ts_node_make_root(const TSTree *tree, const char **names); +TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index, + size_t start_position, const char **names); +TSNode *ts_node_make_root(const TSTree *tree, const char **names); #endif diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 29476792..0c4e7980 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -7,24 +7,28 @@ * Private */ -static const TSParseAction * actions_for_state(TSParserConfig config, TSStateId state) { +static const TSParseAction *actions_for_state(TSParserConfig config, + TSStateId state) { return config.parse_table + (state * config.symbol_count); } static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { - if (!edit) return 0; + if (!edit) + return 0; TSStack *stack = &parser->stack; size_t position = 0; for (;;) { TSTree *node = ts_stack_top_node(stack); - if (!node) break; + if (!node) + break; position = ts_stack_right_position(stack); size_t child_count; TSTree **children = ts_tree_children(node, &child_count); - if (position <= edit->position && !children) break; + if (position <= edit->position && !children) + break; stack->size--; position -= ts_tree_total_size(node); @@ -32,7 +36,8 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { for (size_t i = 0; i < child_count && position < edit->position; i++) { TSTree *child = children[i]; TSStateId state = ts_stack_top_state(stack); - TSStateId next_state = actions_for_state(parser->config, state)[child->symbol].data.to_state; + TSStateId next_state = + actions_for_state(parser->config, state)[child->symbol].data.to_state; ts_stack_push(stack, next_state, child); ts_tree_retain(child); position += ts_tree_total_size(child); @@ -44,9 +49,10 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { return position; } -static TSSymbol * expected_symbols(TSParser *parser, size_t *count) { +static TSSymbol *expected_symbols(TSParser *parser, size_t *count) { *count = 0; - const TSParseAction *actions = actions_for_state(parser->config, ts_stack_top_state(&parser->stack)); + const TSParseAction *actions = + actions_for_state(parser->config, ts_stack_top_state(&parser->stack)); for (size_t i = 0; i < parser->config.symbol_count; i++) if (actions[i].type != TSParseActionTypeError) (*count)++; @@ -64,26 +70,27 @@ static TSSymbol * expected_symbols(TSParser *parser, size_t *count) { * Public */ -TSParser * ts_parser_make(TSParserConfig config) { +TSParser *ts_parser_make(TSParserConfig config) { TSParser *result = malloc(sizeof(*result)); - *result = (TSParser) { - .lexer = ts_lexer_make(), - .stack = ts_stack_make(), - .debug = 0, - .config = config, - }; + *result = (TSParser) { .lexer = ts_lexer_make(), + .stack = ts_stack_make(), + .debug = 0, + .config = config, }; return result; } void ts_parser_free(TSParser *parser) { - if (parser->lookahead) ts_tree_release(parser->lookahead); - if (parser->next_lookahead) ts_tree_release(parser->next_lookahead); + if (parser->lookahead) + ts_tree_release(parser->lookahead); + if (parser->next_lookahead) + ts_tree_release(parser->next_lookahead); ts_stack_delete(&parser->stack); free(parser); } void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit) { - if (!edit) ts_stack_shrink(&parser->stack, 0); + if (!edit) + ts_stack_shrink(&parser->stack, 0); parser->lookahead = NULL; parser->next_lookahead = NULL; @@ -110,11 +117,8 @@ void ts_parser_shift_extra(TSParser *parser) { void ts_parser_reduce(TSParser *parser, TSSymbol symbol, size_t child_count) { parser->next_lookahead = parser->lookahead; - parser->lookahead = ts_stack_reduce( - &parser->stack, - symbol, - child_count, - parser->config.hidden_symbol_flags, 1); + parser->lookahead = ts_stack_reduce(&parser->stack, symbol, child_count, + parser->config.hidden_symbol_flags, 1); } int ts_parser_reduce_extra(TSParser *parser, TSSymbol symbol) { @@ -131,12 +135,8 @@ int ts_parser_reduce_extra(TSParser *parser, TSSymbol symbol) { int ts_parser_handle_error(TSParser *parser) { size_t count = 0; const TSSymbol *inputs = expected_symbols(parser, &count); - TSTree *error = ts_tree_make_error( - ts_lexer_lookahead_char(&parser->lexer), - count, - inputs, - 0, - 0); + TSTree *error = ts_tree_make_error(ts_lexer_lookahead_char(&parser->lexer), + count, inputs, 0, 0); for (;;) { ts_tree_release(parser->lookahead); @@ -159,10 +159,13 @@ int ts_parser_handle_error(TSParser *parser) { for (size_t j = 0; j < parser->stack.size; j++) { size_t i = parser->stack.size - 1 - j; TSStateId stack_state = parser->stack.entries[i].state; - TSParseAction action_on_error = actions_for_state(parser->config, stack_state)[ts_builtin_sym_error]; + TSParseAction action_on_error = + actions_for_state(parser->config, stack_state)[ts_builtin_sym_error]; if (action_on_error.type == TSParseActionTypeShift) { TSStateId state_after_error = action_on_error.data.to_state; - if (actions_for_state(parser->config, state_after_error)[parser->lookahead->symbol].type != TSParseActionTypeError) { + if (actions_for_state(parser->config, + state_after_error)[parser->lookahead->symbol] + .type != TSParseActionTypeError) { ts_stack_shrink(&parser->stack, i + 1); ts_stack_push(&parser->stack, state_after_error, error); return 1; @@ -172,7 +175,7 @@ int ts_parser_handle_error(TSParser *parser) { } } -TSTree * ts_parser_tree_root(TSParser *parser) { +TSTree *ts_parser_tree_root(TSParser *parser) { TSStack *stack = &parser->stack; size_t node_count = 0; for (size_t i = 0; i < stack->size; i++) { @@ -182,7 +185,8 @@ TSTree * ts_parser_tree_root(TSParser *parser) { } if (node_count > 1) - return ts_stack_reduce(stack, 2, stack->size, parser->config.hidden_symbol_flags, 0); + return ts_stack_reduce(stack, 2, stack->size, + parser->config.hidden_symbol_flags, 0); else return ts_stack_top_node(stack); } @@ -190,16 +194,20 @@ TSTree * ts_parser_tree_root(TSParser *parser) { TSParseAction ts_parser_next_action(TSParser *parser) { TSStateId state = ts_stack_top_state(&parser->stack); if (!parser->lookahead) - parser->lookahead = parser->config.lex_fn(parser, parser->config.lex_states[state]); + parser->lookahead = + parser->config.lex_fn(parser, parser->config.lex_states[state]); return actions_for_state(parser->config, state)[parser->lookahead->symbol]; } -#define DEBUG_PARSE(...) \ - if (parser->debug) { fprintf(stderr, "\n" __VA_ARGS__); } +#define DEBUG_PARSE(...) \ + if (parser->debug) { \ + fprintf(stderr, "\n" __VA_ARGS__); \ + } -TSTree * ts_parser_step(TSParser *parser) { +TSTree *ts_parser_step(TSParser *parser) { TSParseAction action = ts_parser_next_action(parser); - DEBUG_PARSE("LOOKAHEAD %s", parser->config.symbol_names[parser->lookahead->symbol]); + DEBUG_PARSE("LOOKAHEAD %s", + parser->config.symbol_names[parser->lookahead->symbol]); switch (action.type) { case TSParseActionTypeShift: DEBUG_PARSE("SHIFT %d", action.data.to_state); @@ -210,7 +218,9 @@ TSTree * ts_parser_step(TSParser *parser) { ts_parser_shift_extra(parser); return NULL; case TSParseActionTypeReduce: - DEBUG_PARSE("REDUCE %s %d", parser->config.symbol_names[action.data.symbol], action.data.child_count); + DEBUG_PARSE("REDUCE %s %d", + parser->config.symbol_names[action.data.symbol], + action.data.child_count); ts_parser_reduce(parser, action.data.symbol, action.data.child_count); return NULL; case TSParseActionTypeReduceExtra: @@ -235,15 +245,15 @@ error: return NULL; } -const TSTree * ts_parser_parse(TSParser *parser, TSInput input, TSInputEdit *edit) { +const TSTree *ts_parser_parse(TSParser *parser, TSInput input, + TSInputEdit *edit) { ts_parser_start(parser, input, edit); for (;;) { const TSTree *tree = ts_parser_step(parser); - if (tree) return tree; + if (tree) + return tree; } } -TSParserConfig ts_parser_config(TSParser *parser) { - return parser->config; -} +TSParserConfig ts_parser_config(TSParser *parser) { return parser->config; } diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 6a6ab23c..d5106acb 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -8,8 +8,7 @@ static TSStateId INITIAL_STATE = 0; TSStack ts_stack_make() { TSStack result = { - .entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), - .size = 0, + .entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), .size = 0, }; return result; } @@ -25,7 +24,7 @@ TSStateId ts_stack_top_state(const TSStack *stack) { return stack->entries[stack->size - 1].state; } -TSTree * ts_stack_top_node(const TSStack *stack) { +TSTree *ts_stack_top_node(const TSStack *stack) { if (stack->size == 0) return NULL; return stack->entries[stack->size - 1].node; @@ -53,11 +52,8 @@ size_t ts_stack_right_position(const TSStack *stack) { return result; } -TSTree * ts_stack_reduce(TSStack *stack, - TSSymbol symbol, - size_t child_count, - const int *hidden_symbol_flags, - int dont_count_extras) { +TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t child_count, + const int *hidden_symbol_flags, int dont_count_extras) { // First, walk down the stack to determine which symbols will be reduced. // The child node count is known ahead of time, but some children may be @@ -73,11 +69,8 @@ TSTree * ts_stack_reduce(TSStack *stack, for (size_t i = 0; i < child_count; i++) children[i] = stack->entries[start_index + i].node; - TSTree *lookahead = ts_tree_make_node( - symbol, - child_count, - children, - hidden_symbol_flags[symbol]); + TSTree *lookahead = ts_tree_make_node(symbol, child_count, children, + hidden_symbol_flags[symbol]); ts_stack_shrink(stack, stack->size - child_count); return lookahead; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index ffa068db..bc462777 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -3,26 +3,27 @@ #include "tree_sitter/parser.h" #include "runtime/tree.h" -static TSTree * ts_tree_make(TSSymbol symbol, size_t size, size_t offset, int is_hidden) { +static TSTree *ts_tree_make(TSSymbol symbol, size_t size, size_t offset, + int is_hidden) { TSTree *result = malloc(sizeof(TSTree)); - *result = (TSTree) { - .ref_count = 1, - .symbol = symbol, - .size = size, - .offset = offset, - .options = is_hidden ? TSTreeOptionsHidden : 0, - }; + *result = (TSTree) { .ref_count = 1, + .symbol = symbol, + .size = size, + .offset = offset, + .options = is_hidden ? TSTreeOptionsHidden : 0, }; return result; } -TSTree * ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset, int is_hidden) { +TSTree *ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset, + int is_hidden) { TSTree *result = ts_tree_make(symbol, size, offset, is_hidden); result->child_count = 0; result->children = NULL; return result; } -TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **children, int is_hidden) { +TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, + TSTree **children, int is_hidden) { size_t size = 0, offset = 0, visible_child_count = 0; for (size_t i = 0; i < child_count; i++) { TSTree *child = children[i]; @@ -43,34 +44,34 @@ TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **childre TSTreeOptions options = 0; if (is_hidden) options |= TSTreeOptionsHidden; - if (child_count == 1 && (ts_tree_is_visible(children[0]) || ts_tree_is_wrapper(children[0]))) + if (child_count == 1 && + (ts_tree_is_visible(children[0]) || ts_tree_is_wrapper(children[0]))) options |= (TSTreeOptionsWrapper | TSTreeOptionsHidden); - TSTree *result = malloc(sizeof(TSTree) + (visible_child_count * sizeof(TSChildWithPosition))); - *result = (TSTree) { - .ref_count = 1, - .symbol = symbol, - .size = size, - .offset = offset, - .options = options, - .children = children, - .child_count = child_count, - .visible_child_count = visible_child_count, - }; + TSTree *result = malloc(sizeof(TSTree) + + (visible_child_count * sizeof(TSChildWithPosition))); + *result = (TSTree) { .ref_count = 1, + .symbol = symbol, + .size = size, + .offset = offset, + .options = options, + .children = children, + .child_count = child_count, + .visible_child_count = visible_child_count, }; - TSChildWithPosition *visible_children = ts_tree_visible_children(result, NULL); + TSChildWithPosition *visible_children = + ts_tree_visible_children(result, NULL); for (size_t i = 0, visible_i = 0, child_position = 0; i < child_count; i++) { TSTree *child = children[i]; if (ts_tree_is_visible(child)) { - visible_children[visible_i] = (TSChildWithPosition) { - .tree = child, - .position = child_position - }; + visible_children[visible_i] = + (TSChildWithPosition) { .tree = child, .position = child_position }; visible_i++; } else { size_t granchild_count = 0; - TSChildWithPosition *grandchildren = ts_tree_visible_children(child, &granchild_count); + TSChildWithPosition *grandchildren = + ts_tree_visible_children(child, &granchild_count); for (size_t j = 0; j < granchild_count; j++) { visible_children[visible_i] = (TSChildWithPosition) { .tree = grandchildren[j].tree, @@ -86,7 +87,9 @@ TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **childre return result; } -TSTree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const TSSymbol *expected_inputs, size_t size, size_t offset) { +TSTree *ts_tree_make_error(char lookahead_char, size_t expected_input_count, + const TSSymbol *expected_inputs, size_t size, + size_t offset) { TSTree *result = ts_tree_make(ts_builtin_sym_error, size, offset, 0); result->lookahead_char = lookahead_char; result->expected_input_count = expected_input_count; @@ -94,9 +97,7 @@ TSTree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, co return result; } -void ts_tree_retain(TSTree *tree) { - tree->ref_count++; -} +void ts_tree_retain(TSTree *tree) { tree->ref_count++; } void ts_tree_release(TSTree *tree) { tree->ref_count--; @@ -115,27 +116,33 @@ size_t ts_tree_total_size(const TSTree *tree) { } int ts_tree_equals(const TSTree *node1, const TSTree *node2) { - if (node1->symbol != node2->symbol) return 0; + if (node1->symbol != node2->symbol) + return 0; if (node1->symbol == ts_builtin_sym_error) { // check error equality } else { - if (node1->child_count != node2->child_count) return 0; + if (node1->child_count != node2->child_count) + return 0; for (size_t i = 0; i < node1->child_count; i++) - if (!ts_tree_equals(node1->children[i], node2->children[i])) return 0; + if (!ts_tree_equals(node1->children[i], node2->children[i])) + return 0; } return 1; } -TSTree ** ts_tree_children(const TSTree *tree, size_t *count) { +TSTree **ts_tree_children(const TSTree *tree, size_t *count) { if (!tree || tree->symbol == ts_builtin_sym_error) { - if (count) *count = 0; + if (count) + *count = 0; return NULL; } - if (count) *count = tree->child_count; + if (count) + *count = tree->child_count; return tree->children; } -static size_t write_lookahead_to_string(char *string, size_t limit, char lookahead) { +static size_t write_lookahead_to_string(char *string, size_t limit, + char lookahead) { switch (lookahead) { case '\0': return snprintf(string, limit, ""); @@ -144,7 +151,9 @@ static size_t write_lookahead_to_string(char *string, size_t limit, char lookahe } } -static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names, char *string, size_t limit, int is_root) { +static size_t tree_write_to_string(const TSTree *tree, + const char **symbol_names, char *string, + size_t limit, int is_root) { char *cursor = string; char **writer = (limit > 0) ? &cursor : &string; int visible = ts_tree_is_visible(tree); @@ -167,7 +176,8 @@ static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names } for (size_t i = 0; i < tree->child_count; i++) - cursor += tree_write_to_string(tree->children[i], symbol_names, *writer, limit, is_root); + cursor += tree_write_to_string(tree->children[i], symbol_names, *writer, + limit, is_root); if (visible) cursor += snprintf(*writer, limit, ")"); @@ -175,9 +185,10 @@ static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names return cursor - string; } -char * ts_tree_string(const TSTree *tree, const char **symbol_names) { +char *ts_tree_string(const TSTree *tree, const char **symbol_names) { static char SCRATCH_STRING[1]; - size_t size = tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0, 1) + 1; + size_t size = + tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0, 1) + 1; char *result = malloc(size * sizeof(char)); tree_write_to_string(tree, symbol_names, result, size, 1); return result; diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 252a82e0..43f7137f 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -61,25 +61,32 @@ static inline size_t ts_tree_visible_child_count(const TSTree *tree) { return tree->visible_child_count; } -static inline TSChildWithPosition * ts_tree_visible_children(const TSTree *tree, size_t *count) { +static inline TSChildWithPosition *ts_tree_visible_children(const TSTree *tree, + size_t *count) { if (tree->symbol == ts_builtin_sym_error || tree->visible_child_count == 0) { - if (count) *count = 0; + if (count) + *count = 0; return NULL; } else { - if (count) *count = tree->visible_child_count; + if (count) + *count = tree->visible_child_count; return (TSChildWithPosition *)(tree + 1); } } -TSTree * ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset, int is_hidden); -TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **children, int is_hidden); -TSTree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const TSSymbol *expected_inputs, size_t size, size_t offset); +TSTree *ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset, + int is_hidden); +TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count, + TSTree **children, int is_hidden); +TSTree *ts_tree_make_error(char lookahead_char, size_t expected_input_count, + const TSSymbol *expected_inputs, size_t size, + size_t offset); void ts_tree_retain(TSTree *tree); void ts_tree_release(TSTree *tree); int ts_tree_equals(const TSTree *tree1, const TSTree *tree2); -char * ts_tree_string(const TSTree *tree, const char **names); -char * ts_tree_error_string(const TSTree *tree, const char **names); -TSTree ** ts_tree_children(const TSTree *tree, size_t *count); +char *ts_tree_string(const TSTree *tree, const char **names); +char *ts_tree_error_string(const TSTree *tree, const char **names); +TSTree **ts_tree_children(const TSTree *tree, size_t *count); size_t ts_tree_total_size(const TSTree *tree); #ifdef __cplusplus