Auto-format all source code with clang-format

This commit is contained in:
Max Brunsfeld 2014-07-20 21:43:27 -07:00
parent b8d8386e63
commit 98cc2f2264
105 changed files with 4223 additions and 4052 deletions

View file

@ -2,28 +2,30 @@
#include "helpers.h"
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
extern const Grammar arithmetic({
{ "expression", choice({
sym("sum"),
sym("difference"),
sym("product"),
sym("quotient"),
sym("exponent"),
sym("group"),
sym("number"),
sym("variable") }) },
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
{ "sum", infix_op("+", "expression", 1) },
{ "difference", infix_op("-", "expression", 1) },
{ "product", infix_op("*", "expression", 2) },
{ "quotient", infix_op("/", "expression", 2) },
{ "exponent", infix_op("^", "expression", 3) },
{ "group", in_parens(err(sym("expression"))) },
extern const Grammar arithmetic({
{ "expression", choice({
sym("sum"),
sym("difference"),
sym("product"),
sym("quotient"),
sym("exponent"),
sym("group"),
sym("number"),
sym("variable") }) },
{ "number", pattern("\\d+") },
{ "variable", pattern("\\a[\\w_]*") },
});
}
{ "sum", infix_op("+", "expression", 1) },
{ "difference", infix_op("-", "expression", 1) },
{ "product", infix_op("*", "expression", 2) },
{ "quotient", infix_op("/", "expression", 2) },
{ "exponent", infix_op("^", "expression", 3) },
{ "group", in_parens(err(sym("expression"))) },
{ "number", pattern("\\d+") },
{ "variable", pattern("\\a[\\w_]*") },
});
} // namespace tree_sitter_examples

View file

@ -2,170 +2,172 @@
#include "helpers.h"
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
static rule_ptr terminated(rule_ptr rule) {
return seq({ rule, choice({
sym("_line_break"),
str(";") }) });
}
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
extern const Grammar golang = Grammar({
{ "program", seq({
sym("package_directive"),
repeat(sym("imports_block")),
repeat(sym("declaration")) }) },
{ "package_directive", seq({
keyword("package"),
sym("package_name") }) },
{ "imports_block", seq({
keyword("import"),
choice({
in_parens(err(repeat(sym("package_import")))),
sym("package_import") }) }) },
{ "package_import", sym("string") },
{ "declaration", choice({
sym("type_declaration"),
sym("var_declaration"),
sym("func_declaration") }) },
static rule_ptr terminated(rule_ptr rule) {
return seq({ rule, choice({
sym("_line_break"),
str(";") }) });
}
// Declarations
{ "type_declaration", terminated(seq({
keyword("type"),
sym("type_name"),
sym("type_expression") })) },
{ "var_declaration", terminated(seq({
keyword("var"),
sym("var_name"),
choice({
seq({
optional(sym("type_expression")),
str("="),
sym("expression") }),
sym("type_expression") }) })) },
{ "func_declaration", terminated(seq({
keyword("func"),
sym("var_name"),
sym("_func_signature"),
sym("block_statement") })) },
{ "block_statement", in_braces(err(repeat(sym("statement")))) },
{ "type_expression", choice({
sym("pointer_type"),
sym("slice_type"),
sym("map_type"),
sym("interface_type"),
sym("struct_type"),
sym("type_name") }) },
extern const Grammar golang = Grammar({
{ "program", seq({
sym("package_directive"),
repeat(sym("imports_block")),
repeat(sym("declaration")) }) },
{ "package_directive", seq({
keyword("package"),
sym("package_name") }) },
{ "imports_block", seq({
keyword("import"),
choice({
in_parens(err(repeat(sym("package_import")))),
sym("package_import") }) }) },
{ "package_import", sym("string") },
{ "declaration", choice({
sym("type_declaration"),
sym("var_declaration"),
sym("func_declaration") }) },
// Type expressions
{ "pointer_type", seq({
keyword("*"),
sym("type_expression") }) },
{ "map_type", seq({
keyword("map"),
in_brackets(sym("type_expression")),
sym("type_expression") }) },
{ "slice_type", seq({
in_brackets(blank()),
sym("type_expression") }) },
{ "struct_type", seq({
keyword("struct"),
in_braces(repeat(seq({
sym("var_name"),
sym("type_expression") }))) }) },
{ "interface_type", seq({
keyword("interface"),
in_braces(repeat(seq({
sym("var_name"),
sym("_func_signature") }))) }) },
// Declarations
{ "type_declaration", terminated(seq({
keyword("type"),
sym("type_name"),
sym("type_expression") })) },
{ "var_declaration", terminated(seq({
keyword("var"),
sym("var_name"),
choice({
seq({
optional(sym("type_expression")),
str("="),
sym("expression") }),
sym("type_expression") }) })) },
{ "func_declaration", terminated(seq({
keyword("func"),
sym("var_name"),
sym("_func_signature"),
sym("block_statement") })) },
{ "block_statement", in_braces(err(repeat(sym("statement")))) },
{ "type_expression", choice({
sym("pointer_type"),
sym("slice_type"),
sym("map_type"),
sym("interface_type"),
sym("struct_type"),
sym("type_name") }) },
// Statements
{ "statement", choice({
sym("expression_statement"),
sym("return_statement"),
sym("declaration_statement"),
sym("range_statement"),
sym("if_statement") }) },
{ "return_statement", terminated(seq({
keyword("return"),
comma_sep(sym("expression")) })) },
{ "declaration_statement", choice({
sym("var_declaration"),
terminated(seq({
comma_sep(sym("var_name")),
str(":="),
sym("expression") })) }) },
{ "range_statement", seq({
keyword("for"),
sym("var_name"),
optional(seq({ str(","), sym("var_name") })),
// Type expressions
{ "pointer_type", seq({
keyword("*"),
sym("type_expression") }) },
{ "map_type", seq({
keyword("map"),
in_brackets(sym("type_expression")),
sym("type_expression") }) },
{ "slice_type", seq({
in_brackets(blank()),
sym("type_expression") }) },
{ "struct_type", seq({
keyword("struct"),
in_braces(repeat(seq({
sym("var_name"),
sym("type_expression") }))) }) },
{ "interface_type", seq({
keyword("interface"),
in_braces(repeat(seq({
sym("var_name"),
sym("_func_signature") }))) }) },
// Statements
{ "statement", choice({
sym("expression_statement"),
sym("return_statement"),
sym("declaration_statement"),
sym("range_statement"),
sym("if_statement") }) },
{ "return_statement", terminated(seq({
keyword("return"),
comma_sep(sym("expression")) })) },
{ "declaration_statement", choice({
sym("var_declaration"),
terminated(seq({
comma_sep(sym("var_name")),
str(":="),
keyword("range"),
sym("expression"),
sym("block_statement") }) },
{ "if_statement", seq({
keyword("if"),
sym("expression"),
sym("block_statement"),
optional(seq({
keyword("else"),
choice({
sym("if_statement"),
sym("block_statement") }) })) }) },
{ "expression_statement", terminated(sym("expression")) },
// Value expressions
{ "expression", choice({
sym("call_expression"),
sym("selector_expression"),
sym("math_op"),
sym("bool_op"),
sym("number"),
sym("string"),
sym("var_name") }) },
{ "call_expression", seq({
sym("expression"),
in_parens(comma_sep(sym("expression"))) }) },
{ "selector_expression", seq({
sym("expression"),
str("."),
sym("var_name") }) },
{ "math_op", choice({
infix_op("*", "expression", 2),
infix_op("/", "expression", 2),
infix_op("+", "expression", 1),
infix_op("-", "expression", 1) }) },
{ "bool_op", choice({
infix_op("||", "expression", 1),
infix_op("&&", "expression", 2),
infix_op("==", "expression", 3),
infix_op("<=", "expression", 3),
infix_op("<", "expression", 3),
infix_op(">=", "expression", 3),
infix_op(">", "expression", 3),
prefix_op("!", "expression", 4) }) },
{ "_func_signature", seq({
in_parens(comma_sep(seq({
comma_sep1(sym("var_name")),
sym("type_expression") }))),
sym("expression") })) }) },
{ "range_statement", seq({
keyword("for"),
sym("var_name"),
optional(seq({ str(","), sym("var_name") })),
str(":="),
keyword("range"),
sym("expression"),
sym("block_statement") }) },
{ "if_statement", seq({
keyword("if"),
sym("expression"),
sym("block_statement"),
optional(seq({
keyword("else"),
choice({
in_parens(choice({
comma_sep1(seq({ sym("var_name"), sym("type_name") })),
comma_sep1(sym("type_name")) })),
sym("type_name"),
blank() }) }) },
sym("if_statement"),
sym("block_statement") }) })) }) },
{ "expression_statement", terminated(sym("expression")) },
{ "_line_break", str("\n") },
// Value expressions
{ "expression", choice({
sym("call_expression"),
sym("selector_expression"),
sym("math_op"),
sym("bool_op"),
sym("number"),
sym("string"),
sym("var_name") }) },
{ "call_expression", seq({
sym("expression"),
in_parens(comma_sep(sym("expression"))) }) },
{ "selector_expression", seq({
sym("expression"),
str("."),
sym("var_name") }) },
{ "math_op", choice({
infix_op("*", "expression", 2),
infix_op("/", "expression", 2),
infix_op("+", "expression", 1),
infix_op("-", "expression", 1) }) },
{ "bool_op", choice({
infix_op("||", "expression", 1),
infix_op("&&", "expression", 2),
infix_op("==", "expression", 3),
infix_op("<=", "expression", 3),
infix_op("<", "expression", 3),
infix_op(">=", "expression", 3),
infix_op(">", "expression", 3),
prefix_op("!", "expression", 4) }) },
{ "_func_signature", seq({
in_parens(comma_sep(seq({
comma_sep1(sym("var_name")),
sym("type_expression") }))),
choice({
in_parens(choice({
comma_sep1(seq({ sym("var_name"), sym("type_name") })),
comma_sep1(sym("type_name")) })),
sym("type_name"),
blank() }) }) },
{ "string", delimited("\"") },
{ "package_name", sym("_identifier") },
{ "var_name", sym("_identifier") },
{ "type_name", sym("_identifier") },
{ "_identifier", pattern("\\a[\\w_]*") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "comment", keypattern("//[^\n]*") },
})
{ "_line_break", str("\n") },
{ "string", delimited("\"") },
{ "package_name", sym("_identifier") },
{ "var_name", sym("_identifier") },
{ "type_name", sym("_identifier") },
{ "_identifier", pattern("\\a[\\w_]*") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "comment", keypattern("//[^\n]*") },
})
.ubiquitous_tokens({ "comment", "_line_break" })
.separators({ ' ', '\t', '\r' });
}
} // namespace tree_sitter_examples

View file

@ -1,58 +1,59 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter_examples {
using namespace tree_sitter::rules;
rule_ptr comma_sep1(rule_ptr element) {
return seq({ element, repeat(seq({ str(","), element })) });
}
using namespace tree_sitter::rules;
rule_ptr comma_sep(rule_ptr element) {
return choice({ comma_sep1(element), blank() });
}
rule_ptr optional(rule_ptr rule) {
return choice({ rule, blank() });
}
rule_ptr in_parens(rule_ptr rule) {
return seq({ str("("), rule, str(")") });
}
rule_ptr in_braces(rule_ptr rule) {
return seq({ str("{"), rule, str("}") });
}
rule_ptr in_brackets(rule_ptr rule) {
return seq({ str("["), rule, str("]") });
}
rule_ptr infix_op(std::string op, std::string rule_name, int precedence) {
return prec(precedence, seq({
sym(rule_name),
keyword(op),
sym(rule_name) }));
}
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence) {
return prec(precedence, seq({
keyword(op),
sym(rule_name) }));
}
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence) {
return prec(precedence, seq({
sym(rule_name),
keyword(op) }));
}
rule_ptr delimited(std::string delimiter) {
return token(seq({
str(delimiter),
repeat(choice({
pattern("[^" + delimiter + "]"),
seq({ str("\\"), str(delimiter) }) })),
str(delimiter)
}));
}
rule_ptr comma_sep1(rule_ptr element) {
return seq({ element, repeat(seq({ str(","), element })) });
}
rule_ptr comma_sep(rule_ptr element) {
return choice({ comma_sep1(element), blank() });
}
rule_ptr optional(rule_ptr rule) {
return choice({ rule, blank() });
}
rule_ptr in_parens(rule_ptr rule) {
return seq({ str("("), rule, str(")") });
}
rule_ptr in_braces(rule_ptr rule) {
return seq({ str("{"), rule, str("}") });
}
rule_ptr in_brackets(rule_ptr rule) {
return seq({ str("["), rule, str("]") });
}
rule_ptr infix_op(std::string op, std::string rule_name, int precedence) {
return prec(precedence, seq({
sym(rule_name),
keyword(op),
sym(rule_name) }));
}
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence) {
return prec(precedence, seq({
keyword(op),
sym(rule_name) }));
}
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence) {
return prec(precedence, seq({
sym(rule_name),
keyword(op) }));
}
rule_ptr delimited(std::string delimiter) {
return token(seq({
str(delimiter),
repeat(choice({
pattern("[^" + delimiter + "]"),
seq({ str("\\"), str(delimiter) }) })),
str(delimiter) }));
}
} // namespace tree_sitter_examples

View file

@ -4,18 +4,20 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter_examples {
using namespace tree_sitter::rules;
rule_ptr comma_sep1(rule_ptr element);
rule_ptr comma_sep(rule_ptr element);
rule_ptr optional(rule_ptr rule);
rule_ptr in_parens(rule_ptr rule);
rule_ptr in_braces(rule_ptr rule);
rule_ptr in_brackets(rule_ptr rule);
rule_ptr infix_op(std::string op, std::string rule_name, int precedence);
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence);
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence);
rule_ptr delimited(std::string delimiter);
}
using namespace tree_sitter::rules;
#endif // TREESITTER_EXAMPLES_HELPERS_
rule_ptr comma_sep1(rule_ptr element);
rule_ptr comma_sep(rule_ptr element);
rule_ptr optional(rule_ptr rule);
rule_ptr in_parens(rule_ptr rule);
rule_ptr in_braces(rule_ptr rule);
rule_ptr in_brackets(rule_ptr rule);
rule_ptr infix_op(std::string op, std::string rule_name, int precedence);
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence);
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence);
rule_ptr delimited(std::string delimiter);
} // namespace tree_sitter_examples
#endif // TREESITTER_EXAMPLES_HELPERS_

View file

@ -2,217 +2,219 @@
#include "helpers.h"
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
static rule_ptr terminated(rule_ptr rule) {
return seq({ rule, choice({
sym("_line_break"),
str(";") }) });
}
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
extern const Grammar javascript = Grammar({
{ "program", repeat(sym("statement")) },
static rule_ptr terminated(rule_ptr rule) {
return seq({ rule, choice({
sym("_line_break"),
str(";") }) });
}
// Statements
{ "statement", choice({
sym("statement_block"),
sym("if_statement"),
sym("try_statement"),
sym("switch_statement"),
sym("while_statement"),
sym("for_statement"),
sym("for_in_statement"),
sym("break_statement"),
sym("var_declaration"),
sym("throw_statement"),
sym("return_statement"),
sym("delete_statement"),
sym("expression_statement") }) },
{ "statement_block", in_braces(err(repeat(sym("statement")))) },
{ "for_statement", seq({
keyword("for"),
in_parens(err(seq({
choice({
sym("var_declaration"),
sym("expression_statement") }),
sym("expression_statement"),
sym("expression") }))),
sym("statement") }) },
{ "for_in_statement", seq({
keyword("for"),
in_parens(err(seq({
optional(keyword("var")),
sym("identifier"),
keyword("in"),
sym("expression") }))),
sym("statement") }) },
{ "throw_statement", terminated(seq({
keyword("throw"),
sym("expression") })) },
{ "if_statement", seq({
keyword("if"),
in_parens(err(sym("expression"))),
sym("statement"),
optional(prec(1, seq({
keyword("else"),
sym("statement") }))) }) },
{ "while_statement", seq({
keyword("while"),
in_parens(err(sym("expression"))),
sym("statement") }) },
{ "try_statement", seq({
keyword("try"),
sym("statement"),
optional(sym("catch_clause")),
optional(sym("finally_clause")) }) },
{ "catch_clause", seq({
keyword("catch"),
in_parens(err(sym("identifier"))),
sym("statement") }) },
{ "finally_clause", seq({
keyword("finally"),
sym("statement") }) },
{ "switch_statement", seq({
keyword("switch"),
in_parens(err(sym("expression"))),
in_braces(repeat(sym("switch_case"))) }) },
{ "switch_case", seq({
extern const Grammar javascript = Grammar({
{ "program", repeat(sym("statement")) },
// Statements
{ "statement", choice({
sym("statement_block"),
sym("if_statement"),
sym("try_statement"),
sym("switch_statement"),
sym("while_statement"),
sym("for_statement"),
sym("for_in_statement"),
sym("break_statement"),
sym("var_declaration"),
sym("throw_statement"),
sym("return_statement"),
sym("delete_statement"),
sym("expression_statement") }) },
{ "statement_block", in_braces(err(repeat(sym("statement")))) },
{ "for_statement", seq({
keyword("for"),
in_parens(err(seq({
choice({
seq({
keyword("case"),
sym("expression") }),
keyword("default") }),
str(":"),
repeat(sym("statement")) }) },
{ "break_statement", terminated(keyword("break")) },
{ "var_declaration", terminated(seq({
keyword("var"),
comma_sep(err(seq({
sym("var_declaration"),
sym("expression_statement") }),
sym("expression_statement"),
sym("expression") }))),
sym("statement") }) },
{ "for_in_statement", seq({
keyword("for"),
in_parens(err(seq({
optional(keyword("var")),
sym("identifier"),
keyword("in"),
sym("expression") }))),
sym("statement") }) },
{ "throw_statement", terminated(seq({
keyword("throw"),
sym("expression") })) },
{ "if_statement", seq({
keyword("if"),
in_parens(err(sym("expression"))),
sym("statement"),
optional(prec(1, seq({
keyword("else"),
sym("statement") }))) }) },
{ "while_statement", seq({
keyword("while"),
in_parens(err(sym("expression"))),
sym("statement") }) },
{ "try_statement", seq({
keyword("try"),
sym("statement"),
optional(sym("catch_clause")),
optional(sym("finally_clause")) }) },
{ "catch_clause", seq({
keyword("catch"),
in_parens(err(sym("identifier"))),
sym("statement") }) },
{ "finally_clause", seq({
keyword("finally"),
sym("statement") }) },
{ "switch_statement", seq({
keyword("switch"),
in_parens(err(sym("expression"))),
in_braces(repeat(sym("switch_case"))) }) },
{ "switch_case", seq({
choice({
seq({
keyword("case"),
sym("expression") }),
keyword("default") }),
str(":"),
repeat(sym("statement")) }) },
{ "break_statement", terminated(keyword("break")) },
{ "var_declaration", terminated(seq({
keyword("var"),
comma_sep(err(seq({
sym("identifier"),
optional(seq({
str("="),
sym("expression") })) }))) })) },
{ "expression_statement", terminated(err(sym("expression"))) },
{ "return_statement", terminated(seq({
keyword("return"),
optional(sym("expression")) })) },
{ "delete_statement", terminated(seq({
keyword("delete"),
sym("property_access") })) },
{ "expression_statement", terminated(err(sym("expression"))) },
{ "return_statement", terminated(seq({
keyword("return"),
optional(sym("expression")) })) },
{ "delete_statement", terminated(seq({
keyword("delete"),
sym("property_access") })) },
// Expressions
{ "expression", choice({
sym("function_expression"),
sym("function_call"),
sym("constructor_call"),
sym("property_access"),
sym("assignment"),
sym("ternary"),
sym("math_op"),
sym("bool_op"),
sym("object"),
sym("array"),
sym("regex"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"),
// Expressions
{ "expression", choice({
sym("function_expression"),
sym("function_call"),
sym("constructor_call"),
sym("property_access"),
sym("assignment"),
sym("ternary"),
sym("math_op"),
sym("bool_op"),
sym("object"),
sym("array"),
sym("regex"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"),
sym("identifier"),
sym("in_expression"),
sym("instanceof_expression"),
sym("typeof_expression"),
in_parens(sym("expression")) }) },
{ "in_expression", infix_op("in", "expression", 3) },
{ "instanceof_expression", infix_op("instanceof", "expression", 3) },
{ "typeof_expression", prefix_op("typeof", "expression", 3) },
{ "math_op", choice({
prefix_op("++", "expression", 3),
prefix_op("--", "expression", 3),
postfix_op("++", "expression", 3),
postfix_op("--", "expression", 3),
prefix_op("+", "expression", 3),
prefix_op("-", "expression", 3),
infix_op("*", "expression", 2),
infix_op("/", "expression", 2),
infix_op("&", "expression", 2),
infix_op("|", "expression", 2),
infix_op("^", "expression", 2),
infix_op("+", "expression", 1),
infix_op("-", "expression", 1) }) },
{ "bool_op", choice({
infix_op("||", "expression", 1),
infix_op("&&", "expression", 2),
infix_op("===", "expression", 3),
infix_op("==", "expression", 3),
infix_op("!==", "expression", 3),
infix_op("!=", "expression", 3),
infix_op("<=", "expression", 3),
infix_op("<", "expression", 3),
infix_op(">=", "expression", 3),
infix_op(">", "expression", 3),
prefix_op("!", "expression", 4) }) },
{ "ternary", seq({
sym("expression"),
str("?"),
sym("expression"),
str(":"),
sym("expression") }) },
{ "assignment", prec(-1, seq({
choice({
sym("identifier"),
sym("in_expression"),
sym("instanceof_expression"),
sym("typeof_expression"),
in_parens(sym("expression")) }) },
{ "in_expression", infix_op("in", "expression", 3) },
{ "instanceof_expression", infix_op("instanceof", "expression", 3) },
{ "typeof_expression", prefix_op("typeof", "expression", 3) },
{ "math_op", choice({
prefix_op("++", "expression", 3),
prefix_op("--", "expression", 3),
postfix_op("++", "expression", 3),
postfix_op("--", "expression", 3),
prefix_op("+", "expression", 3),
prefix_op("-", "expression", 3),
infix_op("*", "expression", 2),
infix_op("/", "expression", 2),
infix_op("&", "expression", 2),
infix_op("|", "expression", 2),
infix_op("^", "expression", 2),
infix_op("+", "expression", 1),
infix_op("-", "expression", 1) }) },
{ "bool_op", choice({
infix_op("||", "expression", 1),
infix_op("&&", "expression", 2),
infix_op("===", "expression", 3),
infix_op("==", "expression", 3),
infix_op("!==", "expression", 3),
infix_op("!=", "expression", 3),
infix_op("<=", "expression", 3),
infix_op("<", "expression", 3),
infix_op(">=", "expression", 3),
infix_op(">", "expression", 3),
prefix_op("!", "expression", 4) }) },
{ "ternary", seq({
sym("expression"),
str("?"),
sym("expression"),
str(":"),
sym("expression") }) },
{ "assignment", prec(-1, seq({
choice({
sym("identifier"),
sym("property_access") }),
choice({
str("="),
str("+="),
str("-="),
str("*="),
str("/=") }),
sym("expression") })) },
{ "function_expression", seq({
keyword("function"),
optional(sym("identifier")),
sym("formal_parameters"),
sym("statement_block") }) },
{ "function_call", seq({
sym("expression"),
in_parens(comma_sep(err(sym("expression")))) }) },
{ "constructor_call", seq({
keyword("new"),
sym("function_call") }) },
{ "property_access", seq({
sym("expression"),
prec(10, choice({
seq({
str("."),
sym("identifier") }),
in_brackets(sym("expression")) })) }) },
{ "formal_parameters", in_parens(comma_sep(sym("identifier"))) },
// Literals
{ "comment", token(choice({
sym("property_access") }),
choice({
str("="),
str("+="),
str("-="),
str("*="),
str("/=") }),
sym("expression") })) },
{ "function_expression", seq({
keyword("function"),
optional(sym("identifier")),
sym("formal_parameters"),
sym("statement_block") }) },
{ "function_call", seq({
sym("expression"),
in_parens(comma_sep(err(sym("expression")))) }) },
{ "constructor_call", seq({
keyword("new"),
sym("function_call") }) },
{ "property_access", seq({
sym("expression"),
prec(10, choice({
seq({
str("/*"),
repeat(pattern("[^*]|(*[^/])")),
str("*/") }),
pattern("//[^\n]*") })) },
{ "object", in_braces(comma_sep(err(seq({
choice({ sym("string"), sym("identifier") }),
str(":"),
sym("expression") })))) },
{ "array", in_brackets(comma_sep(err(sym("expression")))) },
{ "regex", token(seq({ delimited("/"), optional(str("g")) })) },
{ "string", token(choice({
delimited("\""),
delimited("'") })) },
{ "_line_break", str("\n") },
{ "identifier", pattern("[\\a_$][\\w_$]*") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "null", keyword("null") },
{ "true", keyword("true") },
{ "false", keyword("false") },
})
.ubiquitous_tokens({ "comment", "_line_break" })
.separators({ ' ', '\t', '\r' });
}
str("."),
sym("identifier") }),
in_brackets(sym("expression")) })) }) },
{ "formal_parameters", in_parens(comma_sep(sym("identifier"))) },
// Literals
{ "comment", token(choice({
seq({
str("/*"),
repeat(pattern("[^*]|(*[^/])")),
str("*/") }),
pattern("//[^\n]*") })) },
{ "object", in_braces(comma_sep(err(seq({
choice({ sym("string"), sym("identifier") }),
str(":"),
sym("expression") })))) },
{ "array", in_brackets(comma_sep(err(sym("expression")))) },
{ "regex", token(seq({ delimited("/"), optional(str("g")) })) },
{ "string", token(choice({
delimited("\""),
delimited("'") })) },
{ "_line_break", str("\n") },
{ "identifier", pattern("[\\a_$][\\w_$]*") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "null", keyword("null") },
{ "true", keyword("true") },
{ "false", keyword("false") },
})
.ubiquitous_tokens({ "comment", "_line_break" })
.separators({ ' ', '\t', '\r' });
} // namespace tree_sitter_examples

View file

@ -2,27 +2,29 @@
#include "helpers.h"
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
extern const Grammar json({
{ "value", choice({
sym("object"),
sym("array"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"), }) },
{ "object", in_braces(comma_sep(err(seq({
sym("string"),
str(":"),
sym("value") })))) },
{ "array", in_brackets(comma_sep(err(sym("value")))) },
{ "string", pattern("\"([^\"]|\\\\\")*\"") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "null", keyword("null") },
{ "true", keyword("true") },
{ "false", keyword("false") },
});
}
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
extern const Grammar json({
{ "value", choice({
sym("object"),
sym("array"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"), }) },
{ "object", in_braces(comma_sep(err(seq({
sym("string"),
str(":"),
sym("value") })))) },
{ "array", in_brackets(comma_sep(err(sym("value")))) },
{ "string", pattern("\"([^\"]|\\\\\")*\"") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "null", keyword("null") },
{ "true", keyword("true") },
{ "false", keyword("false") },
});
} // namespace tree_sitter_examples

View file

@ -319,7 +319,6 @@ LEX_FN() {
ADVANCE(27);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(25);
if (('\t' <= lookahead && lookahead <= '\n') ||

View file

@ -7,71 +7,71 @@
#include <memory>
namespace tree_sitter {
namespace rules {
class Rule;
typedef std::shared_ptr<Rule> rule_ptr;
namespace rules {
class Rule;
typedef std::shared_ptr<Rule> rule_ptr;
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
std::ostream &operator<<(std::ostream &stream, const rule_ptr &rule);
rule_ptr blank();
rule_ptr choice(const std::vector<rule_ptr> &rules);
rule_ptr repeat(const rule_ptr &content);
rule_ptr seq(const std::vector<rule_ptr> &rules);
rule_ptr sym(const std::string &name);
rule_ptr pattern(const std::string &value);
rule_ptr str(const std::string &value);
rule_ptr keyword(const std::string &value);
rule_ptr keypattern(const std::string &value);
rule_ptr err(const rule_ptr &rule);
rule_ptr prec(int precedence, rule_ptr rule);
rule_ptr token(rule_ptr rule);
}
rule_ptr blank();
rule_ptr choice(const std::vector<rule_ptr> &rules);
rule_ptr repeat(const rule_ptr &content);
rule_ptr seq(const std::vector<rule_ptr> &rules);
rule_ptr sym(const std::string &name);
rule_ptr pattern(const std::string &value);
rule_ptr str(const std::string &value);
rule_ptr keyword(const std::string &value);
rule_ptr keypattern(const std::string &value);
rule_ptr err(const rule_ptr &rule);
rule_ptr prec(int precedence, rule_ptr rule);
rule_ptr token(rule_ptr rule);
}
class Grammar {
protected:
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
std::set<std::string> ubiquitous_tokens_;
std::set<char> separators_;
class Grammar {
protected:
const std::vector<std::pair<std::string, rules::rule_ptr> > rules_;
std::set<std::string> ubiquitous_tokens_;
std::set<char> separators_;
public:
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
bool operator==(const Grammar &other) const;
std::string start_rule_name() const;
const rules::rule_ptr rule(const std::string &name) const;
public:
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr> > &rules);
bool operator==(const Grammar &other) const;
std::string start_rule_name() const;
const rules::rule_ptr rule(const std::string &name) const;
const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
const std::set<std::string> & ubiquitous_tokens() const;
Grammar & ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
const std::set<char> & separators() const;
Grammar & separators(const std::set<char> &separators);
};
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules() const;
const std::set<std::string> &ubiquitous_tokens() const;
Grammar &ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
const std::set<char> &separators() const;
Grammar &separators(const std::set<char> &separators);
};
struct Conflict {
Conflict(std::string description);
std::string description;
bool operator==(const Conflict &other) const;
bool operator<(const Conflict &other) const;
};
struct Conflict {
Conflict(std::string description);
std::string description;
bool operator==(const Conflict &other) const;
bool operator<(const Conflict &other) const;
};
enum GrammarErrorType {
GrammarErrorTypeRegex,
GrammarErrorTypeUndefinedSymbol
};
enum GrammarErrorType {
GrammarErrorTypeRegex,
GrammarErrorTypeUndefinedSymbol
};
class GrammarError {
public:
GrammarError(GrammarErrorType type, std::string message);
bool operator==(const GrammarError &other) const;
GrammarErrorType type;
std::string message;
};
class GrammarError {
public:
GrammarError(GrammarErrorType type, std::string message);
bool operator==(const GrammarError &other) const;
GrammarErrorType type;
std::string message;
};
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
std::ostream& operator<<(std::ostream &stream, const Conflict &conflict);
std::ostream& operator<<(std::ostream &stream, const GrammarError *error);
std::ostream &operator<<(std::ostream &stream, const Grammar &grammar);
std::ostream &operator<<(std::ostream &stream, const Conflict &conflict);
std::ostream &operator<<(std::ostream &stream, const GrammarError *error);
std::tuple<std::string, std::vector<Conflict>, const GrammarError *>
compile(const Grammar &grammar, std::string name);
std::tuple<std::string, std::vector<Conflict>, const GrammarError *> compile(
const Grammar &grammar, std::string name);
}
#endif // TREE_SITTER_COMPILER_H_

View file

@ -26,7 +26,7 @@ typedef struct {
TSLexer ts_lexer_make();
int ts_lexer_advance(TSLexer *lexer);
TSTree * ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden);
TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden);
static inline size_t ts_lexer_position(const TSLexer *lexer) {
return lexer->chunk_start + lexer->position_in_chunk;
@ -53,11 +53,13 @@ typedef struct {
TSStack ts_stack_make();
void ts_stack_delete(TSStack *);
TSTree * ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t immediate_child_count, const int *hidden_symbol_flags, int gather_extras);
TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol,
size_t immediate_child_count,
const int *hidden_symbol_flags, int gather_extras);
void ts_stack_shrink(TSStack *stack, size_t new_size);
void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node);
TSStateId ts_stack_top_state(const TSStack *stack);
TSTree * ts_stack_top_node(const TSStack *stack);
TSTree *ts_stack_top_node(const TSStack *stack);
size_t ts_stack_right_position(const TSStack *stack);
typedef enum {
@ -86,7 +88,7 @@ typedef struct {
const int *hidden_symbol_flags;
const TSParseAction *parse_table;
const TSStateId *lex_states;
TSTree * (* lex_fn)(TSParser *, TSStateId);
TSTree *(*lex_fn)(TSParser *, TSStateId);
} TSParserConfig;
struct TSParser {
@ -98,91 +100,99 @@ struct TSParser {
TSParserConfig config;
};
TSParser * ts_parser_make(TSParserConfig);
TSParser *ts_parser_make(TSParserConfig);
void ts_parser_free(TSParser *);
TSParserConfig ts_parser_config(TSParser *);
const TSTree * ts_parser_parse(TSParser *parser, TSInput input, TSInputEdit *edit);
const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
TSInputEdit *edit);
void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit);
TSTree * ts_parser_step(TSParser *parser);
TSTree *ts_parser_step(TSParser *parser);
#define SYMBOL_NAMES \
static const char *ts_symbol_names[]
#define SYMBOL_NAMES static const char *ts_symbol_names[]
#define HIDDEN_SYMBOLS \
static const int ts_hidden_symbol_flags[SYMBOL_COUNT]
#define HIDDEN_SYMBOLS static const int ts_hidden_symbol_flags[SYMBOL_COUNT]
#define LEX_STATES \
static TSStateId ts_lex_states[STATE_COUNT]
#define LEX_STATES static TSStateId ts_lex_states[STATE_COUNT]
#define PARSE_TABLE \
static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT]
#define LEX_FN() \
static TSTree * ts_lex(TSParser *parser, TSStateId lex_state)
#define LEX_FN() static TSTree *ts_lex(TSParser *parser, TSStateId lex_state)
#define DEBUG_LEX(...) \
if (parser->lexer.debug) { fprintf(stderr, "\n" __VA_ARGS__); }
#define DEBUG_LEX(...) \
if (parser->lexer.debug) { \
fprintf(stderr, "\n" __VA_ARGS__); \
}
#define START_LEXER() \
DEBUG_LEX("LEX %d", lex_state); \
char lookahead; \
next_state: \
#define START_LEXER() \
DEBUG_LEX("LEX %d", lex_state); \
char lookahead; \
next_state: \
lookahead = ts_lexer_lookahead_char(&parser->lexer); \
DEBUG_LEX("CHAR '%c'", lookahead);
#define START_TOKEN() \
ts_lexer_start_token(&parser->lexer);
#define START_TOKEN() ts_lexer_start_token(&parser->lexer);
#define ADVANCE(state_index) \
{ \
DEBUG_LEX("ADVANCE %d", state_index); \
if (!ts_lexer_advance(&parser->lexer)) ACCEPT_TOKEN(ts_builtin_sym_end); \
lex_state = state_index; goto next_state; \
#define ADVANCE(state_index) \
{ \
DEBUG_LEX("ADVANCE %d", state_index); \
if (!ts_lexer_advance(&parser->lexer)) \
ACCEPT_TOKEN(ts_builtin_sym_end); \
lex_state = state_index; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol) \
{ \
DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \
return ts_lexer_build_node(&parser->lexer, symbol, ts_hidden_symbol_flags[symbol]); \
#define ACCEPT_TOKEN(symbol) \
{ \
DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \
return ts_lexer_build_node(&parser->lexer, symbol, \
ts_hidden_symbol_flags[symbol]); \
}
#define LEX_ERROR() \
{ \
DEBUG_LEX("ERROR"); \
#define LEX_ERROR() \
{ \
DEBUG_LEX("ERROR"); \
return ts_lexer_build_node(&parser->lexer, ts_builtin_sym_error, 0); \
}
#define LEX_PANIC() \
{ \
#define LEX_PANIC() \
{ \
DEBUG_LEX("LEX ERROR: unexpected state %d", lex_state); \
return NULL; \
return NULL; \
}
#define SHIFT(to_state_value) \
{ .type = TSParseActionTypeShift, .data = { .to_state = to_state_value } }
#define SHIFT(to_state_value) \
{ \
.type = TSParseActionTypeShift, .data = { .to_state = to_state_value } \
}
#define SHIFT_EXTRA() \
{ .type = TSParseActionTypeShiftExtra }
#define REDUCE_EXTRA(symbol_val) \
{ .type = TSParseActionTypeReduceExtra, .data = { .symbol = symbol_val } }
#define REDUCE_EXTRA(symbol_val) \
{ \
.type = TSParseActionTypeReduceExtra, .data = { .symbol = symbol_val } \
}
#define REDUCE(symbol_val, child_count_val) \
{ .type = TSParseActionTypeReduce, .data = { .symbol = symbol_val, .child_count = child_count_val } }
#define REDUCE(symbol_val, child_count_val) \
{ \
.type = TSParseActionTypeReduce, \
.data = { .symbol = symbol_val, .child_count = child_count_val } \
}
#define ACCEPT_INPUT() \
{ .type = TSParseActionTypeAccept }
#define EXPORT_PARSER(constructor_name) \
TSParser * constructor_name() { \
return ts_parser_make((TSParserConfig) { \
.symbol_count = SYMBOL_COUNT, \
.hidden_symbol_flags = ts_hidden_symbol_flags, \
#define EXPORT_PARSER(constructor_name) \
TSParser *constructor_name() { \
return ts_parser_make((TSParserConfig) { \
.symbol_count = SYMBOL_COUNT, \
.hidden_symbol_flags = ts_hidden_symbol_flags, \
.parse_table = (const TSParseAction *)ts_parse_actions, \
.lex_states = ts_lex_states, \
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
}); \
.lex_states = ts_lex_states, \
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
}); \
}
#ifdef __cplusplus

View file

@ -9,9 +9,9 @@ extern "C" {
typedef struct {
void *data;
const char * (* read_fn)(void *data, size_t *bytes_read);
int (* seek_fn)(void *data, size_t position);
void (* release_fn)(void *data);
const char *(*read_fn)(void *data, size_t *bytes_read);
int (*seek_fn)(void *data, size_t position);
void (*release_fn)(void *data);
} TSInput;
typedef struct {
@ -29,26 +29,26 @@ typedef struct TSDocument TSDocument;
size_t ts_node_pos(const TSNode *);
size_t ts_node_size(const TSNode *);
TSSymbol ts_node_sym(const TSNode *);
TSNode * ts_node_child(TSNode *, size_t);
TSNode *ts_node_child(TSNode *, size_t);
size_t ts_node_child_count(const TSNode *);
TSNode * ts_node_leaf_at_pos(TSNode *, size_t);
TSNode * ts_node_parent(TSNode *node);
TSNode * ts_node_next_sibling(TSNode *node);
TSNode * ts_node_prev_sibling(TSNode *node);
const char * ts_node_name(const TSNode *);
const char * ts_node_string(const TSNode *);
TSNode *ts_node_leaf_at_pos(TSNode *, size_t);
TSNode *ts_node_parent(TSNode *node);
TSNode *ts_node_next_sibling(TSNode *node);
TSNode *ts_node_prev_sibling(TSNode *node);
const char *ts_node_name(const TSNode *);
const char *ts_node_string(const TSNode *);
void ts_node_retain(TSNode *node);
void ts_node_release(TSNode *node);
int ts_node_eq(const TSNode *, const TSNode *);
TSDocument * ts_document_make();
TSDocument *ts_document_make();
void ts_document_free(TSDocument *doc);
void ts_document_set_parser(TSDocument *doc, TSParser *parser);
void ts_document_set_input(TSDocument *doc, TSInput input);
void ts_document_set_input_string(TSDocument *doc, const char *text);
void ts_document_edit(TSDocument *doc, TSInputEdit edit);
const char * ts_document_string(const TSDocument *doc);
TSNode * ts_document_root_node(const TSDocument *document);
const char *ts_document_string(const TSDocument *doc);
TSNode *ts_document_root_node(const TSDocument *document);
#define ts_builtin_sym_error 0
#define ts_builtin_sym_end 1

View file

@ -11,6 +11,6 @@ fi
$CPPLINT \
--root=src \
--linelength=110 \
--filter=-legal/copyright,-readability/namespace,-whitespace/indent,-whitespace/line_length,-readability/todo \
--filter=-legal/copyright,-whitespace/indent,-whitespace/line_length,-readability/todo \
$(find src/compiler -type f) \
2>&1

View file

@ -15,128 +15,133 @@
#include "compiler/build_tables/item_set_transitions.h"
namespace tree_sitter {
using std::string;
using std::map;
using std::unordered_map;
using std::set;
using std::make_shared;
using rules::Symbol;
using rules::CharacterSet;
namespace build_tables {
namespace build_tables {
class LexTableBuilder {
const LexicalGrammar lex_grammar;
ParseTable *parse_table;
LexConflictManager conflict_manager;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
LexTable lex_table;
using std::string;
using std::map;
using std::unordered_map;
using std::set;
using std::make_shared;
using rules::Symbol;
using rules::CharacterSet;
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet result;
for (const auto &symbol : symbols) {
if (symbol == rules::ERROR())
continue;
else if (symbol == rules::END_OF_INPUT())
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
else if (symbol.is_token())
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
}
return result;
}
class LexTableBuilder {
const LexicalGrammar lex_grammar;
ParseTable *parse_table;
LexConflictManager conflict_manager;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
LexTable lex_table;
LexStateId add_lex_state(const LexItemSet &item_set) {
auto pair = lex_state_ids.find(item_set);
if (pair == lex_state_ids.end()) {
LexStateId state_id = lex_table.add_state();
lex_state_ids[item_set] = state_id;
add_accept_token_actions(item_set, state_id);
add_advance_actions(item_set, state_id);
add_token_start(item_set, state_id);
return state_id;
} else {
return pair->second;
}
}
void add_error_lex_state() {
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID);
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = char_transitions(item_set);
for (const auto &transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
auto action = LexAction::Advance(new_state_id, precedence_values_for_item_set(new_item_set));
if (conflict_manager.resolve_lex_action(lex_table.state(state_id).default_action, action))
lex_table.state(state_id).actions[rule] = action;
}
}
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
for (const LexItem &item : item_set) {
if (item.is_done()) {
auto current_action = lex_table.state(state_id).default_action;
auto new_action = LexAction::Accept(item.lhs, item.precedence());
if (conflict_manager.resolve_lex_action(current_action, new_action))
lex_table.state(state_id).default_action = new_action;
}
}
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &item : item_set)
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
CharacterSet separator_set() const {
set<rules::CharacterRange> ranges;
for (char c : lex_grammar.separators)
ranges.insert(c);
return CharacterSet(ranges);
}
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build({
make_shared<rules::Metadata>(
make_shared<rules::Repeat>(separator_set().copy()),
map<rules::MetadataKey, int>({
{rules::START_TOKEN, 1},
{rules::PRECEDENCE, -1},
})),
rule,
});
}
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
set<int> result;
for (const auto &item : item_set)
result.insert(item.precedence());
return result;
}
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
lex_grammar(lex_grammar),
parse_table(parse_table),
conflict_manager(LexConflictManager(lex_grammar)) {}
LexTable build() {
for (auto &parse_state : parse_table->states) {
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
parse_state.lex_state_id = add_lex_state(item_set);
}
add_error_lex_state();
return lex_table;
}
};
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
return LexTableBuilder(parse_table, lex_grammar).build();
}
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet result;
for (const auto &symbol : symbols) {
if (symbol == rules::ERROR())
continue;
else if (symbol == rules::END_OF_INPUT())
result.insert(
LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
else if (symbol.is_token())
result.insert(
LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
}
return result;
}
LexStateId add_lex_state(const LexItemSet &item_set) {
auto pair = lex_state_ids.find(item_set);
if (pair == lex_state_ids.end()) {
LexStateId state_id = lex_table.add_state();
lex_state_ids[item_set] = state_id;
add_accept_token_actions(item_set, state_id);
add_advance_actions(item_set, state_id);
add_token_start(item_set, state_id);
return state_id;
} else {
return pair->second;
}
}
void add_error_lex_state() {
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID);
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = char_transitions(item_set);
for (const auto &transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
auto action = LexAction::Advance(
new_state_id, precedence_values_for_item_set(new_item_set));
if (conflict_manager.resolve_lex_action(
lex_table.state(state_id).default_action, action))
lex_table.state(state_id).actions[rule] = action;
}
}
void add_accept_token_actions(const LexItemSet &item_set,
LexStateId state_id) {
for (const LexItem &item : item_set) {
if (item.is_done()) {
auto current_action = lex_table.state(state_id).default_action;
auto new_action = LexAction::Accept(item.lhs, item.precedence());
if (conflict_manager.resolve_lex_action(current_action, new_action))
lex_table.state(state_id).default_action = new_action;
}
}
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &item : item_set)
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
CharacterSet separator_set() const {
set<rules::CharacterRange> ranges;
for (char c : lex_grammar.separators)
ranges.insert(c);
return CharacterSet(ranges);
}
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build(
{ make_shared<rules::Metadata>(
make_shared<rules::Repeat>(separator_set().copy()),
map<rules::MetadataKey, int>(
{ { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })),
rule, });
}
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
set<int> result;
for (const auto &item : item_set)
result.insert(item.precedence());
return result;
}
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
: lex_grammar(lex_grammar),
parse_table(parse_table),
conflict_manager(LexConflictManager(lex_grammar)) {}
LexTable build() {
for (auto &parse_state : parse_table->states) {
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
parse_state.lex_state_id = add_lex_state(item_set);
}
add_error_lex_state();
return lex_table;
}
};
LexTable build_lex_table(ParseTable *parse_table,
const LexicalGrammar &lex_grammar) {
return LexTableBuilder(parse_table, lex_grammar).build();
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,12 +5,16 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class LexicalGrammar;
class ParseTable;
namespace build_tables {
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
}
}
class LexicalGrammar;
class ParseTable;
namespace build_tables {
LexTable build_lex_table(ParseTable *parse_table,
const LexicalGrammar &lex_grammar);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_

View file

@ -14,142 +14,156 @@
#include "compiler/build_tables/first_set.h"
namespace tree_sitter {
using std::pair;
using std::string;
using std::vector;
using std::set;
using std::map;
using std::unordered_map;
using std::make_shared;
using rules::Symbol;
namespace build_tables {
namespace build_tables {
class ParseTableBuilder {
const SyntaxGrammar grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
ParseTable parse_table;
using std::pair;
using std::string;
using std::vector;
using std::set;
using std::map;
using std::unordered_map;
using std::make_shared;
using rules::Symbol;
ParseStateId add_parse_state(const ParseItemSet &item_set) {
auto pair = parse_state_ids.find(item_set);
if (pair == parse_state_ids.end()) {
ParseStateId state_id = parse_table.add_state();
parse_state_ids[item_set] = state_id;
item_sets_to_process.push_back({ item_set, state_id });
return state_id;
} else {
return pair->second;
}
}
class ParseTableBuilder {
const SyntaxGrammar grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId> > item_sets_to_process;
ParseTable parse_table;
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
if (item.is_done()) {
ParseAction action = (item.lhs == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
for (auto &lookahead_sym : lookahead_symbols)
if (should_add_action(state_id, lookahead_sym, action))
parse_table.add_action(state_id, lookahead_sym, action);
}
}
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
ParseAction new_action = ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
if (should_add_action(state_id, symbol, new_action)) {
ParseStateId new_state_id = add_parse_state(next_item_set);
new_action.state_index = new_state_id;
parse_table.add_action(state_id, symbol, new_action);
}
}
}
void add_shift_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol == actions.end()) {
parse_table.add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra());
}
}
}
void add_reduce_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol != actions.end() && pair_for_symbol->second.type == ParseActionTypeShift) {
size_t shift_state_id = pair_for_symbol->second.state_index;
for (const auto &pair : actions) {
const Symbol &lookahead_sym = pair.first;
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
if (should_add_action(shift_state_id, lookahead_sym, reduce_extra))
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
}
}
}
}
bool should_add_action(ParseStateId state_id, const Symbol &symbol, const ParseAction &action) {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
return (
current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(symbol, current_action->second, action));
}
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
set<int> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
if (item.consumed_symbol_count > 0)
result.insert(item.precedence());
}
return result;
}
public:
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
grammar(grammar),
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
pair<ParseTable, vector<Conflict>> build() {
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0);
add_parse_state(item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
parse_table.symbols.insert(rules::ERROR());
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();
ParseItemSet &item_set = pair.first;
ParseStateId &state_id = pair.second;
item_sets_to_process.pop_back();
add_reduce_actions(item_set, state_id);
add_shift_actions(item_set, state_id);
add_shift_extra_actions(state_id);
}
for (ParseStateId state_id = 0; state_id < parse_table.states.size(); state_id++)
add_reduce_extra_actions(state_id);
return { parse_table, conflict_manager.conflicts() };
}
};
pair<ParseTable, vector<Conflict>>
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
}
ParseStateId add_parse_state(const ParseItemSet &item_set) {
auto pair = parse_state_ids.find(item_set);
if (pair == parse_state_ids.end()) {
ParseStateId state_id = parse_table.add_state();
parse_state_ids[item_set] = state_id;
item_sets_to_process.push_back({ item_set, state_id });
return state_id;
} else {
return pair->second;
}
}
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
if (item.is_done()) {
ParseAction action =
(item.lhs == rules::START())
? ParseAction::Accept()
: ParseAction::Reduce(item.lhs, item.consumed_symbol_count,
item.precedence());
for (auto &lookahead_sym : lookahead_symbols)
if (should_add_action(state_id, lookahead_sym, action))
parse_table.add_action(state_id, lookahead_sym, action);
}
}
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
ParseAction new_action =
ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
if (should_add_action(state_id, symbol, new_action)) {
ParseStateId new_state_id = add_parse_state(next_item_set);
new_action.state_index = new_state_id;
parse_table.add_action(state_id, symbol, new_action);
}
}
}
void add_shift_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions =
parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol == actions.end()) {
parse_table.add_action(state_id, ubiquitous_symbol,
ParseAction::ShiftExtra());
}
}
}
void add_reduce_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions =
parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol != actions.end() &&
pair_for_symbol->second.type == ParseActionTypeShift) {
size_t shift_state_id = pair_for_symbol->second.state_index;
for (const auto &pair : actions) {
const Symbol &lookahead_sym = pair.first;
ParseAction reduce_extra =
ParseAction::ReduceExtra(ubiquitous_symbol);
if (should_add_action(shift_state_id, lookahead_sym, reduce_extra))
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
}
}
}
}
bool should_add_action(ParseStateId state_id, const Symbol &symbol,
const ParseAction &action) {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
return (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(
symbol, current_action->second, action));
}
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
set<int> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
if (item.consumed_symbol_count > 0)
result.insert(item.precedence());
}
return result;
}
public:
ParseTableBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar)
: grammar(grammar),
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
pair<ParseTable, vector<Conflict> > build() {
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0);
add_parse_state(
item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
parse_table.symbols.insert(rules::ERROR());
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();
ParseItemSet &item_set = pair.first;
ParseStateId &state_id = pair.second;
item_sets_to_process.pop_back();
add_reduce_actions(item_set, state_id);
add_shift_actions(item_set, state_id);
add_shift_extra_actions(state_id);
}
for (ParseStateId state_id = 0; state_id < parse_table.states.size();
state_id++)
add_reduce_extra_actions(state_id);
return { parse_table, conflict_manager.conflicts() };
}
};
pair<ParseTable, vector<Conflict> > build_parse_table(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -7,13 +7,13 @@
#include "compiler/parse_table.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::pair<ParseTable, std::vector<Conflict>>
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
namespace build_tables {
std::pair<ParseTable, std::vector<Conflict> > build_parse_table(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
}
#endif // COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_

View file

@ -4,19 +4,20 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::vector;
using std::make_tuple;
namespace build_tables {
namespace build_tables {
tuple<ParseTable, LexTable, vector<Conflict>>
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
vector<Conflict> conflicts = parse_table_result.second;
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
return make_tuple(parse_table, lex_table, conflicts);
}
}
using std::tuple;
using std::vector;
using std::make_tuple;
tuple<ParseTable, LexTable, vector<Conflict> > build_tables(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
vector<Conflict> conflicts = parse_table_result.second;
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
return make_tuple(parse_table, lex_table, conflicts);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -8,14 +8,13 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, std::vector<Conflict>>
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar);
}
namespace build_tables {
std::tuple<ParseTable, LexTable, std::vector<Conflict> > build_tables(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
}
#endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_

View file

@ -9,53 +9,55 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::set;
using rules::Symbol;
namespace build_tables {
namespace build_tables {
class FirstSet : public rules::RuleFn<set<Symbol>> {
const SyntaxGrammar *grammar;
set<Symbol> visited_symbols;
using std::set;
using rules::Symbol;
public:
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
class FirstSet : public rules::RuleFn<set<Symbol> > {
const SyntaxGrammar *grammar;
set<Symbol> visited_symbols;
set<Symbol> apply_to(const Symbol *rule) {
auto insertion_result = visited_symbols.insert(*rule);
if (insertion_result.second) {
return (rule->is_token()) ?
set<Symbol>({ *rule }) :
apply(grammar->rule(*rule));
} else {
return set<Symbol>();
}
}
public:
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
set<Symbol> apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
set<Symbol> apply_to(const rules::Choice *rule) {
set<Symbol> result;
for (const auto &el : rule->elements) {
auto &&next_syms = apply(el);
result.insert(next_syms.begin(), next_syms.end());
}
return result;
}
set<Symbol> apply_to(const rules::Seq *rule) {
auto &&result = apply(rule->left);
if (rule_can_be_blank(rule->left, *grammar)) {
auto &&right_symbols = apply(rule->right);
result.insert(right_symbols.begin(), right_symbols.end());
}
return result;
}
};
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return FirstSet(&grammar).apply(rule);
}
set<Symbol> apply_to(const Symbol *rule) {
auto insertion_result = visited_symbols.insert(*rule);
if (insertion_result.second) {
return (rule->is_token()) ? set<Symbol>({ *rule })
: apply(grammar->rule(*rule));
} else {
return set<Symbol>();
}
}
set<Symbol> apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
set<Symbol> apply_to(const rules::Choice *rule) {
set<Symbol> result;
for (const auto &el : rule->elements) {
auto &&next_syms = apply(el);
result.insert(next_syms.begin(), next_syms.end());
}
return result;
}
set<Symbol> apply_to(const rules::Seq *rule) {
auto &&result = apply(rule->left);
if (rule_can_be_blank(rule->left, *grammar)) {
auto &&right_symbols = apply(rule->right);
result.insert(right_symbols.begin(), right_symbols.end());
}
return result;
}
};
set<Symbol> first_set(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar) {
return FirstSet(&grammar).apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,18 +6,20 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace build_tables {
class SyntaxGrammar;
/*
* Returns the set of terminal symbols that can appear at
* the beginning of a string derivable from a given rule,
* in a given grammar.
*/
std::set<rules::Symbol>
first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}
namespace build_tables {
/*
* Returns the set of terminal symbols that can appear at
* the beginning of a string derivable from a given rule,
* in a given grammar.
*/
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_FIRST_SET_H_

View file

@ -3,28 +3,28 @@
#include "compiler/rules/seq.h"
namespace tree_sitter {
namespace build_tables {
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
namespace build_tables {
int apply_to(const rules::Metadata *rule) {
int result = rule->value_for(metadata_key);
return (result != 0) ? result : apply(rule->rule);
}
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
// TODO -
// Remove this. It is currently needed to make the rule generated
// by `LexTableBuilder::after_separators` have the right precedence.
int apply_to(const rules::Seq *rule) {
return apply(rule->left);
}
public:
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
};
return GetMetadata(key).apply(rule);
}
int apply_to(const rules::Metadata *rule) {
int result = rule->value_for(metadata_key);
return (result != 0) ? result : apply(rule->rule);
}
// TODO -
// Remove this. It is currently needed to make the rule generated
// by `LexTableBuilder::after_separators` have the right precedence.
int apply_to(const rules::Seq *rule) { return apply(rule->left); }
public:
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
};
return GetMetadata(key).apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,9 +5,9 @@
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key);
}
namespace build_tables {
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key);
}
}
#endif // COMPILER_BUILD_TABLES_GET_METADATA_H_

View file

@ -5,17 +5,14 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace build_tables {
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) :
lhs(lhs),
rule(rule) {}
namespace build_tables {
bool Item::is_done() const {
return rule_can_be_blank(rule);
}
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule)
: lhs(lhs), rule(rule) {}
int Item::precedence() const {
return get_metadata(rule, rules::PRECEDENCE);
}
}
}
bool Item::is_done() const { return rule_can_be_blank(rule); }
int Item::precedence() const { return get_metadata(rule, rules::PRECEDENCE); }
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,17 +5,19 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
class Item {
public:
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
bool is_done() const;
int precedence() const;
namespace build_tables {
rules::Symbol lhs;
rules::rule_ptr rule;
};
}
}
class Item {
public:
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
bool is_done() const;
int precedence() const;
rules::Symbol lhs;
rules::rule_ptr rule;
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_ITEM_H_

View file

@ -10,50 +10,56 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::set;
using std::vector;
using std::pair;
using rules::Symbol;
using rules::rule_ptr;
namespace build_tables {
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &starting_item,
const set<Symbol> &starting_lookahead_symbols,
const SyntaxGrammar &grammar) {
ParseItemSet result;
using std::set;
using std::vector;
using std::pair;
using rules::Symbol;
using rules::rule_ptr;
vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};
while (!items_to_process.empty()) {
ParseItem item = items_to_process.back().first;
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
items_to_process.pop_back();
const ParseItemSet item_set_closure(
const ParseItem &starting_item,
const set<Symbol> &starting_lookahead_symbols,
const SyntaxGrammar &grammar) {
ParseItemSet result;
set<Symbol> &lookahead_symbols = result[item];
size_t previous_size = lookahead_symbols.size();
lookahead_symbols.insert(new_lookahead_symbols.begin(), new_lookahead_symbols.end());
vector<pair<ParseItem, set<Symbol>>> items_to_process = {
{ starting_item, starting_lookahead_symbols }
};
if (lookahead_symbols.size() == previous_size)
continue;
while (!items_to_process.empty()) {
ParseItem item = items_to_process.back().first;
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
items_to_process.pop_back();
for (const auto &pair : sym_transitions(item.rule)) {
const Symbol &symbol = pair.first;
const rule_ptr &next_rule = pair.second;
set<Symbol> &lookahead_symbols = result[item];
size_t previous_size = lookahead_symbols.size();
lookahead_symbols.insert(new_lookahead_symbols.begin(),
new_lookahead_symbols.end());
if (symbol.is_token() || symbol.is_built_in())
continue;
if (lookahead_symbols.size() == previous_size)
continue;
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
if (rule_can_be_blank(next_rule, grammar))
next_lookahead_symbols.insert(lookahead_symbols.begin(), lookahead_symbols.end());
for (const auto &pair : sym_transitions(item.rule)) {
const Symbol &symbol = pair.first;
const rule_ptr &next_rule = pair.second;
items_to_process.push_back({
ParseItem(symbol, grammar.rule(symbol), 0),
next_lookahead_symbols
});
}
}
if (symbol.is_token() || symbol.is_built_in())
continue;
return result;
}
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
if (rule_can_be_blank(next_rule, grammar))
next_lookahead_symbols.insert(lookahead_symbols.begin(),
lookahead_symbols.end());
items_to_process.push_back({ ParseItem(symbol, grammar.rule(symbol), 0),
next_lookahead_symbols });
}
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,13 +6,13 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class SyntaxGrammar;
class SyntaxGrammar;
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &item,
const std::set<rules::Symbol> &lookahead_symbols,
const SyntaxGrammar &grammar);
}
namespace build_tables {
const ParseItemSet item_set_closure(
const ParseItem &item, const std::set<rules::Symbol> &lookahead_symbols,
const SyntaxGrammar &grammar);
}
}
#endif // COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_

View file

@ -7,43 +7,49 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::map;
using std::set;
using rules::CharacterSet;
using rules::Symbol;
namespace build_tables {
namespace build_tables {
map<Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
for (auto &transition : sym_transitions(item.rule)) {
ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1);
merge_sym_transition<ParseItemSet>(&result, { transition.first, item_set_closure(new_item, lookahead_symbols, grammar) },
[](ParseItemSet *left, const ParseItemSet *right) {
for (auto &pair : *right)
left->operator[](pair.first).insert(pair.second.begin(), pair.second.end());
});
}
}
return result;
}
using std::map;
using std::set;
using rules::CharacterSet;
using rules::Symbol;
map<CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : char_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_char_transition<LexItemSet>(&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
});
}
}
return result;
}
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
for (auto &transition : sym_transitions(item.rule)) {
ParseItem new_item(item.lhs, transition.second,
item.consumed_symbol_count + 1);
merge_sym_transition<ParseItemSet>(
&result, { transition.first,
item_set_closure(new_item, lookahead_symbols, grammar) },
[](ParseItemSet *left, const ParseItemSet *right) {
for (auto &pair : *right)
left->operator[](pair.first)
.insert(pair.second.begin(), pair.second.end());
});
}
}
return result;
}
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : char_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_char_transition<LexItemSet>(
&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
});
}
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,19 +6,23 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace rules {
class CharacterSet;
class Symbol;
}
namespace build_tables {
std::map<rules::Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);
class SyntaxGrammar;
std::map<rules::CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set);
}
namespace rules {
class CharacterSet;
class Symbol;
}
namespace build_tables {
std::map<rules::Symbol, ParseItemSet> sym_transitions(
const ParseItemSet &item_set, const SyntaxGrammar &grammar);
std::map<rules::CharacterSet, LexItemSet> char_transitions(
const LexItemSet &item_set);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_

View file

@ -7,49 +7,49 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
namespace build_tables {
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
grammar(grammar) {}
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
const LexAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_lex_action(new_action, old_action);
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar)
: grammar(grammar) {}
switch (old_action.type) {
case LexActionTypeError:
return true;
case LexActionTypeAccept: {
int old_precedence = *old_action.precedence_values.begin();
switch (new_action.type) {
case LexActionTypeAccept: {
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
return new_action.symbol.index < old_action.symbol.index;
}
}
case LexActionTypeAdvance: {
return true;
}
default:
return false;
}
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
const LexAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_lex_action(new_action, old_action);
return true;
}
default:
return false;
}
switch (old_action.type) {
case LexActionTypeError:
return true;
case LexActionTypeAccept: {
int old_precedence = *old_action.precedence_values.begin();
switch (new_action.type) {
case LexActionTypeAccept: {
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
return new_action.symbol.index < old_action.symbol.index;
}
}
case LexActionTypeAdvance: { return true; }
default:
return false;
}
return true;
}
default:
return false;
}
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,16 +6,18 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
class LexConflictManager {
const LexicalGrammar grammar;
namespace build_tables {
public:
explicit LexConflictManager(const LexicalGrammar &grammar);
bool resolve_lex_action(const LexAction &old_action,
const LexAction &new_action);
};
}
}
class LexConflictManager {
const LexicalGrammar grammar;
public:
explicit LexConflictManager(const LexicalGrammar &grammar);
bool resolve_lex_action(const LexAction &old_action,
const LexAction &new_action);
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_

View file

@ -6,45 +6,42 @@
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::ostream;
using std::vector;
namespace build_tables {
namespace build_tables {
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) :
Item(lhs, rule) {}
using std::string;
using std::ostream;
using std::vector;
bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
}
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule)
: Item(lhs, rule) {}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
if (apply(rule->left))
return true;
else if (rule_can_be_blank(rule->left))
return apply(rule->right);
else
return false;
}
bool apply_to(const rules::Metadata *rule) {
return rule->value_for(rules::START_TOKEN);
}
};
return IsTokenStart().apply(rule);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(">");
}
}
bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
if (apply(rule->left))
return true;
else if (rule_can_be_blank(rule->left))
return apply(rule->right);
else
return false;
}
bool apply_to(const rules::Metadata *rule) {
return rule->value_for(rules::START_TOKEN);
}
};
return IsTokenStart().apply(rule);
}
ostream &operator<<(ostream &stream, const LexItem &item) {
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule
<< string(">");
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,39 +6,42 @@
#include "compiler/build_tables/item.h"
namespace tree_sitter {
namespace build_tables {
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
namespace build_tables {
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
typedef std::unordered_set<LexItem> LexItemSet;
}
}
std::ostream &operator<<(std::ostream &stream, const LexItem &item);
typedef std::unordered_set<LexItem> LexItemSet;
} // namespace build_tables
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template<>
struct hash<const tree_sitter::build_tables::LexItemSet> {
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
return result;
}
};
}
template <>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template <>
struct hash<const tree_sitter::build_tables::LexItemSet> {
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
return result;
}
};
} // namespace std
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_

View file

@ -7,69 +7,71 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
namespace build_tables {
/*
* Merges a new transition into a map with symbol keys.
* If the symbol already exists in the map, the new value for that
* symbol will be computed by merging the old and new values
* using the given function.
*/
template<typename T>
void merge_sym_transition(std::map<rules::Symbol, T> *left,
const std::pair<rules::Symbol, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
auto new_symbol = new_pair.first;
for (auto &existing_pair : *left) {
auto existing_symbol = existing_pair.first;
if (new_symbol < existing_symbol) break;
if (existing_symbol == new_symbol) {
merge_fn(&existing_pair.second, &new_pair.second);
return;
}
}
left->insert(new_pair);
}
/*
* Merges two transition maps with character set keys. If the
* two maps contain values for overlapping character sets, the
* new value for the two sets' intersection will be computed by
* merging the old and new values using the given function.
*/
template<typename T>
void merge_char_transition(std::map<rules::CharacterSet, T> *left,
const std::pair<rules::CharacterSet, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
std::map<rules::CharacterSet, T> pairs_to_insert;
auto iter = left->begin();
while (iter != left->end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
merge_fn(&value, &new_value);
pairs_to_insert.insert({ intersection, value });
left->erase(iter++);
} else {
++iter;
}
}
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
left->insert({ new_char_set, new_pair.second });
}
/*
* Merges a new transition into a map with symbol keys.
* If the symbol already exists in the map, the new value for that
* symbol will be computed by merging the old and new values
* using the given function.
*/
template <typename T>
void merge_sym_transition(std::map<rules::Symbol, T> *left,
const std::pair<rules::Symbol, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
auto new_symbol = new_pair.first;
for (auto &existing_pair : *left) {
auto existing_symbol = existing_pair.first;
if (new_symbol < existing_symbol)
break;
if (existing_symbol == new_symbol) {
merge_fn(&existing_pair.second, &new_pair.second);
return;
}
}
left->insert(new_pair);
}
/*
* Merges two transition maps with character set keys. If the
* two maps contain values for overlapping character sets, the
* new value for the two sets' intersection will be computed by
* merging the old and new values using the given function.
*/
template <typename T>
void merge_char_transition(std::map<rules::CharacterSet, T> *left,
const std::pair<rules::CharacterSet, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
std::map<rules::CharacterSet, T> pairs_to_insert;
auto iter = left->begin();
while (iter != left->end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
merge_fn(&value, &new_value);
pairs_to_insert.insert({ intersection, value });
left->erase(iter++);
} else {
++iter;
}
}
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
left->insert({ new_char_set, new_pair.second });
}
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_

View file

@ -8,127 +8,130 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
namespace build_tables {
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar) :
parse_grammar(parse_grammar),
lex_grammar(lex_grammar) {}
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_parse_action(symbol, new_action, old_action);
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar)
: parse_grammar(parse_grammar), lex_grammar(lex_grammar) {}
switch (old_action.type) {
case ParseActionTypeError:
return true;
case ParseActionTypeShift: {
int min_precedence = *old_action.precedence_values.begin();
int max_precedence = *old_action.precedence_values.rbegin();
switch (new_action.type) {
case ParseActionTypeReduce: {
int new_precedence = *new_action.precedence_values.rbegin();
if (max_precedence > new_precedence) {
if (min_precedence < new_precedence)
record_conflict(symbol, old_action, new_action);
return false;
} else if (max_precedence < new_precedence) {
return true;
} else {
record_conflict(symbol, old_action, new_action);
return false;
}
}
default:
return false;
}
}
case ParseActionTypeReduce:
switch (new_action.type) {
case ParseActionTypeReduce: {
int old_precedence = *old_action.precedence_values.begin();
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
record_conflict(symbol, old_action, new_action);
return new_action.symbol.index < old_action.symbol.index;
}
}
default:
return false;
}
default:
return false;
}
}
const vector<Conflict> ParseConflictManager::conflicts() const {
vector<Conflict> result;
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
return result;
}
string precedence_string(const ParseAction &action) {
string precedences = "(precedence ";
bool started = false;
for (auto value : action.precedence_values) {
if (started) precedences += ", ";
started = true;
precedences += to_string(value);
}
return precedences + ")";
}
string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
switch (action.type) {
case ParseActionTypeShift:
return "shift " + precedence_string(action);
case ParseActionTypeReduce: {
string name = parse_grammar.rule_name(action.symbol);
if (name == "")
return "ERROR" + to_string(action.symbol.index);
else
return "reduce " + name + " " + precedence_string(action);
}
case ParseActionTypeAccept:
return "accept";
default:
return "error";
}
}
string ParseConflictManager::symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ERROR";
else if (symbol == rules::END_OF_INPUT())
return "END_OF_INPUT";
else
return "";
}
if (symbol.is_token())
return lex_grammar.rule_name(symbol);
else
return parse_grammar.rule_name(symbol);
}
void ParseConflictManager::record_conflict(const rules::Symbol &symbol,
const ParseAction &left,
const ParseAction &right) {
conflicts_.insert(Conflict(symbol_name(symbol) + ": " +
message_for_action(left, parse_grammar) + " / " +
message_for_action(right, parse_grammar)));
bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_parse_action(symbol, new_action, old_action);
switch (old_action.type) {
case ParseActionTypeError:
return true;
case ParseActionTypeShift: {
int min_precedence = *old_action.precedence_values.begin();
int max_precedence = *old_action.precedence_values.rbegin();
switch (new_action.type) {
case ParseActionTypeReduce: {
int new_precedence = *new_action.precedence_values.rbegin();
if (max_precedence > new_precedence) {
if (min_precedence < new_precedence)
record_conflict(symbol, old_action, new_action);
return false;
} else if (max_precedence < new_precedence) {
return true;
} else {
record_conflict(symbol, old_action, new_action);
return false;
}
}
default:
return false;
}
}
case ParseActionTypeReduce:
switch (new_action.type) {
case ParseActionTypeReduce: {
int old_precedence = *old_action.precedence_values.begin();
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
record_conflict(symbol, old_action, new_action);
return new_action.symbol.index < old_action.symbol.index;
}
}
default:
return false;
}
default:
return false;
}
}
const vector<Conflict> ParseConflictManager::conflicts() const {
vector<Conflict> result;
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
return result;
}
string precedence_string(const ParseAction &action) {
string precedences = "(precedence ";
bool started = false;
for (auto value : action.precedence_values) {
if (started)
precedences += ", ";
started = true;
precedences += to_string(value);
}
return precedences + ")";
}
string message_for_action(const ParseAction &action,
const SyntaxGrammar &parse_grammar) {
switch (action.type) {
case ParseActionTypeShift:
return "shift " + precedence_string(action);
case ParseActionTypeReduce: {
string name = parse_grammar.rule_name(action.symbol);
if (name == "")
return "ERROR" + to_string(action.symbol.index);
else
return "reduce " + name + " " + precedence_string(action);
}
case ParseActionTypeAccept:
return "accept";
default:
return "error";
}
}
string ParseConflictManager::symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ERROR";
else if (symbol == rules::END_OF_INPUT())
return "END_OF_INPUT";
else
return "";
}
if (symbol.is_token())
return lex_grammar.rule_name(symbol);
else
return parse_grammar.rule_name(symbol);
}
void ParseConflictManager::record_conflict(const rules::Symbol &symbol,
const ParseAction &left,
const ParseAction &right) {
conflicts_.insert(Conflict(symbol_name(symbol) + ": " +
message_for_action(left, parse_grammar) + " / " +
message_for_action(right, parse_grammar)));
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -11,25 +11,28 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
class ParseConflictManager {
const SyntaxGrammar parse_grammar;
const LexicalGrammar lex_grammar;
std::set<Conflict> conflicts_;
namespace build_tables {
public:
ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar);
bool resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action);
const std::vector<Conflict> conflicts() const;
class ParseConflictManager {
const SyntaxGrammar parse_grammar;
const LexicalGrammar lex_grammar;
std::set<Conflict> conflicts_;
private:
std::string symbol_name(const rules::Symbol &symbol);
void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right);
};
}
}
public:
ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar);
bool resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action);
const std::vector<Conflict> conflicts() const;
private:
std::string symbol_name(const rules::Symbol &symbol);
void record_conflict(const rules::Symbol &symbol, const ParseAction &left,
const ParseAction &right);
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_

View file

@ -3,29 +3,28 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::pair;
using std::set;
using std::string;
using std::to_string;
using std::ostream;
namespace build_tables {
namespace build_tables {
ParseItem::ParseItem(const rules::Symbol &lhs,
const rules::rule_ptr rule,
size_t consumed_symbol_count) :
Item(lhs, rule),
consumed_symbol_count(consumed_symbol_count) {}
using std::pair;
using std::set;
using std::string;
using std::to_string;
using std::ostream;
bool ParseItem::operator==(const ParseItem &other) const {
return
(lhs == other.lhs) &&
(consumed_symbol_count == other.consumed_symbol_count) &&
(rule == other.rule || rule->operator==(*other.rule));
}
ParseItem::ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule,
size_t consumed_symbol_count)
: Item(lhs, rule), consumed_symbol_count(consumed_symbol_count) {}
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule << string(">");
}
}
bool ParseItem::operator==(const ParseItem &other) const {
return (lhs == other.lhs) &&
(consumed_symbol_count == other.consumed_symbol_count) &&
(rule == other.rule || rule->operator==(*other.rule));
}
ostream &operator<<(ostream &stream, const ParseItem &item) {
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule
<< string(">");
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -8,44 +8,48 @@
#include "compiler/build_tables/item.h"
namespace tree_sitter {
namespace build_tables {
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule, const size_t consumed_symbol_count);
bool operator==(const ParseItem &other) const;
size_t consumed_symbol_count;
};
namespace build_tables {
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule,
const size_t consumed_symbol_count);
bool operator==(const ParseItem &other) const;
size_t consumed_symbol_count;
};
typedef std::unordered_map<ParseItem, std::set<rules::Symbol>> ParseItemSet;
}
}
std::ostream &operator<<(std::ostream &stream, const ParseItem &item);
typedef std::unordered_map<ParseItem, std::set<rules::Symbol> > ParseItemSet;
} // namespace build_tables
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count);
}
};
template<>
struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto &pair : set) {
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
result ^= hash<size_t>()(pair.second.size());
for (auto &symbol : pair.second)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
return result;
}
};
}
template <>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count);
}
};
template <>
struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto &pair : set) {
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
result ^= hash<size_t>()(pair.second.size());
for (auto &symbol : pair.second)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
return result;
}
};
} // namespace std
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_

View file

@ -9,60 +9,59 @@
#include "compiler/rules/blank.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
using std::set;
namespace tree_sitter {
namespace build_tables {
namespace build_tables {
class CanBeBlank : public rules::RuleFn<bool> {
protected:
bool apply_to(const rules::Blank *) {
return true;
}
using std::set;
bool apply_to(const rules::Repeat *rule) {
return true;
}
class CanBeBlank : public rules::RuleFn<bool> {
protected:
bool apply_to(const rules::Blank *) { return true; }
bool apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements)
if (apply(element)) return true;
return false;
}
bool apply_to(const rules::Repeat *rule) { return true; }
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) && apply(rule->right);
}
bool apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements)
if (apply(element))
return true;
return false;
}
bool apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
};
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) && apply(rule->right);
}
class CanBeBlankRecursive : public CanBeBlank {
const SyntaxGrammar *grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
bool apply_to(const rules::Metadata *rule) { return apply(rule->rule); }
};
public:
using CanBeBlank::apply_to;
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}
class CanBeBlankRecursive : public CanBeBlank {
const SyntaxGrammar *grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
bool apply_to(const rules::Symbol *rule) {
if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
return !rule->is_token() && apply(grammar->rule(*rule));
} else {
return false;
}
}
};
public:
using CanBeBlank::apply_to;
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar)
: grammar(grammar) {}
bool rule_can_be_blank(const rules::rule_ptr &rule) {
return CanBeBlank().apply(rule);
}
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return CanBeBlankRecursive(&grammar).apply(rule);
}
bool apply_to(const rules::Symbol *rule) {
if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
return !rule->is_token() && apply(grammar->rule(*rule));
} else {
return false;
}
}
};
bool rule_can_be_blank(const rules::rule_ptr &rule) {
return CanBeBlank().apply(rule);
}
bool rule_can_be_blank(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar) {
return CanBeBlankRecursive(&grammar).apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -4,12 +4,16 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}
class SyntaxGrammar;
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
bool rule_can_be_blank(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_

View file

@ -13,87 +13,91 @@
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::map;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
using rules::CharacterSet;
namespace build_tables {
namespace build_tables {
template<typename T>
void merge_transitions(map<T, rule_ptr> *left, const map<T, rule_ptr> &right);
using std::map;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
using rules::CharacterSet;
template<>
void merge_transitions(map<CharacterSet, rule_ptr> *left, const map<CharacterSet, rule_ptr> &right) {
for (auto &pair : right)
merge_char_transition<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template <typename T>
void merge_transitions(map<T, rule_ptr> *left, const map<T, rule_ptr> &right);
template<>
void merge_transitions(map<Symbol, rule_ptr> *left, const map<Symbol, rule_ptr> &right) {
for (auto &pair : right)
merge_sym_transition<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template<typename T>
class RuleTransitions : public rules::RuleFn<map<T, rule_ptr>> {
map<T, rule_ptr> apply_to_atom(const rules::Rule *rule) {
auto atom = dynamic_cast<const T *>(rule);
if (atom)
return map<T, rule_ptr>({{ *atom, make_shared<rules::Blank>() }});
else
return map<T, rule_ptr>();
}
map<T, rule_ptr> apply_to(const CharacterSet *rule) {
return apply_to_atom(rule);
}
map<T, rule_ptr> apply_to(const Symbol *rule) {
return apply_to_atom(rule);
}
map<T, rule_ptr> apply_to(const rules::Choice *rule) {
map<T, rule_ptr> result;
for (const auto &el : rule->elements)
merge_transitions<T>(&result, this->apply(el));
return result;
}
map<T, rule_ptr> apply_to(const rules::Seq *rule) {
auto result = this->apply(rule->left);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->right });
if (rule_can_be_blank(rule->left))
merge_transitions<T>(&result, this->apply(rule->right));
return result;
}
map<T, rule_ptr> apply_to(const rules::Repeat *rule) {
auto result = this->apply(rule->content);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->copy() });
return result;
}
map<T, rule_ptr> apply_to(const rules::Metadata *rule) {
auto result = this->apply(rule->rule);
for (auto &pair : result)
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
return result;
}
};
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
return RuleTransitions<CharacterSet>().apply(rule);
}
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
return RuleTransitions<Symbol>().apply(rule);
}
}
template <>
void merge_transitions(map<CharacterSet, rule_ptr> *left,
const map<CharacterSet, rule_ptr> &right) {
for (auto &pair : right)
merge_char_transition<rule_ptr>(left, pair,
[](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template <>
void merge_transitions(map<Symbol, rule_ptr> *left,
const map<Symbol, rule_ptr> &right) {
for (auto &pair : right)
merge_sym_transition<rule_ptr>(left, pair,
[](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template <typename T>
class RuleTransitions : public rules::RuleFn<map<T, rule_ptr> > {
map<T, rule_ptr> apply_to_atom(const rules::Rule *rule) {
auto atom = dynamic_cast<const T *>(rule);
if (atom)
return map<T, rule_ptr>({ { *atom, make_shared<rules::Blank>() } });
else
return map<T, rule_ptr>();
}
map<T, rule_ptr> apply_to(const CharacterSet *rule) {
return apply_to_atom(rule);
}
map<T, rule_ptr> apply_to(const Symbol *rule) { return apply_to_atom(rule); }
map<T, rule_ptr> apply_to(const rules::Choice *rule) {
map<T, rule_ptr> result;
for (const auto &el : rule->elements)
merge_transitions<T>(&result, this->apply(el));
return result;
}
map<T, rule_ptr> apply_to(const rules::Seq *rule) {
auto result = this->apply(rule->left);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->right });
if (rule_can_be_blank(rule->left))
merge_transitions<T>(&result, this->apply(rule->right));
return result;
}
map<T, rule_ptr> apply_to(const rules::Repeat *rule) {
auto result = this->apply(rule->content);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->copy() });
return result;
}
map<T, rule_ptr> apply_to(const rules::Metadata *rule) {
auto result = this->apply(rule->rule);
for (auto &pair : result)
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
return result;
}
};
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
return RuleTransitions<CharacterSet>().apply(rule);
}
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
return RuleTransitions<Symbol>().apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,14 +5,16 @@
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
std::map<rules::CharacterSet, rules::rule_ptr>
char_transitions(const rules::rule_ptr &rule);
namespace tree_sitter {
namespace build_tables {
std::map<rules::Symbol, rules::rule_ptr>
sym_transitions(const rules::rule_ptr &rule);
}
}
std::map<rules::CharacterSet, rules::rule_ptr> char_transitions(
const rules::rule_ptr &rule);
std::map<rules::Symbol, rules::rule_ptr> sym_transitions(
const rules::rule_ptr &rule);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_

View file

@ -5,29 +5,33 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::string;
using std::vector;
using std::get;
using std::make_tuple;
tuple<string, vector<Conflict>, const GrammarError *>
compile(const Grammar &grammar, std::string name) {
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
const GrammarError *error = get<2>(prepare_grammar_result);
using std::tuple;
using std::string;
using std::vector;
using std::get;
using std::make_tuple;
if (error)
return make_tuple("", vector<Conflict>(), error);
tuple<string, vector<Conflict>, const GrammarError *> compile(
const Grammar &grammar, std::string name) {
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
const GrammarError *error = get<2>(prepare_grammar_result);
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar);
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
const vector<Conflict> &conflicts = get<2>(table_build_result);
if (error)
return make_tuple("", vector<Conflict>(), error);
string code = generate_code::c_code(name, parse_table, lex_table, syntax_grammar, lexical_grammar);
auto table_build_result =
build_tables::build_tables(syntax_grammar, lexical_grammar);
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
const vector<Conflict> &conflicts = get<2>(table_build_result);
return make_tuple(code, conflicts, nullptr);
}
string code = generate_code::c_code(name, parse_table, lex_table,
syntax_grammar, lexical_grammar);
return make_tuple(code, conflicts, nullptr);
}
} // namespace tree_sitter

View file

@ -2,19 +2,21 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::string;
Conflict::Conflict(string description) : description(description) {}
using std::string;
bool Conflict::operator==(const tree_sitter::Conflict &other) const {
return other.description == description;
}
Conflict::Conflict(string description) : description(description) {}
bool Conflict::operator<(const tree_sitter::Conflict &other) const {
return other.description < description;
}
std::ostream& operator<<(std::ostream &stream, const Conflict &conflict) {
return stream << "#<conflict " + conflict.description + ">";
}
bool Conflict::operator==(const tree_sitter::Conflict &other) const {
return other.description == description;
}
bool Conflict::operator<(const tree_sitter::Conflict &other) const {
return other.description < description;
}
std::ostream &operator<<(std::ostream &stream, const Conflict &conflict) {
return stream << "#<conflict " + conflict.description + ">";
}
} // namespace tree_sitter

View file

@ -10,393 +10,378 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::function;
using std::map;
using std::vector;
using std::set;
using std::pair;
using util::escape_char;
namespace generate_code {
using std::string;
using std::to_string;
using std::function;
using std::map;
using std::vector;
using std::set;
using std::pair;
using util::escape_char;
namespace generate_code {
class CCodeGenerator {
string buffer;
size_t indent_level;
class CCodeGenerator {
string buffer;
size_t indent_level;
const string name;
const ParseTable parse_table;
const LexTable lex_table;
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
const string name;
const ParseTable parse_table;
const LexTable lex_table;
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
public:
CCodeGenerator(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) :
indent_level(0),
name(name),
parse_table(parse_table),
lex_table(lex_table),
syntax_grammar(syntax_grammar),
lexical_grammar(lexical_grammar) {}
public:
CCodeGenerator(string name, const ParseTable &parse_table,
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar)
: indent_level(0),
name(name),
parse_table(parse_table),
lex_table(lex_table),
syntax_grammar(syntax_grammar),
lexical_grammar(lexical_grammar) {}
string code() {
buffer = "";
string code() {
buffer = "";
includes();
state_and_symbol_counts();
symbol_enum();
symbol_names_list();
hidden_symbols_list();
lex_function();
lex_states_list();
parse_table_array();
parser_export();
includes();
state_and_symbol_counts();
symbol_enum();
symbol_names_list();
hidden_symbols_list();
lex_function();
lex_states_list();
parse_table_array();
parser_export();
return buffer;
}
return buffer;
}
private:
void includes() {
add("#include \"tree_sitter/parser.h\"");
line();
}
private:
void includes() {
add("#include \"tree_sitter/parser.h\"");
line();
}
void state_and_symbol_counts() {
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line();
}
void state_and_symbol_counts() {
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line();
}
void symbol_enum() {
line("enum {");
indent([&]() {
bool at_start = true;
for (auto symbol : parse_table.symbols)
if (!symbol.is_built_in()) {
if (at_start)
line(symbol_id(symbol) + " = ts_start_sym,");
else
line(symbol_id(symbol) + ",");
at_start = false;
}
});
line("};");
line();
}
void symbol_names_list() {
set<rules::Symbol> symbols(parse_table.symbols);
symbols.insert(rules::END_OF_INPUT());
symbols.insert(rules::ERROR());
line("SYMBOL_NAMES = {");
indent([&]() {
for (auto symbol : parse_table.symbols)
line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\",");
});
line("};");
line();
}
void hidden_symbols_list() {
line("HIDDEN_SYMBOLS = {");
indent([&]() {
for (auto &symbol : parse_table.symbols)
if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
line("[" + symbol_id(symbol) + "] = 1,");
});
line("};");
line();
}
void lex_function() {
line("LEX_FN() {");
indent([&]() {
line("START_LEXER();");
switch_on_lex_state();
});
line("}");
line();
}
void lex_states_list() {
line("LEX_STATES = {");
indent([&]() {
size_t state_id = 0;
for (auto &state : parse_table.states)
line("[" + to_string(state_id++) + "] = " + lex_state_index(state.lex_state_id) + ",");
});
line("};");
line();
}
void parse_table_array() {
size_t state_id = 0;
line("#pragma GCC diagnostic push");
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
line();
line("PARSE_TABLE = {");
indent([&]() {
for (auto &state : parse_table.states) {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (auto &pair : state.actions) {
line("[" + symbol_id(pair.first) + "] = ");
code_for_parse_action(pair.second);
add(",");
}
});
line("},");
}
});
line("};");
line();
line("#pragma GCC diagnostic pop");
line();
}
void parser_export() {
line("EXPORT_PARSER(ts_parser_" + name + ");");
line();
}
string rule_name(const rules::Symbol &symbol) {
return symbol.is_token() ?
lexical_grammar.rule_name(symbol) :
syntax_grammar.rule_name(symbol);
}
string symbol_id(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ?
"ts_builtin_sym_error" :
"ts_builtin_sym_end";
} else {
string name = sanitize_name(rule_name(symbol));
if (symbol.is_auxiliary())
return "ts_aux_sym_" + name;
else
return "ts_sym_" + name;
}
}
string sanitize_name(string name) {
auto existing = sanitized_names.find(name);
if (existing != sanitized_names.end())
return existing->second;
string stripped_name;
for (char c : name) {
if (('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') ||
(c == '_')) {
stripped_name += c;
}
}
for (size_t extra_number = 0;; extra_number++) {
string suffix = extra_number ? to_string(extra_number) : "";
string unique_name = stripped_name + suffix;
if (unique_name == "")
continue;
if (!has_sanitized_name(unique_name)) {
sanitized_names.insert({ name, unique_name });
return unique_name;
}
}
}
bool has_sanitized_name(string name) {
for (auto &pair : sanitized_names)
if (pair.second == name)
return true;
return false;
}
string lex_state_index(size_t i) {
return to_string(i + 1);
}
string symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "error" : "end";
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return rule_name(symbol);
} else {
return rule_name(symbol);
}
}
string condition_for_character_range(const rules::CharacterRange &range) {
string lookahead("lookahead");
if (range.min == range.max) {
return lookahead + " == '" + escape_char(range.min) + "'";
} else {
return string("'") + escape_char(range.min) + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + escape_char(range.max) + "'";
}
}
void condition_for_character_set(const rules::CharacterSet &set) {
if (set.ranges.size() == 1) {
add(condition_for_character_range(*set.ranges.begin()));
} else {
bool first = true;
for (auto &match : set.ranges) {
string part = "(" + condition_for_character_range(match) + ")";
if (first) {
add(part);
} else {
add(" ||");
line(part);
}
first = false;
}
}
}
void condition_for_character_rule(const rules::CharacterSet &rule) {
pair<rules::CharacterSet, bool> representation = rule.most_compact_representation();
if (representation.second) {
condition_for_character_set(representation.first);
} else {
add("!(");
condition_for_character_set(rule.complement());
add(")");
}
}
void code_for_parse_action(const ParseAction &action) {
switch (action.type) {
case ParseActionTypeAccept:
add("ACCEPT_INPUT()");
break;
case ParseActionTypeShift:
add("SHIFT(" + to_string(action.state_index) + ")");
break;
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduce:
add("REDUCE(" +
symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
break;
default: {}
}
}
void code_for_lex_actions(const LexAction &action,
const set<rules::CharacterSet> &expected_inputs) {
switch (action.type) {
case LexActionTypeAdvance:
line("ADVANCE(" + lex_state_index(action.state_index) + ");");
break;
case LexActionTypeAccept:
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeError:
line("LEX_ERROR();");
break;
default: {}
}
}
void code_for_lex_state(const LexState &lex_state) {
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
line("START_TOKEN();");
for (auto pair : lex_state.actions)
if (!pair.first.is_empty())
_if([&]() { condition_for_character_rule(pair.first); },
[&]() { code_for_lex_actions(pair.second, expected_inputs); });
code_for_lex_actions(lex_state.default_action, expected_inputs);
}
void switch_on_lex_state() {
_switch("lex_state", [&]() {
for (size_t i = 0; i < lex_table.states.size(); i++)
_case(lex_state_index(i), [&]() {
code_for_lex_state(lex_table.states[i]);
});
_case("ts_lex_state_error", [&]() {
code_for_lex_state(lex_table.error_state);
});
_default([&]() {
line("LEX_PANIC();");
});
});
}
void _switch(string condition, function<void()> body) {
line("switch (" + condition + ") {");
indent(body);
line("}");
}
void _case(string value, function<void()> body) {
line("case " + value + ":");
indent(body);
}
void _default(function<void()> body) {
line("default:");
indent(body);
}
void _if(function<void()> condition, function<void()> body) {
line("if (");
indent(condition);
add(")");
indent(body);
}
void line() {
line("");
}
void line(string input) {
add("\n");
if (!input.empty()) {
string space;
for (size_t i = 0; i < indent_level; i++)
space += " ";
add(space + input);
}
}
void add(string input) {
buffer += input;
}
void indent(function<void()> body) {
indent(body, 1);
}
void indent(function<void()> body, size_t n) {
indent_level += n;
body();
indent_level -= n;
}
};
string c_code(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) {
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code();
void symbol_enum() {
line("enum {");
indent([&]() {
bool at_start = true;
for (auto symbol : parse_table.symbols)
if (!symbol.is_built_in()) {
if (at_start)
line(symbol_id(symbol) + " = ts_start_sym,");
else
line(symbol_id(symbol) + ",");
at_start = false;
}
});
line("};");
line();
}
void symbol_names_list() {
set<rules::Symbol> symbols(parse_table.symbols);
symbols.insert(rules::END_OF_INPUT());
symbols.insert(rules::ERROR());
line("SYMBOL_NAMES = {");
indent([&]() {
for (auto symbol : parse_table.symbols)
line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\",");
});
line("};");
line();
}
void hidden_symbols_list() {
line("HIDDEN_SYMBOLS = {");
indent([&]() {
for (auto &symbol : parse_table.symbols)
if (!symbol.is_built_in() &&
(symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
line("[" + symbol_id(symbol) + "] = 1,");
});
line("};");
line();
}
void lex_function() {
line("LEX_FN() {");
indent([&]() {
line("START_LEXER();");
switch_on_lex_state();
});
line("}");
line();
}
void lex_states_list() {
line("LEX_STATES = {");
indent([&]() {
size_t state_id = 0;
for (auto &state : parse_table.states)
line("[" + to_string(state_id++) + "] = " +
lex_state_index(state.lex_state_id) + ",");
});
line("};");
line();
}
void parse_table_array() {
size_t state_id = 0;
line("#pragma GCC diagnostic push");
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
line();
line("PARSE_TABLE = {");
indent([&]() {
for (auto &state : parse_table.states) {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (auto &pair : state.actions) {
line("[" + symbol_id(pair.first) + "] = ");
code_for_parse_action(pair.second);
add(",");
}
});
line("},");
}
});
line("};");
line();
line("#pragma GCC diagnostic pop");
line();
}
void parser_export() {
line("EXPORT_PARSER(ts_parser_" + name + ");");
line();
}
string rule_name(const rules::Symbol &symbol) {
return symbol.is_token() ? lexical_grammar.rule_name(symbol)
: syntax_grammar.rule_name(symbol);
}
string symbol_id(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "ts_builtin_sym_error"
: "ts_builtin_sym_end";
} else {
string name = sanitize_name(rule_name(symbol));
if (symbol.is_auxiliary())
return "ts_aux_sym_" + name;
else
return "ts_sym_" + name;
}
}
string sanitize_name(string name) {
auto existing = sanitized_names.find(name);
if (existing != sanitized_names.end())
return existing->second;
string stripped_name;
for (char c : name) {
if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') || (c == '_')) {
stripped_name += c;
}
}
for (size_t extra_number = 0;; extra_number++) {
string suffix = extra_number ? to_string(extra_number) : "";
string unique_name = stripped_name + suffix;
if (unique_name == "")
continue;
if (!has_sanitized_name(unique_name)) {
sanitized_names.insert({ name, unique_name });
return unique_name;
}
}
}
bool has_sanitized_name(string name) {
for (auto &pair : sanitized_names)
if (pair.second == name)
return true;
return false;
}
string lex_state_index(size_t i) { return to_string(i + 1); }
string symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "error" : "end";
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return rule_name(symbol);
} else {
return rule_name(symbol);
}
}
string condition_for_character_range(const rules::CharacterRange &range) {
string lookahead("lookahead");
if (range.min == range.max) {
return lookahead + " == '" + escape_char(range.min) + "'";
} else {
return string("'") + escape_char(range.min) + string("' <= ") +
lookahead + " && " + lookahead + " <= '" + escape_char(range.max) +
"'";
}
}
void condition_for_character_set(const rules::CharacterSet &set) {
if (set.ranges.size() == 1) {
add(condition_for_character_range(*set.ranges.begin()));
} else {
bool first = true;
for (auto &match : set.ranges) {
string part = "(" + condition_for_character_range(match) + ")";
if (first) {
add(part);
} else {
add(" ||");
line(part);
}
first = false;
}
}
}
void condition_for_character_rule(const rules::CharacterSet &rule) {
pair<rules::CharacterSet, bool> representation =
rule.most_compact_representation();
if (representation.second) {
condition_for_character_set(representation.first);
} else {
add("!(");
condition_for_character_set(rule.complement());
add(")");
}
}
void code_for_parse_action(const ParseAction &action) {
switch (action.type) {
case ParseActionTypeAccept:
add("ACCEPT_INPUT()");
break;
case ParseActionTypeShift:
add("SHIFT(" + to_string(action.state_index) + ")");
break;
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduce:
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
break;
default: {}
}
}
void code_for_lex_actions(const LexAction &action,
const set<rules::CharacterSet> &expected_inputs) {
switch (action.type) {
case LexActionTypeAdvance:
line("ADVANCE(" + lex_state_index(action.state_index) + ");");
break;
case LexActionTypeAccept:
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeError:
line("LEX_ERROR();");
break;
default: {}
}
}
void code_for_lex_state(const LexState &lex_state) {
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
line("START_TOKEN();");
for (auto pair : lex_state.actions)
if (!pair.first.is_empty())
_if([&]() { condition_for_character_rule(pair.first); },
[&]() { code_for_lex_actions(pair.second, expected_inputs); });
code_for_lex_actions(lex_state.default_action, expected_inputs);
}
void switch_on_lex_state() {
_switch("lex_state", [&]() {
for (size_t i = 0; i < lex_table.states.size(); i++)
_case(lex_state_index(i),
[&]() { code_for_lex_state(lex_table.states[i]); });
_case("ts_lex_state_error",
[&]() { code_for_lex_state(lex_table.error_state); });
_default([&]() { line("LEX_PANIC();"); });
});
}
void _switch(string condition, function<void()> body) {
line("switch (" + condition + ") {");
indent(body);
line("}");
}
void _case(string value, function<void()> body) {
line("case " + value + ":");
indent(body);
}
void _default(function<void()> body) {
line("default:");
indent(body);
}
void _if(function<void()> condition, function<void()> body) {
line("if (");
indent(condition);
add(")");
indent(body);
}
void line() { line(""); }
void line(string input) {
add("\n");
if (!input.empty()) {
string space;
for (size_t i = 0; i < indent_level; i++)
space += " ";
add(space + input);
}
}
void add(string input) { buffer += input; }
void indent(function<void()> body) { indent(body, 1); }
void indent(function<void()> body, size_t n) {
indent_level += n;
body();
indent_level -= n;
}
};
string c_code(string name, const ParseTable &parse_table,
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) {
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar,
lexical_grammar).code();
}
} // namespace generate_code
} // namespace tree_sitter

View file

@ -7,16 +7,17 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace generate_code {
std::string c_code(std::string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar);
}
}
namespace generate_code {
std::string c_code(std::string name, const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar);
} // namespace generate_code
} // namespace tree_sitter
#endif // COMPILER_GENERATE_CODE_C_CODE_H_

View file

@ -2,83 +2,83 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
using std::ostream;
using std::pair;
using std::set;
using std::string;
using std::vector;
using rules::rule_ptr;
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
rules_(rules),
ubiquitous_tokens_({}),
separators_({ ' ', '\r', '\t', '\n' }) {}
using std::ostream;
using std::pair;
using std::set;
using std::string;
using std::vector;
using rules::rule_ptr;
bool Grammar::operator==(const Grammar &other) const {
if (other.rules_.size() != rules_.size()) return false;
Grammar::Grammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules)
: rules_(rules),
ubiquitous_tokens_({}),
separators_({ ' ', '\r', '\t', '\n' }) {}
for (size_t i = 0; i < rules_.size(); i++) {
auto &pair = rules_[i];
auto &other_pair = other.rules_[i];
if (other_pair.first != pair.first) return false;
if (!other_pair.second->operator==(*pair.second)) return false;
}
bool Grammar::operator==(const Grammar &other) const {
if (other.rules_.size() != rules_.size())
return false;
return true;
}
for (size_t i = 0; i < rules_.size(); i++) {
auto &pair = rules_[i];
auto &other_pair = other.rules_[i];
if (other_pair.first != pair.first)
return false;
if (!other_pair.second->operator==(*pair.second))
return false;
}
string Grammar::start_rule_name() const {
return rules_.front().first;
}
ostream& operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules()) {
if (started) stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
return stream << string("}>");
}
GrammarError::GrammarError(GrammarErrorType type, std::string message) :
type(type),
message(message) {}
bool GrammarError::operator==(const GrammarError &other) const {
return type == other.type && message == other.message;
}
ostream& operator<<(ostream &stream, const GrammarError *error) {
if (error)
return stream << (string("#<grammar-error '") + error->message + "'>");
else
return stream << string("#<null>");
}
const set<string> & Grammar::ubiquitous_tokens() const {
return ubiquitous_tokens_;
}
Grammar & Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
ubiquitous_tokens_ = ubiquitous_tokens;
return *this;
}
const set<char> & Grammar::separators() const {
return separators_;
}
Grammar & Grammar::separators(const set<char> &separators) {
separators_ = separators;
return *this;
}
const vector<pair<string, rule_ptr>> & Grammar::rules() const {
return rules_;
}
return true;
}
string Grammar::start_rule_name() const { return rules_.front().first; }
ostream &operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules()) {
if (started)
stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
return stream << string("}>");
}
GrammarError::GrammarError(GrammarErrorType type, std::string message)
: type(type), message(message) {}
bool GrammarError::operator==(const GrammarError &other) const {
return type == other.type && message == other.message;
}
ostream &operator<<(ostream &stream, const GrammarError *error) {
if (error)
return stream << (string("#<grammar-error '") + error->message + "'>");
else
return stream << string("#<null>");
}
const set<string> &Grammar::ubiquitous_tokens() const {
return ubiquitous_tokens_;
}
Grammar &Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
ubiquitous_tokens_ = ubiquitous_tokens;
return *this;
}
const set<char> &Grammar::separators() const { return separators_; }
Grammar &Grammar::separators(const set<char> &separators) {
separators_ = separators;
return *this;
}
const vector<pair<string, rule_ptr> > &Grammar::rules() const { return rules_; }
} // namespace tree_sitter

View file

@ -2,75 +2,79 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::map;
using std::set;
using rules::Symbol;
using rules::CharacterSet;
LexAction::LexAction() :
type(LexActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
precedence_values({0}) {}
using std::string;
using std::to_string;
using std::map;
using std::set;
using rules::Symbol;
using rules::CharacterSet;
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, set<int> precedence_values) :
type(type),
symbol(symbol),
state_index(state_index),
precedence_values(precedence_values) {}
LexAction::LexAction()
: type(LexActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
precedence_values({ 0 }) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), {0});
}
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol,
set<int> precedence_values)
: type(type),
symbol(symbol),
state_index(state_index),
precedence_values(precedence_values) {}
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_values);
}
LexAction LexAction::Accept(Symbol symbol, int precedence) {
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
}
bool LexAction::operator==(const LexAction &other) const {
return
(type == other.type) &&
(state_index == other.state_index) &&
(symbol == other.symbol);
}
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + to_string(action.symbol.index) + ">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) + ">";
default:
return stream;
}
}
set<CharacterSet> LexState::expected_inputs() const {
set<CharacterSet> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
LexState & LexTable::state(LexStateId id) {
if (id < 0)
return error_state;
else
return states[id];
}
const LexStateId LexTable::ERROR_STATE_ID = -1;
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0 });
}
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
precedence_values);
}
LexAction LexAction::Accept(Symbol symbol, int precedence) {
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
}
bool LexAction::operator==(const LexAction &other) const {
return (type == other.type) && (state_index == other.state_index) &&
(symbol == other.symbol);
}
std::ostream &operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + to_string(action.symbol.index) +
">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) +
">";
default:
return stream;
}
}
set<CharacterSet> LexState::expected_inputs() const {
set<CharacterSet> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
LexState &LexTable::state(LexStateId id) {
if (id < 0)
return error_state;
else
return states[id];
}
const LexStateId LexTable::ERROR_STATE_ID = -1;
} // namespace tree_sitter

View file

@ -9,61 +9,69 @@
#include "compiler/rules/character_set.h"
namespace tree_sitter {
typedef enum {
LexActionTypeError,
LexActionTypeAccept,
LexActionTypeAdvance
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, std::set<int> precedence_values);
public:
LexAction();
static LexAction Accept(rules::Symbol symbol, int precedence);
static LexAction Error();
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
bool operator==(const LexAction &action) const;
typedef enum {
LexActionTypeError,
LexActionTypeAccept,
LexActionTypeAdvance
} LexActionType;
LexActionType type;
rules::Symbol symbol;
size_t state_index;
std::set<int> precedence_values;
};
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol,
std::set<int> precedence_values);
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
}
public:
LexAction();
static LexAction Accept(rules::Symbol symbol, int precedence);
static LexAction Error();
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
bool operator==(const LexAction &action) const;
LexActionType type;
rules::Symbol symbol;
size_t state_index;
std::set<int> precedence_values;
};
std::ostream &operator<<(std::ostream &stream, const LexAction &item);
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::LexAction> {
size_t operator()(const tree_sitter::LexAction &action) const {
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index));
}
};
}
template <>
struct hash<tree_sitter::LexAction> {
size_t operator()(const tree_sitter::LexAction &action) const {
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index));
}
};
} // namespace std
namespace tree_sitter {
class LexState {
public:
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};
typedef int64_t LexStateId;
class LexState {
public:
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};
class LexTable {
public:
static const LexStateId ERROR_STATE_ID;
LexStateId add_state();
LexState & state(LexStateId state_id);
typedef int64_t LexStateId;
std::vector<LexState> states;
LexState error_state;
};
}
class LexTable {
public:
static const LexStateId ERROR_STATE_ID;
LexStateId add_state();
LexState &state(LexStateId state_id);
std::vector<LexState> states;
LexState error_state;
};
} // namespace tree_sitter
#endif // COMPILER_LEX_TABLE_H_

View file

@ -2,109 +2,120 @@
#include <string>
namespace tree_sitter {
using std::string;
using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using rules::Symbol;
ParseAction::ParseAction(ParseActionType type,
ParseStateId state_index,
Symbol symbol,
size_t consumed_symbol_count,
set<int> precedence_values) :
type(type),
symbol(symbol),
state_index(state_index),
consumed_symbol_count(consumed_symbol_count),
precedence_values(precedence_values) {}
using std::string;
using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using rules::Symbol;
ParseAction::ParseAction() :
type(ParseActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
consumed_symbol_count(0) {}
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
Symbol symbol, size_t consumed_symbol_count,
set<int> precedence_values)
: type(type),
symbol(symbol),
state_index(state_index),
consumed_symbol_count(consumed_symbol_count),
precedence_values(precedence_values) {}
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 });
}
ParseAction::ParseAction()
: type(ParseActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
consumed_symbol_count(0) {}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::Shift(ParseStateId state_index, set<int> precedence_values) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, precedence_values);
}
ParseAction ParseAction::ShiftExtra() {
return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 });
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, int precedence) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count, { precedence });
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool symbols_eq = symbol == other.symbol;
bool state_indices_eq = state_index == other.state_index;
bool consumed_symbol_counts_eq = consumed_symbol_count == other.consumed_symbol_count;
return types_eq && symbols_eq && state_indices_eq && consumed_symbol_counts_eq;
}
ostream& operator<<(ostream &stream, const ParseAction &action) {
switch (action.type) {
case ParseActionTypeError:
return stream << string("#<error>");
case ParseActionTypeAccept:
return stream << string("#<accept>");
case ParseActionTypeShift:
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
case ParseActionTypeShiftExtra:
return stream << string("#<shift_extra");
case ParseActionTypeReduceExtra:
return stream << ("#<reduce_extra sym" + to_string(action.symbol.index) + ">");
case ParseActionTypeReduce:
return stream << ("#<reduce sym" + to_string(action.symbol.index) + " " + to_string(action.consumed_symbol_count) + ">");
default:
return stream;
}
}
ParseState::ParseState() : lex_state_id(-1) {}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
ostream& operator<<(ostream &stream, const ParseState &state) {
stream << string("#<parse_state ");
bool started = false;
for (auto pair : state.actions) {
if (started) stream << string(", ");
stream << pair.first << string(" => ") << pair.second;
started = true;
}
stream << string(">");
return stream;
}
ParseStateId ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
symbols.insert(symbol);
states[id].actions[symbol] = action;
}
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::Shift(ParseStateId state_index,
set<int> precedence_values) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0,
precedence_values);
}
ParseAction ParseAction::ShiftExtra() {
return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 });
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
int precedence) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
{ precedence });
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool symbols_eq = symbol == other.symbol;
bool state_indices_eq = state_index == other.state_index;
bool consumed_symbol_counts_eq =
consumed_symbol_count == other.consumed_symbol_count;
return types_eq && symbols_eq && state_indices_eq &&
consumed_symbol_counts_eq;
}
ostream &operator<<(ostream &stream, const ParseAction &action) {
switch (action.type) {
case ParseActionTypeError:
return stream << string("#<error>");
case ParseActionTypeAccept:
return stream << string("#<accept>");
case ParseActionTypeShift:
return stream << (string("#<shift ") + to_string(action.state_index) +
">");
case ParseActionTypeShiftExtra:
return stream << string("#<shift_extra");
case ParseActionTypeReduceExtra:
return stream << ("#<reduce_extra sym" + to_string(action.symbol.index) +
">");
case ParseActionTypeReduce:
return stream << ("#<reduce sym" + to_string(action.symbol.index) + " " +
to_string(action.consumed_symbol_count) + ">");
default:
return stream;
}
}
ParseState::ParseState() : lex_state_id(-1) {}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
ostream &operator<<(ostream &stream, const ParseState &state) {
stream << string("#<parse_state ");
bool started = false;
for (auto pair : state.actions) {
if (started)
stream << string(", ");
stream << pair.first << string(" => ") << pair.second;
started = true;
}
stream << string(">");
return stream;
}
ParseStateId ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(ParseStateId id, Symbol symbol,
ParseAction action) {
symbols.insert(symbol);
states[id].actions[symbol] = action;
}
} // namespace tree_sitter

View file

@ -9,75 +9,82 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
typedef uint64_t ParseStateId;
typedef enum {
ParseActionTypeError,
ParseActionTypeShift,
ParseActionTypeShiftExtra,
ParseActionTypeReduce,
ParseActionTypeReduceExtra,
ParseActionTypeAccept,
} ParseActionType;
typedef uint64_t ParseStateId;
class ParseAction {
ParseAction(ParseActionType type,
ParseStateId state_index,
rules::Symbol symbol,
size_t consumed_symbol_count,
std::set<int> precedence_values);
public:
ParseAction();
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index, std::set<int> precedence_values);
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, int precedence);
static ParseAction ShiftExtra();
static ParseAction ReduceExtra(rules::Symbol symbol);
bool operator==(const ParseAction &action) const;
typedef enum {
ParseActionTypeError,
ParseActionTypeShift,
ParseActionTypeShiftExtra,
ParseActionTypeReduce,
ParseActionTypeReduceExtra,
ParseActionTypeAccept,
} ParseActionType;
ParseActionType type;
rules::Symbol symbol;
ParseStateId state_index;
size_t consumed_symbol_count;
std::set<int> precedence_values;
};
class ParseAction {
ParseAction(ParseActionType type, ParseStateId state_index,
rules::Symbol symbol, size_t consumed_symbol_count,
std::set<int> precedence_values);
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
}
public:
ParseAction();
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index,
std::set<int> precedence_values);
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
int precedence);
static ParseAction ShiftExtra();
static ParseAction ReduceExtra(rules::Symbol symbol);
bool operator==(const ParseAction &action) const;
ParseActionType type;
rules::Symbol symbol;
ParseStateId state_index;
size_t consumed_symbol_count;
std::set<int> precedence_values;
};
std::ostream &operator<<(std::ostream &stream, const ParseAction &item);
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::ParseAction> {
size_t operator()(const tree_sitter::ParseAction &action) const {
return (
hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.consumed_symbol_count));
}
};
}
template <>
struct hash<tree_sitter::ParseAction> {
size_t operator()(const tree_sitter::ParseAction &action) const {
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.consumed_symbol_count));
}
};
} // namespace std
namespace tree_sitter {
class ParseState {
public:
ParseState();
std::map<rules::Symbol, ParseAction> actions;
std::set<rules::Symbol> expected_inputs() const;
LexStateId lex_state_id;
};
std::ostream& operator<<(std::ostream &stream, const ParseState &state);
class ParseState {
public:
ParseState();
std::map<rules::Symbol, ParseAction> actions;
std::set<rules::Symbol> expected_inputs() const;
LexStateId lex_state_id;
};
class ParseTable {
public:
ParseStateId add_state();
void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action);
std::ostream &operator<<(std::ostream &stream, const ParseState &state);
std::vector<ParseState> states;
std::set<rules::Symbol> symbols;
};
}
class ParseTable {
public:
ParseStateId add_state();
void add_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
std::vector<ParseState> states;
std::set<rules::Symbol> symbols;
};
} // namespace tree_sitter
#endif // COMPILER_PARSE_TABLE_H_

View file

@ -11,55 +11,57 @@
#include "compiler/rules/repeat.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::to_string;
using std::make_shared;
using rules::rule_ptr;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::Rule;
using rules::Seq;
using rules::Symbol;
namespace prepare_grammar {
namespace prepare_grammar {
class ExpandRepeats : public rules::IdentityRuleFn {
string rule_name;
using std::string;
using std::vector;
using std::pair;
using std::to_string;
using std::make_shared;
using rules::rule_ptr;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::Rule;
using rules::Seq;
using rules::Symbol;
rule_ptr apply_to(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
rule_ptr repeat_symbol = make_shared<Symbol>(offset + index, rules::SymbolOptionAuxiliary);
aux_rules.push_back({
helper_rule_name,
Choice::Build({
Seq::Build({ inner_rule, repeat_symbol }),
make_shared<Blank>()
})
});
return repeat_symbol;
}
class ExpandRepeats : public rules::IdentityRuleFn {
string rule_name;
public:
ExpandRepeats(string rule_name, size_t offset) : rule_name(rule_name), offset(offset) {}
rule_ptr apply_to(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
rule_ptr repeat_symbol =
make_shared<Symbol>(offset + index, rules::SymbolOptionAuxiliary);
aux_rules.push_back(
{ helper_rule_name,
Choice::Build({ Seq::Build({ inner_rule, repeat_symbol }),
make_shared<Blank>() }) });
return repeat_symbol;
}
size_t offset;
vector<pair<string, rules::rule_ptr>> aux_rules;
};
public:
ExpandRepeats(string rule_name, size_t offset)
: rule_name(rule_name), offset(offset) {}
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
size_t offset;
vector<pair<string, rules::rule_ptr> > aux_rules;
};
for (auto &pair : grammar.rules) {
ExpandRepeats expander(pair.first, aux_rules.size());
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
}
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
vector<pair<string, rules::rule_ptr> > rules, aux_rules(grammar.aux_rules);
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
}
}
for (auto &pair : grammar.rules) {
ExpandRepeats expander(pair.first, aux_rules.size());
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
expander.aux_rules.end());
}
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -4,11 +4,14 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace prepare_grammar {
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
}
}
class SyntaxGrammar;
namespace prepare_grammar {
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_

View file

@ -12,60 +12,59 @@
#include "compiler/prepare_grammar/parse_regex.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::String;
using rules::Pattern;
namespace prepare_grammar {
namespace prepare_grammar {
class ExpandTokens : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::String;
using rules::Pattern;
rule_ptr apply_to(const String *rule) {
vector<rule_ptr> elements;
for (char val : rule->value)
elements.push_back(rules::CharacterSet({ val }).copy());
return rules::Seq::Build(elements);
}
class ExpandTokens : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error)
error = pair.second;
return pair.first;
}
rule_ptr apply_to(const String *rule) {
vector<rule_ptr> elements;
for (char val : rule->value)
elements.push_back(rules::CharacterSet({ val }).copy());
return rules::Seq::Build(elements);
}
public:
const GrammarError *error;
ExpandTokens() : error(nullptr) {}
};
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error)
error = pair.second;
return pair.first;
}
pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &grammar) {
vector<pair<string, rule_ptr>> rules, aux_rules;
ExpandTokens expander;
public:
const GrammarError *error;
ExpandTokens() : error(nullptr) {}
};
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
rules.push_back({ pair.first, rule });
}
pair<LexicalGrammar, const GrammarError *> expand_tokens(
const LexicalGrammar &grammar) {
vector<pair<string, rule_ptr> > rules, aux_rules;
ExpandTokens expander;
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
aux_rules.push_back({ pair.first, rule });
}
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
rules.push_back({ pair.first, rule });
}
return {
LexicalGrammar(rules, aux_rules, grammar.separators),
nullptr,
};
}
}
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
aux_rules.push_back({ pair.first, rule });
}
return { LexicalGrammar(rules, aux_rules, grammar.separators), nullptr, };
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,13 +5,15 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class LexicalGrammar;
namespace prepare_grammar {
std::pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &);
}
}
class LexicalGrammar;
namespace prepare_grammar {
std::pair<LexicalGrammar, const GrammarError *> expand_tokens(
const LexicalGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_

View file

@ -14,120 +14,123 @@
#include "compiler/prepare_grammar/token_description.h"
namespace tree_sitter {
using std::pair;
using std::string;
using std::map;
using std::to_string;
using std::vector;
using std::set;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
namespace prepare_grammar {
namespace prepare_grammar {
class IsToken : public rules::RuleFn<bool> {
bool apply_to(const rules::String *rule) { return true; }
bool apply_to(const rules::Pattern *rule) { return true; }
bool apply_to(const rules::Metadata *rule) { return rule->value_for(rules::IS_TOKEN); }
};
using std::pair;
using std::string;
using std::map;
using std::to_string;
using std::vector;
using std::set;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
class SymbolInliner : public rules::IdentityRuleFn {
map<Symbol, Symbol> replacements;
using rules::IdentityRuleFn::apply_to;
class IsToken : public rules::RuleFn<bool> {
bool apply_to(const rules::String *rule) { return true; }
bool apply_to(const rules::Pattern *rule) { return true; }
bool apply_to(const rules::Metadata *rule) {
return rule->value_for(rules::IS_TOKEN);
}
};
int new_index_for_symbol(const Symbol &symbol) {
int result = symbol.index;
for (const auto &pair : replacements)
if (pair.first.index < symbol.index &&
pair.first.is_auxiliary() == symbol.is_auxiliary())
result--;
return result;
}
class SymbolInliner : public rules::IdentityRuleFn {
map<Symbol, Symbol> replacements;
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const Symbol *rule) {
return replace_symbol(*rule).copy();
}
int new_index_for_symbol(const Symbol &symbol) {
int result = symbol.index;
for (const auto &pair : replacements)
if (pair.first.index < symbol.index &&
pair.first.is_auxiliary() == symbol.is_auxiliary())
result--;
return result;
}
public:
Symbol replace_symbol(const Symbol &rule) {
if (rule.is_built_in()) return rule;
auto replacement_pair = replacements.find(rule);
if (replacement_pair != replacements.end())
return replacement_pair->second;
else
return Symbol(new_index_for_symbol(rule), rule.options);
}
rule_ptr apply_to(const Symbol *rule) { return replace_symbol(*rule).copy(); }
SymbolInliner(const map<Symbol, Symbol> &replacements) : replacements(replacements) {}
};
public:
Symbol replace_symbol(const Symbol &rule) {
if (rule.is_built_in())
return rule;
auto replacement_pair = replacements.find(rule);
if (replacement_pair != replacements.end())
return replacement_pair->second;
else
return Symbol(new_index_for_symbol(rule), rule.options);
}
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);
SymbolInliner(const map<Symbol, Symbol> &replacements)
: replacements(replacements) {}
};
class TokenExtractor : public rules::IdentityRuleFn {
rule_ptr apply_to_token(const rules::Rule *input) {
auto rule = input->copy();
for (size_t i = 0; i < tokens.size(); i++)
if (tokens[i].second->operator==(*rule))
return make_shared<Symbol>(i, SymbolOptionAuxToken);
size_t index = tokens.size();
tokens.push_back({ token_description(rule), rule });
return make_shared<Symbol>(index, SymbolOptionAuxToken);
}
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(
rules::SymbolOptionToken | rules::SymbolOptionAuxiliary);
rule_ptr default_apply(const rules::Rule *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return result;
}
}
class TokenExtractor : public rules::IdentityRuleFn {
rule_ptr apply_to_token(const rules::Rule *input) {
auto rule = input->copy();
for (size_t i = 0; i < tokens.size(); i++)
if (tokens[i].second->operator==(*rule))
return make_shared<Symbol>(i, SymbolOptionAuxToken);
size_t index = tokens.size();
tokens.push_back({ token_description(rule), rule });
return make_shared<Symbol>(index, SymbolOptionAuxToken);
}
rule_ptr apply_to(const rules::Metadata *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return rules::IdentityRuleFn::apply_to(rule);
}
}
public:
vector<pair<string, rule_ptr>> tokens;
};
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
set<Symbol> ubiquitous_tokens;
TokenExtractor extractor;
map<Symbol, Symbol> symbol_replacements;
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
auto pair = input_grammar.rules[i];
if (IsToken().apply(pair.second)) {
tokens.push_back(pair);
symbol_replacements.insert({
Symbol(i),
Symbol(tokens.size() - 1, rules::SymbolOptionToken)
});
} else {
rules.push_back({ pair.first, extractor.apply(pair.second) });
}
}
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
SymbolInliner inliner(symbol_replacements);
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
for (auto &symbol : input_grammar.ubiquitous_tokens)
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
return {
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, input_grammar.separators),
};
}
rule_ptr default_apply(const rules::Rule *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return result;
}
}
rule_ptr apply_to(const rules::Metadata *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return rules::IdentityRuleFn::apply_to(rule);
}
}
public:
vector<pair<string, rule_ptr> > tokens;
};
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
const InternedGrammar &input_grammar) {
vector<pair<string, rule_ptr> > rules, tokens, aux_rules, aux_tokens;
set<Symbol> ubiquitous_tokens;
TokenExtractor extractor;
map<Symbol, Symbol> symbol_replacements;
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
auto pair = input_grammar.rules[i];
if (IsToken().apply(pair.second)) {
tokens.push_back(pair);
symbol_replacements.insert(
{ Symbol(i), Symbol(tokens.size() - 1, rules::SymbolOptionToken) });
} else {
rules.push_back({ pair.first, extractor.apply(pair.second) });
}
}
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(),
extractor.tokens.end());
SymbolInliner inliner(symbol_replacements);
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
for (auto &symbol : input_grammar.ubiquitous_tokens)
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
return { SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, input_grammar.separators), };
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,12 +5,16 @@
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
}
}
class SyntaxGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
const InternedGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_

View file

@ -10,70 +10,72 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::string;
using rules::rule_ptr;
using std::vector;
using std::set;
using std::pair;
using std::make_shared;
namespace prepare_grammar {
namespace prepare_grammar {
class InternSymbols : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
using std::string;
using rules::rule_ptr;
using std::vector;
using std::set;
using std::pair;
using std::make_shared;
rule_ptr apply_to(const rules::NamedSymbol *rule) {
auto result = symbol_for_rule_name(rule->name);
if (!result.get()) missing_rule_name = rule->name;
return result;
}
class InternSymbols : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
public:
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
for (size_t i = 0; i < grammar.rules().size(); i++)
if (grammar.rules()[i].first == rule_name)
return make_shared<rules::Symbol>(i);
return nullptr;
}
rule_ptr apply_to(const rules::NamedSymbol *rule) {
auto result = symbol_for_rule_name(rule->name);
if (!result.get())
missing_rule_name = rule->name;
return result;
}
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
const Grammar grammar;
string missing_rule_name;
};
public:
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
for (size_t i = 0; i < grammar.rules().size(); i++)
if (grammar.rules()[i].first == rule_name)
return make_shared<rules::Symbol>(i);
return nullptr;
}
pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
InternedGrammar grammar;
return {
grammar,
new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'")
};
}
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
const Grammar grammar;
string missing_rule_name;
};
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
InternSymbols interner(grammar);
vector<pair<string, rule_ptr>> rules;
for (auto &pair : grammar.rules()) {
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
return missing_rule_error(interner.missing_rule_name);
rules.push_back({ pair.first, new_rule });
}
set<rules::Symbol> ubiquitous_tokens;
for (auto &name : grammar.ubiquitous_tokens()) {
auto token = interner.symbol_for_rule_name(name);
if (!token.get())
return missing_rule_error(name);
ubiquitous_tokens.insert(*token);
}
InternedGrammar result;
result.rules = rules;
result.ubiquitous_tokens = ubiquitous_tokens;
result.separators = grammar.separators();
return { result, nullptr };
}
}
pair<InternedGrammar, const GrammarError *> missing_rule_error(
string rule_name) {
InternedGrammar grammar;
return { grammar, new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'") };
}
pair<InternedGrammar, const GrammarError *> intern_symbols(
const Grammar &grammar) {
InternSymbols interner(grammar);
vector<pair<string, rule_ptr> > rules;
for (auto &pair : grammar.rules()) {
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
return missing_rule_error(interner.missing_rule_name);
rules.push_back({ pair.first, new_rule });
}
set<rules::Symbol> ubiquitous_tokens;
for (auto &name : grammar.ubiquitous_tokens()) {
auto token = interner.symbol_for_rule_name(name);
if (!token.get())
return missing_rule_error(name);
ubiquitous_tokens.insert(*token);
}
InternedGrammar result;
result.rules = rules;
result.ubiquitous_tokens = ubiquitous_tokens;
result.separators = grammar.separators();
return { result, nullptr };
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -7,11 +7,15 @@
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class Grammar;
namespace prepare_grammar {
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
}
}
class Grammar;
namespace prepare_grammar {
std::pair<InternedGrammar, const GrammarError *> intern_symbols(
const Grammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_INTERN_SYMBOLS_H_

View file

@ -9,14 +9,16 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace prepare_grammar {
class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
std::set<rules::Symbol> ubiquitous_tokens;
std::set<char> separators;
};
}
}
namespace prepare_grammar {
class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr> > rules;
std::set<rules::Symbol> ubiquitous_tokens;
std::set<char> separators;
};
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_

View file

@ -10,202 +10,193 @@
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::CharacterSet;
using rules::Seq;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::CharacterRange;
using rules::blank;
namespace prepare_grammar {
namespace prepare_grammar {
class PatternParser {
public:
explicit PatternParser(const string &input) :
input(input),
length(input.length()),
position(0) {}
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::CharacterSet;
using rules::Seq;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::CharacterRange;
using rules::blank;
pair<rule_ptr, const GrammarError *> rule(bool nested) {
vector<rule_ptr> choices = {};
do {
if (!choices.empty()) {
if (peek() == '|')
next();
else
break;
}
auto pair = term(nested);
if (pair.second)
return { blank(), pair.second };
choices.push_back(pair.first);
} while (has_more_input());
auto rule = (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, nullptr };
}
class PatternParser {
public:
explicit PatternParser(const string &input)
: input(input), length(input.length()), position(0) {}
private:
pair<rule_ptr, const GrammarError *> term(bool nested) {
rule_ptr result = blank();
do {
if (peek() == '|')
break;
if (nested && peek() == ')')
break;
auto pair = factor();
if (pair.second)
return { blank(), pair.second };
result = Seq::Build({ result, pair.first });
} while (has_more_input());
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> rule(bool nested) {
vector<rule_ptr> choices = {};
do {
if (!choices.empty()) {
if (peek() == '|')
next();
else
break;
}
auto pair = term(nested);
if (pair.second)
return { blank(), pair.second };
choices.push_back(pair.first);
} while (has_more_input());
auto rule =
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, nullptr };
}
pair<rule_ptr, const GrammarError *> factor() {
auto pair = atom();
if (pair.second)
return { blank(), pair.second };
rule_ptr result = pair.first;
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = make_shared<Repeat>(result);
break;
case '+':
next();
result = make_shared<Seq>(result, make_shared<Repeat>(result));
break;
case '?':
next();
result = Choice::Build({ result, make_shared<Blank>() });
break;
}
}
return { result, nullptr };
}
private:
pair<rule_ptr, const GrammarError *> term(bool nested) {
rule_ptr result = blank();
do {
if (peek() == '|')
break;
if (nested && peek() == ')')
break;
auto pair = factor();
if (pair.second)
return { blank(), pair.second };
result = Seq::Build({ result, pair.first });
} while (has_more_input());
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> atom() {
switch (peek()) {
case '(': {
next();
auto pair = rule(true);
if (pair.second)
return { blank(), pair.second };
if (peek() != ')')
return error("unmatched open paren");
next();
return { pair.first, nullptr };
}
case '[': {
next();
auto pair = char_set();
if (pair.second)
return { blank(), pair.second };
if (peek() != ']')
return error("unmatched open square bracket");
next();
return { pair.first.copy(), nullptr };
}
case ')': {
return error("unmatched close paren");
}
case ']': {
return error("unmatched close square bracket");
}
case '.': {
next();
return { CharacterSet({ '\n' }).complement().copy(), nullptr };
}
default: {
auto pair = single_char();
if (pair.second)
return { blank(), pair.second };
return { pair.first.copy(), nullptr };
}
}
}
pair<rule_ptr, const GrammarError *> factor() {
auto pair = atom();
if (pair.second)
return { blank(), pair.second };
rule_ptr result = pair.first;
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = make_shared<Repeat>(result);
break;
case '+':
next();
result = make_shared<Seq>(result, make_shared<Repeat>(result));
break;
case '?':
next();
result = Choice::Build({ result, make_shared<Blank>() });
break;
}
}
return { result, nullptr };
}
pair<CharacterSet, const GrammarError *> char_set() {
bool is_affirmative = true;
if (peek() == '^') {
next();
is_affirmative = false;
}
CharacterSet result;
while (has_more_input() && (peek() != ']')) {
auto pair = single_char();
if (pair.second)
return { CharacterSet(), pair.second };
result.add_set(pair.first);
}
if (!is_affirmative)
result = result.complement();
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> atom() {
switch (peek()) {
case '(': {
next();
auto pair = rule(true);
if (pair.second)
return { blank(), pair.second };
if (peek() != ')')
return error("unmatched open paren");
next();
return { pair.first, nullptr };
}
case '[': {
next();
auto pair = char_set();
if (pair.second)
return { blank(), pair.second };
if (peek() != ']')
return error("unmatched open square bracket");
next();
return { pair.first.copy(), nullptr };
}
case ')': { return error("unmatched close paren"); }
case ']': { return error("unmatched close square bracket"); }
case '.': {
next();
return { CharacterSet({ '\n' }).complement().copy(), nullptr };
}
default: {
auto pair = single_char();
if (pair.second)
return { blank(), pair.second };
return { pair.first.copy(), nullptr };
}
}
}
pair<CharacterSet, const GrammarError *> single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
next();
break;
default:
char first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet({ CharacterRange(first_char, peek()) });
next();
} else {
value = CharacterSet({ first_char });
}
}
return { value, nullptr };
}
pair<CharacterSet, const GrammarError *> char_set() {
bool is_affirmative = true;
if (peek() == '^') {
next();
is_affirmative = false;
}
CharacterSet result;
while (has_more_input() && (peek() != ']')) {
auto pair = single_char();
if (pair.second)
return { CharacterSet(), pair.second };
result.add_set(pair.first);
}
if (!is_affirmative)
result = result.complement();
return { result, nullptr };
}
CharacterSet escaped_char(char value) {
switch (value) {
case 'a':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'} });
case 'w':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'}});
case 'd':
return CharacterSet({ {'0', '9'} });
default:
return CharacterSet({ value });
}
}
void next() {
position++;
}
char peek() {
return input[position];
}
bool has_more_input() {
return position < length;
}
pair<rule_ptr, const GrammarError *> error(string msg) {
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
}
const string input;
const size_t length;
size_t position;
};
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
return PatternParser(input).rule(false);
pair<CharacterSet, const GrammarError *> single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
next();
break;
default:
char first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet({ CharacterRange(first_char, peek()) });
next();
} else {
value = CharacterSet({ first_char });
}
}
return { value, nullptr };
}
CharacterSet escaped_char(char value) {
switch (value) {
case 'a':
return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' } });
case 'w':
return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } });
case 'd':
return CharacterSet({ { '0', '9' } });
default:
return CharacterSet({ value });
}
}
void next() { position++; }
char peek() { return input[position]; }
bool has_more_input() { return position < length; }
pair<rule_ptr, const GrammarError *> error(string msg) {
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
}
const string input;
const size_t length;
size_t position;
};
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
return PatternParser(input).rule(false);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -6,10 +6,12 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace prepare_grammar {
std::pair<rules::rule_ptr, const GrammarError *>
parse_regex(const std::string &);
}
}
namespace prepare_grammar {
std::pair<rules::rule_ptr, const GrammarError *> parse_regex(
const std::string &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_PARSE_REGEX_H_

View file

@ -7,29 +7,31 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::make_tuple;
namespace prepare_grammar {
namespace prepare_grammar {
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &input_grammar) {
auto result = intern_symbols(input_grammar);
const InternedGrammar &grammar = result.first;
const GrammarError *error = result.second;
using std::tuple;
using std::make_tuple;
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
const Grammar &input_grammar) {
auto result = intern_symbols(input_grammar);
const InternedGrammar &grammar = result.first;
const GrammarError *error = result.second;
auto grammars = extract_tokens(grammar);
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
auto expand_tokens_result = expand_tokens(grammars.second);
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
auto grammars = extract_tokens(grammar);
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
auto expand_tokens_result = expand_tokens(grammars.second);
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
return make_tuple(rule_grammar, lex_grammar, nullptr);
}
}
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
return make_tuple(rule_grammar, lex_grammar, nullptr);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,13 +5,16 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
class Grammar;
class GrammarError;
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &);
}
}
class Grammar;
class GrammarError;
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
const Grammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_

View file

@ -9,36 +9,36 @@
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
namespace prepare_grammar {
namespace prepare_grammar {
class TokenDescription : public rules::RuleFn<string> {
string apply_to(const rules::Pattern *rule) {
return "/" + util::escape_string(rule->value) + "/";
}
using std::string;
string apply_to(const rules::String *rule) {
return "'" + util::escape_string(rule->value) + "'";
}
class TokenDescription : public rules::RuleFn<string> {
string apply_to(const rules::Pattern *rule) {
return "/" + util::escape_string(rule->value) + "/";
}
string apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
string apply_to(const rules::String *rule) {
return "'" + util::escape_string(rule->value) + "'";
}
string apply_to(const rules::Seq *rule) {
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
}
string apply_to(const rules::Metadata *rule) { return apply(rule->rule); }
string apply_to(const rules::Choice *rule) {
string result = "(choice";
for (auto &element : rule->elements)
result += " " + apply(element);
return result + ")";
}
};
string apply_to(const rules::Seq *rule) {
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
}
std::string token_description(const rules::rule_ptr &rule) {
return TokenDescription().apply(rule);
}
}
string apply_to(const rules::Choice *rule) {
string result = "(choice";
for (auto &element : rule->elements)
result += " " + apply(element);
return result + ")";
}
};
std::string token_description(const rules::rule_ptr &rule) {
return TokenDescription().apply(rule);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,9 +5,11 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace prepare_grammar {
std::string token_description(const rules::rule_ptr &);
}
}
namespace prepare_grammar {
std::string token_description(const rules::rule_ptr &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_

View file

@ -5,54 +5,52 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::string;
using std::pair;
using std::vector;
using std::set;
const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ?
aux_rules[symbol.index].second :
rules[symbol.index].second;
}
using std::string;
using std::pair;
using std::vector;
using std::set;
const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ?
aux_rules[symbol.index].first :
rules[symbol.index].first;
}
PreparedGrammar::PreparedGrammar() {}
SyntaxGrammar::SyntaxGrammar() {}
LexicalGrammar::LexicalGrammar() {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
PreparedGrammar(rules, aux_rules) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
PreparedGrammar(rules, aux_rules) {}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
rules(rules),
aux_rules(aux_rules) {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const set<rules::Symbol> &ubiquitous_tokens) :
PreparedGrammar(rules, aux_rules),
ubiquitous_tokens(ubiquitous_tokens) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const set<char> &separators) :
PreparedGrammar(rules, aux_rules),
separators(separators) {}
const rules::rule_ptr &PreparedGrammar::rule(const rules::Symbol &symbol)
const {
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
: rules[symbol.index].second;
}
const string &PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ? aux_rules[symbol.index].first
: rules[symbol.index].first;
}
PreparedGrammar::PreparedGrammar() {}
SyntaxGrammar::SyntaxGrammar() {}
LexicalGrammar::LexicalGrammar() {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: PreparedGrammar(rules, aux_rules) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: PreparedGrammar(rules, aux_rules) {}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: rules(rules), aux_rules(aux_rules) {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules,
const set<rules::Symbol> &ubiquitous_tokens)
: PreparedGrammar(rules, aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules,
const set<char> &separators)
: PreparedGrammar(rules, aux_rules), separators(separators) {}
} // namespace tree_sitter

View file

@ -9,47 +9,49 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
class PreparedGrammar {
public:
PreparedGrammar();
PreparedGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
class PreparedGrammar {
public:
PreparedGrammar();
PreparedGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
const std::string & rule_name(const rules::Symbol &symbol) const;
const rules::rule_ptr & rule(const rules::Symbol &symbol) const;
};
const std::vector<std::pair<std::string, rules::rule_ptr> > rules;
const std::vector<std::pair<std::string, rules::rule_ptr> > aux_rules;
class SyntaxGrammar : public PreparedGrammar {
public:
SyntaxGrammar();
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::set<rules::Symbol> &ubiquitous_tokens);
const std::string &rule_name(const rules::Symbol &symbol) const;
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
};
std::set<rules::Symbol> ubiquitous_tokens;
};
class SyntaxGrammar : public PreparedGrammar {
public:
SyntaxGrammar();
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
const std::set<rules::Symbol> &ubiquitous_tokens);
class LexicalGrammar : public PreparedGrammar {
public:
LexicalGrammar();
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::set<char> &separators);
std::set<rules::Symbol> ubiquitous_tokens;
};
std::set<char> separators;
};
}
class LexicalGrammar : public PreparedGrammar {
public:
LexicalGrammar();
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
const std::set<char> &separators);
std::set<char> separators;
};
} // namespace tree_sitter
#endif // COMPILER_PREPARED_GRAMMAR_H_

View file

@ -2,28 +2,22 @@
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
Blank::Blank() {}
namespace tree_sitter {
namespace rules {
bool Blank::operator==(const Rule &rule) const {
return dynamic_cast<const Blank *>(&rule) != nullptr;
}
Blank::Blank() {}
size_t Blank::hash_code() const {
return 0;
}
rule_ptr Blank::copy() const {
return std::make_shared<Blank>();
}
std::string Blank::to_string() const {
return "#<blank>";
}
void Blank::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Blank::operator==(const Rule &rule) const {
return dynamic_cast<const Blank *>(&rule) != nullptr;
}
size_t Blank::hash_code() const { return 0; }
rule_ptr Blank::copy() const { return std::make_shared<Blank>(); }
std::string Blank::to_string() const { return "#<blank>"; }
void Blank::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,19 +4,21 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Blank : public Rule {
public:
Blank();
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
};
}
}
class Blank : public Rule {
public:
Blank();
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_BLANK_H_

View file

@ -1,9 +1,11 @@
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
namespace rules {
Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
Symbol START() { return Symbol(-3); }
}
}
namespace rules {
Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
Symbol START() { return Symbol(-3); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,11 +4,11 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace rules {
Symbol ERROR();
Symbol START();
Symbol END_OF_INPUT();
}
namespace rules {
Symbol ERROR();
Symbol START();
Symbol END_OF_INPUT();
}
}
#endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_

View file

@ -3,50 +3,55 @@
#include <string>
namespace tree_sitter {
using std::string;
namespace rules {
namespace rules {
static const unsigned char MAX_CHAR = -1;
using std::string;
CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {}
CharacterRange::CharacterRange(unsigned char min, unsigned char max) : min(min), max(max) {}
static const unsigned char MAX_CHAR = -1;
bool CharacterRange::operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
}
CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {}
CharacterRange::CharacterRange(unsigned char min, unsigned char max)
: min(min), max(max) {}
bool CharacterRange::operator<(const CharacterRange &other) const {
if (min < other.min) return true;
if (min > other.min) return false;
if (max < other.max) return true;
return false;
}
string escape_character(unsigned char input) {
switch (input) {
case '\0':
return "<EOF>";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case MAX_CHAR:
return "<MAX>";
default:
return string() + static_cast<char>(input);
}
}
string CharacterRange::to_string() const {
if (min == 0 && max == MAX_CHAR)
return "<ANY>";
if (min == max)
return escape_character(min);
else
return string() + escape_character(min) + "-" + escape_character(max);
}
}
bool CharacterRange::operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
}
bool CharacterRange::operator<(const CharacterRange &other) const {
if (min < other.min)
return true;
if (min > other.min)
return false;
if (max < other.max)
return true;
return false;
}
string escape_character(unsigned char input) {
switch (input) {
case '\0':
return "<EOF>";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case MAX_CHAR:
return "<MAX>";
default:
return string() + static_cast<char>(input);
}
}
string CharacterRange::to_string() const {
if (min == 0 && max == MAX_CHAR)
return "<ANY>";
if (min == max)
return escape_character(min);
else
return string() + escape_character(min) + "-" + escape_character(max);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -5,29 +5,34 @@
#include <string>
namespace tree_sitter {
namespace rules {
struct CharacterRange {
unsigned char min;
unsigned char max;
namespace rules {
// IMPLICIT_CONSTRUCTORS
CharacterRange(unsigned char value);
CharacterRange(unsigned char min, unsigned char max);
struct CharacterRange {
unsigned char min;
unsigned char max;
bool operator==(const CharacterRange &other) const;
bool operator<(const CharacterRange &others) const;
std::string to_string() const;
};
}
}
// IMPLICIT_CONSTRUCTORS
CharacterRange(unsigned char value);
CharacterRange(unsigned char min, unsigned char max);
bool operator==(const CharacterRange &other) const;
bool operator<(const CharacterRange &others) const;
std::string to_string() const;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::CharacterRange> {
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
return (hash<unsigned char>()(range.min) ^ hash<unsigned char>()(range.max));
}
};
}
template <>
struct hash<tree_sitter::rules::CharacterRange> {
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
return (hash<unsigned char>()(range.min) ^
hash<unsigned char>()(range.max));
}
};
} // namespace std
#endif // COMPILER_RULES_CHARACTER_RANGE_H_

View file

@ -3,140 +3,141 @@
#include <utility>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
using std::set;
using std::pair;
using std::initializer_list;
namespace tree_sitter {
namespace rules {
static const unsigned char MAX_CHAR = -1;
static const unsigned char MAX_CHAR = -1;
CharacterSet::CharacterSet() : ranges({}) {}
CharacterSet::CharacterSet(const set<CharacterRange> &ranges) : ranges(ranges) {}
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges) : ranges(ranges) {}
CharacterSet::CharacterSet() : ranges({}) {}
CharacterSet::CharacterSet(const set<CharacterRange> &ranges)
: ranges(ranges) {}
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges)
: ranges(ranges) {}
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
return other && (ranges == other->ranges);
}
bool CharacterSet::operator<(const CharacterSet &other) const {
return ranges < other.ranges;
}
size_t CharacterSet::hash_code() const {
size_t result = std::hash<size_t>()(ranges.size());
for (auto &range : ranges) {
result ^= std::hash<unsigned char>()(range.min);
result ^= std::hash<unsigned char>()(range.max);
}
return result;
}
rule_ptr CharacterSet::copy() const {
return std::make_shared<CharacterSet>(*this);
}
string CharacterSet::to_string() const {
string result("#<char {");
for (auto &range : ranges)
result += " " + range.to_string();
return result + " }>";
}
CharacterSet CharacterSet::complement() const {
CharacterSet result({ {0, MAX_CHAR} });
result.remove_set(*this);
return result;
}
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
auto first_range = *ranges.begin();
if (first_range.min == 0 && first_range.max > 0) {
return { this->complement(), false };
} else {
return { *this, true };
}
}
void add_range(CharacterSet *self, CharacterRange addition) {
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
bool is_adjacent = false;
if (range.min < addition.min && range.max >= addition.min - 1) {
is_adjacent = true;
addition.min = range.min;
}
if (range.max > addition.max && range.min <= addition.max + 1) {
is_adjacent = true;
addition.max = range.max;
}
if (!is_adjacent) {
new_ranges.insert(range);
}
}
new_ranges.insert(addition);
self->ranges = new_ranges;
}
CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) {
CharacterSet removed_set;
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
if (range_to_remove.min <= range.min) {
if (range_to_remove.max < range.min) {
new_ranges.insert(range);
} else if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, CharacterRange(range.min, range_to_remove.max));
} else {
add_range(&removed_set, range);
}
} else if (range_to_remove.min <= range.max) {
if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, range_to_remove);
} else {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
add_range(&removed_set, CharacterRange(range_to_remove.min, range.max));
}
} else {
new_ranges.insert(range);
}
}
self->ranges = new_ranges;
return removed_set;
}
bool CharacterSet::is_empty() const {
return ranges.empty();
}
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);
}
}
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet result;
for (auto &other_range : other.ranges) {
auto removed_set = remove_range(this, other_range);
result.add_set(removed_set);
}
return result;
}
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
CharacterSet copy = *this;
return copy.remove_set(set);
}
void CharacterSet::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
return other && (ranges == other->ranges);
}
bool CharacterSet::operator<(const CharacterSet &other) const {
return ranges < other.ranges;
}
size_t CharacterSet::hash_code() const {
size_t result = std::hash<size_t>()(ranges.size());
for (auto &range : ranges) {
result ^= std::hash<unsigned char>()(range.min);
result ^= std::hash<unsigned char>()(range.max);
}
return result;
}
rule_ptr CharacterSet::copy() const {
return std::make_shared<CharacterSet>(*this);
}
string CharacterSet::to_string() const {
string result("#<char {");
for (auto &range : ranges)
result += " " + range.to_string();
return result + " }>";
}
CharacterSet CharacterSet::complement() const {
CharacterSet result({ { 0, MAX_CHAR } });
result.remove_set(*this);
return result;
}
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation()
const {
auto first_range = *ranges.begin();
if (first_range.min == 0 && first_range.max > 0) {
return { this->complement(), false };
} else {
return { *this, true };
}
}
void add_range(CharacterSet *self, CharacterRange addition) {
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
bool is_adjacent = false;
if (range.min < addition.min && range.max >= addition.min - 1) {
is_adjacent = true;
addition.min = range.min;
}
if (range.max > addition.max && range.min <= addition.max + 1) {
is_adjacent = true;
addition.max = range.max;
}
if (!is_adjacent) {
new_ranges.insert(range);
}
}
new_ranges.insert(addition);
self->ranges = new_ranges;
}
CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) {
CharacterSet removed_set;
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
if (range_to_remove.min <= range.min) {
if (range_to_remove.max < range.min) {
new_ranges.insert(range);
} else if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, CharacterRange(range.min, range_to_remove.max));
} else {
add_range(&removed_set, range);
}
} else if (range_to_remove.min <= range.max) {
if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, range_to_remove);
} else {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
add_range(&removed_set, CharacterRange(range_to_remove.min, range.max));
}
} else {
new_ranges.insert(range);
}
}
self->ranges = new_ranges;
return removed_set;
}
bool CharacterSet::is_empty() const { return ranges.empty(); }
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);
}
}
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet result;
for (auto &other_range : other.ranges) {
auto removed_set = remove_range(this, other_range);
result.add_set(removed_set);
}
return result;
}
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
CharacterSet copy = *this;
return copy.remove_set(set);
}
void CharacterSet::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -8,40 +8,44 @@
#include "compiler/rules/rule.h"
#include "compiler/rules/character_range.h"
namespace tree_sitter {
namespace rules {
class CharacterSet : public Rule {
public:
CharacterSet();
explicit CharacterSet(const std::set<CharacterRange> &ranges);
explicit CharacterSet(const std::initializer_list<CharacterRange> &ranges);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
bool operator<(const CharacterSet &) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class CharacterSet : public Rule {
public:
CharacterSet();
explicit CharacterSet(const std::set<CharacterRange> &ranges);
explicit CharacterSet(const std::initializer_list<CharacterRange> &ranges);
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
bool is_empty() const;
bool operator==(const Rule &other) const;
bool operator<(const CharacterSet &) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
std::set<CharacterRange> ranges;
};
}
}
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
bool is_empty() const;
std::set<CharacterRange> ranges;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::CharacterSet> {
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
return rule.hash_code();
}
};
}
template <>
struct hash<tree_sitter::rules::CharacterSet> {
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
return rule.hash_code();
}
};
} // namespace std
#endif // COMPILER_RULES_CHARACTER_SET_H_

View file

@ -3,50 +3,51 @@
#include <set>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::make_shared;
using std::vector;
using std::set;
using std::dynamic_pointer_cast;
namespace tree_sitter {
namespace rules {
namespace rules {
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
using std::string;
using std::make_shared;
using std::vector;
using std::set;
using std::dynamic_pointer_cast;
rule_ptr Choice::Build(const vector<rule_ptr> &elements) {
return make_shared<Choice>(elements);
}
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
bool Choice::operator==(const Rule &rule) const {
const Choice *other = dynamic_cast<const Choice *>(&rule);
if (!other) return false;
size_t size = elements.size();
if (size != other->elements.size()) return false;
for (size_t i = 0; i < size; i++)
if (!elements[i]->operator==(*other->elements[i])) return false;
return true;
}
size_t Choice::hash_code() const {
size_t result = std::hash<size_t>()(elements.size());
for (const auto &element : elements)
result ^= element->hash_code();
return result;
}
rule_ptr Choice::copy() const {
return std::make_shared<Choice>(*this);
}
string Choice::to_string() const {
string result = "#<choice";
for (const auto &element : elements)
result += " " + element->to_string();
return result + ">";
}
void Choice::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
rule_ptr Choice::Build(const vector<rule_ptr> &elements) {
return make_shared<Choice>(elements);
}
bool Choice::operator==(const Rule &rule) const {
const Choice *other = dynamic_cast<const Choice *>(&rule);
if (!other)
return false;
size_t size = elements.size();
if (size != other->elements.size())
return false;
for (size_t i = 0; i < size; i++)
if (!elements[i]->operator==(*other->elements[i]))
return false;
return true;
}
size_t Choice::hash_code() const {
size_t result = std::hash<size_t>()(elements.size());
for (const auto &element : elements)
result ^= element->hash_code();
return result;
}
rule_ptr Choice::copy() const { return std::make_shared<Choice>(*this); }
string Choice::to_string() const {
string result = "#<choice";
for (const auto &element : elements)
result += " " + element->to_string();
return result + ">";
}
void Choice::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -6,21 +6,23 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Choice : public Rule {
public:
explicit Choice(const std::vector<rule_ptr> &elements);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Choice : public Rule {
public:
explicit Choice(const std::vector<rule_ptr> &elements);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
const std::vector<rule_ptr> elements;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::vector<rule_ptr> elements;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_CHOICE_H_

View file

@ -3,45 +3,44 @@
#include <map>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::hash;
using std::make_shared;
using std::map;
namespace tree_sitter {
namespace rules {
namespace rules {
Metadata::Metadata(rule_ptr rule, map<MetadataKey, int> values) : rule(rule), value(values) {}
using std::hash;
using std::make_shared;
using std::map;
bool Metadata::operator==(const Rule &rule) const {
auto other = dynamic_cast<const Metadata *>(&rule);
return other && other->value == value && other->rule->operator==(*this->rule);
}
Metadata::Metadata(rule_ptr rule, map<MetadataKey, int> values)
: rule(rule), value(values) {}
size_t Metadata::hash_code() const {
size_t result = hash<size_t>()(value.size());
for (auto &pair : value) {
result ^= hash<int>()(pair.first);
result ^= hash<int>()(pair.second);
}
return result;
}
rule_ptr Metadata::copy() const {
return make_shared<Metadata>(rule->copy(), value);
}
int Metadata::value_for(MetadataKey key) const {
auto pair = value.find(key);
return (pair != value.end()) ?
pair->second :
0;
}
std::string Metadata::to_string() const {
return "#<metadata " + rule->to_string() + ">";
}
void Metadata::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Metadata::operator==(const Rule &rule) const {
auto other = dynamic_cast<const Metadata *>(&rule);
return other && other->value == value && other->rule->operator==(*this->rule);
}
size_t Metadata::hash_code() const {
size_t result = hash<size_t>()(value.size());
for (auto &pair : value) {
result ^= hash<int>()(pair.first);
result ^= hash<int>()(pair.second);
}
return result;
}
rule_ptr Metadata::copy() const {
return make_shared<Metadata>(rule->copy(), value);
}
int Metadata::value_for(MetadataKey key) const {
auto pair = value.find(key);
return (pair != value.end()) ? pair->second : 0;
}
std::string Metadata::to_string() const {
return "#<metadata " + rule->to_string() + ">";
}
void Metadata::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -5,30 +5,32 @@
#include <map>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
typedef enum {
START_TOKEN,
PRECEDENCE,
IS_TOKEN,
DESCRIPTION,
} MetadataKey;
namespace tree_sitter {
namespace rules {
class Metadata : public Rule {
public:
Metadata(rule_ptr rule, std::map<MetadataKey, int> value);
typedef enum {
START_TOKEN,
PRECEDENCE,
IS_TOKEN,
DESCRIPTION,
} MetadataKey;
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
int value_for(MetadataKey key) const;
class Metadata : public Rule {
public:
Metadata(rule_ptr rule, std::map<MetadataKey, int> value);
const rule_ptr rule;
const std::map<MetadataKey, int> value;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
int value_for(MetadataKey key) const;
const rule_ptr rule;
const std::map<MetadataKey, int> value;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_METADATA_H_

View file

@ -2,32 +2,30 @@
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::hash;
namespace tree_sitter {
namespace rules {
namespace rules {
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
using std::string;
using std::hash;
bool NamedSymbol::operator==(const Rule &rule) const {
auto other = dynamic_cast<const NamedSymbol *>(&rule);
return other && other->name == name;
}
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
size_t NamedSymbol::hash_code() const {
return hash<string>()(name);
}
rule_ptr NamedSymbol::copy() const {
return std::make_shared<NamedSymbol>(*this);
}
string NamedSymbol::to_string() const {
return string("#<sym '") + name + "'>";
}
void NamedSymbol::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool NamedSymbol::operator==(const Rule &rule) const {
auto other = dynamic_cast<const NamedSymbol *>(&rule);
return other && other->name == name;
}
size_t NamedSymbol::hash_code() const { return hash<string>()(name); }
rule_ptr NamedSymbol::copy() const {
return std::make_shared<NamedSymbol>(*this);
}
string NamedSymbol::to_string() const {
return string("#<sym '") + name + "'>";
}
void NamedSymbol::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,21 +4,23 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class NamedSymbol : public Rule {
public:
explicit NamedSymbol(const std::string &name);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class NamedSymbol : public Rule {
public:
explicit NamedSymbol(const std::string &name);
std::string name;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
std::string name;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_NAMED_SYMBOL_H_

View file

@ -4,31 +4,27 @@
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
namespace rules {
Pattern::Pattern(const string &string) : value(string) {}
using std::string;
using std::hash;
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
auto pattern = dynamic_cast<const Pattern *>(&other);
return pattern && (pattern->value == value);
}
Pattern::Pattern(const string &string) : value(string) {}
size_t Pattern::hash_code() const {
return hash<string>()(value);
}
rule_ptr Pattern::copy() const {
return std::make_shared<Pattern>(*this);
}
string Pattern::to_string() const {
return string("#<pattern '") + util::escape_string(value) + "'>";
}
void Pattern::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
auto pattern = dynamic_cast<const Pattern *>(&other);
return pattern && (pattern->value == value);
}
size_t Pattern::hash_code() const { return hash<string>()(value); }
rule_ptr Pattern::copy() const { return std::make_shared<Pattern>(*this); }
string Pattern::to_string() const {
return string("#<pattern '") + util::escape_string(value) + "'>";
}
void Pattern::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -5,21 +5,22 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Pattern : public Rule {
public:
explicit Pattern(const std::string &string);
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Pattern : public Rule {
public:
explicit Pattern(const std::string &string);
const std::string value;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::string value;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_PATTERN_H_

View file

@ -3,30 +3,26 @@
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
namespace rules {
namespace rules {
Repeat::Repeat(const rule_ptr content) : content(content) {}
using std::string;
bool Repeat::operator==(const Rule &rule) const {
const Repeat *other = dynamic_cast<const Repeat *>(&rule);
return other && (*other->content == *content);
}
Repeat::Repeat(const rule_ptr content) : content(content) {}
size_t Repeat::hash_code() const {
return content->hash_code();
}
rule_ptr Repeat::copy() const {
return std::make_shared<Repeat>(*this);
}
string Repeat::to_string() const {
return string("#<repeat ") + content->to_string() + ">";
}
void Repeat::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Repeat::operator==(const Rule &rule) const {
const Repeat *other = dynamic_cast<const Repeat *>(&rule);
return other && (*other->content == *content);
}
size_t Repeat::hash_code() const { return content->hash_code(); }
rule_ptr Repeat::copy() const { return std::make_shared<Repeat>(*this); }
string Repeat::to_string() const {
return string("#<repeat ") + content->to_string() + ">";
}
void Repeat::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,21 +4,23 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Repeat : public Rule {
public:
explicit Repeat(rule_ptr content);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Repeat : public Rule {
public:
explicit Repeat(rule_ptr content);
const rule_ptr content;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const rule_ptr content;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_REPEAT_H_

View file

@ -2,26 +2,28 @@
#include <set>
namespace tree_sitter {
using std::ostream;
using std::string;
namespace rules {
namespace rules {
bool Rule::operator!=(const Rule &other) const {
return !this->operator==(other);
}
using std::ostream;
using std::string;
ostream& operator<<(ostream& stream, const Rule &rule) {
return stream << rule.to_string();
}
ostream& operator<<(ostream& stream, const rule_ptr &rule) {
if (rule.get())
stream << *rule;
else
stream << string("#<null-rule>");
return stream;
}
Rule::~Rule() {}
}
bool Rule::operator!=(const Rule &other) const {
return !this->operator==(other);
}
ostream &operator<<(ostream &stream, const Rule &rule) {
return stream << rule.to_string();
}
ostream &operator<<(ostream &stream, const rule_ptr &rule) {
if (rule.get())
stream << *rule;
else
stream << string("#<null-rule>");
return stream;
}
Rule::~Rule() {}
} // namespace rules
} // namespace tree_sitter

View file

@ -5,35 +5,39 @@
#include <memory>
namespace tree_sitter {
namespace rules {
class Visitor;
class Rule;
namespace rules {
typedef std::shared_ptr<Rule> rule_ptr;
class Visitor;
class Rule;
class Rule {
public:
virtual bool operator==(const Rule& other) const = 0;
bool operator!=(const Rule& other) const;
virtual size_t hash_code() const = 0;
virtual rule_ptr copy() const = 0;
virtual std::string to_string() const = 0;
virtual void accept(Visitor *visitor) const = 0;
virtual ~Rule();
};
typedef std::shared_ptr<Rule> rule_ptr;
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
}
}
class Rule {
public:
virtual bool operator==(const Rule &other) const = 0;
bool operator!=(const Rule &other) const;
virtual size_t hash_code() const = 0;
virtual rule_ptr copy() const = 0;
virtual std::string to_string() const = 0;
virtual void accept(Visitor *visitor) const = 0;
virtual ~Rule();
};
std::ostream &operator<<(std::ostream &stream, const Rule &rule);
std::ostream &operator<<(std::ostream &stream, const rule_ptr &rule);
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::rule_ptr> {
size_t operator()(const tree_sitter::rules::rule_ptr &rule) const {
return typeid(*rule).hash_code() ^ rule->hash_code();
}
};
}
template <>
struct hash<tree_sitter::rules::rule_ptr> {
size_t operator()(const tree_sitter::rules::rule_ptr &rule) const {
return typeid(*rule).hash_code() ^ rule->hash_code();
}
};
} // namespace std
#endif // COMPILER_RULES_RULE_H_

View file

@ -16,65 +16,55 @@
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
using std::make_shared;
using std::string;
using std::set;
using std::vector;
using std::map;
namespace rules {
namespace rules {
static const int KEYWORD_PRECEDENCE = 100;
using std::make_shared;
using std::string;
using std::set;
using std::vector;
using std::map;
static rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
return std::make_shared<Metadata>(rule, values);
}
static const int KEYWORD_PRECEDENCE = 100;
rule_ptr blank() {
return make_shared<Blank>();
}
rule_ptr choice(const vector<rule_ptr> &rules) {
return Choice::Build(rules);
}
rule_ptr repeat(const rule_ptr &content) {
return std::make_shared<Repeat>(content);
}
rule_ptr seq(const vector<rule_ptr> &rules) {
return Seq::Build(rules);
}
rule_ptr sym(const string &name) {
return make_shared<NamedSymbol>(name);
}
rule_ptr pattern(const string &value) {
return make_shared<Pattern>(value);
}
rule_ptr str(const string &value) {
return make_shared<String>(value);
}
rule_ptr keyword(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, str(value)));
}
rule_ptr keypattern(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, pattern(value)));
}
rule_ptr err(const rule_ptr &rule) {
return choice({ rule, ERROR().copy() });
}
rule_ptr prec(int precedence, rule_ptr rule) {
return metadata(rule, {{ PRECEDENCE, precedence }});
}
rule_ptr token(rule_ptr rule) {
return metadata(rule, {{ IS_TOKEN, 1 }});
}
}
static rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
return std::make_shared<Metadata>(rule, values);
}
rule_ptr blank() { return make_shared<Blank>(); }
rule_ptr choice(const vector<rule_ptr> &rules) { return Choice::Build(rules); }
rule_ptr repeat(const rule_ptr &content) {
return std::make_shared<Repeat>(content);
}
rule_ptr seq(const vector<rule_ptr> &rules) { return Seq::Build(rules); }
rule_ptr sym(const string &name) { return make_shared<NamedSymbol>(name); }
rule_ptr pattern(const string &value) { return make_shared<Pattern>(value); }
rule_ptr str(const string &value) { return make_shared<String>(value); }
rule_ptr keyword(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, str(value)));
}
rule_ptr keypattern(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, pattern(value)));
}
rule_ptr err(const rule_ptr &rule) {
return choice({ rule, ERROR().copy() });
}
rule_ptr prec(int precedence, rule_ptr rule) {
return metadata(rule, { { PRECEDENCE, precedence } });
}
rule_ptr token(rule_ptr rule) {
return metadata(rule, { { IS_TOKEN, 1 } });
}
} // namespace rules
} // namespace tree_sitter

View file

@ -4,39 +4,36 @@
#include "compiler/rules/blank.h"
namespace tree_sitter {
using std::make_shared;
using std::string;
using std::vector;
namespace rules {
namespace rules {
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
using std::make_shared;
using std::string;
using std::vector;
rule_ptr Seq::Build(const std::vector<rule_ptr> &rules) {
rule_ptr result = make_shared<Blank>();
for (auto &rule : rules)
result = (typeid(*result) != typeid(Blank)) ? make_shared<Seq>(result, rule) : rule;
return result;
}
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
bool Seq::operator==(const Rule &rule) const {
const Seq *other = dynamic_cast<const Seq *>(&rule);
return other && (*other->left == *left) && (*other->right == *right);
}
size_t Seq::hash_code() const {
return left->hash_code() ^ right->hash_code();
}
rule_ptr Seq::copy() const {
return std::make_shared<Seq>(*this);
}
string Seq::to_string() const {
return string("#<seq ") + left->to_string() + " " + right->to_string() + ">";
}
void Seq::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
rule_ptr Seq::Build(const std::vector<rule_ptr> &rules) {
rule_ptr result = make_shared<Blank>();
for (auto &rule : rules)
result = (typeid(*result) != typeid(Blank)) ? make_shared<Seq>(result, rule)
: rule;
return result;
}
bool Seq::operator==(const Rule &rule) const {
const Seq *other = dynamic_cast<const Seq *>(&rule);
return other && (*other->left == *left) && (*other->right == *right);
}
size_t Seq::hash_code() const { return left->hash_code() ^ right->hash_code(); }
rule_ptr Seq::copy() const { return std::make_shared<Seq>(*this); }
string Seq::to_string() const {
return string("#<seq ") + left->to_string() + " " + right->to_string() + ">";
}
void Seq::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -5,23 +5,25 @@
#include <vector>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Seq : public Rule {
public:
Seq(rule_ptr left, rule_ptr right);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Seq : public Rule {
public:
Seq(rule_ptr left, rule_ptr right);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
const rule_ptr left;
const rule_ptr right;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const rule_ptr left;
const rule_ptr right;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_SEQ_H_

View file

@ -2,32 +2,26 @@
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::hash;
namespace tree_sitter {
namespace rules {
namespace rules {
String::String(string value) : value(value) {}
using std::string;
using std::hash;
bool String::operator==(const Rule &rule) const {
const String *other = dynamic_cast<const String *>(&rule);
return other && (other->value == value);
}
String::String(string value) : value(value) {}
size_t String::hash_code() const {
return hash<string>()(value);
}
rule_ptr String::copy() const {
return std::make_shared<String>(*this);
}
string String::to_string() const {
return string("#<string '") + value + "'>";
}
void String::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool String::operator==(const Rule &rule) const {
const String *other = dynamic_cast<const String *>(&rule);
return other && (other->value == value);
}
size_t String::hash_code() const { return hash<string>()(value); }
rule_ptr String::copy() const { return std::make_shared<String>(*this); }
string String::to_string() const { return string("#<string '") + value + "'>"; }
void String::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,21 +4,23 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class String : public Rule {
public:
explicit String(std::string value);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class String : public Rule {
public:
explicit String(std::string value);
const std::string value;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::string value;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_STRING_H_

View file

@ -3,63 +3,54 @@
#include <map>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::hash;
namespace tree_sitter {
namespace rules {
namespace rules {
Symbol::Symbol(int index) :
index(index),
options(SymbolOption(0)) {}
using std::string;
using std::to_string;
using std::hash;
Symbol::Symbol(int index, SymbolOption options) :
index(index),
options(options) {}
Symbol::Symbol(int index) : index(index), options(SymbolOption(0)) {}
bool Symbol::operator==(const Symbol &other) const {
return (other.index == index) && (other.options == options);
}
Symbol::Symbol(int index, SymbolOption options)
: index(index), options(options) {}
bool Symbol::operator==(const Rule &rule) const {
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
return other && this->operator==(*other);
}
size_t Symbol::hash_code() const {
return hash<int>()(index) ^ hash<int16_t>()(options);
}
rule_ptr Symbol::copy() const {
return std::make_shared<Symbol>(*this);
}
string Symbol::to_string() const {
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
name += (options & SymbolOptionToken) ? "token" : "sym";
return "#<" + name + " " + std::to_string(index) + ">";
}
bool Symbol::operator<(const Symbol &other) const {
if (options < other.options) return true;
if (options > other.options) return false;
return (index < other.index);
}
bool Symbol::is_token() const {
return options & SymbolOptionToken;
}
bool Symbol::is_built_in() const {
return index < 0;
}
bool Symbol::is_auxiliary() const {
return options & SymbolOptionAuxiliary;
}
void Symbol::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Symbol::operator==(const Symbol &other) const {
return (other.index == index) && (other.options == options);
}
bool Symbol::operator==(const Rule &rule) const {
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
return other && this->operator==(*other);
}
size_t Symbol::hash_code() const {
return hash<int>()(index) ^ hash<int16_t>()(options);
}
rule_ptr Symbol::copy() const { return std::make_shared<Symbol>(*this); }
string Symbol::to_string() const {
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
name += (options & SymbolOptionToken) ? "token" : "sym";
return "#<" + name + " " + std::to_string(index) + ">";
}
bool Symbol::operator<(const Symbol &other) const {
if (options < other.options)
return true;
if (options > other.options)
return false;
return (index < other.index);
}
bool Symbol::is_token() const { return options & SymbolOptionToken; }
bool Symbol::is_built_in() const { return index < 0; }
bool Symbol::is_auxiliary() const { return options & SymbolOptionAuxiliary; }
void Symbol::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,44 +4,48 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
typedef enum {
SymbolOptionToken = 1 << 0,
SymbolOptionAuxiliary = 1 << 1,
} SymbolOption;
namespace tree_sitter {
namespace rules {
class Symbol : public Rule {
public:
explicit Symbol(int index);
Symbol(int index, SymbolOption options);
typedef enum {
SymbolOptionToken = 1 << 0,
SymbolOptionAuxiliary = 1 << 1,
} SymbolOption;
bool operator==(const Symbol &other) const;
bool operator==(const Rule &other) const;
class Symbol : public Rule {
public:
explicit Symbol(int index);
Symbol(int index, SymbolOption options);
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
bool operator==(const Symbol &other) const;
bool operator==(const Rule &other) const;
bool operator<(const Symbol &other) const;
bool is_token() const;
bool is_built_in() const;
bool is_auxiliary() const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
int index;
SymbolOption options;
};
}
}
bool operator<(const Symbol &other) const;
bool is_token() const;
bool is_built_in() const;
bool is_auxiliary() const;
int index;
SymbolOption options;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::Symbol> {
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
return rule.hash_code();
}
};
}
template <>
struct hash<tree_sitter::rules::Symbol> {
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
return rule.hash_code();
}
};
} // std
#endif // COMPILER_RULES_SYMBOL_H_

View file

@ -11,32 +11,34 @@
#include "compiler/rules/repeat.h"
namespace tree_sitter {
using std::vector;
namespace rules {
namespace rules {
Visitor::~Visitor() {}
using std::vector;
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
return rule->copy();
}
Visitor::~Visitor() {}
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
vector<rule_ptr> rules;
for (const auto &el : rule->elements)
rules.push_back(apply(el));
return Choice::Build(rules);
}
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
return Seq::Build({ apply(rule->left), apply(rule->right) });
}
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
return std::make_shared<Repeat>(apply(rule->content));
}
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
return std::make_shared<Metadata>(apply(rule->rule), rule->value);
}
}
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
return rule->copy();
}
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
vector<rule_ptr> rules;
for (const auto &el : rule->elements)
rules.push_back(apply(el));
return Choice::Build(rules);
}
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
return Seq::Build({ apply(rule->left), apply(rule->right) });
}
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
return std::make_shared<Repeat>(apply(rule->content));
}
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
return std::make_shared<Metadata>(apply(rule->rule), rule->value);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -4,79 +4,101 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Blank;
class NamedSymbol;
class CharacterSet;
class Choice;
class Repeat;
class Seq;
class String;
class Symbol;
class Pattern;
class Metadata;
namespace rules {
class Visitor {
public:
virtual void visit(const Blank *rule) = 0;
virtual void visit(const CharacterSet *rule) = 0;
virtual void visit(const Choice *rule) = 0;
virtual void visit(const Metadata *rule) = 0;
virtual void visit(const Pattern *rule) = 0;
virtual void visit(const Repeat *rule) = 0;
virtual void visit(const Seq *rule) = 0;
virtual void visit(const String *rule) = 0;
virtual void visit(const NamedSymbol *rule) = 0;
virtual void visit(const Symbol *rule) = 0;
virtual ~Visitor();
};
class Blank;
class NamedSymbol;
class CharacterSet;
class Choice;
class Repeat;
class Seq;
class String;
class Symbol;
class Pattern;
class Metadata;
template<typename T>
class RuleFn : private Visitor {
public:
T apply(const rule_ptr &rule) {
value_ = T();
rule->accept(this);
return value_;
}
class Visitor {
public:
virtual void visit(const Blank *rule) = 0;
virtual void visit(const CharacterSet *rule) = 0;
virtual void visit(const Choice *rule) = 0;
virtual void visit(const Metadata *rule) = 0;
virtual void visit(const Pattern *rule) = 0;
virtual void visit(const Repeat *rule) = 0;
virtual void visit(const Seq *rule) = 0;
virtual void visit(const String *rule) = 0;
virtual void visit(const NamedSymbol *rule) = 0;
virtual void visit(const Symbol *rule) = 0;
virtual ~Visitor();
};
protected:
virtual T default_apply(const Rule *rule) { return T(); }
virtual T apply_to(const Blank *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const CharacterSet *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Choice *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Metadata *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Pattern *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Repeat *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Seq *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const String *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const NamedSymbol *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Symbol *rule) { return default_apply((const Rule *)rule); }
template <typename T>
class RuleFn : private Visitor {
public:
T apply(const rule_ptr &rule) {
value_ = T();
rule->accept(this);
return value_;
}
void visit(const Blank *rule) { value_ = apply_to(rule); }
void visit(const CharacterSet *rule) { value_ = apply_to(rule); }
void visit(const Choice *rule) { value_ = apply_to(rule); }
void visit(const Metadata *rule) { value_ = apply_to(rule); }
void visit(const Pattern *rule) { value_ = apply_to(rule); }
void visit(const Repeat *rule) { value_ = apply_to(rule); }
void visit(const Seq *rule) { value_ = apply_to(rule); }
void visit(const String *rule) { value_ = apply_to(rule); }
void visit(const NamedSymbol *rule) { value_ = apply_to(rule); }
void visit(const Symbol *rule) { value_ = apply_to(rule); }
protected:
virtual T default_apply(const Rule *rule) { return T(); }
virtual T apply_to(const Blank *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const CharacterSet *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Choice *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Metadata *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Pattern *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Repeat *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Seq *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const String *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const NamedSymbol *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Symbol *rule) {
return default_apply((const Rule *)rule);
}
private:
T value_;
};
void visit(const Blank *rule) { value_ = apply_to(rule); }
void visit(const CharacterSet *rule) { value_ = apply_to(rule); }
void visit(const Choice *rule) { value_ = apply_to(rule); }
void visit(const Metadata *rule) { value_ = apply_to(rule); }
void visit(const Pattern *rule) { value_ = apply_to(rule); }
void visit(const Repeat *rule) { value_ = apply_to(rule); }
void visit(const Seq *rule) { value_ = apply_to(rule); }
void visit(const String *rule) { value_ = apply_to(rule); }
void visit(const NamedSymbol *rule) { value_ = apply_to(rule); }
void visit(const Symbol *rule) { value_ = apply_to(rule); }
class IdentityRuleFn : public RuleFn<rule_ptr> {
protected:
virtual rule_ptr default_apply(const Rule *rule);
virtual rule_ptr apply_to(const Choice *rule);
virtual rule_ptr apply_to(const Metadata *rule);
virtual rule_ptr apply_to(const Seq *rule);
virtual rule_ptr apply_to(const Repeat *rule);
};
}
}
private:
T value_;
};
class IdentityRuleFn : public RuleFn<rule_ptr> {
protected:
virtual rule_ptr default_apply(const Rule *rule);
virtual rule_ptr apply_to(const Choice *rule);
virtual rule_ptr apply_to(const Metadata *rule);
virtual rule_ptr apply_to(const Seq *rule);
virtual rule_ptr apply_to(const Repeat *rule);
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_VISITOR_H_

View file

@ -2,47 +2,50 @@
#include <vector>
namespace tree_sitter {
using std::string;
using std::vector;
using std::set;
namespace util {
namespace util {
void str_replace(string *input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {
pos = input->find(search, pos);
if (pos == string::npos) break;
input->erase(pos, search.length());
input->insert(pos, replace);
pos += replace.length();
}
}
using std::string;
using std::vector;
using std::set;
string escape_string(string input) {
str_replace(&input, "\"", "\\\"");
str_replace(&input, "\n", "\\n");
return input;
}
string escape_char(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
case '\'':
return "\\'";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case '\\':
return "\\\\";
default:
return string() + character;
}
}
}
void str_replace(string *input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {
pos = input->find(search, pos);
if (pos == string::npos)
break;
input->erase(pos, search.length());
input->insert(pos, replace);
pos += replace.length();
}
}
string escape_string(string input) {
str_replace(&input, "\"", "\\\"");
str_replace(&input, "\n", "\\n");
return input;
}
string escape_char(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
case '\'':
return "\\'";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case '\\':
return "\\\\";
default:
return string() + character;
}
}
} // namespace util
} // namespace tree_sitter

View file

@ -6,11 +6,14 @@
#include <set>
namespace tree_sitter {
namespace util {
void str_replace(std::string *input, const std::string &search, const std::string &replace);
std::string escape_string(std::string input);
std::string escape_char(char character);
}
}
namespace util {
void str_replace(std::string *input, const std::string &search,
const std::string &replace);
std::string escape_string(std::string input);
std::string escape_char(char character);
} // namespace util
} // namespace tree_sitter
#endif // COMPILER_UTIL_STRING_HELPERS_H_

View file

@ -11,11 +11,9 @@ struct TSDocument {
size_t error_count;
};
TSDocument * ts_document_make() {
TSDocument *ts_document_make() {
TSDocument *document = malloc(sizeof(TSDocument));
*document = (TSDocument) {
.input = (TSInput) {}
};
*document = (TSDocument) { .input = (TSInput) {} };
return document;
}
@ -33,12 +31,13 @@ void ts_document_set_parser(TSDocument *document, TSParser *parser) {
document->parser = parser;
}
const TSTree * ts_document_tree(const TSDocument *document) {
const TSTree *ts_document_tree(const TSDocument *document) {
return document->tree;
}
const char * ts_document_string(const TSDocument *document) {
return ts_tree_string(document->tree, ts_parser_config(document->parser).symbol_names);
const char *ts_document_string(const TSDocument *document) {
return ts_tree_string(document->tree,
ts_parser_config(document->parser).symbol_names);
}
void ts_document_set_input(TSDocument *document, TSInput input) {
@ -50,7 +49,8 @@ void ts_document_edit(TSDocument *document, TSInputEdit edit) {
document->tree = ts_parser_parse(document->parser, document->input, &edit);
}
const char * ts_document_symbol_name(const TSDocument *document, const TSTree *tree) {
const char *ts_document_symbol_name(const TSDocument *document,
const TSTree *tree) {
return ts_parser_config(document->parser).symbol_names[tree->symbol];
}
@ -60,7 +60,7 @@ typedef struct {
size_t length;
} TSStringInput;
const char * ts_string_input_read(void *d, size_t *bytes_read) {
const char *ts_string_input_read(void *d, size_t *bytes_read) {
TSStringInput *data = (TSStringInput *)d;
if (data->position >= data->length) {
*bytes_read = 0;
@ -83,24 +83,22 @@ TSInput ts_string_input_make(const char *string) {
data->string = string;
data->position = 0;
data->length = strlen(string);
TSInput input = {
.data = (void *)data,
.read_fn = ts_string_input_read,
.seek_fn = ts_string_input_seek,
.release_fn = free,
};
return input;
return (TSInput) { .data = (void *)data,
.read_fn = ts_string_input_read,
.seek_fn = ts_string_input_seek,
.release_fn = free };
}
void ts_document_set_input_string(TSDocument *document, const char *text) {
ts_document_set_input(document, ts_string_input_make(text));
}
TSNode * ts_document_root_node(const TSDocument *document) {
return ts_node_make_root(document->tree, document->parser->config.symbol_names);
TSNode *ts_document_root_node(const TSDocument *document) {
return ts_node_make_root(document->tree,
document->parser->config.symbol_names);
}
TSNode * ts_document_get_node(const TSDocument *document, size_t pos) {
TSNode *ts_document_get_node(const TSDocument *document, size_t pos) {
TSNode *root = ts_document_root_node(document);
TSNode *result = ts_node_leaf_at_pos(root, pos);
ts_node_release(root);

View file

@ -2,16 +2,14 @@
#include "runtime/tree.h"
TSLexer ts_lexer_make() {
return (TSLexer) {
.chunk = NULL,
.debug = 0,
.chunk_start = 0,
.chunk_size = 0,
.position_in_chunk = 0,
.token_start_position = 0,
.token_end_position = 0,
.reached_end = 0
};
return (TSLexer) { .chunk = NULL,
.debug = 0,
.chunk_start = 0,
.chunk_size = 0,
.position_in_chunk = 0,
.token_start_position = 0,
.token_end_position = 0,
.reached_end = 0 };
}
int ts_lexer_advance(TSLexer *lexer) {
@ -33,11 +31,10 @@ int ts_lexer_advance(TSLexer *lexer) {
return 1;
}
TSTree * ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
size_t current_position = ts_lexer_position(lexer);
size_t size = current_position - lexer->token_start_position;
size_t offset = lexer->token_start_position - lexer->token_end_position;
lexer->token_end_position = current_position;
return ts_tree_make_leaf(symbol, size, offset, is_hidden);
}

View file

@ -1,34 +1,33 @@
#include "runtime/node.h"
#include "runtime/tree.h"
TSNode * ts_node_make(const TSTree *tree, TSNode *parent, size_t index, size_t start_position, const char **names) {
if (parent) ts_node_retain(parent);
TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
size_t start_position, const char **names) {
if (parent)
ts_node_retain(parent);
TSNode *result = malloc(sizeof(TSNode));
*result = (TSNode) {
.ref_count = 1,
.parent = parent,
.index = index,
.content = tree,
.start_position = start_position,
.names = names,
};
*result = (TSNode) { .ref_count = 1,
.parent = parent,
.index = index,
.content = tree,
.start_position = start_position,
.names = names, };
return result;
}
TSNode * ts_node_make_root(const TSTree *tree, const char **names) {
TSNode *ts_node_make_root(const TSTree *tree, const char **names) {
while (ts_tree_is_wrapper(tree))
tree = tree->children[0];
return ts_node_make(tree, NULL, 0, 0, names);
}
void ts_node_retain(TSNode *node) {
node->ref_count++;
}
void ts_node_retain(TSNode *node) { node->ref_count++; }
void ts_node_release(TSNode *node) {
node->ref_count--;
if (node->ref_count == 0) {
if (node->parent) ts_node_release(node->parent);
if (node->parent)
ts_node_release(node->parent);
free(node);
}
}
@ -37,31 +36,27 @@ size_t ts_node_pos(const TSNode *node) {
return node->start_position + node->content->offset;
}
size_t ts_node_size(const TSNode *node) {
return node->content->size;
}
size_t ts_node_size(const TSNode *node) { return node->content->size; }
int ts_node_eq(const TSNode *left, const TSNode *right) {
return ts_tree_equals(left->content, right->content);
}
const char * ts_node_name(const TSNode *node) {
const char *ts_node_name(const TSNode *node) {
return node->names[node->content->symbol];
}
const char * ts_node_string(const TSNode *node) {
const char *ts_node_string(const TSNode *node) {
return ts_tree_string(node->content, node->names);
}
TSNode * ts_node_parent(TSNode *child) {
return child->parent;
}
TSNode *ts_node_parent(TSNode *child) { return child->parent; }
TSNode * ts_node_prev_sibling(TSNode *child) {
TSNode *ts_node_prev_sibling(TSNode *child) {
return ts_node_child(child->parent, child->index - 1);
}
TSNode * ts_node_next_sibling(TSNode *child) {
TSNode *ts_node_next_sibling(TSNode *child) {
return ts_node_child(child->parent, child->index + 1);
}
@ -71,25 +66,29 @@ size_t ts_node_child_count(const TSNode *parent) {
return result;
}
TSNode * ts_node_child(TSNode *parent, size_t index) {
TSNode *ts_node_child(TSNode *parent, size_t index) {
size_t child_count;
TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count);
TSChildWithPosition *children =
ts_tree_visible_children(parent->content, &child_count);
if (child_count <= index)
return NULL;
size_t position = parent->start_position + children[index].position;
return ts_node_make(children[index].tree, parent, index, position, parent->names);
return ts_node_make(children[index].tree, parent, index, position,
parent->names);
}
TSNode * ts_node_leaf_at_pos(TSNode *parent, size_t position) {
TSNode *ts_node_leaf_at_pos(TSNode *parent, size_t position) {
size_t child_count;
TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count);
TSChildWithPosition *children =
ts_tree_visible_children(parent->content, &child_count);
for (size_t i = 0; i < child_count; i++) {
TSChildWithPosition child = children[i];
size_t child_left = child.position + child.tree->offset;
if (child_left > position)
break;
if (child_left + child.tree->size > position) {
TSNode *node = ts_node_make(child.tree, parent, i, child.position, parent->names);
TSNode *node =
ts_node_make(child.tree, parent, i, child.position, parent->names);
TSNode *result = ts_node_leaf_at_pos(node, position);
ts_node_release(node);
return result;

Some files were not shown because too many files have changed in this diff Show more