Auto-format all source code with clang-format
This commit is contained in:
parent
b8d8386e63
commit
98cc2f2264
105 changed files with 4223 additions and 4052 deletions
|
|
@ -2,28 +2,30 @@
|
|||
#include "helpers.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar arithmetic({
|
||||
{ "expression", choice({
|
||||
sym("sum"),
|
||||
sym("difference"),
|
||||
sym("product"),
|
||||
sym("quotient"),
|
||||
sym("exponent"),
|
||||
sym("group"),
|
||||
sym("number"),
|
||||
sym("variable") }) },
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
{ "sum", infix_op("+", "expression", 1) },
|
||||
{ "difference", infix_op("-", "expression", 1) },
|
||||
{ "product", infix_op("*", "expression", 2) },
|
||||
{ "quotient", infix_op("/", "expression", 2) },
|
||||
{ "exponent", infix_op("^", "expression", 3) },
|
||||
{ "group", in_parens(err(sym("expression"))) },
|
||||
extern const Grammar arithmetic({
|
||||
{ "expression", choice({
|
||||
sym("sum"),
|
||||
sym("difference"),
|
||||
sym("product"),
|
||||
sym("quotient"),
|
||||
sym("exponent"),
|
||||
sym("group"),
|
||||
sym("number"),
|
||||
sym("variable") }) },
|
||||
|
||||
{ "number", pattern("\\d+") },
|
||||
{ "variable", pattern("\\a[\\w_]*") },
|
||||
});
|
||||
}
|
||||
{ "sum", infix_op("+", "expression", 1) },
|
||||
{ "difference", infix_op("-", "expression", 1) },
|
||||
{ "product", infix_op("*", "expression", 2) },
|
||||
{ "quotient", infix_op("/", "expression", 2) },
|
||||
{ "exponent", infix_op("^", "expression", 3) },
|
||||
{ "group", in_parens(err(sym("expression"))) },
|
||||
|
||||
{ "number", pattern("\\d+") },
|
||||
{ "variable", pattern("\\a[\\w_]*") },
|
||||
});
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -2,170 +2,172 @@
|
|||
#include "helpers.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
static rule_ptr terminated(rule_ptr rule) {
|
||||
return seq({ rule, choice({
|
||||
sym("_line_break"),
|
||||
str(";") }) });
|
||||
}
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar golang = Grammar({
|
||||
{ "program", seq({
|
||||
sym("package_directive"),
|
||||
repeat(sym("imports_block")),
|
||||
repeat(sym("declaration")) }) },
|
||||
{ "package_directive", seq({
|
||||
keyword("package"),
|
||||
sym("package_name") }) },
|
||||
{ "imports_block", seq({
|
||||
keyword("import"),
|
||||
choice({
|
||||
in_parens(err(repeat(sym("package_import")))),
|
||||
sym("package_import") }) }) },
|
||||
{ "package_import", sym("string") },
|
||||
{ "declaration", choice({
|
||||
sym("type_declaration"),
|
||||
sym("var_declaration"),
|
||||
sym("func_declaration") }) },
|
||||
static rule_ptr terminated(rule_ptr rule) {
|
||||
return seq({ rule, choice({
|
||||
sym("_line_break"),
|
||||
str(";") }) });
|
||||
}
|
||||
|
||||
// Declarations
|
||||
{ "type_declaration", terminated(seq({
|
||||
keyword("type"),
|
||||
sym("type_name"),
|
||||
sym("type_expression") })) },
|
||||
{ "var_declaration", terminated(seq({
|
||||
keyword("var"),
|
||||
sym("var_name"),
|
||||
choice({
|
||||
seq({
|
||||
optional(sym("type_expression")),
|
||||
str("="),
|
||||
sym("expression") }),
|
||||
sym("type_expression") }) })) },
|
||||
{ "func_declaration", terminated(seq({
|
||||
keyword("func"),
|
||||
sym("var_name"),
|
||||
sym("_func_signature"),
|
||||
sym("block_statement") })) },
|
||||
{ "block_statement", in_braces(err(repeat(sym("statement")))) },
|
||||
{ "type_expression", choice({
|
||||
sym("pointer_type"),
|
||||
sym("slice_type"),
|
||||
sym("map_type"),
|
||||
sym("interface_type"),
|
||||
sym("struct_type"),
|
||||
sym("type_name") }) },
|
||||
extern const Grammar golang = Grammar({
|
||||
{ "program", seq({
|
||||
sym("package_directive"),
|
||||
repeat(sym("imports_block")),
|
||||
repeat(sym("declaration")) }) },
|
||||
{ "package_directive", seq({
|
||||
keyword("package"),
|
||||
sym("package_name") }) },
|
||||
{ "imports_block", seq({
|
||||
keyword("import"),
|
||||
choice({
|
||||
in_parens(err(repeat(sym("package_import")))),
|
||||
sym("package_import") }) }) },
|
||||
{ "package_import", sym("string") },
|
||||
{ "declaration", choice({
|
||||
sym("type_declaration"),
|
||||
sym("var_declaration"),
|
||||
sym("func_declaration") }) },
|
||||
|
||||
// Type expressions
|
||||
{ "pointer_type", seq({
|
||||
keyword("*"),
|
||||
sym("type_expression") }) },
|
||||
{ "map_type", seq({
|
||||
keyword("map"),
|
||||
in_brackets(sym("type_expression")),
|
||||
sym("type_expression") }) },
|
||||
{ "slice_type", seq({
|
||||
in_brackets(blank()),
|
||||
sym("type_expression") }) },
|
||||
{ "struct_type", seq({
|
||||
keyword("struct"),
|
||||
in_braces(repeat(seq({
|
||||
sym("var_name"),
|
||||
sym("type_expression") }))) }) },
|
||||
{ "interface_type", seq({
|
||||
keyword("interface"),
|
||||
in_braces(repeat(seq({
|
||||
sym("var_name"),
|
||||
sym("_func_signature") }))) }) },
|
||||
// Declarations
|
||||
{ "type_declaration", terminated(seq({
|
||||
keyword("type"),
|
||||
sym("type_name"),
|
||||
sym("type_expression") })) },
|
||||
{ "var_declaration", terminated(seq({
|
||||
keyword("var"),
|
||||
sym("var_name"),
|
||||
choice({
|
||||
seq({
|
||||
optional(sym("type_expression")),
|
||||
str("="),
|
||||
sym("expression") }),
|
||||
sym("type_expression") }) })) },
|
||||
{ "func_declaration", terminated(seq({
|
||||
keyword("func"),
|
||||
sym("var_name"),
|
||||
sym("_func_signature"),
|
||||
sym("block_statement") })) },
|
||||
{ "block_statement", in_braces(err(repeat(sym("statement")))) },
|
||||
{ "type_expression", choice({
|
||||
sym("pointer_type"),
|
||||
sym("slice_type"),
|
||||
sym("map_type"),
|
||||
sym("interface_type"),
|
||||
sym("struct_type"),
|
||||
sym("type_name") }) },
|
||||
|
||||
// Statements
|
||||
{ "statement", choice({
|
||||
sym("expression_statement"),
|
||||
sym("return_statement"),
|
||||
sym("declaration_statement"),
|
||||
sym("range_statement"),
|
||||
sym("if_statement") }) },
|
||||
{ "return_statement", terminated(seq({
|
||||
keyword("return"),
|
||||
comma_sep(sym("expression")) })) },
|
||||
{ "declaration_statement", choice({
|
||||
sym("var_declaration"),
|
||||
terminated(seq({
|
||||
comma_sep(sym("var_name")),
|
||||
str(":="),
|
||||
sym("expression") })) }) },
|
||||
{ "range_statement", seq({
|
||||
keyword("for"),
|
||||
sym("var_name"),
|
||||
optional(seq({ str(","), sym("var_name") })),
|
||||
// Type expressions
|
||||
{ "pointer_type", seq({
|
||||
keyword("*"),
|
||||
sym("type_expression") }) },
|
||||
{ "map_type", seq({
|
||||
keyword("map"),
|
||||
in_brackets(sym("type_expression")),
|
||||
sym("type_expression") }) },
|
||||
{ "slice_type", seq({
|
||||
in_brackets(blank()),
|
||||
sym("type_expression") }) },
|
||||
{ "struct_type", seq({
|
||||
keyword("struct"),
|
||||
in_braces(repeat(seq({
|
||||
sym("var_name"),
|
||||
sym("type_expression") }))) }) },
|
||||
{ "interface_type", seq({
|
||||
keyword("interface"),
|
||||
in_braces(repeat(seq({
|
||||
sym("var_name"),
|
||||
sym("_func_signature") }))) }) },
|
||||
|
||||
// Statements
|
||||
{ "statement", choice({
|
||||
sym("expression_statement"),
|
||||
sym("return_statement"),
|
||||
sym("declaration_statement"),
|
||||
sym("range_statement"),
|
||||
sym("if_statement") }) },
|
||||
{ "return_statement", terminated(seq({
|
||||
keyword("return"),
|
||||
comma_sep(sym("expression")) })) },
|
||||
{ "declaration_statement", choice({
|
||||
sym("var_declaration"),
|
||||
terminated(seq({
|
||||
comma_sep(sym("var_name")),
|
||||
str(":="),
|
||||
keyword("range"),
|
||||
sym("expression"),
|
||||
sym("block_statement") }) },
|
||||
{ "if_statement", seq({
|
||||
keyword("if"),
|
||||
sym("expression"),
|
||||
sym("block_statement"),
|
||||
optional(seq({
|
||||
keyword("else"),
|
||||
choice({
|
||||
sym("if_statement"),
|
||||
sym("block_statement") }) })) }) },
|
||||
{ "expression_statement", terminated(sym("expression")) },
|
||||
|
||||
// Value expressions
|
||||
{ "expression", choice({
|
||||
sym("call_expression"),
|
||||
sym("selector_expression"),
|
||||
sym("math_op"),
|
||||
sym("bool_op"),
|
||||
sym("number"),
|
||||
sym("string"),
|
||||
sym("var_name") }) },
|
||||
{ "call_expression", seq({
|
||||
sym("expression"),
|
||||
in_parens(comma_sep(sym("expression"))) }) },
|
||||
{ "selector_expression", seq({
|
||||
sym("expression"),
|
||||
str("."),
|
||||
sym("var_name") }) },
|
||||
{ "math_op", choice({
|
||||
infix_op("*", "expression", 2),
|
||||
infix_op("/", "expression", 2),
|
||||
infix_op("+", "expression", 1),
|
||||
infix_op("-", "expression", 1) }) },
|
||||
{ "bool_op", choice({
|
||||
infix_op("||", "expression", 1),
|
||||
infix_op("&&", "expression", 2),
|
||||
infix_op("==", "expression", 3),
|
||||
infix_op("<=", "expression", 3),
|
||||
infix_op("<", "expression", 3),
|
||||
infix_op(">=", "expression", 3),
|
||||
infix_op(">", "expression", 3),
|
||||
prefix_op("!", "expression", 4) }) },
|
||||
{ "_func_signature", seq({
|
||||
in_parens(comma_sep(seq({
|
||||
comma_sep1(sym("var_name")),
|
||||
sym("type_expression") }))),
|
||||
sym("expression") })) }) },
|
||||
{ "range_statement", seq({
|
||||
keyword("for"),
|
||||
sym("var_name"),
|
||||
optional(seq({ str(","), sym("var_name") })),
|
||||
str(":="),
|
||||
keyword("range"),
|
||||
sym("expression"),
|
||||
sym("block_statement") }) },
|
||||
{ "if_statement", seq({
|
||||
keyword("if"),
|
||||
sym("expression"),
|
||||
sym("block_statement"),
|
||||
optional(seq({
|
||||
keyword("else"),
|
||||
choice({
|
||||
in_parens(choice({
|
||||
comma_sep1(seq({ sym("var_name"), sym("type_name") })),
|
||||
comma_sep1(sym("type_name")) })),
|
||||
sym("type_name"),
|
||||
blank() }) }) },
|
||||
sym("if_statement"),
|
||||
sym("block_statement") }) })) }) },
|
||||
{ "expression_statement", terminated(sym("expression")) },
|
||||
|
||||
{ "_line_break", str("\n") },
|
||||
// Value expressions
|
||||
{ "expression", choice({
|
||||
sym("call_expression"),
|
||||
sym("selector_expression"),
|
||||
sym("math_op"),
|
||||
sym("bool_op"),
|
||||
sym("number"),
|
||||
sym("string"),
|
||||
sym("var_name") }) },
|
||||
{ "call_expression", seq({
|
||||
sym("expression"),
|
||||
in_parens(comma_sep(sym("expression"))) }) },
|
||||
{ "selector_expression", seq({
|
||||
sym("expression"),
|
||||
str("."),
|
||||
sym("var_name") }) },
|
||||
{ "math_op", choice({
|
||||
infix_op("*", "expression", 2),
|
||||
infix_op("/", "expression", 2),
|
||||
infix_op("+", "expression", 1),
|
||||
infix_op("-", "expression", 1) }) },
|
||||
{ "bool_op", choice({
|
||||
infix_op("||", "expression", 1),
|
||||
infix_op("&&", "expression", 2),
|
||||
infix_op("==", "expression", 3),
|
||||
infix_op("<=", "expression", 3),
|
||||
infix_op("<", "expression", 3),
|
||||
infix_op(">=", "expression", 3),
|
||||
infix_op(">", "expression", 3),
|
||||
prefix_op("!", "expression", 4) }) },
|
||||
{ "_func_signature", seq({
|
||||
in_parens(comma_sep(seq({
|
||||
comma_sep1(sym("var_name")),
|
||||
sym("type_expression") }))),
|
||||
choice({
|
||||
in_parens(choice({
|
||||
comma_sep1(seq({ sym("var_name"), sym("type_name") })),
|
||||
comma_sep1(sym("type_name")) })),
|
||||
sym("type_name"),
|
||||
blank() }) }) },
|
||||
|
||||
{ "string", delimited("\"") },
|
||||
{ "package_name", sym("_identifier") },
|
||||
{ "var_name", sym("_identifier") },
|
||||
{ "type_name", sym("_identifier") },
|
||||
{ "_identifier", pattern("\\a[\\w_]*") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "comment", keypattern("//[^\n]*") },
|
||||
})
|
||||
{ "_line_break", str("\n") },
|
||||
|
||||
{ "string", delimited("\"") },
|
||||
{ "package_name", sym("_identifier") },
|
||||
{ "var_name", sym("_identifier") },
|
||||
{ "type_name", sym("_identifier") },
|
||||
{ "_identifier", pattern("\\a[\\w_]*") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "comment", keypattern("//[^\n]*") },
|
||||
})
|
||||
.ubiquitous_tokens({ "comment", "_line_break" })
|
||||
.separators({ ' ', '\t', '\r' });
|
||||
}
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -1,58 +1,59 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
rule_ptr comma_sep1(rule_ptr element) {
|
||||
return seq({ element, repeat(seq({ str(","), element })) });
|
||||
}
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
rule_ptr comma_sep(rule_ptr element) {
|
||||
return choice({ comma_sep1(element), blank() });
|
||||
}
|
||||
|
||||
rule_ptr optional(rule_ptr rule) {
|
||||
return choice({ rule, blank() });
|
||||
}
|
||||
|
||||
rule_ptr in_parens(rule_ptr rule) {
|
||||
return seq({ str("("), rule, str(")") });
|
||||
}
|
||||
|
||||
rule_ptr in_braces(rule_ptr rule) {
|
||||
return seq({ str("{"), rule, str("}") });
|
||||
}
|
||||
|
||||
rule_ptr in_brackets(rule_ptr rule) {
|
||||
return seq({ str("["), rule, str("]") });
|
||||
}
|
||||
|
||||
rule_ptr infix_op(std::string op, std::string rule_name, int precedence) {
|
||||
return prec(precedence, seq({
|
||||
sym(rule_name),
|
||||
keyword(op),
|
||||
sym(rule_name) }));
|
||||
}
|
||||
|
||||
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence) {
|
||||
return prec(precedence, seq({
|
||||
keyword(op),
|
||||
sym(rule_name) }));
|
||||
}
|
||||
|
||||
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence) {
|
||||
return prec(precedence, seq({
|
||||
sym(rule_name),
|
||||
keyword(op) }));
|
||||
}
|
||||
|
||||
rule_ptr delimited(std::string delimiter) {
|
||||
return token(seq({
|
||||
str(delimiter),
|
||||
repeat(choice({
|
||||
pattern("[^" + delimiter + "]"),
|
||||
seq({ str("\\"), str(delimiter) }) })),
|
||||
str(delimiter)
|
||||
}));
|
||||
}
|
||||
rule_ptr comma_sep1(rule_ptr element) {
|
||||
return seq({ element, repeat(seq({ str(","), element })) });
|
||||
}
|
||||
|
||||
rule_ptr comma_sep(rule_ptr element) {
|
||||
return choice({ comma_sep1(element), blank() });
|
||||
}
|
||||
|
||||
rule_ptr optional(rule_ptr rule) {
|
||||
return choice({ rule, blank() });
|
||||
}
|
||||
|
||||
rule_ptr in_parens(rule_ptr rule) {
|
||||
return seq({ str("("), rule, str(")") });
|
||||
}
|
||||
|
||||
rule_ptr in_braces(rule_ptr rule) {
|
||||
return seq({ str("{"), rule, str("}") });
|
||||
}
|
||||
|
||||
rule_ptr in_brackets(rule_ptr rule) {
|
||||
return seq({ str("["), rule, str("]") });
|
||||
}
|
||||
|
||||
rule_ptr infix_op(std::string op, std::string rule_name, int precedence) {
|
||||
return prec(precedence, seq({
|
||||
sym(rule_name),
|
||||
keyword(op),
|
||||
sym(rule_name) }));
|
||||
}
|
||||
|
||||
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence) {
|
||||
return prec(precedence, seq({
|
||||
keyword(op),
|
||||
sym(rule_name) }));
|
||||
}
|
||||
|
||||
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence) {
|
||||
return prec(precedence, seq({
|
||||
sym(rule_name),
|
||||
keyword(op) }));
|
||||
}
|
||||
|
||||
rule_ptr delimited(std::string delimiter) {
|
||||
return token(seq({
|
||||
str(delimiter),
|
||||
repeat(choice({
|
||||
pattern("[^" + delimiter + "]"),
|
||||
seq({ str("\\"), str(delimiter) }) })),
|
||||
str(delimiter) }));
|
||||
}
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -4,18 +4,20 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
rule_ptr comma_sep1(rule_ptr element);
|
||||
rule_ptr comma_sep(rule_ptr element);
|
||||
rule_ptr optional(rule_ptr rule);
|
||||
rule_ptr in_parens(rule_ptr rule);
|
||||
rule_ptr in_braces(rule_ptr rule);
|
||||
rule_ptr in_brackets(rule_ptr rule);
|
||||
rule_ptr infix_op(std::string op, std::string rule_name, int precedence);
|
||||
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence);
|
||||
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence);
|
||||
rule_ptr delimited(std::string delimiter);
|
||||
}
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
#endif // TREESITTER_EXAMPLES_HELPERS_
|
||||
rule_ptr comma_sep1(rule_ptr element);
|
||||
rule_ptr comma_sep(rule_ptr element);
|
||||
rule_ptr optional(rule_ptr rule);
|
||||
rule_ptr in_parens(rule_ptr rule);
|
||||
rule_ptr in_braces(rule_ptr rule);
|
||||
rule_ptr in_brackets(rule_ptr rule);
|
||||
rule_ptr infix_op(std::string op, std::string rule_name, int precedence);
|
||||
rule_ptr prefix_op(std::string op, std::string rule_name, int precedence);
|
||||
rule_ptr postfix_op(std::string op, std::string rule_name, int precedence);
|
||||
rule_ptr delimited(std::string delimiter);
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
||||
#endif // TREESITTER_EXAMPLES_HELPERS_
|
||||
|
|
|
|||
|
|
@ -2,217 +2,219 @@
|
|||
#include "helpers.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
static rule_ptr terminated(rule_ptr rule) {
|
||||
return seq({ rule, choice({
|
||||
sym("_line_break"),
|
||||
str(";") }) });
|
||||
}
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar javascript = Grammar({
|
||||
{ "program", repeat(sym("statement")) },
|
||||
static rule_ptr terminated(rule_ptr rule) {
|
||||
return seq({ rule, choice({
|
||||
sym("_line_break"),
|
||||
str(";") }) });
|
||||
}
|
||||
|
||||
// Statements
|
||||
{ "statement", choice({
|
||||
sym("statement_block"),
|
||||
sym("if_statement"),
|
||||
sym("try_statement"),
|
||||
sym("switch_statement"),
|
||||
sym("while_statement"),
|
||||
sym("for_statement"),
|
||||
sym("for_in_statement"),
|
||||
sym("break_statement"),
|
||||
sym("var_declaration"),
|
||||
sym("throw_statement"),
|
||||
sym("return_statement"),
|
||||
sym("delete_statement"),
|
||||
sym("expression_statement") }) },
|
||||
{ "statement_block", in_braces(err(repeat(sym("statement")))) },
|
||||
{ "for_statement", seq({
|
||||
keyword("for"),
|
||||
in_parens(err(seq({
|
||||
choice({
|
||||
sym("var_declaration"),
|
||||
sym("expression_statement") }),
|
||||
sym("expression_statement"),
|
||||
sym("expression") }))),
|
||||
sym("statement") }) },
|
||||
{ "for_in_statement", seq({
|
||||
keyword("for"),
|
||||
in_parens(err(seq({
|
||||
optional(keyword("var")),
|
||||
sym("identifier"),
|
||||
keyword("in"),
|
||||
sym("expression") }))),
|
||||
sym("statement") }) },
|
||||
{ "throw_statement", terminated(seq({
|
||||
keyword("throw"),
|
||||
sym("expression") })) },
|
||||
{ "if_statement", seq({
|
||||
keyword("if"),
|
||||
in_parens(err(sym("expression"))),
|
||||
sym("statement"),
|
||||
optional(prec(1, seq({
|
||||
keyword("else"),
|
||||
sym("statement") }))) }) },
|
||||
{ "while_statement", seq({
|
||||
keyword("while"),
|
||||
in_parens(err(sym("expression"))),
|
||||
sym("statement") }) },
|
||||
{ "try_statement", seq({
|
||||
keyword("try"),
|
||||
sym("statement"),
|
||||
optional(sym("catch_clause")),
|
||||
optional(sym("finally_clause")) }) },
|
||||
{ "catch_clause", seq({
|
||||
keyword("catch"),
|
||||
in_parens(err(sym("identifier"))),
|
||||
sym("statement") }) },
|
||||
{ "finally_clause", seq({
|
||||
keyword("finally"),
|
||||
sym("statement") }) },
|
||||
{ "switch_statement", seq({
|
||||
keyword("switch"),
|
||||
in_parens(err(sym("expression"))),
|
||||
in_braces(repeat(sym("switch_case"))) }) },
|
||||
{ "switch_case", seq({
|
||||
extern const Grammar javascript = Grammar({
|
||||
{ "program", repeat(sym("statement")) },
|
||||
|
||||
// Statements
|
||||
{ "statement", choice({
|
||||
sym("statement_block"),
|
||||
sym("if_statement"),
|
||||
sym("try_statement"),
|
||||
sym("switch_statement"),
|
||||
sym("while_statement"),
|
||||
sym("for_statement"),
|
||||
sym("for_in_statement"),
|
||||
sym("break_statement"),
|
||||
sym("var_declaration"),
|
||||
sym("throw_statement"),
|
||||
sym("return_statement"),
|
||||
sym("delete_statement"),
|
||||
sym("expression_statement") }) },
|
||||
{ "statement_block", in_braces(err(repeat(sym("statement")))) },
|
||||
{ "for_statement", seq({
|
||||
keyword("for"),
|
||||
in_parens(err(seq({
|
||||
choice({
|
||||
seq({
|
||||
keyword("case"),
|
||||
sym("expression") }),
|
||||
keyword("default") }),
|
||||
str(":"),
|
||||
repeat(sym("statement")) }) },
|
||||
{ "break_statement", terminated(keyword("break")) },
|
||||
{ "var_declaration", terminated(seq({
|
||||
keyword("var"),
|
||||
comma_sep(err(seq({
|
||||
sym("var_declaration"),
|
||||
sym("expression_statement") }),
|
||||
sym("expression_statement"),
|
||||
sym("expression") }))),
|
||||
sym("statement") }) },
|
||||
{ "for_in_statement", seq({
|
||||
keyword("for"),
|
||||
in_parens(err(seq({
|
||||
optional(keyword("var")),
|
||||
sym("identifier"),
|
||||
keyword("in"),
|
||||
sym("expression") }))),
|
||||
sym("statement") }) },
|
||||
{ "throw_statement", terminated(seq({
|
||||
keyword("throw"),
|
||||
sym("expression") })) },
|
||||
{ "if_statement", seq({
|
||||
keyword("if"),
|
||||
in_parens(err(sym("expression"))),
|
||||
sym("statement"),
|
||||
optional(prec(1, seq({
|
||||
keyword("else"),
|
||||
sym("statement") }))) }) },
|
||||
{ "while_statement", seq({
|
||||
keyword("while"),
|
||||
in_parens(err(sym("expression"))),
|
||||
sym("statement") }) },
|
||||
{ "try_statement", seq({
|
||||
keyword("try"),
|
||||
sym("statement"),
|
||||
optional(sym("catch_clause")),
|
||||
optional(sym("finally_clause")) }) },
|
||||
{ "catch_clause", seq({
|
||||
keyword("catch"),
|
||||
in_parens(err(sym("identifier"))),
|
||||
sym("statement") }) },
|
||||
{ "finally_clause", seq({
|
||||
keyword("finally"),
|
||||
sym("statement") }) },
|
||||
{ "switch_statement", seq({
|
||||
keyword("switch"),
|
||||
in_parens(err(sym("expression"))),
|
||||
in_braces(repeat(sym("switch_case"))) }) },
|
||||
{ "switch_case", seq({
|
||||
choice({
|
||||
seq({
|
||||
keyword("case"),
|
||||
sym("expression") }),
|
||||
keyword("default") }),
|
||||
str(":"),
|
||||
repeat(sym("statement")) }) },
|
||||
{ "break_statement", terminated(keyword("break")) },
|
||||
{ "var_declaration", terminated(seq({
|
||||
keyword("var"),
|
||||
comma_sep(err(seq({
|
||||
sym("identifier"),
|
||||
optional(seq({
|
||||
str("="),
|
||||
sym("expression") })) }))) })) },
|
||||
{ "expression_statement", terminated(err(sym("expression"))) },
|
||||
{ "return_statement", terminated(seq({
|
||||
keyword("return"),
|
||||
optional(sym("expression")) })) },
|
||||
{ "delete_statement", terminated(seq({
|
||||
keyword("delete"),
|
||||
sym("property_access") })) },
|
||||
{ "expression_statement", terminated(err(sym("expression"))) },
|
||||
{ "return_statement", terminated(seq({
|
||||
keyword("return"),
|
||||
optional(sym("expression")) })) },
|
||||
{ "delete_statement", terminated(seq({
|
||||
keyword("delete"),
|
||||
sym("property_access") })) },
|
||||
|
||||
// Expressions
|
||||
{ "expression", choice({
|
||||
sym("function_expression"),
|
||||
sym("function_call"),
|
||||
sym("constructor_call"),
|
||||
sym("property_access"),
|
||||
sym("assignment"),
|
||||
sym("ternary"),
|
||||
sym("math_op"),
|
||||
sym("bool_op"),
|
||||
sym("object"),
|
||||
sym("array"),
|
||||
sym("regex"),
|
||||
sym("string"),
|
||||
sym("number"),
|
||||
sym("true"),
|
||||
sym("false"),
|
||||
sym("null"),
|
||||
// Expressions
|
||||
{ "expression", choice({
|
||||
sym("function_expression"),
|
||||
sym("function_call"),
|
||||
sym("constructor_call"),
|
||||
sym("property_access"),
|
||||
sym("assignment"),
|
||||
sym("ternary"),
|
||||
sym("math_op"),
|
||||
sym("bool_op"),
|
||||
sym("object"),
|
||||
sym("array"),
|
||||
sym("regex"),
|
||||
sym("string"),
|
||||
sym("number"),
|
||||
sym("true"),
|
||||
sym("false"),
|
||||
sym("null"),
|
||||
sym("identifier"),
|
||||
sym("in_expression"),
|
||||
sym("instanceof_expression"),
|
||||
sym("typeof_expression"),
|
||||
in_parens(sym("expression")) }) },
|
||||
{ "in_expression", infix_op("in", "expression", 3) },
|
||||
{ "instanceof_expression", infix_op("instanceof", "expression", 3) },
|
||||
{ "typeof_expression", prefix_op("typeof", "expression", 3) },
|
||||
{ "math_op", choice({
|
||||
prefix_op("++", "expression", 3),
|
||||
prefix_op("--", "expression", 3),
|
||||
postfix_op("++", "expression", 3),
|
||||
postfix_op("--", "expression", 3),
|
||||
prefix_op("+", "expression", 3),
|
||||
prefix_op("-", "expression", 3),
|
||||
infix_op("*", "expression", 2),
|
||||
infix_op("/", "expression", 2),
|
||||
infix_op("&", "expression", 2),
|
||||
infix_op("|", "expression", 2),
|
||||
infix_op("^", "expression", 2),
|
||||
infix_op("+", "expression", 1),
|
||||
infix_op("-", "expression", 1) }) },
|
||||
{ "bool_op", choice({
|
||||
infix_op("||", "expression", 1),
|
||||
infix_op("&&", "expression", 2),
|
||||
infix_op("===", "expression", 3),
|
||||
infix_op("==", "expression", 3),
|
||||
infix_op("!==", "expression", 3),
|
||||
infix_op("!=", "expression", 3),
|
||||
infix_op("<=", "expression", 3),
|
||||
infix_op("<", "expression", 3),
|
||||
infix_op(">=", "expression", 3),
|
||||
infix_op(">", "expression", 3),
|
||||
prefix_op("!", "expression", 4) }) },
|
||||
{ "ternary", seq({
|
||||
sym("expression"),
|
||||
str("?"),
|
||||
sym("expression"),
|
||||
str(":"),
|
||||
sym("expression") }) },
|
||||
{ "assignment", prec(-1, seq({
|
||||
choice({
|
||||
sym("identifier"),
|
||||
sym("in_expression"),
|
||||
sym("instanceof_expression"),
|
||||
sym("typeof_expression"),
|
||||
in_parens(sym("expression")) }) },
|
||||
{ "in_expression", infix_op("in", "expression", 3) },
|
||||
{ "instanceof_expression", infix_op("instanceof", "expression", 3) },
|
||||
{ "typeof_expression", prefix_op("typeof", "expression", 3) },
|
||||
{ "math_op", choice({
|
||||
prefix_op("++", "expression", 3),
|
||||
prefix_op("--", "expression", 3),
|
||||
postfix_op("++", "expression", 3),
|
||||
postfix_op("--", "expression", 3),
|
||||
prefix_op("+", "expression", 3),
|
||||
prefix_op("-", "expression", 3),
|
||||
infix_op("*", "expression", 2),
|
||||
infix_op("/", "expression", 2),
|
||||
infix_op("&", "expression", 2),
|
||||
infix_op("|", "expression", 2),
|
||||
infix_op("^", "expression", 2),
|
||||
infix_op("+", "expression", 1),
|
||||
infix_op("-", "expression", 1) }) },
|
||||
{ "bool_op", choice({
|
||||
infix_op("||", "expression", 1),
|
||||
infix_op("&&", "expression", 2),
|
||||
infix_op("===", "expression", 3),
|
||||
infix_op("==", "expression", 3),
|
||||
infix_op("!==", "expression", 3),
|
||||
infix_op("!=", "expression", 3),
|
||||
infix_op("<=", "expression", 3),
|
||||
infix_op("<", "expression", 3),
|
||||
infix_op(">=", "expression", 3),
|
||||
infix_op(">", "expression", 3),
|
||||
prefix_op("!", "expression", 4) }) },
|
||||
{ "ternary", seq({
|
||||
sym("expression"),
|
||||
str("?"),
|
||||
sym("expression"),
|
||||
str(":"),
|
||||
sym("expression") }) },
|
||||
{ "assignment", prec(-1, seq({
|
||||
choice({
|
||||
sym("identifier"),
|
||||
sym("property_access") }),
|
||||
choice({
|
||||
str("="),
|
||||
str("+="),
|
||||
str("-="),
|
||||
str("*="),
|
||||
str("/=") }),
|
||||
sym("expression") })) },
|
||||
{ "function_expression", seq({
|
||||
keyword("function"),
|
||||
optional(sym("identifier")),
|
||||
sym("formal_parameters"),
|
||||
sym("statement_block") }) },
|
||||
{ "function_call", seq({
|
||||
sym("expression"),
|
||||
in_parens(comma_sep(err(sym("expression")))) }) },
|
||||
{ "constructor_call", seq({
|
||||
keyword("new"),
|
||||
sym("function_call") }) },
|
||||
{ "property_access", seq({
|
||||
sym("expression"),
|
||||
prec(10, choice({
|
||||
seq({
|
||||
str("."),
|
||||
sym("identifier") }),
|
||||
in_brackets(sym("expression")) })) }) },
|
||||
{ "formal_parameters", in_parens(comma_sep(sym("identifier"))) },
|
||||
|
||||
// Literals
|
||||
{ "comment", token(choice({
|
||||
sym("property_access") }),
|
||||
choice({
|
||||
str("="),
|
||||
str("+="),
|
||||
str("-="),
|
||||
str("*="),
|
||||
str("/=") }),
|
||||
sym("expression") })) },
|
||||
{ "function_expression", seq({
|
||||
keyword("function"),
|
||||
optional(sym("identifier")),
|
||||
sym("formal_parameters"),
|
||||
sym("statement_block") }) },
|
||||
{ "function_call", seq({
|
||||
sym("expression"),
|
||||
in_parens(comma_sep(err(sym("expression")))) }) },
|
||||
{ "constructor_call", seq({
|
||||
keyword("new"),
|
||||
sym("function_call") }) },
|
||||
{ "property_access", seq({
|
||||
sym("expression"),
|
||||
prec(10, choice({
|
||||
seq({
|
||||
str("/*"),
|
||||
repeat(pattern("[^*]|(*[^/])")),
|
||||
str("*/") }),
|
||||
pattern("//[^\n]*") })) },
|
||||
{ "object", in_braces(comma_sep(err(seq({
|
||||
choice({ sym("string"), sym("identifier") }),
|
||||
str(":"),
|
||||
sym("expression") })))) },
|
||||
{ "array", in_brackets(comma_sep(err(sym("expression")))) },
|
||||
{ "regex", token(seq({ delimited("/"), optional(str("g")) })) },
|
||||
{ "string", token(choice({
|
||||
delimited("\""),
|
||||
delimited("'") })) },
|
||||
{ "_line_break", str("\n") },
|
||||
{ "identifier", pattern("[\\a_$][\\w_$]*") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
})
|
||||
.ubiquitous_tokens({ "comment", "_line_break" })
|
||||
.separators({ ' ', '\t', '\r' });
|
||||
}
|
||||
str("."),
|
||||
sym("identifier") }),
|
||||
in_brackets(sym("expression")) })) }) },
|
||||
{ "formal_parameters", in_parens(comma_sep(sym("identifier"))) },
|
||||
|
||||
// Literals
|
||||
{ "comment", token(choice({
|
||||
seq({
|
||||
str("/*"),
|
||||
repeat(pattern("[^*]|(*[^/])")),
|
||||
str("*/") }),
|
||||
pattern("//[^\n]*") })) },
|
||||
{ "object", in_braces(comma_sep(err(seq({
|
||||
choice({ sym("string"), sym("identifier") }),
|
||||
str(":"),
|
||||
sym("expression") })))) },
|
||||
{ "array", in_brackets(comma_sep(err(sym("expression")))) },
|
||||
{ "regex", token(seq({ delimited("/"), optional(str("g")) })) },
|
||||
{ "string", token(choice({
|
||||
delimited("\""),
|
||||
delimited("'") })) },
|
||||
{ "_line_break", str("\n") },
|
||||
{ "identifier", pattern("[\\a_$][\\w_$]*") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
})
|
||||
.ubiquitous_tokens({ "comment", "_line_break" })
|
||||
.separators({ ' ', '\t', '\r' });
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -2,27 +2,29 @@
|
|||
#include "helpers.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar json({
|
||||
{ "value", choice({
|
||||
sym("object"),
|
||||
sym("array"),
|
||||
sym("string"),
|
||||
sym("number"),
|
||||
sym("true"),
|
||||
sym("false"),
|
||||
sym("null"), }) },
|
||||
{ "object", in_braces(comma_sep(err(seq({
|
||||
sym("string"),
|
||||
str(":"),
|
||||
sym("value") })))) },
|
||||
{ "array", in_brackets(comma_sep(err(sym("value")))) },
|
||||
{ "string", pattern("\"([^\"]|\\\\\")*\"") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
});
|
||||
}
|
||||
using tree_sitter::Grammar;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar json({
|
||||
{ "value", choice({
|
||||
sym("object"),
|
||||
sym("array"),
|
||||
sym("string"),
|
||||
sym("number"),
|
||||
sym("true"),
|
||||
sym("false"),
|
||||
sym("null"), }) },
|
||||
{ "object", in_braces(comma_sep(err(seq({
|
||||
sym("string"),
|
||||
str(":"),
|
||||
sym("value") })))) },
|
||||
{ "array", in_brackets(comma_sep(err(sym("value")))) },
|
||||
{ "string", pattern("\"([^\"]|\\\\\")*\"") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
});
|
||||
|
||||
} // namespace tree_sitter_examples
|
||||
|
|
|
|||
|
|
@ -319,7 +319,6 @@ LEX_FN() {
|
|||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(25);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
|
|
|
|||
|
|
@ -7,71 +7,71 @@
|
|||
#include <memory>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Rule;
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
namespace rules {
|
||||
class Rule;
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
|
||||
std::ostream &operator<<(std::ostream &stream, const rule_ptr &rule);
|
||||
|
||||
rule_ptr blank();
|
||||
rule_ptr choice(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr repeat(const rule_ptr &content);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr sym(const std::string &name);
|
||||
rule_ptr pattern(const std::string &value);
|
||||
rule_ptr str(const std::string &value);
|
||||
rule_ptr keyword(const std::string &value);
|
||||
rule_ptr keypattern(const std::string &value);
|
||||
rule_ptr err(const rule_ptr &rule);
|
||||
rule_ptr prec(int precedence, rule_ptr rule);
|
||||
rule_ptr token(rule_ptr rule);
|
||||
}
|
||||
rule_ptr blank();
|
||||
rule_ptr choice(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr repeat(const rule_ptr &content);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr sym(const std::string &name);
|
||||
rule_ptr pattern(const std::string &value);
|
||||
rule_ptr str(const std::string &value);
|
||||
rule_ptr keyword(const std::string &value);
|
||||
rule_ptr keypattern(const std::string &value);
|
||||
rule_ptr err(const rule_ptr &rule);
|
||||
rule_ptr prec(int precedence, rule_ptr rule);
|
||||
rule_ptr token(rule_ptr rule);
|
||||
}
|
||||
|
||||
class Grammar {
|
||||
protected:
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
|
||||
std::set<std::string> ubiquitous_tokens_;
|
||||
std::set<char> separators_;
|
||||
class Grammar {
|
||||
protected:
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > rules_;
|
||||
std::set<std::string> ubiquitous_tokens_;
|
||||
std::set<char> separators_;
|
||||
|
||||
public:
|
||||
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
|
||||
bool operator==(const Grammar &other) const;
|
||||
std::string start_rule_name() const;
|
||||
const rules::rule_ptr rule(const std::string &name) const;
|
||||
public:
|
||||
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr> > &rules);
|
||||
bool operator==(const Grammar &other) const;
|
||||
std::string start_rule_name() const;
|
||||
const rules::rule_ptr rule(const std::string &name) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
|
||||
const std::set<std::string> & ubiquitous_tokens() const;
|
||||
Grammar & ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
|
||||
const std::set<char> & separators() const;
|
||||
Grammar & separators(const std::set<char> &separators);
|
||||
};
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules() const;
|
||||
const std::set<std::string> &ubiquitous_tokens() const;
|
||||
Grammar &ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
|
||||
const std::set<char> &separators() const;
|
||||
Grammar &separators(const std::set<char> &separators);
|
||||
};
|
||||
|
||||
struct Conflict {
|
||||
Conflict(std::string description);
|
||||
std::string description;
|
||||
bool operator==(const Conflict &other) const;
|
||||
bool operator<(const Conflict &other) const;
|
||||
};
|
||||
struct Conflict {
|
||||
Conflict(std::string description);
|
||||
std::string description;
|
||||
bool operator==(const Conflict &other) const;
|
||||
bool operator<(const Conflict &other) const;
|
||||
};
|
||||
|
||||
enum GrammarErrorType {
|
||||
GrammarErrorTypeRegex,
|
||||
GrammarErrorTypeUndefinedSymbol
|
||||
};
|
||||
enum GrammarErrorType {
|
||||
GrammarErrorTypeRegex,
|
||||
GrammarErrorTypeUndefinedSymbol
|
||||
};
|
||||
|
||||
class GrammarError {
|
||||
public:
|
||||
GrammarError(GrammarErrorType type, std::string message);
|
||||
bool operator==(const GrammarError &other) const;
|
||||
GrammarErrorType type;
|
||||
std::string message;
|
||||
};
|
||||
class GrammarError {
|
||||
public:
|
||||
GrammarError(GrammarErrorType type, std::string message);
|
||||
bool operator==(const GrammarError &other) const;
|
||||
GrammarErrorType type;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
std::ostream& operator<<(std::ostream &stream, const Conflict &conflict);
|
||||
std::ostream& operator<<(std::ostream &stream, const GrammarError *error);
|
||||
std::ostream &operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
std::ostream &operator<<(std::ostream &stream, const Conflict &conflict);
|
||||
std::ostream &operator<<(std::ostream &stream, const GrammarError *error);
|
||||
|
||||
std::tuple<std::string, std::vector<Conflict>, const GrammarError *>
|
||||
compile(const Grammar &grammar, std::string name);
|
||||
std::tuple<std::string, std::vector<Conflict>, const GrammarError *> compile(
|
||||
const Grammar &grammar, std::string name);
|
||||
}
|
||||
|
||||
#endif // TREE_SITTER_COMPILER_H_
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ typedef struct {
|
|||
|
||||
TSLexer ts_lexer_make();
|
||||
int ts_lexer_advance(TSLexer *lexer);
|
||||
TSTree * ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden);
|
||||
TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden);
|
||||
|
||||
static inline size_t ts_lexer_position(const TSLexer *lexer) {
|
||||
return lexer->chunk_start + lexer->position_in_chunk;
|
||||
|
|
@ -53,11 +53,13 @@ typedef struct {
|
|||
|
||||
TSStack ts_stack_make();
|
||||
void ts_stack_delete(TSStack *);
|
||||
TSTree * ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t immediate_child_count, const int *hidden_symbol_flags, int gather_extras);
|
||||
TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol,
|
||||
size_t immediate_child_count,
|
||||
const int *hidden_symbol_flags, int gather_extras);
|
||||
void ts_stack_shrink(TSStack *stack, size_t new_size);
|
||||
void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node);
|
||||
TSStateId ts_stack_top_state(const TSStack *stack);
|
||||
TSTree * ts_stack_top_node(const TSStack *stack);
|
||||
TSTree *ts_stack_top_node(const TSStack *stack);
|
||||
size_t ts_stack_right_position(const TSStack *stack);
|
||||
|
||||
typedef enum {
|
||||
|
|
@ -86,7 +88,7 @@ typedef struct {
|
|||
const int *hidden_symbol_flags;
|
||||
const TSParseAction *parse_table;
|
||||
const TSStateId *lex_states;
|
||||
TSTree * (* lex_fn)(TSParser *, TSStateId);
|
||||
TSTree *(*lex_fn)(TSParser *, TSStateId);
|
||||
} TSParserConfig;
|
||||
|
||||
struct TSParser {
|
||||
|
|
@ -98,91 +100,99 @@ struct TSParser {
|
|||
TSParserConfig config;
|
||||
};
|
||||
|
||||
TSParser * ts_parser_make(TSParserConfig);
|
||||
TSParser *ts_parser_make(TSParserConfig);
|
||||
void ts_parser_free(TSParser *);
|
||||
TSParserConfig ts_parser_config(TSParser *);
|
||||
const TSTree * ts_parser_parse(TSParser *parser, TSInput input, TSInputEdit *edit);
|
||||
const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
|
||||
TSInputEdit *edit);
|
||||
void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit);
|
||||
TSTree * ts_parser_step(TSParser *parser);
|
||||
TSTree *ts_parser_step(TSParser *parser);
|
||||
|
||||
#define SYMBOL_NAMES \
|
||||
static const char *ts_symbol_names[]
|
||||
#define SYMBOL_NAMES static const char *ts_symbol_names[]
|
||||
|
||||
#define HIDDEN_SYMBOLS \
|
||||
static const int ts_hidden_symbol_flags[SYMBOL_COUNT]
|
||||
#define HIDDEN_SYMBOLS static const int ts_hidden_symbol_flags[SYMBOL_COUNT]
|
||||
|
||||
#define LEX_STATES \
|
||||
static TSStateId ts_lex_states[STATE_COUNT]
|
||||
#define LEX_STATES static TSStateId ts_lex_states[STATE_COUNT]
|
||||
|
||||
#define PARSE_TABLE \
|
||||
static const TSParseAction ts_parse_actions[STATE_COUNT][SYMBOL_COUNT]
|
||||
|
||||
#define LEX_FN() \
|
||||
static TSTree * ts_lex(TSParser *parser, TSStateId lex_state)
|
||||
#define LEX_FN() static TSTree *ts_lex(TSParser *parser, TSStateId lex_state)
|
||||
|
||||
#define DEBUG_LEX(...) \
|
||||
if (parser->lexer.debug) { fprintf(stderr, "\n" __VA_ARGS__); }
|
||||
#define DEBUG_LEX(...) \
|
||||
if (parser->lexer.debug) { \
|
||||
fprintf(stderr, "\n" __VA_ARGS__); \
|
||||
}
|
||||
|
||||
#define START_LEXER() \
|
||||
DEBUG_LEX("LEX %d", lex_state); \
|
||||
char lookahead; \
|
||||
next_state: \
|
||||
#define START_LEXER() \
|
||||
DEBUG_LEX("LEX %d", lex_state); \
|
||||
char lookahead; \
|
||||
next_state: \
|
||||
lookahead = ts_lexer_lookahead_char(&parser->lexer); \
|
||||
DEBUG_LEX("CHAR '%c'", lookahead);
|
||||
|
||||
#define START_TOKEN() \
|
||||
ts_lexer_start_token(&parser->lexer);
|
||||
#define START_TOKEN() ts_lexer_start_token(&parser->lexer);
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ \
|
||||
DEBUG_LEX("ADVANCE %d", state_index); \
|
||||
if (!ts_lexer_advance(&parser->lexer)) ACCEPT_TOKEN(ts_builtin_sym_end); \
|
||||
lex_state = state_index; goto next_state; \
|
||||
#define ADVANCE(state_index) \
|
||||
{ \
|
||||
DEBUG_LEX("ADVANCE %d", state_index); \
|
||||
if (!ts_lexer_advance(&parser->lexer)) \
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end); \
|
||||
lex_state = state_index; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol) \
|
||||
{ \
|
||||
DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \
|
||||
return ts_lexer_build_node(&parser->lexer, symbol, ts_hidden_symbol_flags[symbol]); \
|
||||
#define ACCEPT_TOKEN(symbol) \
|
||||
{ \
|
||||
DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); \
|
||||
return ts_lexer_build_node(&parser->lexer, symbol, \
|
||||
ts_hidden_symbol_flags[symbol]); \
|
||||
}
|
||||
|
||||
#define LEX_ERROR() \
|
||||
{ \
|
||||
DEBUG_LEX("ERROR"); \
|
||||
#define LEX_ERROR() \
|
||||
{ \
|
||||
DEBUG_LEX("ERROR"); \
|
||||
return ts_lexer_build_node(&parser->lexer, ts_builtin_sym_error, 0); \
|
||||
}
|
||||
|
||||
#define LEX_PANIC() \
|
||||
{ \
|
||||
#define LEX_PANIC() \
|
||||
{ \
|
||||
DEBUG_LEX("LEX ERROR: unexpected state %d", lex_state); \
|
||||
return NULL; \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define SHIFT(to_state_value) \
|
||||
{ .type = TSParseActionTypeShift, .data = { .to_state = to_state_value } }
|
||||
#define SHIFT(to_state_value) \
|
||||
{ \
|
||||
.type = TSParseActionTypeShift, .data = { .to_state = to_state_value } \
|
||||
}
|
||||
|
||||
#define SHIFT_EXTRA() \
|
||||
{ .type = TSParseActionTypeShiftExtra }
|
||||
|
||||
#define REDUCE_EXTRA(symbol_val) \
|
||||
{ .type = TSParseActionTypeReduceExtra, .data = { .symbol = symbol_val } }
|
||||
#define REDUCE_EXTRA(symbol_val) \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduceExtra, .data = { .symbol = symbol_val } \
|
||||
}
|
||||
|
||||
#define REDUCE(symbol_val, child_count_val) \
|
||||
{ .type = TSParseActionTypeReduce, .data = { .symbol = symbol_val, .child_count = child_count_val } }
|
||||
#define REDUCE(symbol_val, child_count_val) \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.data = { .symbol = symbol_val, .child_count = child_count_val } \
|
||||
}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{ .type = TSParseActionTypeAccept }
|
||||
|
||||
#define EXPORT_PARSER(constructor_name) \
|
||||
TSParser * constructor_name() { \
|
||||
return ts_parser_make((TSParserConfig) { \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.hidden_symbol_flags = ts_hidden_symbol_flags, \
|
||||
#define EXPORT_PARSER(constructor_name) \
|
||||
TSParser *constructor_name() { \
|
||||
return ts_parser_make((TSParserConfig) { \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.hidden_symbol_flags = ts_hidden_symbol_flags, \
|
||||
.parse_table = (const TSParseAction *)ts_parse_actions, \
|
||||
.lex_states = ts_lex_states, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
}); \
|
||||
.lex_states = ts_lex_states, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
}); \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ extern "C" {
|
|||
|
||||
typedef struct {
|
||||
void *data;
|
||||
const char * (* read_fn)(void *data, size_t *bytes_read);
|
||||
int (* seek_fn)(void *data, size_t position);
|
||||
void (* release_fn)(void *data);
|
||||
const char *(*read_fn)(void *data, size_t *bytes_read);
|
||||
int (*seek_fn)(void *data, size_t position);
|
||||
void (*release_fn)(void *data);
|
||||
} TSInput;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -29,26 +29,26 @@ typedef struct TSDocument TSDocument;
|
|||
size_t ts_node_pos(const TSNode *);
|
||||
size_t ts_node_size(const TSNode *);
|
||||
TSSymbol ts_node_sym(const TSNode *);
|
||||
TSNode * ts_node_child(TSNode *, size_t);
|
||||
TSNode *ts_node_child(TSNode *, size_t);
|
||||
size_t ts_node_child_count(const TSNode *);
|
||||
TSNode * ts_node_leaf_at_pos(TSNode *, size_t);
|
||||
TSNode * ts_node_parent(TSNode *node);
|
||||
TSNode * ts_node_next_sibling(TSNode *node);
|
||||
TSNode * ts_node_prev_sibling(TSNode *node);
|
||||
const char * ts_node_name(const TSNode *);
|
||||
const char * ts_node_string(const TSNode *);
|
||||
TSNode *ts_node_leaf_at_pos(TSNode *, size_t);
|
||||
TSNode *ts_node_parent(TSNode *node);
|
||||
TSNode *ts_node_next_sibling(TSNode *node);
|
||||
TSNode *ts_node_prev_sibling(TSNode *node);
|
||||
const char *ts_node_name(const TSNode *);
|
||||
const char *ts_node_string(const TSNode *);
|
||||
void ts_node_retain(TSNode *node);
|
||||
void ts_node_release(TSNode *node);
|
||||
int ts_node_eq(const TSNode *, const TSNode *);
|
||||
|
||||
TSDocument * ts_document_make();
|
||||
TSDocument *ts_document_make();
|
||||
void ts_document_free(TSDocument *doc);
|
||||
void ts_document_set_parser(TSDocument *doc, TSParser *parser);
|
||||
void ts_document_set_input(TSDocument *doc, TSInput input);
|
||||
void ts_document_set_input_string(TSDocument *doc, const char *text);
|
||||
void ts_document_edit(TSDocument *doc, TSInputEdit edit);
|
||||
const char * ts_document_string(const TSDocument *doc);
|
||||
TSNode * ts_document_root_node(const TSDocument *document);
|
||||
const char *ts_document_string(const TSDocument *doc);
|
||||
TSNode *ts_document_root_node(const TSDocument *document);
|
||||
|
||||
#define ts_builtin_sym_error 0
|
||||
#define ts_builtin_sym_end 1
|
||||
|
|
|
|||
|
|
@ -11,6 +11,6 @@ fi
|
|||
$CPPLINT \
|
||||
--root=src \
|
||||
--linelength=110 \
|
||||
--filter=-legal/copyright,-readability/namespace,-whitespace/indent,-whitespace/line_length,-readability/todo \
|
||||
--filter=-legal/copyright,-whitespace/indent,-whitespace/line_length,-readability/todo \
|
||||
$(find src/compiler -type f) \
|
||||
2>&1
|
||||
|
|
|
|||
|
|
@ -15,128 +15,133 @@
|
|||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::unordered_map;
|
||||
using std::set;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
class LexTableBuilder {
|
||||
const LexicalGrammar lex_grammar;
|
||||
ParseTable *parse_table;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::unordered_map;
|
||||
using std::set;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
|
||||
LexItemSet result;
|
||||
for (const auto &symbol : symbols) {
|
||||
if (symbol == rules::ERROR())
|
||||
continue;
|
||||
else if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
|
||||
else if (symbol.is_token())
|
||||
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
class LexTableBuilder {
|
||||
const LexicalGrammar lex_grammar;
|
||||
ParseTable *parse_table;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_token_start(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
|
||||
add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID);
|
||||
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
|
||||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set);
|
||||
for (const auto &transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
auto action = LexAction::Advance(new_state_id, precedence_values_for_item_set(new_item_set));
|
||||
if (conflict_manager.resolve_lex_action(lex_table.state(state_id).default_action, action))
|
||||
lex_table.state(state_id).actions[rule] = action;
|
||||
}
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const LexItem &item : item_set) {
|
||||
if (item.is_done()) {
|
||||
auto current_action = lex_table.state(state_id).default_action;
|
||||
auto new_action = LexAction::Accept(item.lhs, item.precedence());
|
||||
if (conflict_manager.resolve_lex_action(current_action, new_action))
|
||||
lex_table.state(state_id).default_action = new_action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
CharacterSet separator_set() const {
|
||||
set<rules::CharacterRange> ranges;
|
||||
for (char c : lex_grammar.separators)
|
||||
ranges.insert(c);
|
||||
return CharacterSet(ranges);
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build({
|
||||
make_shared<rules::Metadata>(
|
||||
make_shared<rules::Repeat>(separator_set().copy()),
|
||||
map<rules::MetadataKey, int>({
|
||||
{rules::START_TOKEN, 1},
|
||||
{rules::PRECEDENCE, -1},
|
||||
})),
|
||||
rule,
|
||||
});
|
||||
}
|
||||
|
||||
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
|
||||
lex_grammar(lex_grammar),
|
||||
parse_table(parse_table),
|
||||
conflict_manager(LexConflictManager(lex_grammar)) {}
|
||||
|
||||
LexTable build() {
|
||||
for (auto &parse_state : parse_table->states) {
|
||||
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
|
||||
parse_state.lex_state_id = add_lex_state(item_set);
|
||||
}
|
||||
add_error_lex_state();
|
||||
return lex_table;
|
||||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
|
||||
return LexTableBuilder(parse_table, lex_grammar).build();
|
||||
}
|
||||
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
|
||||
LexItemSet result;
|
||||
for (const auto &symbol : symbols) {
|
||||
if (symbol == rules::ERROR())
|
||||
continue;
|
||||
else if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(
|
||||
LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
|
||||
else if (symbol.is_token())
|
||||
result.insert(
|
||||
LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_token_start(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
|
||||
add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID);
|
||||
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
|
||||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set);
|
||||
for (const auto &transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
auto action = LexAction::Advance(
|
||||
new_state_id, precedence_values_for_item_set(new_item_set));
|
||||
if (conflict_manager.resolve_lex_action(
|
||||
lex_table.state(state_id).default_action, action))
|
||||
lex_table.state(state_id).actions[rule] = action;
|
||||
}
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set,
|
||||
LexStateId state_id) {
|
||||
for (const LexItem &item : item_set) {
|
||||
if (item.is_done()) {
|
||||
auto current_action = lex_table.state(state_id).default_action;
|
||||
auto new_action = LexAction::Accept(item.lhs, item.precedence());
|
||||
if (conflict_manager.resolve_lex_action(current_action, new_action))
|
||||
lex_table.state(state_id).default_action = new_action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
CharacterSet separator_set() const {
|
||||
set<rules::CharacterRange> ranges;
|
||||
for (char c : lex_grammar.separators)
|
||||
ranges.insert(c);
|
||||
return CharacterSet(ranges);
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build(
|
||||
{ make_shared<rules::Metadata>(
|
||||
make_shared<rules::Repeat>(separator_set().copy()),
|
||||
map<rules::MetadataKey, int>(
|
||||
{ { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })),
|
||||
rule, });
|
||||
}
|
||||
|
||||
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
|
||||
: lex_grammar(lex_grammar),
|
||||
parse_table(parse_table),
|
||||
conflict_manager(LexConflictManager(lex_grammar)) {}
|
||||
|
||||
LexTable build() {
|
||||
for (auto &parse_state : parse_table->states) {
|
||||
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
|
||||
parse_state.lex_state_id = add_lex_state(item_set);
|
||||
}
|
||||
add_error_lex_state();
|
||||
return lex_table;
|
||||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *parse_table,
|
||||
const LexicalGrammar &lex_grammar) {
|
||||
return LexTableBuilder(parse_table, lex_grammar).build();
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,12 +5,16 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class LexicalGrammar;
|
||||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
class LexicalGrammar;
|
||||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
LexTable build_lex_table(ParseTable *parse_table,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
|
|
|||
|
|
@ -14,142 +14,156 @@
|
|||
#include "compiler/build_tables/first_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
class ParseTableBuilder {
|
||||
const SyntaxGrammar grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
||||
ParseTable parse_table;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
ParseStateId state_id = parse_table.add_state();
|
||||
parse_state_ids[item_set] = state_id;
|
||||
item_sets_to_process.push_back({ item_set, state_id });
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
class ParseTableBuilder {
|
||||
const SyntaxGrammar grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
vector<pair<ParseItemSet, ParseStateId> > item_sets_to_process;
|
||||
ParseTable parse_table;
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs == rules::START()) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
||||
for (auto &lookahead_sym : lookahead_symbols)
|
||||
if (should_add_action(state_id, lookahead_sym, action))
|
||||
parse_table.add_action(state_id, lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &next_item_set = transition.second;
|
||||
|
||||
ParseAction new_action = ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
|
||||
if (should_add_action(state_id, symbol, new_action)) {
|
||||
ParseStateId new_state_id = add_parse_state(next_item_set);
|
||||
new_action.state_index = new_state_id;
|
||||
parse_table.add_action(state_id, symbol, new_action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_shift_extra_actions(ParseStateId state_id) {
|
||||
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
|
||||
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
|
||||
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
|
||||
if (pair_for_symbol == actions.end()) {
|
||||
parse_table.add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_extra_actions(ParseStateId state_id) {
|
||||
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
|
||||
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
|
||||
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
|
||||
|
||||
if (pair_for_symbol != actions.end() && pair_for_symbol->second.type == ParseActionTypeShift) {
|
||||
size_t shift_state_id = pair_for_symbol->second.state_index;
|
||||
for (const auto &pair : actions) {
|
||||
const Symbol &lookahead_sym = pair.first;
|
||||
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
|
||||
if (should_add_action(shift_state_id, lookahead_sym, reduce_extra))
|
||||
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool should_add_action(ParseStateId state_id, const Symbol &symbol, const ParseAction &action) {
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
return (
|
||||
current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(symbol, current_action->second, action));
|
||||
}
|
||||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
if (item.consumed_symbol_count > 0)
|
||||
result.insert(item.precedence());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
|
||||
|
||||
pair<ParseTable, vector<Conflict>> build() {
|
||||
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0);
|
||||
add_parse_state(item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
|
||||
|
||||
parse_table.symbols.insert(rules::ERROR());
|
||||
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
ParseItemSet &item_set = pair.first;
|
||||
ParseStateId &state_id = pair.second;
|
||||
item_sets_to_process.pop_back();
|
||||
|
||||
add_reduce_actions(item_set, state_id);
|
||||
add_shift_actions(item_set, state_id);
|
||||
add_shift_extra_actions(state_id);
|
||||
}
|
||||
|
||||
for (ParseStateId state_id = 0; state_id < parse_table.states.size(); state_id++)
|
||||
add_reduce_extra_actions(state_id);
|
||||
|
||||
return { parse_table, conflict_manager.conflicts() };
|
||||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, vector<Conflict>>
|
||||
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
return ParseTableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
ParseStateId state_id = parse_table.add_state();
|
||||
parse_state_ids[item_set] = state_id;
|
||||
item_sets_to_process.push_back({ item_set, state_id });
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
|
||||
if (item.is_done()) {
|
||||
ParseAction action =
|
||||
(item.lhs == rules::START())
|
||||
? ParseAction::Accept()
|
||||
: ParseAction::Reduce(item.lhs, item.consumed_symbol_count,
|
||||
item.precedence());
|
||||
for (auto &lookahead_sym : lookahead_symbols)
|
||||
if (should_add_action(state_id, lookahead_sym, action))
|
||||
parse_table.add_action(state_id, lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &next_item_set = transition.second;
|
||||
|
||||
ParseAction new_action =
|
||||
ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
|
||||
if (should_add_action(state_id, symbol, new_action)) {
|
||||
ParseStateId new_state_id = add_parse_state(next_item_set);
|
||||
new_action.state_index = new_state_id;
|
||||
parse_table.add_action(state_id, symbol, new_action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_shift_extra_actions(ParseStateId state_id) {
|
||||
const map<Symbol, ParseAction> &actions =
|
||||
parse_table.states[state_id].actions;
|
||||
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
|
||||
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
|
||||
if (pair_for_symbol == actions.end()) {
|
||||
parse_table.add_action(state_id, ubiquitous_symbol,
|
||||
ParseAction::ShiftExtra());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_extra_actions(ParseStateId state_id) {
|
||||
const map<Symbol, ParseAction> &actions =
|
||||
parse_table.states[state_id].actions;
|
||||
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
|
||||
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
|
||||
|
||||
if (pair_for_symbol != actions.end() &&
|
||||
pair_for_symbol->second.type == ParseActionTypeShift) {
|
||||
size_t shift_state_id = pair_for_symbol->second.state_index;
|
||||
for (const auto &pair : actions) {
|
||||
const Symbol &lookahead_sym = pair.first;
|
||||
ParseAction reduce_extra =
|
||||
ParseAction::ReduceExtra(ubiquitous_symbol);
|
||||
if (should_add_action(shift_state_id, lookahead_sym, reduce_extra))
|
||||
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool should_add_action(ParseStateId state_id, const Symbol &symbol,
|
||||
const ParseAction &action) {
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
return (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(
|
||||
symbol, current_action->second, action));
|
||||
}
|
||||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
if (item.consumed_symbol_count > 0)
|
||||
result.insert(item.precedence());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
ParseTableBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar),
|
||||
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
|
||||
|
||||
pair<ParseTable, vector<Conflict> > build() {
|
||||
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0);
|
||||
add_parse_state(
|
||||
item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
|
||||
|
||||
parse_table.symbols.insert(rules::ERROR());
|
||||
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
ParseItemSet &item_set = pair.first;
|
||||
ParseStateId &state_id = pair.second;
|
||||
item_sets_to_process.pop_back();
|
||||
|
||||
add_reduce_actions(item_set, state_id);
|
||||
add_shift_actions(item_set, state_id);
|
||||
add_shift_extra_actions(state_id);
|
||||
}
|
||||
|
||||
for (ParseStateId state_id = 0; state_id < parse_table.states.size();
|
||||
state_id++)
|
||||
add_reduce_extra_actions(state_id);
|
||||
|
||||
return { parse_table, conflict_manager.conflicts() };
|
||||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, vector<Conflict> > build_parse_table(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
return ParseTableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -7,13 +7,13 @@
|
|||
#include "compiler/parse_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, std::vector<Conflict>>
|
||||
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, std::vector<Conflict> > build_parse_table(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_
|
||||
|
|
|
|||
|
|
@ -4,19 +4,20 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
using std::vector;
|
||||
using std::make_tuple;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
tuple<ParseTable, LexTable, vector<Conflict>>
|
||||
build_tables(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
vector<Conflict> conflicts = parse_table_result.second;
|
||||
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
return make_tuple(parse_table, lex_table, conflicts);
|
||||
}
|
||||
}
|
||||
using std::tuple;
|
||||
using std::vector;
|
||||
using std::make_tuple;
|
||||
|
||||
tuple<ParseTable, LexTable, vector<Conflict> > build_tables(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
vector<Conflict> conflicts = parse_table_result.second;
|
||||
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
return make_tuple(parse_table, lex_table, conflicts);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::tuple<ParseTable, LexTable, std::vector<Conflict>>
|
||||
build_tables(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
namespace build_tables {
|
||||
std::tuple<ParseTable, LexTable, std::vector<Conflict> > build_tables(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_
|
||||
|
|
|
|||
|
|
@ -9,53 +9,55 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
class FirstSet : public rules::RuleFn<set<Symbol>> {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
|
||||
public:
|
||||
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
class FirstSet : public rules::RuleFn<set<Symbol> > {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
auto insertion_result = visited_symbols.insert(*rule);
|
||||
if (insertion_result.second) {
|
||||
return (rule->is_token()) ?
|
||||
set<Symbol>({ *rule }) :
|
||||
apply(grammar->rule(*rule));
|
||||
} else {
|
||||
return set<Symbol>();
|
||||
}
|
||||
}
|
||||
public:
|
||||
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Choice *rule) {
|
||||
set<Symbol> result;
|
||||
for (const auto &el : rule->elements) {
|
||||
auto &&next_syms = apply(el);
|
||||
result.insert(next_syms.begin(), next_syms.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Seq *rule) {
|
||||
auto &&result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left, *grammar)) {
|
||||
auto &&right_symbols = apply(rule->right);
|
||||
result.insert(right_symbols.begin(), right_symbols.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return FirstSet(&grammar).apply(rule);
|
||||
}
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
auto insertion_result = visited_symbols.insert(*rule);
|
||||
if (insertion_result.second) {
|
||||
return (rule->is_token()) ? set<Symbol>({ *rule })
|
||||
: apply(grammar->rule(*rule));
|
||||
} else {
|
||||
return set<Symbol>();
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Choice *rule) {
|
||||
set<Symbol> result;
|
||||
for (const auto &el : rule->elements) {
|
||||
auto &&next_syms = apply(el);
|
||||
result.insert(next_syms.begin(), next_syms.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Seq *rule) {
|
||||
auto &&result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left, *grammar)) {
|
||||
auto &&right_symbols = apply(rule->right);
|
||||
result.insert(right_symbols.begin(), right_symbols.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar) {
|
||||
return FirstSet(&grammar).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,18 +6,20 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
class SyntaxGrammar;
|
||||
|
||||
/*
|
||||
* Returns the set of terminal symbols that can appear at
|
||||
* the beginning of a string derivable from a given rule,
|
||||
* in a given grammar.
|
||||
*/
|
||||
std::set<rules::Symbol>
|
||||
first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Returns the set of terminal symbols that can appear at
|
||||
* the beginning of a string derivable from a given rule,
|
||||
* in a given grammar.
|
||||
*/
|
||||
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_FIRST_SET_H_
|
||||
|
|
|
|||
|
|
@ -3,28 +3,28 @@
|
|||
#include "compiler/rules/seq.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
|
||||
class GetMetadata : public rules::RuleFn<int> {
|
||||
rules::MetadataKey metadata_key;
|
||||
namespace build_tables {
|
||||
|
||||
int apply_to(const rules::Metadata *rule) {
|
||||
int result = rule->value_for(metadata_key);
|
||||
return (result != 0) ? result : apply(rule->rule);
|
||||
}
|
||||
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
|
||||
class GetMetadata : public rules::RuleFn<int> {
|
||||
rules::MetadataKey metadata_key;
|
||||
|
||||
// TODO -
|
||||
// Remove this. It is currently needed to make the rule generated
|
||||
// by `LexTableBuilder::after_separators` have the right precedence.
|
||||
int apply_to(const rules::Seq *rule) {
|
||||
return apply(rule->left);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
|
||||
};
|
||||
|
||||
return GetMetadata(key).apply(rule);
|
||||
}
|
||||
int apply_to(const rules::Metadata *rule) {
|
||||
int result = rule->value_for(metadata_key);
|
||||
return (result != 0) ? result : apply(rule->rule);
|
||||
}
|
||||
|
||||
// TODO -
|
||||
// Remove this. It is currently needed to make the rule generated
|
||||
// by `LexTableBuilder::after_separators` have the right precedence.
|
||||
int apply_to(const rules::Seq *rule) { return apply(rule->left); }
|
||||
|
||||
public:
|
||||
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
|
||||
};
|
||||
|
||||
return GetMetadata(key).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,9 +5,9 @@
|
|||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key);
|
||||
}
|
||||
namespace build_tables {
|
||||
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_GET_METADATA_H_
|
||||
|
|
|
|||
|
|
@ -5,17 +5,14 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) :
|
||||
lhs(lhs),
|
||||
rule(rule) {}
|
||||
namespace build_tables {
|
||||
|
||||
bool Item::is_done() const {
|
||||
return rule_can_be_blank(rule);
|
||||
}
|
||||
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule)
|
||||
: lhs(lhs), rule(rule) {}
|
||||
|
||||
int Item::precedence() const {
|
||||
return get_metadata(rule, rules::PRECEDENCE);
|
||||
}
|
||||
}
|
||||
}
|
||||
bool Item::is_done() const { return rule_can_be_blank(rule); }
|
||||
|
||||
int Item::precedence() const { return get_metadata(rule, rules::PRECEDENCE); }
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,17 +5,19 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class Item {
|
||||
public:
|
||||
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool is_done() const;
|
||||
int precedence() const;
|
||||
namespace build_tables {
|
||||
|
||||
rules::Symbol lhs;
|
||||
rules::rule_ptr rule;
|
||||
};
|
||||
}
|
||||
}
|
||||
class Item {
|
||||
public:
|
||||
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool is_done() const;
|
||||
int precedence() const;
|
||||
|
||||
rules::Symbol lhs;
|
||||
rules::rule_ptr rule;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_H_
|
||||
|
|
|
|||
|
|
@ -10,50 +10,56 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using rules::Symbol;
|
||||
using rules::rule_ptr;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(const ParseItem &starting_item,
|
||||
const set<Symbol> &starting_lookahead_symbols,
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using rules::Symbol;
|
||||
using rules::rule_ptr;
|
||||
|
||||
vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
|
||||
items_to_process.pop_back();
|
||||
const ParseItemSet item_set_closure(
|
||||
const ParseItem &starting_item,
|
||||
const set<Symbol> &starting_lookahead_symbols,
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
|
||||
set<Symbol> &lookahead_symbols = result[item];
|
||||
size_t previous_size = lookahead_symbols.size();
|
||||
lookahead_symbols.insert(new_lookahead_symbols.begin(), new_lookahead_symbols.end());
|
||||
vector<pair<ParseItem, set<Symbol>>> items_to_process = {
|
||||
{ starting_item, starting_lookahead_symbols }
|
||||
};
|
||||
|
||||
if (lookahead_symbols.size() == previous_size)
|
||||
continue;
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
|
||||
items_to_process.pop_back();
|
||||
|
||||
for (const auto &pair : sym_transitions(item.rule)) {
|
||||
const Symbol &symbol = pair.first;
|
||||
const rule_ptr &next_rule = pair.second;
|
||||
set<Symbol> &lookahead_symbols = result[item];
|
||||
size_t previous_size = lookahead_symbols.size();
|
||||
lookahead_symbols.insert(new_lookahead_symbols.begin(),
|
||||
new_lookahead_symbols.end());
|
||||
|
||||
if (symbol.is_token() || symbol.is_built_in())
|
||||
continue;
|
||||
if (lookahead_symbols.size() == previous_size)
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
|
||||
if (rule_can_be_blank(next_rule, grammar))
|
||||
next_lookahead_symbols.insert(lookahead_symbols.begin(), lookahead_symbols.end());
|
||||
for (const auto &pair : sym_transitions(item.rule)) {
|
||||
const Symbol &symbol = pair.first;
|
||||
const rule_ptr &next_rule = pair.second;
|
||||
|
||||
items_to_process.push_back({
|
||||
ParseItem(symbol, grammar.rule(symbol), 0),
|
||||
next_lookahead_symbols
|
||||
});
|
||||
}
|
||||
}
|
||||
if (symbol.is_token() || symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
return result;
|
||||
}
|
||||
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
|
||||
if (rule_can_be_blank(next_rule, grammar))
|
||||
next_lookahead_symbols.insert(lookahead_symbols.begin(),
|
||||
lookahead_symbols.end());
|
||||
|
||||
items_to_process.push_back({ ParseItem(symbol, grammar.rule(symbol), 0),
|
||||
next_lookahead_symbols });
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,13 +6,13 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(const ParseItem &item,
|
||||
const std::set<rules::Symbol> &lookahead_symbols,
|
||||
const SyntaxGrammar &grammar);
|
||||
}
|
||||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(
|
||||
const ParseItem &item, const std::set<rules::Symbol> &lookahead_symbols,
|
||||
const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
|
|
|
|||
|
|
@ -7,43 +7,49 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::map;
|
||||
using std::set;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
map<Symbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
for (auto &transition : sym_transitions(item.rule)) {
|
||||
ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1);
|
||||
merge_sym_transition<ParseItemSet>(&result, { transition.first, item_set_closure(new_item, lookahead_symbols, grammar) },
|
||||
[](ParseItemSet *left, const ParseItemSet *right) {
|
||||
for (auto &pair : *right)
|
||||
left->operator[](pair.first).insert(pair.second.begin(), pair.second.end());
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
using std::map;
|
||||
using std::set;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
|
||||
map<CharacterSet, LexItemSet>
|
||||
char_transitions(const LexItemSet &item_set) {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
for (auto &transition : char_transitions(item.rule)) {
|
||||
LexItem next_item(item.lhs, transition.second);
|
||||
merge_char_transition<LexItemSet>(&result, { transition.first, LexItemSet({ next_item }) },
|
||||
[](LexItemSet *left, const LexItemSet *right) {
|
||||
left->insert(right->begin(), right->end());
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
|
||||
const SyntaxGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
for (auto &transition : sym_transitions(item.rule)) {
|
||||
ParseItem new_item(item.lhs, transition.second,
|
||||
item.consumed_symbol_count + 1);
|
||||
merge_sym_transition<ParseItemSet>(
|
||||
&result, { transition.first,
|
||||
item_set_closure(new_item, lookahead_symbols, grammar) },
|
||||
[](ParseItemSet *left, const ParseItemSet *right) {
|
||||
for (auto &pair : *right)
|
||||
left->operator[](pair.first)
|
||||
.insert(pair.second.begin(), pair.second.end());
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set) {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
for (auto &transition : char_transitions(item.rule)) {
|
||||
LexItem next_item(item.lhs, transition.second);
|
||||
merge_char_transition<LexItemSet>(
|
||||
&result, { transition.first, LexItemSet({ next_item }) },
|
||||
[](LexItemSet *left, const LexItemSet *right) {
|
||||
left->insert(right->begin(), right->end());
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,19 +6,23 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
class Symbol;
|
||||
}
|
||||
|
||||
namespace build_tables {
|
||||
std::map<rules::Symbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);
|
||||
class SyntaxGrammar;
|
||||
|
||||
std::map<rules::CharacterSet, LexItemSet>
|
||||
char_transitions(const LexItemSet &item_set);
|
||||
}
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
class Symbol;
|
||||
}
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
std::map<rules::Symbol, ParseItemSet> sym_transitions(
|
||||
const ParseItemSet &item_set, const SyntaxGrammar &grammar);
|
||||
|
||||
std::map<rules::CharacterSet, LexItemSet> char_transitions(
|
||||
const LexItemSet &item_set);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
|
||||
|
|
|
|||
|
|
@ -7,49 +7,49 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
namespace build_tables {
|
||||
|
||||
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
|
||||
grammar(grammar) {}
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action) {
|
||||
if (new_action.type < old_action.type)
|
||||
return !resolve_lex_action(new_action, old_action);
|
||||
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar)
|
||||
: grammar(grammar) {}
|
||||
|
||||
switch (old_action.type) {
|
||||
case LexActionTypeError:
|
||||
return true;
|
||||
case LexActionTypeAccept: {
|
||||
int old_precedence = *old_action.precedence_values.begin();
|
||||
switch (new_action.type) {
|
||||
case LexActionTypeAccept: {
|
||||
int new_precedence = *new_action.precedence_values.begin();
|
||||
if (new_precedence > old_precedence) {
|
||||
return true;
|
||||
} else if (new_precedence < old_precedence) {
|
||||
return false;
|
||||
} else {
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
}
|
||||
case LexActionTypeAdvance: {
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action) {
|
||||
if (new_action.type < old_action.type)
|
||||
return !resolve_lex_action(new_action, old_action);
|
||||
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (old_action.type) {
|
||||
case LexActionTypeError:
|
||||
return true;
|
||||
case LexActionTypeAccept: {
|
||||
int old_precedence = *old_action.precedence_values.begin();
|
||||
switch (new_action.type) {
|
||||
case LexActionTypeAccept: {
|
||||
int new_precedence = *new_action.precedence_values.begin();
|
||||
if (new_precedence > old_precedence) {
|
||||
return true;
|
||||
} else if (new_precedence < old_precedence) {
|
||||
return false;
|
||||
} else {
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
}
|
||||
case LexActionTypeAdvance: { return true; }
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,16 +6,18 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class LexConflictManager {
|
||||
const LexicalGrammar grammar;
|
||||
namespace build_tables {
|
||||
|
||||
public:
|
||||
explicit LexConflictManager(const LexicalGrammar &grammar);
|
||||
bool resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action);
|
||||
};
|
||||
}
|
||||
}
|
||||
class LexConflictManager {
|
||||
const LexicalGrammar grammar;
|
||||
|
||||
public:
|
||||
explicit LexConflictManager(const LexicalGrammar &grammar);
|
||||
bool resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action);
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
|
|
|||
|
|
@ -6,45 +6,42 @@
|
|||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) :
|
||||
Item(lhs, rule) {}
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
return (other.lhs == lhs) && other.rule->operator==(*rule);
|
||||
}
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule)
|
||||
: Item(lhs, rule) {}
|
||||
|
||||
bool LexItem::is_token_start() const {
|
||||
class IsTokenStart : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
if (apply(rule->left))
|
||||
return true;
|
||||
else if (rule_can_be_blank(rule->left))
|
||||
return apply(rule->right);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return rule->value_for(rules::START_TOKEN);
|
||||
}
|
||||
};
|
||||
|
||||
return IsTokenStart().apply(rule);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const LexItem &item) {
|
||||
return stream <<
|
||||
string("#<item ") <<
|
||||
item.lhs <<
|
||||
string(" ") <<
|
||||
*item.rule <<
|
||||
string(">");
|
||||
}
|
||||
}
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
return (other.lhs == lhs) && other.rule->operator==(*rule);
|
||||
}
|
||||
|
||||
bool LexItem::is_token_start() const {
|
||||
class IsTokenStart : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
if (apply(rule->left))
|
||||
return true;
|
||||
else if (rule_can_be_blank(rule->left))
|
||||
return apply(rule->right);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return rule->value_for(rules::START_TOKEN);
|
||||
}
|
||||
};
|
||||
|
||||
return IsTokenStart().apply(rule);
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const LexItem &item) {
|
||||
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule
|
||||
<< string(">");
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,39 +6,42 @@
|
|||
#include "compiler/build_tables/item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class LexItem : public Item {
|
||||
public:
|
||||
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
};
|
||||
namespace build_tables {
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
|
||||
class LexItem : public Item {
|
||||
public:
|
||||
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
};
|
||||
|
||||
typedef std::unordered_set<LexItem> LexItemSet;
|
||||
}
|
||||
}
|
||||
std::ostream &operator<<(std::ostream &stream, const LexItem &item);
|
||||
|
||||
typedef std::unordered_set<LexItem> LexItemSet;
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::build_tables::LexItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) const {
|
||||
return
|
||||
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hash<const tree_sitter::build_tables::LexItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto item : set)
|
||||
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
template <>
|
||||
struct hash<tree_sitter::build_tables::LexItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) const {
|
||||
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<const tree_sitter::build_tables::LexItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto item : set)
|
||||
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_
|
||||
|
|
|
|||
|
|
@ -7,69 +7,71 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Merges a new transition into a map with symbol keys.
|
||||
* If the symbol already exists in the map, the new value for that
|
||||
* symbol will be computed by merging the old and new values
|
||||
* using the given function.
|
||||
*/
|
||||
template<typename T>
|
||||
void merge_sym_transition(std::map<rules::Symbol, T> *left,
|
||||
const std::pair<rules::Symbol, T> &new_pair,
|
||||
std::function<void(T *, const T *)> merge_fn) {
|
||||
auto new_symbol = new_pair.first;
|
||||
for (auto &existing_pair : *left) {
|
||||
auto existing_symbol = existing_pair.first;
|
||||
if (new_symbol < existing_symbol) break;
|
||||
if (existing_symbol == new_symbol) {
|
||||
merge_fn(&existing_pair.second, &new_pair.second);
|
||||
return;
|
||||
}
|
||||
}
|
||||
left->insert(new_pair);
|
||||
}
|
||||
|
||||
/*
|
||||
* Merges two transition maps with character set keys. If the
|
||||
* two maps contain values for overlapping character sets, the
|
||||
* new value for the two sets' intersection will be computed by
|
||||
* merging the old and new values using the given function.
|
||||
*/
|
||||
template<typename T>
|
||||
void merge_char_transition(std::map<rules::CharacterSet, T> *left,
|
||||
const std::pair<rules::CharacterSet, T> &new_pair,
|
||||
std::function<void(T *, const T *)> merge_fn) {
|
||||
rules::CharacterSet new_char_set = new_pair.first;
|
||||
T new_value = new_pair.second;
|
||||
|
||||
std::map<rules::CharacterSet, T> pairs_to_insert;
|
||||
|
||||
auto iter = left->begin();
|
||||
while (iter != left->end()) {
|
||||
rules::CharacterSet char_set = iter->first;
|
||||
T value = iter->second;
|
||||
|
||||
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
|
||||
if (!intersection.is_empty()) {
|
||||
new_char_set.remove_set(intersection);
|
||||
if (!char_set.is_empty())
|
||||
pairs_to_insert.insert({ char_set, value });
|
||||
merge_fn(&value, &new_value);
|
||||
pairs_to_insert.insert({ intersection, value });
|
||||
left->erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
|
||||
|
||||
if (!new_char_set.is_empty())
|
||||
left->insert({ new_char_set, new_pair.second });
|
||||
}
|
||||
/*
|
||||
* Merges a new transition into a map with symbol keys.
|
||||
* If the symbol already exists in the map, the new value for that
|
||||
* symbol will be computed by merging the old and new values
|
||||
* using the given function.
|
||||
*/
|
||||
template <typename T>
|
||||
void merge_sym_transition(std::map<rules::Symbol, T> *left,
|
||||
const std::pair<rules::Symbol, T> &new_pair,
|
||||
std::function<void(T *, const T *)> merge_fn) {
|
||||
auto new_symbol = new_pair.first;
|
||||
for (auto &existing_pair : *left) {
|
||||
auto existing_symbol = existing_pair.first;
|
||||
if (new_symbol < existing_symbol)
|
||||
break;
|
||||
if (existing_symbol == new_symbol) {
|
||||
merge_fn(&existing_pair.second, &new_pair.second);
|
||||
return;
|
||||
}
|
||||
}
|
||||
left->insert(new_pair);
|
||||
}
|
||||
|
||||
/*
|
||||
* Merges two transition maps with character set keys. If the
|
||||
* two maps contain values for overlapping character sets, the
|
||||
* new value for the two sets' intersection will be computed by
|
||||
* merging the old and new values using the given function.
|
||||
*/
|
||||
template <typename T>
|
||||
void merge_char_transition(std::map<rules::CharacterSet, T> *left,
|
||||
const std::pair<rules::CharacterSet, T> &new_pair,
|
||||
std::function<void(T *, const T *)> merge_fn) {
|
||||
rules::CharacterSet new_char_set = new_pair.first;
|
||||
T new_value = new_pair.second;
|
||||
|
||||
std::map<rules::CharacterSet, T> pairs_to_insert;
|
||||
|
||||
auto iter = left->begin();
|
||||
while (iter != left->end()) {
|
||||
rules::CharacterSet char_set = iter->first;
|
||||
T value = iter->second;
|
||||
|
||||
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
|
||||
if (!intersection.is_empty()) {
|
||||
new_char_set.remove_set(intersection);
|
||||
if (!char_set.is_empty())
|
||||
pairs_to_insert.insert({ char_set, value });
|
||||
merge_fn(&value, &new_value);
|
||||
pairs_to_insert.insert({ intersection, value });
|
||||
left->erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
|
||||
|
||||
if (!new_char_set.is_empty())
|
||||
left->insert({ new_char_set, new_pair.second });
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
|
||||
|
|
|
|||
|
|
@ -8,127 +8,130 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
namespace build_tables {
|
||||
|
||||
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar) :
|
||||
parse_grammar(parse_grammar),
|
||||
lex_grammar(lex_grammar) {}
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action) {
|
||||
if (new_action.type < old_action.type)
|
||||
return !resolve_parse_action(symbol, new_action, old_action);
|
||||
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar)
|
||||
: parse_grammar(parse_grammar), lex_grammar(lex_grammar) {}
|
||||
|
||||
switch (old_action.type) {
|
||||
case ParseActionTypeError:
|
||||
return true;
|
||||
case ParseActionTypeShift: {
|
||||
int min_precedence = *old_action.precedence_values.begin();
|
||||
int max_precedence = *old_action.precedence_values.rbegin();
|
||||
switch (new_action.type) {
|
||||
case ParseActionTypeReduce: {
|
||||
int new_precedence = *new_action.precedence_values.rbegin();
|
||||
if (max_precedence > new_precedence) {
|
||||
if (min_precedence < new_precedence)
|
||||
record_conflict(symbol, old_action, new_action);
|
||||
return false;
|
||||
} else if (max_precedence < new_precedence) {
|
||||
return true;
|
||||
} else {
|
||||
record_conflict(symbol, old_action, new_action);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
case ParseActionTypeReduce:
|
||||
switch (new_action.type) {
|
||||
case ParseActionTypeReduce: {
|
||||
int old_precedence = *old_action.precedence_values.begin();
|
||||
int new_precedence = *new_action.precedence_values.begin();
|
||||
if (new_precedence > old_precedence) {
|
||||
return true;
|
||||
} else if (new_precedence < old_precedence) {
|
||||
return false;
|
||||
} else {
|
||||
record_conflict(symbol, old_action, new_action);
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const vector<Conflict> ParseConflictManager::conflicts() const {
|
||||
vector<Conflict> result;
|
||||
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
string precedence_string(const ParseAction &action) {
|
||||
string precedences = "(precedence ";
|
||||
bool started = false;
|
||||
for (auto value : action.precedence_values) {
|
||||
if (started) precedences += ", ";
|
||||
started = true;
|
||||
precedences += to_string(value);
|
||||
}
|
||||
return precedences + ")";
|
||||
}
|
||||
|
||||
string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift:
|
||||
return "shift " + precedence_string(action);
|
||||
case ParseActionTypeReduce: {
|
||||
string name = parse_grammar.rule_name(action.symbol);
|
||||
if (name == "")
|
||||
return "ERROR" + to_string(action.symbol.index);
|
||||
else
|
||||
return "reduce " + name + " " + precedence_string(action);
|
||||
}
|
||||
case ParseActionTypeAccept:
|
||||
return "accept";
|
||||
default:
|
||||
return "error";
|
||||
}
|
||||
}
|
||||
|
||||
string ParseConflictManager::symbol_name(const rules::Symbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
if (symbol == rules::ERROR())
|
||||
return "ERROR";
|
||||
else if (symbol == rules::END_OF_INPUT())
|
||||
return "END_OF_INPUT";
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
if (symbol.is_token())
|
||||
return lex_grammar.rule_name(symbol);
|
||||
else
|
||||
return parse_grammar.rule_name(symbol);
|
||||
}
|
||||
|
||||
void ParseConflictManager::record_conflict(const rules::Symbol &symbol,
|
||||
const ParseAction &left,
|
||||
const ParseAction &right) {
|
||||
conflicts_.insert(Conflict(symbol_name(symbol) + ": " +
|
||||
message_for_action(left, parse_grammar) + " / " +
|
||||
message_for_action(right, parse_grammar)));
|
||||
bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action) {
|
||||
if (new_action.type < old_action.type)
|
||||
return !resolve_parse_action(symbol, new_action, old_action);
|
||||
|
||||
switch (old_action.type) {
|
||||
case ParseActionTypeError:
|
||||
return true;
|
||||
case ParseActionTypeShift: {
|
||||
int min_precedence = *old_action.precedence_values.begin();
|
||||
int max_precedence = *old_action.precedence_values.rbegin();
|
||||
switch (new_action.type) {
|
||||
case ParseActionTypeReduce: {
|
||||
int new_precedence = *new_action.precedence_values.rbegin();
|
||||
if (max_precedence > new_precedence) {
|
||||
if (min_precedence < new_precedence)
|
||||
record_conflict(symbol, old_action, new_action);
|
||||
return false;
|
||||
} else if (max_precedence < new_precedence) {
|
||||
return true;
|
||||
} else {
|
||||
record_conflict(symbol, old_action, new_action);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
case ParseActionTypeReduce:
|
||||
switch (new_action.type) {
|
||||
case ParseActionTypeReduce: {
|
||||
int old_precedence = *old_action.precedence_values.begin();
|
||||
int new_precedence = *new_action.precedence_values.begin();
|
||||
if (new_precedence > old_precedence) {
|
||||
return true;
|
||||
} else if (new_precedence < old_precedence) {
|
||||
return false;
|
||||
} else {
|
||||
record_conflict(symbol, old_action, new_action);
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const vector<Conflict> ParseConflictManager::conflicts() const {
|
||||
vector<Conflict> result;
|
||||
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
string precedence_string(const ParseAction &action) {
|
||||
string precedences = "(precedence ";
|
||||
bool started = false;
|
||||
for (auto value : action.precedence_values) {
|
||||
if (started)
|
||||
precedences += ", ";
|
||||
started = true;
|
||||
precedences += to_string(value);
|
||||
}
|
||||
return precedences + ")";
|
||||
}
|
||||
|
||||
string message_for_action(const ParseAction &action,
|
||||
const SyntaxGrammar &parse_grammar) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift:
|
||||
return "shift " + precedence_string(action);
|
||||
case ParseActionTypeReduce: {
|
||||
string name = parse_grammar.rule_name(action.symbol);
|
||||
if (name == "")
|
||||
return "ERROR" + to_string(action.symbol.index);
|
||||
else
|
||||
return "reduce " + name + " " + precedence_string(action);
|
||||
}
|
||||
case ParseActionTypeAccept:
|
||||
return "accept";
|
||||
default:
|
||||
return "error";
|
||||
}
|
||||
}
|
||||
|
||||
string ParseConflictManager::symbol_name(const rules::Symbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
if (symbol == rules::ERROR())
|
||||
return "ERROR";
|
||||
else if (symbol == rules::END_OF_INPUT())
|
||||
return "END_OF_INPUT";
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
if (symbol.is_token())
|
||||
return lex_grammar.rule_name(symbol);
|
||||
else
|
||||
return parse_grammar.rule_name(symbol);
|
||||
}
|
||||
|
||||
void ParseConflictManager::record_conflict(const rules::Symbol &symbol,
|
||||
const ParseAction &left,
|
||||
const ParseAction &right) {
|
||||
conflicts_.insert(Conflict(symbol_name(symbol) + ": " +
|
||||
message_for_action(left, parse_grammar) + " / " +
|
||||
message_for_action(right, parse_grammar)));
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -11,25 +11,28 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class ParseConflictManager {
|
||||
const SyntaxGrammar parse_grammar;
|
||||
const LexicalGrammar lex_grammar;
|
||||
std::set<Conflict> conflicts_;
|
||||
namespace build_tables {
|
||||
|
||||
public:
|
||||
ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
bool resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action);
|
||||
const std::vector<Conflict> conflicts() const;
|
||||
class ParseConflictManager {
|
||||
const SyntaxGrammar parse_grammar;
|
||||
const LexicalGrammar lex_grammar;
|
||||
std::set<Conflict> conflicts_;
|
||||
|
||||
private:
|
||||
std::string symbol_name(const rules::Symbol &symbol);
|
||||
void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right);
|
||||
};
|
||||
}
|
||||
}
|
||||
public:
|
||||
ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
bool resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action);
|
||||
const std::vector<Conflict> conflicts() const;
|
||||
|
||||
private:
|
||||
std::string symbol_name(const rules::Symbol &symbol);
|
||||
void record_conflict(const rules::Symbol &symbol, const ParseAction &left,
|
||||
const ParseAction &right);
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
|
||||
|
|
|
|||
|
|
@ -3,29 +3,28 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
ParseItem::ParseItem(const rules::Symbol &lhs,
|
||||
const rules::rule_ptr rule,
|
||||
size_t consumed_symbol_count) :
|
||||
Item(lhs, rule),
|
||||
consumed_symbol_count(consumed_symbol_count) {}
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
return
|
||||
(lhs == other.lhs) &&
|
||||
(consumed_symbol_count == other.consumed_symbol_count) &&
|
||||
(rule == other.rule || rule->operator==(*other.rule));
|
||||
}
|
||||
ParseItem::ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule,
|
||||
size_t consumed_symbol_count)
|
||||
: Item(lhs, rule), consumed_symbol_count(consumed_symbol_count) {}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule << string(">");
|
||||
}
|
||||
}
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
return (lhs == other.lhs) &&
|
||||
(consumed_symbol_count == other.consumed_symbol_count) &&
|
||||
(rule == other.rule || rule->operator==(*other.rule));
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule
|
||||
<< string(">");
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -8,44 +8,48 @@
|
|||
#include "compiler/build_tables/item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule, const size_t consumed_symbol_count);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
size_t consumed_symbol_count;
|
||||
};
|
||||
namespace build_tables {
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule,
|
||||
const size_t consumed_symbol_count);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
size_t consumed_symbol_count;
|
||||
};
|
||||
|
||||
typedef std::unordered_map<ParseItem, std::set<rules::Symbol>> ParseItemSet;
|
||||
}
|
||||
}
|
||||
std::ostream &operator<<(std::ostream &stream, const ParseItem &item);
|
||||
|
||||
typedef std::unordered_map<ParseItem, std::set<rules::Symbol> > ParseItemSet;
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return
|
||||
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
|
||||
hash<size_t>()(item.consumed_symbol_count);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hash<const tree_sitter::build_tables::ParseItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto &pair : set) {
|
||||
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
|
||||
result ^= hash<size_t>()(pair.second.size());
|
||||
for (auto &symbol : pair.second)
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
template <>
|
||||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
|
||||
hash<size_t>()(item.consumed_symbol_count);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<const tree_sitter::build_tables::ParseItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto &pair : set) {
|
||||
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
|
||||
result ^= hash<size_t>()(pair.second.size());
|
||||
for (auto &symbol : pair.second)
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
|
|
|
|||
|
|
@ -9,60 +9,59 @@
|
|||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
class CanBeBlank : public rules::RuleFn<bool> {
|
||||
protected:
|
||||
bool apply_to(const rules::Blank *) {
|
||||
return true;
|
||||
}
|
||||
using std::set;
|
||||
|
||||
bool apply_to(const rules::Repeat *rule) {
|
||||
return true;
|
||||
}
|
||||
class CanBeBlank : public rules::RuleFn<bool> {
|
||||
protected:
|
||||
bool apply_to(const rules::Blank *) { return true; }
|
||||
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const auto &element : rule->elements)
|
||||
if (apply(element)) return true;
|
||||
return false;
|
||||
}
|
||||
bool apply_to(const rules::Repeat *rule) { return true; }
|
||||
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
return apply(rule->left) && apply(rule->right);
|
||||
}
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const auto &element : rule->elements)
|
||||
if (apply(element))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
};
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
return apply(rule->left) && apply(rule->right);
|
||||
}
|
||||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<rules::Symbol> visited_symbols;
|
||||
using CanBeBlank::visit;
|
||||
bool apply_to(const rules::Metadata *rule) { return apply(rule->rule); }
|
||||
};
|
||||
|
||||
public:
|
||||
using CanBeBlank::apply_to;
|
||||
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<rules::Symbol> visited_symbols;
|
||||
using CanBeBlank::visit;
|
||||
|
||||
bool apply_to(const rules::Symbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
visited_symbols.insert(*rule);
|
||||
return !rule->is_token() && apply(grammar->rule(*rule));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
public:
|
||||
using CanBeBlank::apply_to;
|
||||
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar)
|
||||
: grammar(grammar) {}
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule) {
|
||||
return CanBeBlank().apply(rule);
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return CanBeBlankRecursive(&grammar).apply(rule);
|
||||
}
|
||||
bool apply_to(const rules::Symbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
visited_symbols.insert(*rule);
|
||||
return !rule->is_token() && apply(grammar->rule(*rule));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule) {
|
||||
return CanBeBlank().apply(rule);
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar) {
|
||||
return CanBeBlankRecursive(&grammar).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
|
||||
|
|
|
|||
|
|
@ -13,87 +13,91 @@
|
|||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
namespace build_tables {
|
||||
|
||||
namespace build_tables {
|
||||
template<typename T>
|
||||
void merge_transitions(map<T, rule_ptr> *left, const map<T, rule_ptr> &right);
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
template<>
|
||||
void merge_transitions(map<CharacterSet, rule_ptr> *left, const map<CharacterSet, rule_ptr> &right) {
|
||||
for (auto &pair : right)
|
||||
merge_char_transition<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
|
||||
*left = rules::Choice::Build({ *left, *right });
|
||||
});
|
||||
}
|
||||
template <typename T>
|
||||
void merge_transitions(map<T, rule_ptr> *left, const map<T, rule_ptr> &right);
|
||||
|
||||
template<>
|
||||
void merge_transitions(map<Symbol, rule_ptr> *left, const map<Symbol, rule_ptr> &right) {
|
||||
for (auto &pair : right)
|
||||
merge_sym_transition<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
|
||||
*left = rules::Choice::Build({ *left, *right });
|
||||
});
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
class RuleTransitions : public rules::RuleFn<map<T, rule_ptr>> {
|
||||
map<T, rule_ptr> apply_to_atom(const rules::Rule *rule) {
|
||||
auto atom = dynamic_cast<const T *>(rule);
|
||||
if (atom)
|
||||
return map<T, rule_ptr>({{ *atom, make_shared<rules::Blank>() }});
|
||||
else
|
||||
return map<T, rule_ptr>();
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const CharacterSet *rule) {
|
||||
return apply_to_atom(rule);
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const Symbol *rule) {
|
||||
return apply_to_atom(rule);
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Choice *rule) {
|
||||
map<T, rule_ptr> result;
|
||||
for (const auto &el : rule->elements)
|
||||
merge_transitions<T>(&result, this->apply(el));
|
||||
return result;
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Seq *rule) {
|
||||
auto result = this->apply(rule->left);
|
||||
for (auto &pair : result)
|
||||
pair.second = rules::Seq::Build({ pair.second, rule->right });
|
||||
if (rule_can_be_blank(rule->left))
|
||||
merge_transitions<T>(&result, this->apply(rule->right));
|
||||
return result;
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Repeat *rule) {
|
||||
auto result = this->apply(rule->content);
|
||||
for (auto &pair : result)
|
||||
pair.second = rules::Seq::Build({ pair.second, rule->copy() });
|
||||
return result;
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Metadata *rule) {
|
||||
auto result = this->apply(rule->rule);
|
||||
for (auto &pair : result)
|
||||
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
|
||||
return RuleTransitions<CharacterSet>().apply(rule);
|
||||
}
|
||||
|
||||
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
|
||||
return RuleTransitions<Symbol>().apply(rule);
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void merge_transitions(map<CharacterSet, rule_ptr> *left,
|
||||
const map<CharacterSet, rule_ptr> &right) {
|
||||
for (auto &pair : right)
|
||||
merge_char_transition<rule_ptr>(left, pair,
|
||||
[](rule_ptr *left, const rule_ptr *right) {
|
||||
*left = rules::Choice::Build({ *left, *right });
|
||||
});
|
||||
}
|
||||
|
||||
template <>
|
||||
void merge_transitions(map<Symbol, rule_ptr> *left,
|
||||
const map<Symbol, rule_ptr> &right) {
|
||||
for (auto &pair : right)
|
||||
merge_sym_transition<rule_ptr>(left, pair,
|
||||
[](rule_ptr *left, const rule_ptr *right) {
|
||||
*left = rules::Choice::Build({ *left, *right });
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class RuleTransitions : public rules::RuleFn<map<T, rule_ptr> > {
|
||||
map<T, rule_ptr> apply_to_atom(const rules::Rule *rule) {
|
||||
auto atom = dynamic_cast<const T *>(rule);
|
||||
if (atom)
|
||||
return map<T, rule_ptr>({ { *atom, make_shared<rules::Blank>() } });
|
||||
else
|
||||
return map<T, rule_ptr>();
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const CharacterSet *rule) {
|
||||
return apply_to_atom(rule);
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const Symbol *rule) { return apply_to_atom(rule); }
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Choice *rule) {
|
||||
map<T, rule_ptr> result;
|
||||
for (const auto &el : rule->elements)
|
||||
merge_transitions<T>(&result, this->apply(el));
|
||||
return result;
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Seq *rule) {
|
||||
auto result = this->apply(rule->left);
|
||||
for (auto &pair : result)
|
||||
pair.second = rules::Seq::Build({ pair.second, rule->right });
|
||||
if (rule_can_be_blank(rule->left))
|
||||
merge_transitions<T>(&result, this->apply(rule->right));
|
||||
return result;
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Repeat *rule) {
|
||||
auto result = this->apply(rule->content);
|
||||
for (auto &pair : result)
|
||||
pair.second = rules::Seq::Build({ pair.second, rule->copy() });
|
||||
return result;
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const rules::Metadata *rule) {
|
||||
auto result = this->apply(rule->rule);
|
||||
for (auto &pair : result)
|
||||
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
|
||||
return RuleTransitions<CharacterSet>().apply(rule);
|
||||
}
|
||||
|
||||
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
|
||||
return RuleTransitions<Symbol>().apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,14 +5,16 @@
|
|||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
std::map<rules::CharacterSet, rules::rule_ptr>
|
||||
char_transitions(const rules::rule_ptr &rule);
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
std::map<rules::Symbol, rules::rule_ptr>
|
||||
sym_transitions(const rules::rule_ptr &rule);
|
||||
}
|
||||
}
|
||||
std::map<rules::CharacterSet, rules::rule_ptr> char_transitions(
|
||||
const rules::rule_ptr &rule);
|
||||
|
||||
std::map<rules::Symbol, rules::rule_ptr> sym_transitions(
|
||||
const rules::rule_ptr &rule);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_
|
||||
|
|
|
|||
|
|
@ -5,29 +5,33 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
tuple<string, vector<Conflict>, const GrammarError *>
|
||||
compile(const Grammar &grammar, std::string name) {
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
const GrammarError *error = get<2>(prepare_grammar_result);
|
||||
using std::tuple;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
if (error)
|
||||
return make_tuple("", vector<Conflict>(), error);
|
||||
tuple<string, vector<Conflict>, const GrammarError *> compile(
|
||||
const Grammar &grammar, std::string name) {
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
const GrammarError *error = get<2>(prepare_grammar_result);
|
||||
|
||||
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
const ParseTable &parse_table = get<0>(table_build_result);
|
||||
const LexTable &lex_table = get<1>(table_build_result);
|
||||
const vector<Conflict> &conflicts = get<2>(table_build_result);
|
||||
if (error)
|
||||
return make_tuple("", vector<Conflict>(), error);
|
||||
|
||||
string code = generate_code::c_code(name, parse_table, lex_table, syntax_grammar, lexical_grammar);
|
||||
auto table_build_result =
|
||||
build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
const ParseTable &parse_table = get<0>(table_build_result);
|
||||
const LexTable &lex_table = get<1>(table_build_result);
|
||||
const vector<Conflict> &conflicts = get<2>(table_build_result);
|
||||
|
||||
return make_tuple(code, conflicts, nullptr);
|
||||
}
|
||||
string code = generate_code::c_code(name, parse_table, lex_table,
|
||||
syntax_grammar, lexical_grammar);
|
||||
|
||||
return make_tuple(code, conflicts, nullptr);
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,19 +2,21 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
||||
Conflict::Conflict(string description) : description(description) {}
|
||||
using std::string;
|
||||
|
||||
bool Conflict::operator==(const tree_sitter::Conflict &other) const {
|
||||
return other.description == description;
|
||||
}
|
||||
Conflict::Conflict(string description) : description(description) {}
|
||||
|
||||
bool Conflict::operator<(const tree_sitter::Conflict &other) const {
|
||||
return other.description < description;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Conflict &conflict) {
|
||||
return stream << "#<conflict " + conflict.description + ">";
|
||||
}
|
||||
bool Conflict::operator==(const tree_sitter::Conflict &other) const {
|
||||
return other.description == description;
|
||||
}
|
||||
|
||||
bool Conflict::operator<(const tree_sitter::Conflict &other) const {
|
||||
return other.description < description;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const Conflict &conflict) {
|
||||
return stream << "#<conflict " + conflict.description + ">";
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,393 +10,378 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::function;
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using util::escape_char;
|
||||
namespace generate_code {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::function;
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using util::escape_char;
|
||||
|
||||
namespace generate_code {
|
||||
class CCodeGenerator {
|
||||
string buffer;
|
||||
size_t indent_level;
|
||||
class CCodeGenerator {
|
||||
string buffer;
|
||||
size_t indent_level;
|
||||
|
||||
const string name;
|
||||
const ParseTable parse_table;
|
||||
const LexTable lex_table;
|
||||
const SyntaxGrammar syntax_grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
const string name;
|
||||
const ParseTable parse_table;
|
||||
const LexTable lex_table;
|
||||
const SyntaxGrammar syntax_grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
|
||||
public:
|
||||
CCodeGenerator(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar) :
|
||||
indent_level(0),
|
||||
name(name),
|
||||
parse_table(parse_table),
|
||||
lex_table(lex_table),
|
||||
syntax_grammar(syntax_grammar),
|
||||
lexical_grammar(lexical_grammar) {}
|
||||
public:
|
||||
CCodeGenerator(string name, const ParseTable &parse_table,
|
||||
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar)
|
||||
: indent_level(0),
|
||||
name(name),
|
||||
parse_table(parse_table),
|
||||
lex_table(lex_table),
|
||||
syntax_grammar(syntax_grammar),
|
||||
lexical_grammar(lexical_grammar) {}
|
||||
|
||||
string code() {
|
||||
buffer = "";
|
||||
string code() {
|
||||
buffer = "";
|
||||
|
||||
includes();
|
||||
state_and_symbol_counts();
|
||||
symbol_enum();
|
||||
symbol_names_list();
|
||||
hidden_symbols_list();
|
||||
lex_function();
|
||||
lex_states_list();
|
||||
parse_table_array();
|
||||
parser_export();
|
||||
includes();
|
||||
state_and_symbol_counts();
|
||||
symbol_enum();
|
||||
symbol_names_list();
|
||||
hidden_symbols_list();
|
||||
lex_function();
|
||||
lex_states_list();
|
||||
parse_table_array();
|
||||
parser_export();
|
||||
|
||||
return buffer;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
void includes() {
|
||||
add("#include \"tree_sitter/parser.h\"");
|
||||
line();
|
||||
}
|
||||
private:
|
||||
void includes() {
|
||||
add("#include \"tree_sitter/parser.h\"");
|
||||
line();
|
||||
}
|
||||
|
||||
void state_and_symbol_counts() {
|
||||
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
|
||||
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
|
||||
line();
|
||||
}
|
||||
void state_and_symbol_counts() {
|
||||
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
|
||||
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
|
||||
line();
|
||||
}
|
||||
|
||||
void symbol_enum() {
|
||||
line("enum {");
|
||||
indent([&]() {
|
||||
bool at_start = true;
|
||||
for (auto symbol : parse_table.symbols)
|
||||
if (!symbol.is_built_in()) {
|
||||
if (at_start)
|
||||
line(symbol_id(symbol) + " = ts_start_sym,");
|
||||
else
|
||||
line(symbol_id(symbol) + ",");
|
||||
at_start = false;
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void symbol_names_list() {
|
||||
set<rules::Symbol> symbols(parse_table.symbols);
|
||||
symbols.insert(rules::END_OF_INPUT());
|
||||
symbols.insert(rules::ERROR());
|
||||
|
||||
line("SYMBOL_NAMES = {");
|
||||
indent([&]() {
|
||||
for (auto symbol : parse_table.symbols)
|
||||
line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\",");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void hidden_symbols_list() {
|
||||
line("HIDDEN_SYMBOLS = {");
|
||||
indent([&]() {
|
||||
for (auto &symbol : parse_table.symbols)
|
||||
if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
|
||||
line("[" + symbol_id(symbol) + "] = 1,");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void lex_function() {
|
||||
line("LEX_FN() {");
|
||||
indent([&]() {
|
||||
line("START_LEXER();");
|
||||
switch_on_lex_state();
|
||||
});
|
||||
line("}");
|
||||
line();
|
||||
}
|
||||
|
||||
void lex_states_list() {
|
||||
line("LEX_STATES = {");
|
||||
indent([&]() {
|
||||
size_t state_id = 0;
|
||||
for (auto &state : parse_table.states)
|
||||
line("[" + to_string(state_id++) + "] = " + lex_state_index(state.lex_state_id) + ",");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void parse_table_array() {
|
||||
size_t state_id = 0;
|
||||
line("#pragma GCC diagnostic push");
|
||||
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
|
||||
line();
|
||||
line("PARSE_TABLE = {");
|
||||
|
||||
indent([&]() {
|
||||
for (auto &state : parse_table.states) {
|
||||
line("[" + to_string(state_id++) + "] = {");
|
||||
indent([&]() {
|
||||
for (auto &pair : state.actions) {
|
||||
line("[" + symbol_id(pair.first) + "] = ");
|
||||
code_for_parse_action(pair.second);
|
||||
add(",");
|
||||
}
|
||||
});
|
||||
line("},");
|
||||
}
|
||||
});
|
||||
|
||||
line("};");
|
||||
line();
|
||||
line("#pragma GCC diagnostic pop");
|
||||
line();
|
||||
}
|
||||
|
||||
void parser_export() {
|
||||
line("EXPORT_PARSER(ts_parser_" + name + ");");
|
||||
line();
|
||||
}
|
||||
|
||||
string rule_name(const rules::Symbol &symbol) {
|
||||
return symbol.is_token() ?
|
||||
lexical_grammar.rule_name(symbol) :
|
||||
syntax_grammar.rule_name(symbol);
|
||||
}
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ?
|
||||
"ts_builtin_sym_error" :
|
||||
"ts_builtin_sym_end";
|
||||
} else {
|
||||
string name = sanitize_name(rule_name(symbol));
|
||||
if (symbol.is_auxiliary())
|
||||
return "ts_aux_sym_" + name;
|
||||
else
|
||||
return "ts_sym_" + name;
|
||||
}
|
||||
}
|
||||
|
||||
string sanitize_name(string name) {
|
||||
auto existing = sanitized_names.find(name);
|
||||
if (existing != sanitized_names.end())
|
||||
return existing->second;
|
||||
|
||||
string stripped_name;
|
||||
for (char c : name) {
|
||||
if (('a' <= c && c <= 'z') ||
|
||||
('A' <= c && c <= 'Z') ||
|
||||
('0' <= c && c <= '9') ||
|
||||
(c == '_')) {
|
||||
stripped_name += c;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t extra_number = 0;; extra_number++) {
|
||||
string suffix = extra_number ? to_string(extra_number) : "";
|
||||
string unique_name = stripped_name + suffix;
|
||||
if (unique_name == "")
|
||||
continue;
|
||||
if (!has_sanitized_name(unique_name)) {
|
||||
sanitized_names.insert({ name, unique_name });
|
||||
return unique_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool has_sanitized_name(string name) {
|
||||
for (auto &pair : sanitized_names)
|
||||
if (pair.second == name)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string lex_state_index(size_t i) {
|
||||
return to_string(i + 1);
|
||||
}
|
||||
|
||||
string symbol_name(const rules::Symbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ? "error" : "end";
|
||||
} else if (symbol.is_token() && symbol.is_auxiliary()) {
|
||||
return rule_name(symbol);
|
||||
} else {
|
||||
return rule_name(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_range(const rules::CharacterRange &range) {
|
||||
string lookahead("lookahead");
|
||||
if (range.min == range.max) {
|
||||
return lookahead + " == '" + escape_char(range.min) + "'";
|
||||
} else {
|
||||
return string("'") + escape_char(range.min) + string("' <= ") + lookahead +
|
||||
" && " + lookahead + " <= '" + escape_char(range.max) + "'";
|
||||
}
|
||||
}
|
||||
|
||||
void condition_for_character_set(const rules::CharacterSet &set) {
|
||||
if (set.ranges.size() == 1) {
|
||||
add(condition_for_character_range(*set.ranges.begin()));
|
||||
} else {
|
||||
bool first = true;
|
||||
for (auto &match : set.ranges) {
|
||||
string part = "(" + condition_for_character_range(match) + ")";
|
||||
if (first) {
|
||||
add(part);
|
||||
} else {
|
||||
add(" ||");
|
||||
line(part);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void condition_for_character_rule(const rules::CharacterSet &rule) {
|
||||
pair<rules::CharacterSet, bool> representation = rule.most_compact_representation();
|
||||
if (representation.second) {
|
||||
condition_for_character_set(representation.first);
|
||||
} else {
|
||||
add("!(");
|
||||
condition_for_character_set(rule.complement());
|
||||
add(")");
|
||||
}
|
||||
}
|
||||
|
||||
void code_for_parse_action(const ParseAction &action) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeAccept:
|
||||
add("ACCEPT_INPUT()");
|
||||
break;
|
||||
case ParseActionTypeShift:
|
||||
add("SHIFT(" + to_string(action.state_index) + ")");
|
||||
break;
|
||||
case ParseActionTypeShiftExtra:
|
||||
add("SHIFT_EXTRA()");
|
||||
break;
|
||||
case ParseActionTypeReduce:
|
||||
add("REDUCE(" +
|
||||
symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
break;
|
||||
case ParseActionTypeReduceExtra:
|
||||
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
|
||||
break;
|
||||
default: {}
|
||||
}
|
||||
}
|
||||
|
||||
void code_for_lex_actions(const LexAction &action,
|
||||
const set<rules::CharacterSet> &expected_inputs) {
|
||||
switch (action.type) {
|
||||
case LexActionTypeAdvance:
|
||||
line("ADVANCE(" + lex_state_index(action.state_index) + ");");
|
||||
break;
|
||||
case LexActionTypeAccept:
|
||||
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
|
||||
break;
|
||||
case LexActionTypeError:
|
||||
line("LEX_ERROR();");
|
||||
break;
|
||||
default: {}
|
||||
}
|
||||
}
|
||||
|
||||
void code_for_lex_state(const LexState &lex_state) {
|
||||
auto expected_inputs = lex_state.expected_inputs();
|
||||
if (lex_state.is_token_start)
|
||||
line("START_TOKEN();");
|
||||
for (auto pair : lex_state.actions)
|
||||
if (!pair.first.is_empty())
|
||||
_if([&]() { condition_for_character_rule(pair.first); },
|
||||
[&]() { code_for_lex_actions(pair.second, expected_inputs); });
|
||||
code_for_lex_actions(lex_state.default_action, expected_inputs);
|
||||
}
|
||||
|
||||
void switch_on_lex_state() {
|
||||
_switch("lex_state", [&]() {
|
||||
for (size_t i = 0; i < lex_table.states.size(); i++)
|
||||
_case(lex_state_index(i), [&]() {
|
||||
code_for_lex_state(lex_table.states[i]);
|
||||
});
|
||||
_case("ts_lex_state_error", [&]() {
|
||||
code_for_lex_state(lex_table.error_state);
|
||||
});
|
||||
_default([&]() {
|
||||
line("LEX_PANIC();");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void _switch(string condition, function<void()> body) {
|
||||
line("switch (" + condition + ") {");
|
||||
indent(body);
|
||||
line("}");
|
||||
}
|
||||
|
||||
void _case(string value, function<void()> body) {
|
||||
line("case " + value + ":");
|
||||
indent(body);
|
||||
}
|
||||
|
||||
void _default(function<void()> body) {
|
||||
line("default:");
|
||||
indent(body);
|
||||
}
|
||||
|
||||
void _if(function<void()> condition, function<void()> body) {
|
||||
line("if (");
|
||||
indent(condition);
|
||||
add(")");
|
||||
indent(body);
|
||||
}
|
||||
|
||||
void line() {
|
||||
line("");
|
||||
}
|
||||
|
||||
void line(string input) {
|
||||
add("\n");
|
||||
if (!input.empty()) {
|
||||
string space;
|
||||
for (size_t i = 0; i < indent_level; i++)
|
||||
space += " ";
|
||||
add(space + input);
|
||||
}
|
||||
}
|
||||
|
||||
void add(string input) {
|
||||
buffer += input;
|
||||
}
|
||||
|
||||
void indent(function<void()> body) {
|
||||
indent(body, 1);
|
||||
}
|
||||
|
||||
void indent(function<void()> body, size_t n) {
|
||||
indent_level += n;
|
||||
body();
|
||||
indent_level -= n;
|
||||
}
|
||||
};
|
||||
|
||||
string c_code(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar) {
|
||||
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code();
|
||||
void symbol_enum() {
|
||||
line("enum {");
|
||||
indent([&]() {
|
||||
bool at_start = true;
|
||||
for (auto symbol : parse_table.symbols)
|
||||
if (!symbol.is_built_in()) {
|
||||
if (at_start)
|
||||
line(symbol_id(symbol) + " = ts_start_sym,");
|
||||
else
|
||||
line(symbol_id(symbol) + ",");
|
||||
at_start = false;
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void symbol_names_list() {
|
||||
set<rules::Symbol> symbols(parse_table.symbols);
|
||||
symbols.insert(rules::END_OF_INPUT());
|
||||
symbols.insert(rules::ERROR());
|
||||
|
||||
line("SYMBOL_NAMES = {");
|
||||
indent([&]() {
|
||||
for (auto symbol : parse_table.symbols)
|
||||
line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\",");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void hidden_symbols_list() {
|
||||
line("HIDDEN_SYMBOLS = {");
|
||||
indent([&]() {
|
||||
for (auto &symbol : parse_table.symbols)
|
||||
if (!symbol.is_built_in() &&
|
||||
(symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
|
||||
line("[" + symbol_id(symbol) + "] = 1,");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void lex_function() {
|
||||
line("LEX_FN() {");
|
||||
indent([&]() {
|
||||
line("START_LEXER();");
|
||||
switch_on_lex_state();
|
||||
});
|
||||
line("}");
|
||||
line();
|
||||
}
|
||||
|
||||
void lex_states_list() {
|
||||
line("LEX_STATES = {");
|
||||
indent([&]() {
|
||||
size_t state_id = 0;
|
||||
for (auto &state : parse_table.states)
|
||||
line("[" + to_string(state_id++) + "] = " +
|
||||
lex_state_index(state.lex_state_id) + ",");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void parse_table_array() {
|
||||
size_t state_id = 0;
|
||||
line("#pragma GCC diagnostic push");
|
||||
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
|
||||
line();
|
||||
line("PARSE_TABLE = {");
|
||||
|
||||
indent([&]() {
|
||||
for (auto &state : parse_table.states) {
|
||||
line("[" + to_string(state_id++) + "] = {");
|
||||
indent([&]() {
|
||||
for (auto &pair : state.actions) {
|
||||
line("[" + symbol_id(pair.first) + "] = ");
|
||||
code_for_parse_action(pair.second);
|
||||
add(",");
|
||||
}
|
||||
});
|
||||
line("},");
|
||||
}
|
||||
});
|
||||
|
||||
line("};");
|
||||
line();
|
||||
line("#pragma GCC diagnostic pop");
|
||||
line();
|
||||
}
|
||||
|
||||
void parser_export() {
|
||||
line("EXPORT_PARSER(ts_parser_" + name + ");");
|
||||
line();
|
||||
}
|
||||
|
||||
string rule_name(const rules::Symbol &symbol) {
|
||||
return symbol.is_token() ? lexical_grammar.rule_name(symbol)
|
||||
: syntax_grammar.rule_name(symbol);
|
||||
}
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ? "ts_builtin_sym_error"
|
||||
: "ts_builtin_sym_end";
|
||||
} else {
|
||||
string name = sanitize_name(rule_name(symbol));
|
||||
if (symbol.is_auxiliary())
|
||||
return "ts_aux_sym_" + name;
|
||||
else
|
||||
return "ts_sym_" + name;
|
||||
}
|
||||
}
|
||||
|
||||
string sanitize_name(string name) {
|
||||
auto existing = sanitized_names.find(name);
|
||||
if (existing != sanitized_names.end())
|
||||
return existing->second;
|
||||
|
||||
string stripped_name;
|
||||
for (char c : name) {
|
||||
if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
|
||||
('0' <= c && c <= '9') || (c == '_')) {
|
||||
stripped_name += c;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t extra_number = 0;; extra_number++) {
|
||||
string suffix = extra_number ? to_string(extra_number) : "";
|
||||
string unique_name = stripped_name + suffix;
|
||||
if (unique_name == "")
|
||||
continue;
|
||||
if (!has_sanitized_name(unique_name)) {
|
||||
sanitized_names.insert({ name, unique_name });
|
||||
return unique_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool has_sanitized_name(string name) {
|
||||
for (auto &pair : sanitized_names)
|
||||
if (pair.second == name)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string lex_state_index(size_t i) { return to_string(i + 1); }
|
||||
|
||||
string symbol_name(const rules::Symbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ? "error" : "end";
|
||||
} else if (symbol.is_token() && symbol.is_auxiliary()) {
|
||||
return rule_name(symbol);
|
||||
} else {
|
||||
return rule_name(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_range(const rules::CharacterRange &range) {
|
||||
string lookahead("lookahead");
|
||||
if (range.min == range.max) {
|
||||
return lookahead + " == '" + escape_char(range.min) + "'";
|
||||
} else {
|
||||
return string("'") + escape_char(range.min) + string("' <= ") +
|
||||
lookahead + " && " + lookahead + " <= '" + escape_char(range.max) +
|
||||
"'";
|
||||
}
|
||||
}
|
||||
|
||||
void condition_for_character_set(const rules::CharacterSet &set) {
|
||||
if (set.ranges.size() == 1) {
|
||||
add(condition_for_character_range(*set.ranges.begin()));
|
||||
} else {
|
||||
bool first = true;
|
||||
for (auto &match : set.ranges) {
|
||||
string part = "(" + condition_for_character_range(match) + ")";
|
||||
if (first) {
|
||||
add(part);
|
||||
} else {
|
||||
add(" ||");
|
||||
line(part);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void condition_for_character_rule(const rules::CharacterSet &rule) {
|
||||
pair<rules::CharacterSet, bool> representation =
|
||||
rule.most_compact_representation();
|
||||
if (representation.second) {
|
||||
condition_for_character_set(representation.first);
|
||||
} else {
|
||||
add("!(");
|
||||
condition_for_character_set(rule.complement());
|
||||
add(")");
|
||||
}
|
||||
}
|
||||
|
||||
void code_for_parse_action(const ParseAction &action) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeAccept:
|
||||
add("ACCEPT_INPUT()");
|
||||
break;
|
||||
case ParseActionTypeShift:
|
||||
add("SHIFT(" + to_string(action.state_index) + ")");
|
||||
break;
|
||||
case ParseActionTypeShiftExtra:
|
||||
add("SHIFT_EXTRA()");
|
||||
break;
|
||||
case ParseActionTypeReduce:
|
||||
add("REDUCE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
break;
|
||||
case ParseActionTypeReduceExtra:
|
||||
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
|
||||
break;
|
||||
default: {}
|
||||
}
|
||||
}
|
||||
|
||||
void code_for_lex_actions(const LexAction &action,
|
||||
const set<rules::CharacterSet> &expected_inputs) {
|
||||
switch (action.type) {
|
||||
case LexActionTypeAdvance:
|
||||
line("ADVANCE(" + lex_state_index(action.state_index) + ");");
|
||||
break;
|
||||
case LexActionTypeAccept:
|
||||
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
|
||||
break;
|
||||
case LexActionTypeError:
|
||||
line("LEX_ERROR();");
|
||||
break;
|
||||
default: {}
|
||||
}
|
||||
}
|
||||
|
||||
void code_for_lex_state(const LexState &lex_state) {
|
||||
auto expected_inputs = lex_state.expected_inputs();
|
||||
if (lex_state.is_token_start)
|
||||
line("START_TOKEN();");
|
||||
for (auto pair : lex_state.actions)
|
||||
if (!pair.first.is_empty())
|
||||
_if([&]() { condition_for_character_rule(pair.first); },
|
||||
[&]() { code_for_lex_actions(pair.second, expected_inputs); });
|
||||
code_for_lex_actions(lex_state.default_action, expected_inputs);
|
||||
}
|
||||
|
||||
void switch_on_lex_state() {
|
||||
_switch("lex_state", [&]() {
|
||||
for (size_t i = 0; i < lex_table.states.size(); i++)
|
||||
_case(lex_state_index(i),
|
||||
[&]() { code_for_lex_state(lex_table.states[i]); });
|
||||
_case("ts_lex_state_error",
|
||||
[&]() { code_for_lex_state(lex_table.error_state); });
|
||||
_default([&]() { line("LEX_PANIC();"); });
|
||||
});
|
||||
}
|
||||
|
||||
void _switch(string condition, function<void()> body) {
|
||||
line("switch (" + condition + ") {");
|
||||
indent(body);
|
||||
line("}");
|
||||
}
|
||||
|
||||
void _case(string value, function<void()> body) {
|
||||
line("case " + value + ":");
|
||||
indent(body);
|
||||
}
|
||||
|
||||
void _default(function<void()> body) {
|
||||
line("default:");
|
||||
indent(body);
|
||||
}
|
||||
|
||||
void _if(function<void()> condition, function<void()> body) {
|
||||
line("if (");
|
||||
indent(condition);
|
||||
add(")");
|
||||
indent(body);
|
||||
}
|
||||
|
||||
void line() { line(""); }
|
||||
|
||||
void line(string input) {
|
||||
add("\n");
|
||||
if (!input.empty()) {
|
||||
string space;
|
||||
for (size_t i = 0; i < indent_level; i++)
|
||||
space += " ";
|
||||
add(space + input);
|
||||
}
|
||||
}
|
||||
|
||||
void add(string input) { buffer += input; }
|
||||
|
||||
void indent(function<void()> body) { indent(body, 1); }
|
||||
|
||||
void indent(function<void()> body, size_t n) {
|
||||
indent_level += n;
|
||||
body();
|
||||
indent_level -= n;
|
||||
}
|
||||
};
|
||||
|
||||
string c_code(string name, const ParseTable &parse_table,
|
||||
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar) {
|
||||
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar,
|
||||
lexical_grammar).code();
|
||||
}
|
||||
|
||||
} // namespace generate_code
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -7,16 +7,17 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace generate_code {
|
||||
std::string c_code(std::string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar);
|
||||
}
|
||||
}
|
||||
namespace generate_code {
|
||||
|
||||
std::string c_code(std::string name, const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar);
|
||||
|
||||
} // namespace generate_code
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_GENERATE_CODE_C_CODE_H_
|
||||
|
|
|
|||
|
|
@ -2,83 +2,83 @@
|
|||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::ostream;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using rules::rule_ptr;
|
||||
|
||||
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
|
||||
rules_(rules),
|
||||
ubiquitous_tokens_({}),
|
||||
separators_({ ' ', '\r', '\t', '\n' }) {}
|
||||
using std::ostream;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using rules::rule_ptr;
|
||||
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.rules_.size() != rules_.size()) return false;
|
||||
Grammar::Grammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules)
|
||||
: rules_(rules),
|
||||
ubiquitous_tokens_({}),
|
||||
separators_({ ' ', '\r', '\t', '\n' }) {}
|
||||
|
||||
for (size_t i = 0; i < rules_.size(); i++) {
|
||||
auto &pair = rules_[i];
|
||||
auto &other_pair = other.rules_[i];
|
||||
if (other_pair.first != pair.first) return false;
|
||||
if (!other_pair.second->operator==(*pair.second)) return false;
|
||||
}
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.rules_.size() != rules_.size())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
for (size_t i = 0; i < rules_.size(); i++) {
|
||||
auto &pair = rules_[i];
|
||||
auto &other_pair = other.rules_[i];
|
||||
if (other_pair.first != pair.first)
|
||||
return false;
|
||||
if (!other_pair.second->operator==(*pair.second))
|
||||
return false;
|
||||
}
|
||||
|
||||
string Grammar::start_rule_name() const {
|
||||
return rules_.front().first;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const Grammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules()) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
return stream << string("}>");
|
||||
}
|
||||
|
||||
GrammarError::GrammarError(GrammarErrorType type, std::string message) :
|
||||
type(type),
|
||||
message(message) {}
|
||||
|
||||
bool GrammarError::operator==(const GrammarError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const GrammarError *error) {
|
||||
if (error)
|
||||
return stream << (string("#<grammar-error '") + error->message + "'>");
|
||||
else
|
||||
return stream << string("#<null>");
|
||||
}
|
||||
|
||||
const set<string> & Grammar::ubiquitous_tokens() const {
|
||||
return ubiquitous_tokens_;
|
||||
}
|
||||
|
||||
Grammar & Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
|
||||
ubiquitous_tokens_ = ubiquitous_tokens;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const set<char> & Grammar::separators() const {
|
||||
return separators_;
|
||||
}
|
||||
|
||||
Grammar & Grammar::separators(const set<char> &separators) {
|
||||
separators_ = separators;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const vector<pair<string, rule_ptr>> & Grammar::rules() const {
|
||||
return rules_;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
string Grammar::start_rule_name() const { return rules_.front().first; }
|
||||
|
||||
ostream &operator<<(ostream &stream, const Grammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules()) {
|
||||
if (started)
|
||||
stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
return stream << string("}>");
|
||||
}
|
||||
|
||||
GrammarError::GrammarError(GrammarErrorType type, std::string message)
|
||||
: type(type), message(message) {}
|
||||
|
||||
bool GrammarError::operator==(const GrammarError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const GrammarError *error) {
|
||||
if (error)
|
||||
return stream << (string("#<grammar-error '") + error->message + "'>");
|
||||
else
|
||||
return stream << string("#<null>");
|
||||
}
|
||||
|
||||
const set<string> &Grammar::ubiquitous_tokens() const {
|
||||
return ubiquitous_tokens_;
|
||||
}
|
||||
|
||||
Grammar &Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
|
||||
ubiquitous_tokens_ = ubiquitous_tokens;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const set<char> &Grammar::separators() const { return separators_; }
|
||||
|
||||
Grammar &Grammar::separators(const set<char> &separators) {
|
||||
separators_ = separators;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const vector<pair<string, rule_ptr> > &Grammar::rules() const { return rules_; }
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,75 +2,79 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
LexAction::LexAction() :
|
||||
type(LexActionTypeError),
|
||||
symbol(Symbol(-1)),
|
||||
state_index(-1),
|
||||
precedence_values({0}) {}
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, set<int> precedence_values) :
|
||||
type(type),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
precedence_values(precedence_values) {}
|
||||
LexAction::LexAction()
|
||||
: type(LexActionTypeError),
|
||||
symbol(Symbol(-1)),
|
||||
state_index(-1),
|
||||
precedence_values({ 0 }) {}
|
||||
|
||||
LexAction LexAction::Error() {
|
||||
return LexAction(LexActionTypeError, -1, Symbol(-1), {0});
|
||||
}
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol,
|
||||
set<int> precedence_values)
|
||||
: type(type),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
precedence_values(precedence_values) {}
|
||||
|
||||
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_values);
|
||||
}
|
||||
|
||||
LexAction LexAction::Accept(Symbol symbol, int precedence) {
|
||||
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
|
||||
}
|
||||
|
||||
bool LexAction::operator==(const LexAction &other) const {
|
||||
return
|
||||
(type == other.type) &&
|
||||
(state_index == other.state_index) &&
|
||||
(symbol == other.symbol);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
|
||||
switch (action.type) {
|
||||
case LexActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case LexActionTypeAccept:
|
||||
return stream << string("#<accept ") + to_string(action.symbol.index) + ">";
|
||||
case LexActionTypeAdvance:
|
||||
return stream << string("#<advance ") + to_string(action.state_index) + ">";
|
||||
default:
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
|
||||
set<CharacterSet> LexState::expected_inputs() const {
|
||||
set<CharacterSet> result;
|
||||
for (auto &pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
LexStateId LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
LexState & LexTable::state(LexStateId id) {
|
||||
if (id < 0)
|
||||
return error_state;
|
||||
else
|
||||
return states[id];
|
||||
}
|
||||
|
||||
const LexStateId LexTable::ERROR_STATE_ID = -1;
|
||||
LexAction LexAction::Error() {
|
||||
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0 });
|
||||
}
|
||||
|
||||
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
|
||||
precedence_values);
|
||||
}
|
||||
|
||||
LexAction LexAction::Accept(Symbol symbol, int precedence) {
|
||||
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
|
||||
}
|
||||
|
||||
bool LexAction::operator==(const LexAction &other) const {
|
||||
return (type == other.type) && (state_index == other.state_index) &&
|
||||
(symbol == other.symbol);
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const LexAction &action) {
|
||||
switch (action.type) {
|
||||
case LexActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case LexActionTypeAccept:
|
||||
return stream << string("#<accept ") + to_string(action.symbol.index) +
|
||||
">";
|
||||
case LexActionTypeAdvance:
|
||||
return stream << string("#<advance ") + to_string(action.state_index) +
|
||||
">";
|
||||
default:
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
|
||||
set<CharacterSet> LexState::expected_inputs() const {
|
||||
set<CharacterSet> result;
|
||||
for (auto &pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
LexStateId LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
LexState &LexTable::state(LexStateId id) {
|
||||
if (id < 0)
|
||||
return error_state;
|
||||
else
|
||||
return states[id];
|
||||
}
|
||||
|
||||
const LexStateId LexTable::ERROR_STATE_ID = -1;
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -9,61 +9,69 @@
|
|||
#include "compiler/rules/character_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
LexActionTypeError,
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeAdvance
|
||||
} LexActionType;
|
||||
|
||||
class LexAction {
|
||||
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, std::set<int> precedence_values);
|
||||
public:
|
||||
LexAction();
|
||||
static LexAction Accept(rules::Symbol symbol, int precedence);
|
||||
static LexAction Error();
|
||||
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
|
||||
bool operator==(const LexAction &action) const;
|
||||
typedef enum {
|
||||
LexActionTypeError,
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeAdvance
|
||||
} LexActionType;
|
||||
|
||||
LexActionType type;
|
||||
rules::Symbol symbol;
|
||||
size_t state_index;
|
||||
std::set<int> precedence_values;
|
||||
};
|
||||
class LexAction {
|
||||
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol,
|
||||
std::set<int> precedence_values);
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
|
||||
}
|
||||
public:
|
||||
LexAction();
|
||||
static LexAction Accept(rules::Symbol symbol, int precedence);
|
||||
static LexAction Error();
|
||||
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
|
||||
bool operator==(const LexAction &action) const;
|
||||
|
||||
LexActionType type;
|
||||
rules::Symbol symbol;
|
||||
size_t state_index;
|
||||
std::set<int> precedence_values;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const LexAction &item);
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::LexAction> {
|
||||
size_t operator()(const tree_sitter::LexAction &action) const {
|
||||
return (hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::LexAction> {
|
||||
size_t operator()(const tree_sitter::LexAction &action) const {
|
||||
return (hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace tree_sitter {
|
||||
class LexState {
|
||||
public:
|
||||
std::map<rules::CharacterSet, LexAction> actions;
|
||||
LexAction default_action;
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool is_token_start;
|
||||
};
|
||||
|
||||
typedef int64_t LexStateId;
|
||||
class LexState {
|
||||
public:
|
||||
std::map<rules::CharacterSet, LexAction> actions;
|
||||
LexAction default_action;
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool is_token_start;
|
||||
};
|
||||
|
||||
class LexTable {
|
||||
public:
|
||||
static const LexStateId ERROR_STATE_ID;
|
||||
LexStateId add_state();
|
||||
LexState & state(LexStateId state_id);
|
||||
typedef int64_t LexStateId;
|
||||
|
||||
std::vector<LexState> states;
|
||||
LexState error_state;
|
||||
};
|
||||
}
|
||||
class LexTable {
|
||||
public:
|
||||
static const LexStateId ERROR_STATE_ID;
|
||||
LexStateId add_state();
|
||||
LexState &state(LexStateId state_id);
|
||||
|
||||
std::vector<LexState> states;
|
||||
LexState error_state;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_LEX_TABLE_H_
|
||||
|
|
|
|||
|
|
@ -2,109 +2,120 @@
|
|||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::to_string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseAction::ParseAction(ParseActionType type,
|
||||
ParseStateId state_index,
|
||||
Symbol symbol,
|
||||
size_t consumed_symbol_count,
|
||||
set<int> precedence_values) :
|
||||
type(type),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
precedence_values(precedence_values) {}
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::to_string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseAction::ParseAction() :
|
||||
type(ParseActionTypeError),
|
||||
symbol(Symbol(-1)),
|
||||
state_index(-1),
|
||||
consumed_symbol_count(0) {}
|
||||
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
Symbol symbol, size_t consumed_symbol_count,
|
||||
set<int> precedence_values)
|
||||
: type(type),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
precedence_values(precedence_values) {}
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 });
|
||||
}
|
||||
ParseAction::ParseAction()
|
||||
: type(ParseActionTypeError),
|
||||
symbol(Symbol(-1)),
|
||||
state_index(-1),
|
||||
consumed_symbol_count(0) {}
|
||||
|
||||
ParseAction ParseAction::Accept() {
|
||||
return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Shift(ParseStateId state_index, set<int> precedence_values) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, precedence_values);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::ShiftExtra() {
|
||||
return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
|
||||
return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, int precedence) {
|
||||
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count, { precedence });
|
||||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
bool types_eq = type == other.type;
|
||||
bool symbols_eq = symbol == other.symbol;
|
||||
bool state_indices_eq = state_index == other.state_index;
|
||||
bool consumed_symbol_counts_eq = consumed_symbol_count == other.consumed_symbol_count;
|
||||
return types_eq && symbols_eq && state_indices_eq && consumed_symbol_counts_eq;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseAction &action) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case ParseActionTypeAccept:
|
||||
return stream << string("#<accept>");
|
||||
case ParseActionTypeShift:
|
||||
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
|
||||
case ParseActionTypeShiftExtra:
|
||||
return stream << string("#<shift_extra");
|
||||
case ParseActionTypeReduceExtra:
|
||||
return stream << ("#<reduce_extra sym" + to_string(action.symbol.index) + ">");
|
||||
case ParseActionTypeReduce:
|
||||
return stream << ("#<reduce sym" + to_string(action.symbol.index) + " " + to_string(action.consumed_symbol_count) + ">");
|
||||
default:
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
|
||||
ParseState::ParseState() : lex_state_id(-1) {}
|
||||
|
||||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseState &state) {
|
||||
stream << string("#<parse_state ");
|
||||
bool started = false;
|
||||
for (auto pair : state.actions) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first << string(" => ") << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string(">");
|
||||
return stream;
|
||||
}
|
||||
|
||||
ParseStateId ParseTable::add_state() {
|
||||
states.push_back(ParseState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
|
||||
symbols.insert(symbol);
|
||||
states[id].actions[symbol] = action;
|
||||
}
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Accept() {
|
||||
return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Shift(ParseStateId state_index,
|
||||
set<int> precedence_values) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0,
|
||||
precedence_values);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::ShiftExtra() {
|
||||
return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
|
||||
return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
|
||||
int precedence) {
|
||||
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
|
||||
{ precedence });
|
||||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
bool types_eq = type == other.type;
|
||||
bool symbols_eq = symbol == other.symbol;
|
||||
bool state_indices_eq = state_index == other.state_index;
|
||||
bool consumed_symbol_counts_eq =
|
||||
consumed_symbol_count == other.consumed_symbol_count;
|
||||
return types_eq && symbols_eq && state_indices_eq &&
|
||||
consumed_symbol_counts_eq;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseAction &action) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case ParseActionTypeAccept:
|
||||
return stream << string("#<accept>");
|
||||
case ParseActionTypeShift:
|
||||
return stream << (string("#<shift ") + to_string(action.state_index) +
|
||||
">");
|
||||
case ParseActionTypeShiftExtra:
|
||||
return stream << string("#<shift_extra");
|
||||
case ParseActionTypeReduceExtra:
|
||||
return stream << ("#<reduce_extra sym" + to_string(action.symbol.index) +
|
||||
">");
|
||||
case ParseActionTypeReduce:
|
||||
return stream << ("#<reduce sym" + to_string(action.symbol.index) + " " +
|
||||
to_string(action.consumed_symbol_count) + ">");
|
||||
default:
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
|
||||
ParseState::ParseState() : lex_state_id(-1) {}
|
||||
|
||||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseState &state) {
|
||||
stream << string("#<parse_state ");
|
||||
bool started = false;
|
||||
for (auto pair : state.actions) {
|
||||
if (started)
|
||||
stream << string(", ");
|
||||
stream << pair.first << string(" => ") << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string(">");
|
||||
return stream;
|
||||
}
|
||||
|
||||
ParseStateId ParseTable::add_state() {
|
||||
states.push_back(ParseState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void ParseTable::add_action(ParseStateId id, Symbol symbol,
|
||||
ParseAction action) {
|
||||
symbols.insert(symbol);
|
||||
states[id].actions[symbol] = action;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -9,75 +9,82 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef uint64_t ParseStateId;
|
||||
|
||||
typedef enum {
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeShiftExtra,
|
||||
ParseActionTypeReduce,
|
||||
ParseActionTypeReduceExtra,
|
||||
ParseActionTypeAccept,
|
||||
} ParseActionType;
|
||||
typedef uint64_t ParseStateId;
|
||||
|
||||
class ParseAction {
|
||||
ParseAction(ParseActionType type,
|
||||
ParseStateId state_index,
|
||||
rules::Symbol symbol,
|
||||
size_t consumed_symbol_count,
|
||||
std::set<int> precedence_values);
|
||||
public:
|
||||
ParseAction();
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(ParseStateId state_index, std::set<int> precedence_values);
|
||||
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, int precedence);
|
||||
static ParseAction ShiftExtra();
|
||||
static ParseAction ReduceExtra(rules::Symbol symbol);
|
||||
bool operator==(const ParseAction &action) const;
|
||||
typedef enum {
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeShiftExtra,
|
||||
ParseActionTypeReduce,
|
||||
ParseActionTypeReduceExtra,
|
||||
ParseActionTypeAccept,
|
||||
} ParseActionType;
|
||||
|
||||
ParseActionType type;
|
||||
rules::Symbol symbol;
|
||||
ParseStateId state_index;
|
||||
size_t consumed_symbol_count;
|
||||
std::set<int> precedence_values;
|
||||
};
|
||||
class ParseAction {
|
||||
ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
std::set<int> precedence_values);
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
|
||||
}
|
||||
public:
|
||||
ParseAction();
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(ParseStateId state_index,
|
||||
std::set<int> precedence_values);
|
||||
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
int precedence);
|
||||
static ParseAction ShiftExtra();
|
||||
static ParseAction ReduceExtra(rules::Symbol symbol);
|
||||
bool operator==(const ParseAction &action) const;
|
||||
|
||||
ParseActionType type;
|
||||
rules::Symbol symbol;
|
||||
ParseStateId state_index;
|
||||
size_t consumed_symbol_count;
|
||||
std::set<int> precedence_values;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const ParseAction &item);
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::ParseAction> {
|
||||
size_t operator()(const tree_sitter::ParseAction &action) const {
|
||||
return (
|
||||
hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.consumed_symbol_count));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::ParseAction> {
|
||||
size_t operator()(const tree_sitter::ParseAction &action) const {
|
||||
return (hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.consumed_symbol_count));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace tree_sitter {
|
||||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
std::map<rules::Symbol, ParseAction> actions;
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseState &state);
|
||||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
std::map<rules::Symbol, ParseAction> actions;
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
||||
class ParseTable {
|
||||
public:
|
||||
ParseStateId add_state();
|
||||
void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action);
|
||||
std::ostream &operator<<(std::ostream &stream, const ParseState &state);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::set<rules::Symbol> symbols;
|
||||
};
|
||||
}
|
||||
class ParseTable {
|
||||
public:
|
||||
ParseStateId add_state();
|
||||
void add_action(ParseStateId state_id, rules::Symbol symbol,
|
||||
ParseAction action);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::set<rules::Symbol> symbols;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PARSE_TABLE_H_
|
||||
|
|
|
|||
|
|
@ -11,55 +11,57 @@
|
|||
#include "compiler/rules/repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::to_string;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Repeat;
|
||||
using rules::Rule;
|
||||
using rules::Seq;
|
||||
using rules::Symbol;
|
||||
namespace prepare_grammar {
|
||||
|
||||
namespace prepare_grammar {
|
||||
class ExpandRepeats : public rules::IdentityRuleFn {
|
||||
string rule_name;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::to_string;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Repeat;
|
||||
using rules::Rule;
|
||||
using rules::Seq;
|
||||
using rules::Symbol;
|
||||
|
||||
rule_ptr apply_to(const Repeat *rule) {
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
|
||||
rule_ptr repeat_symbol = make_shared<Symbol>(offset + index, rules::SymbolOptionAuxiliary);
|
||||
aux_rules.push_back({
|
||||
helper_rule_name,
|
||||
Choice::Build({
|
||||
Seq::Build({ inner_rule, repeat_symbol }),
|
||||
make_shared<Blank>()
|
||||
})
|
||||
});
|
||||
return repeat_symbol;
|
||||
}
|
||||
class ExpandRepeats : public rules::IdentityRuleFn {
|
||||
string rule_name;
|
||||
|
||||
public:
|
||||
ExpandRepeats(string rule_name, size_t offset) : rule_name(rule_name), offset(offset) {}
|
||||
rule_ptr apply_to(const Repeat *rule) {
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
|
||||
rule_ptr repeat_symbol =
|
||||
make_shared<Symbol>(offset + index, rules::SymbolOptionAuxiliary);
|
||||
aux_rules.push_back(
|
||||
{ helper_rule_name,
|
||||
Choice::Build({ Seq::Build({ inner_rule, repeat_symbol }),
|
||||
make_shared<Blank>() }) });
|
||||
return repeat_symbol;
|
||||
}
|
||||
|
||||
size_t offset;
|
||||
vector<pair<string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
public:
|
||||
ExpandRepeats(string rule_name, size_t offset)
|
||||
: rule_name(rule_name), offset(offset) {}
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
size_t offset;
|
||||
vector<pair<string, rules::rule_ptr> > aux_rules;
|
||||
};
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
ExpandRepeats expander(pair.first, aux_rules.size());
|
||||
rules.push_back({ pair.first, expander.apply(pair.second) });
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
}
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr> > rules, aux_rules(grammar.aux_rules);
|
||||
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
}
|
||||
}
|
||||
for (auto &pair : grammar.rules) {
|
||||
ExpandRepeats expander(pair.first, aux_rules.size());
|
||||
rules.push_back({ pair.first, expander.apply(pair.second) });
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
}
|
||||
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,11 +4,14 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
|
||||
}
|
||||
}
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
|
||||
|
|
|
|||
|
|
@ -12,60 +12,59 @@
|
|||
#include "compiler/prepare_grammar/parse_regex.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::String;
|
||||
using rules::Pattern;
|
||||
namespace prepare_grammar {
|
||||
|
||||
namespace prepare_grammar {
|
||||
class ExpandTokens : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::String;
|
||||
using rules::Pattern;
|
||||
|
||||
rule_ptr apply_to(const String *rule) {
|
||||
vector<rule_ptr> elements;
|
||||
for (char val : rule->value)
|
||||
elements.push_back(rules::CharacterSet({ val }).copy());
|
||||
return rules::Seq::Build(elements);
|
||||
}
|
||||
class ExpandTokens : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to(const Pattern *rule) {
|
||||
auto pair = parse_regex(rule->value);
|
||||
if (!error)
|
||||
error = pair.second;
|
||||
return pair.first;
|
||||
}
|
||||
rule_ptr apply_to(const String *rule) {
|
||||
vector<rule_ptr> elements;
|
||||
for (char val : rule->value)
|
||||
elements.push_back(rules::CharacterSet({ val }).copy());
|
||||
return rules::Seq::Build(elements);
|
||||
}
|
||||
|
||||
public:
|
||||
const GrammarError *error;
|
||||
ExpandTokens() : error(nullptr) {}
|
||||
};
|
||||
rule_ptr apply_to(const Pattern *rule) {
|
||||
auto pair = parse_regex(rule->value);
|
||||
if (!error)
|
||||
error = pair.second;
|
||||
return pair.first;
|
||||
}
|
||||
|
||||
pair<LexicalGrammar, const GrammarError *>
|
||||
expand_tokens(const LexicalGrammar &grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, aux_rules;
|
||||
ExpandTokens expander;
|
||||
public:
|
||||
const GrammarError *error;
|
||||
ExpandTokens() : error(nullptr) {}
|
||||
};
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { LexicalGrammar(), expander.error };
|
||||
rules.push_back({ pair.first, rule });
|
||||
}
|
||||
pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
||||
const LexicalGrammar &grammar) {
|
||||
vector<pair<string, rule_ptr> > rules, aux_rules;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (auto &pair : grammar.aux_rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { LexicalGrammar(), expander.error };
|
||||
aux_rules.push_back({ pair.first, rule });
|
||||
}
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { LexicalGrammar(), expander.error };
|
||||
rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
return {
|
||||
LexicalGrammar(rules, aux_rules, grammar.separators),
|
||||
nullptr,
|
||||
};
|
||||
}
|
||||
}
|
||||
for (auto &pair : grammar.aux_rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { LexicalGrammar(), expander.error };
|
||||
aux_rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
return { LexicalGrammar(rules, aux_rules, grammar.separators), nullptr, };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,13 +5,15 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<LexicalGrammar, const GrammarError *>
|
||||
expand_tokens(const LexicalGrammar &);
|
||||
}
|
||||
}
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
||||
const LexicalGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
|
||||
|
||||
|
|
|
|||
|
|
@ -14,120 +14,123 @@
|
|||
#include "compiler/prepare_grammar/token_description.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
namespace prepare_grammar {
|
||||
|
||||
namespace prepare_grammar {
|
||||
class IsToken : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::String *rule) { return true; }
|
||||
bool apply_to(const rules::Pattern *rule) { return true; }
|
||||
bool apply_to(const rules::Metadata *rule) { return rule->value_for(rules::IS_TOKEN); }
|
||||
};
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
|
||||
class SymbolInliner : public rules::IdentityRuleFn {
|
||||
map<Symbol, Symbol> replacements;
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
class IsToken : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::String *rule) { return true; }
|
||||
bool apply_to(const rules::Pattern *rule) { return true; }
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return rule->value_for(rules::IS_TOKEN);
|
||||
}
|
||||
};
|
||||
|
||||
int new_index_for_symbol(const Symbol &symbol) {
|
||||
int result = symbol.index;
|
||||
for (const auto &pair : replacements)
|
||||
if (pair.first.index < symbol.index &&
|
||||
pair.first.is_auxiliary() == symbol.is_auxiliary())
|
||||
result--;
|
||||
return result;
|
||||
}
|
||||
class SymbolInliner : public rules::IdentityRuleFn {
|
||||
map<Symbol, Symbol> replacements;
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to(const Symbol *rule) {
|
||||
return replace_symbol(*rule).copy();
|
||||
}
|
||||
int new_index_for_symbol(const Symbol &symbol) {
|
||||
int result = symbol.index;
|
||||
for (const auto &pair : replacements)
|
||||
if (pair.first.index < symbol.index &&
|
||||
pair.first.is_auxiliary() == symbol.is_auxiliary())
|
||||
result--;
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
Symbol replace_symbol(const Symbol &rule) {
|
||||
if (rule.is_built_in()) return rule;
|
||||
auto replacement_pair = replacements.find(rule);
|
||||
if (replacement_pair != replacements.end())
|
||||
return replacement_pair->second;
|
||||
else
|
||||
return Symbol(new_index_for_symbol(rule), rule.options);
|
||||
}
|
||||
rule_ptr apply_to(const Symbol *rule) { return replace_symbol(*rule).copy(); }
|
||||
|
||||
SymbolInliner(const map<Symbol, Symbol> &replacements) : replacements(replacements) {}
|
||||
};
|
||||
public:
|
||||
Symbol replace_symbol(const Symbol &rule) {
|
||||
if (rule.is_built_in())
|
||||
return rule;
|
||||
auto replacement_pair = replacements.find(rule);
|
||||
if (replacement_pair != replacements.end())
|
||||
return replacement_pair->second;
|
||||
else
|
||||
return Symbol(new_index_for_symbol(rule), rule.options);
|
||||
}
|
||||
|
||||
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);
|
||||
SymbolInliner(const map<Symbol, Symbol> &replacements)
|
||||
: replacements(replacements) {}
|
||||
};
|
||||
|
||||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
rule_ptr apply_to_token(const rules::Rule *input) {
|
||||
auto rule = input->copy();
|
||||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].second->operator==(*rule))
|
||||
return make_shared<Symbol>(i, SymbolOptionAuxToken);
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back({ token_description(rule), rule });
|
||||
return make_shared<Symbol>(index, SymbolOptionAuxToken);
|
||||
}
|
||||
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(
|
||||
rules::SymbolOptionToken | rules::SymbolOptionAuxiliary);
|
||||
|
||||
rule_ptr default_apply(const rules::Rule *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(rule->copy())) {
|
||||
return apply_to_token(rule);
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
rule_ptr apply_to_token(const rules::Rule *input) {
|
||||
auto rule = input->copy();
|
||||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].second->operator==(*rule))
|
||||
return make_shared<Symbol>(i, SymbolOptionAuxToken);
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back({ token_description(rule), rule });
|
||||
return make_shared<Symbol>(index, SymbolOptionAuxToken);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(rule->copy())) {
|
||||
return apply_to_token(rule);
|
||||
} else {
|
||||
return rules::IdentityRuleFn::apply_to(rule);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
vector<pair<string, rule_ptr>> tokens;
|
||||
};
|
||||
|
||||
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
|
||||
set<Symbol> ubiquitous_tokens;
|
||||
|
||||
TokenExtractor extractor;
|
||||
map<Symbol, Symbol> symbol_replacements;
|
||||
|
||||
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
|
||||
auto pair = input_grammar.rules[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
tokens.push_back(pair);
|
||||
symbol_replacements.insert({
|
||||
Symbol(i),
|
||||
Symbol(tokens.size() - 1, rules::SymbolOptionToken)
|
||||
});
|
||||
} else {
|
||||
rules.push_back({ pair.first, extractor.apply(pair.second) });
|
||||
}
|
||||
}
|
||||
|
||||
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
SymbolInliner inliner(symbol_replacements);
|
||||
for (auto &pair : rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &symbol : input_grammar.ubiquitous_tokens)
|
||||
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
|
||||
|
||||
return {
|
||||
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
|
||||
LexicalGrammar(tokens, aux_tokens, input_grammar.separators),
|
||||
};
|
||||
}
|
||||
rule_ptr default_apply(const rules::Rule *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(rule->copy())) {
|
||||
return apply_to_token(rule);
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(rule->copy())) {
|
||||
return apply_to_token(rule);
|
||||
} else {
|
||||
return rules::IdentityRuleFn::apply_to(rule);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
vector<pair<string, rule_ptr> > tokens;
|
||||
};
|
||||
|
||||
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
|
||||
const InternedGrammar &input_grammar) {
|
||||
vector<pair<string, rule_ptr> > rules, tokens, aux_rules, aux_tokens;
|
||||
set<Symbol> ubiquitous_tokens;
|
||||
|
||||
TokenExtractor extractor;
|
||||
map<Symbol, Symbol> symbol_replacements;
|
||||
|
||||
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
|
||||
auto pair = input_grammar.rules[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
tokens.push_back(pair);
|
||||
symbol_replacements.insert(
|
||||
{ Symbol(i), Symbol(tokens.size() - 1, rules::SymbolOptionToken) });
|
||||
} else {
|
||||
rules.push_back({ pair.first, extractor.apply(pair.second) });
|
||||
}
|
||||
}
|
||||
|
||||
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(),
|
||||
extractor.tokens.end());
|
||||
|
||||
SymbolInliner inliner(symbol_replacements);
|
||||
for (auto &pair : rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &symbol : input_grammar.ubiquitous_tokens)
|
||||
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
|
||||
|
||||
return { SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
|
||||
LexicalGrammar(tokens, aux_tokens, input_grammar.separators), };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,12 +5,16 @@
|
|||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
|
||||
}
|
||||
}
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
|
||||
const InternedGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
|
||||
|
|
|
|||
|
|
@ -10,70 +10,72 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using rules::rule_ptr;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
namespace prepare_grammar {
|
||||
|
||||
namespace prepare_grammar {
|
||||
class InternSymbols : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
using std::string;
|
||||
using rules::rule_ptr;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
|
||||
rule_ptr apply_to(const rules::NamedSymbol *rule) {
|
||||
auto result = symbol_for_rule_name(rule->name);
|
||||
if (!result.get()) missing_rule_name = rule->name;
|
||||
return result;
|
||||
}
|
||||
class InternSymbols : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
public:
|
||||
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules().size(); i++)
|
||||
if (grammar.rules()[i].first == rule_name)
|
||||
return make_shared<rules::Symbol>(i);
|
||||
return nullptr;
|
||||
}
|
||||
rule_ptr apply_to(const rules::NamedSymbol *rule) {
|
||||
auto result = symbol_for_rule_name(rule->name);
|
||||
if (!result.get())
|
||||
missing_rule_name = rule->name;
|
||||
return result;
|
||||
}
|
||||
|
||||
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
|
||||
const Grammar grammar;
|
||||
string missing_rule_name;
|
||||
};
|
||||
public:
|
||||
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules().size(); i++)
|
||||
if (grammar.rules()[i].first == rule_name)
|
||||
return make_shared<rules::Symbol>(i);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
|
||||
InternedGrammar grammar;
|
||||
return {
|
||||
grammar,
|
||||
new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'")
|
||||
};
|
||||
}
|
||||
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
|
||||
const Grammar grammar;
|
||||
string missing_rule_name;
|
||||
};
|
||||
|
||||
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr>> rules;
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return missing_rule_error(interner.missing_rule_name);
|
||||
rules.push_back({ pair.first, new_rule });
|
||||
}
|
||||
|
||||
set<rules::Symbol> ubiquitous_tokens;
|
||||
for (auto &name : grammar.ubiquitous_tokens()) {
|
||||
auto token = interner.symbol_for_rule_name(name);
|
||||
if (!token.get())
|
||||
return missing_rule_error(name);
|
||||
ubiquitous_tokens.insert(*token);
|
||||
}
|
||||
|
||||
InternedGrammar result;
|
||||
result.rules = rules;
|
||||
result.ubiquitous_tokens = ubiquitous_tokens;
|
||||
result.separators = grammar.separators();
|
||||
|
||||
return { result, nullptr };
|
||||
}
|
||||
}
|
||||
pair<InternedGrammar, const GrammarError *> missing_rule_error(
|
||||
string rule_name) {
|
||||
InternedGrammar grammar;
|
||||
return { grammar, new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'") };
|
||||
}
|
||||
|
||||
pair<InternedGrammar, const GrammarError *> intern_symbols(
|
||||
const Grammar &grammar) {
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr> > rules;
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return missing_rule_error(interner.missing_rule_name);
|
||||
rules.push_back({ pair.first, new_rule });
|
||||
}
|
||||
|
||||
set<rules::Symbol> ubiquitous_tokens;
|
||||
for (auto &name : grammar.ubiquitous_tokens()) {
|
||||
auto token = interner.symbol_for_rule_name(name);
|
||||
if (!token.get())
|
||||
return missing_rule_error(name);
|
||||
ubiquitous_tokens.insert(*token);
|
||||
}
|
||||
|
||||
InternedGrammar result;
|
||||
result.rules = rules;
|
||||
result.ubiquitous_tokens = ubiquitous_tokens;
|
||||
result.separators = grammar.separators();
|
||||
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -7,11 +7,15 @@
|
|||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
}
|
||||
}
|
||||
class Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(
|
||||
const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_INTERN_SYMBOLS_H_
|
||||
|
|
|
|||
|
|
@ -9,14 +9,16 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
class InternedGrammar {
|
||||
public:
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<char> separators;
|
||||
};
|
||||
}
|
||||
}
|
||||
namespace prepare_grammar {
|
||||
|
||||
class InternedGrammar {
|
||||
public:
|
||||
std::vector<std::pair<std::string, rules::rule_ptr> > rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<char> separators;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
|
|
|
|||
|
|
@ -10,202 +10,193 @@
|
|||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::CharacterSet;
|
||||
using rules::Seq;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Repeat;
|
||||
using rules::CharacterRange;
|
||||
using rules::blank;
|
||||
namespace prepare_grammar {
|
||||
|
||||
namespace prepare_grammar {
|
||||
class PatternParser {
|
||||
public:
|
||||
explicit PatternParser(const string &input) :
|
||||
input(input),
|
||||
length(input.length()),
|
||||
position(0) {}
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::CharacterSet;
|
||||
using rules::Seq;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Repeat;
|
||||
using rules::CharacterRange;
|
||||
using rules::blank;
|
||||
|
||||
pair<rule_ptr, const GrammarError *> rule(bool nested) {
|
||||
vector<rule_ptr> choices = {};
|
||||
do {
|
||||
if (!choices.empty()) {
|
||||
if (peek() == '|')
|
||||
next();
|
||||
else
|
||||
break;
|
||||
}
|
||||
auto pair = term(nested);
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
choices.push_back(pair.first);
|
||||
} while (has_more_input());
|
||||
auto rule = (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
|
||||
return { rule, nullptr };
|
||||
}
|
||||
class PatternParser {
|
||||
public:
|
||||
explicit PatternParser(const string &input)
|
||||
: input(input), length(input.length()), position(0) {}
|
||||
|
||||
private:
|
||||
pair<rule_ptr, const GrammarError *> term(bool nested) {
|
||||
rule_ptr result = blank();
|
||||
do {
|
||||
if (peek() == '|')
|
||||
break;
|
||||
if (nested && peek() == ')')
|
||||
break;
|
||||
auto pair = factor();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
result = Seq::Build({ result, pair.first });
|
||||
} while (has_more_input());
|
||||
return { result, nullptr };
|
||||
}
|
||||
pair<rule_ptr, const GrammarError *> rule(bool nested) {
|
||||
vector<rule_ptr> choices = {};
|
||||
do {
|
||||
if (!choices.empty()) {
|
||||
if (peek() == '|')
|
||||
next();
|
||||
else
|
||||
break;
|
||||
}
|
||||
auto pair = term(nested);
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
choices.push_back(pair.first);
|
||||
} while (has_more_input());
|
||||
auto rule =
|
||||
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
|
||||
return { rule, nullptr };
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> factor() {
|
||||
auto pair = atom();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
rule_ptr result = pair.first;
|
||||
if (has_more_input()) {
|
||||
switch (peek()) {
|
||||
case '*':
|
||||
next();
|
||||
result = make_shared<Repeat>(result);
|
||||
break;
|
||||
case '+':
|
||||
next();
|
||||
result = make_shared<Seq>(result, make_shared<Repeat>(result));
|
||||
break;
|
||||
case '?':
|
||||
next();
|
||||
result = Choice::Build({ result, make_shared<Blank>() });
|
||||
break;
|
||||
}
|
||||
}
|
||||
return { result, nullptr };
|
||||
}
|
||||
private:
|
||||
pair<rule_ptr, const GrammarError *> term(bool nested) {
|
||||
rule_ptr result = blank();
|
||||
do {
|
||||
if (peek() == '|')
|
||||
break;
|
||||
if (nested && peek() == ')')
|
||||
break;
|
||||
auto pair = factor();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
result = Seq::Build({ result, pair.first });
|
||||
} while (has_more_input());
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> atom() {
|
||||
switch (peek()) {
|
||||
case '(': {
|
||||
next();
|
||||
auto pair = rule(true);
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
if (peek() != ')')
|
||||
return error("unmatched open paren");
|
||||
next();
|
||||
return { pair.first, nullptr };
|
||||
}
|
||||
case '[': {
|
||||
next();
|
||||
auto pair = char_set();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
if (peek() != ']')
|
||||
return error("unmatched open square bracket");
|
||||
next();
|
||||
return { pair.first.copy(), nullptr };
|
||||
}
|
||||
case ')': {
|
||||
return error("unmatched close paren");
|
||||
}
|
||||
case ']': {
|
||||
return error("unmatched close square bracket");
|
||||
}
|
||||
case '.': {
|
||||
next();
|
||||
return { CharacterSet({ '\n' }).complement().copy(), nullptr };
|
||||
}
|
||||
default: {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
return { pair.first.copy(), nullptr };
|
||||
}
|
||||
}
|
||||
}
|
||||
pair<rule_ptr, const GrammarError *> factor() {
|
||||
auto pair = atom();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
rule_ptr result = pair.first;
|
||||
if (has_more_input()) {
|
||||
switch (peek()) {
|
||||
case '*':
|
||||
next();
|
||||
result = make_shared<Repeat>(result);
|
||||
break;
|
||||
case '+':
|
||||
next();
|
||||
result = make_shared<Seq>(result, make_shared<Repeat>(result));
|
||||
break;
|
||||
case '?':
|
||||
next();
|
||||
result = Choice::Build({ result, make_shared<Blank>() });
|
||||
break;
|
||||
}
|
||||
}
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
pair<CharacterSet, const GrammarError *> char_set() {
|
||||
bool is_affirmative = true;
|
||||
if (peek() == '^') {
|
||||
next();
|
||||
is_affirmative = false;
|
||||
}
|
||||
CharacterSet result;
|
||||
while (has_more_input() && (peek() != ']')) {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
return { CharacterSet(), pair.second };
|
||||
result.add_set(pair.first);
|
||||
}
|
||||
if (!is_affirmative)
|
||||
result = result.complement();
|
||||
return { result, nullptr };
|
||||
}
|
||||
pair<rule_ptr, const GrammarError *> atom() {
|
||||
switch (peek()) {
|
||||
case '(': {
|
||||
next();
|
||||
auto pair = rule(true);
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
if (peek() != ')')
|
||||
return error("unmatched open paren");
|
||||
next();
|
||||
return { pair.first, nullptr };
|
||||
}
|
||||
case '[': {
|
||||
next();
|
||||
auto pair = char_set();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
if (peek() != ']')
|
||||
return error("unmatched open square bracket");
|
||||
next();
|
||||
return { pair.first.copy(), nullptr };
|
||||
}
|
||||
case ')': { return error("unmatched close paren"); }
|
||||
case ']': { return error("unmatched close square bracket"); }
|
||||
case '.': {
|
||||
next();
|
||||
return { CharacterSet({ '\n' }).complement().copy(), nullptr };
|
||||
}
|
||||
default: {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
return { pair.first.copy(), nullptr };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair<CharacterSet, const GrammarError *> single_char() {
|
||||
CharacterSet value;
|
||||
switch (peek()) {
|
||||
case '\\':
|
||||
next();
|
||||
value = escaped_char(peek());
|
||||
next();
|
||||
break;
|
||||
default:
|
||||
char first_char = peek();
|
||||
next();
|
||||
if (peek() == '-') {
|
||||
next();
|
||||
value = CharacterSet({ CharacterRange(first_char, peek()) });
|
||||
next();
|
||||
} else {
|
||||
value = CharacterSet({ first_char });
|
||||
}
|
||||
}
|
||||
return { value, nullptr };
|
||||
}
|
||||
pair<CharacterSet, const GrammarError *> char_set() {
|
||||
bool is_affirmative = true;
|
||||
if (peek() == '^') {
|
||||
next();
|
||||
is_affirmative = false;
|
||||
}
|
||||
CharacterSet result;
|
||||
while (has_more_input() && (peek() != ']')) {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
return { CharacterSet(), pair.second };
|
||||
result.add_set(pair.first);
|
||||
}
|
||||
if (!is_affirmative)
|
||||
result = result.complement();
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
CharacterSet escaped_char(char value) {
|
||||
switch (value) {
|
||||
case 'a':
|
||||
return CharacterSet({ {'a', 'z'}, {'A', 'Z'} });
|
||||
case 'w':
|
||||
return CharacterSet({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'}});
|
||||
case 'd':
|
||||
return CharacterSet({ {'0', '9'} });
|
||||
default:
|
||||
return CharacterSet({ value });
|
||||
}
|
||||
}
|
||||
|
||||
void next() {
|
||||
position++;
|
||||
}
|
||||
|
||||
char peek() {
|
||||
return input[position];
|
||||
}
|
||||
|
||||
bool has_more_input() {
|
||||
return position < length;
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> error(string msg) {
|
||||
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
|
||||
}
|
||||
|
||||
const string input;
|
||||
const size_t length;
|
||||
size_t position;
|
||||
};
|
||||
|
||||
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
|
||||
return PatternParser(input).rule(false);
|
||||
pair<CharacterSet, const GrammarError *> single_char() {
|
||||
CharacterSet value;
|
||||
switch (peek()) {
|
||||
case '\\':
|
||||
next();
|
||||
value = escaped_char(peek());
|
||||
next();
|
||||
break;
|
||||
default:
|
||||
char first_char = peek();
|
||||
next();
|
||||
if (peek() == '-') {
|
||||
next();
|
||||
value = CharacterSet({ CharacterRange(first_char, peek()) });
|
||||
next();
|
||||
} else {
|
||||
value = CharacterSet({ first_char });
|
||||
}
|
||||
}
|
||||
return { value, nullptr };
|
||||
}
|
||||
|
||||
CharacterSet escaped_char(char value) {
|
||||
switch (value) {
|
||||
case 'a':
|
||||
return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' } });
|
||||
case 'w':
|
||||
return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } });
|
||||
case 'd':
|
||||
return CharacterSet({ { '0', '9' } });
|
||||
default:
|
||||
return CharacterSet({ value });
|
||||
}
|
||||
}
|
||||
|
||||
void next() { position++; }
|
||||
|
||||
char peek() { return input[position]; }
|
||||
|
||||
bool has_more_input() { return position < length; }
|
||||
|
||||
pair<rule_ptr, const GrammarError *> error(string msg) {
|
||||
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
|
||||
}
|
||||
|
||||
const string input;
|
||||
const size_t length;
|
||||
size_t position;
|
||||
};
|
||||
|
||||
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
|
||||
return PatternParser(input).rule(false);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,10 +6,12 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
std::pair<rules::rule_ptr, const GrammarError *>
|
||||
parse_regex(const std::string &);
|
||||
}
|
||||
}
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<rules::rule_ptr, const GrammarError *> parse_regex(
|
||||
const std::string &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_PARSE_REGEX_H_
|
||||
|
|
|
|||
|
|
@ -7,29 +7,31 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
namespace prepare_grammar {
|
||||
|
||||
namespace prepare_grammar {
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
prepare_grammar(const Grammar &input_grammar) {
|
||||
auto result = intern_symbols(input_grammar);
|
||||
const InternedGrammar &grammar = result.first;
|
||||
const GrammarError *error = result.second;
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
|
||||
if (error)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
||||
const Grammar &input_grammar) {
|
||||
auto result = intern_symbols(input_grammar);
|
||||
const InternedGrammar &grammar = result.first;
|
||||
const GrammarError *error = result.second;
|
||||
|
||||
auto grammars = extract_tokens(grammar);
|
||||
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
|
||||
auto expand_tokens_result = expand_tokens(grammars.second);
|
||||
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
if (error)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
if (error)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
auto grammars = extract_tokens(grammar);
|
||||
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
|
||||
auto expand_tokens_result = expand_tokens(grammars.second);
|
||||
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
|
||||
return make_tuple(rule_grammar, lex_grammar, nullptr);
|
||||
}
|
||||
}
|
||||
if (error)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
return make_tuple(rule_grammar, lex_grammar, nullptr);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,13 +5,16 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class GrammarError;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
prepare_grammar(const Grammar &);
|
||||
}
|
||||
}
|
||||
class Grammar;
|
||||
class GrammarError;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
||||
const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
|
||||
|
|
|
|||
|
|
@ -9,36 +9,36 @@
|
|||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
namespace prepare_grammar {
|
||||
|
||||
namespace prepare_grammar {
|
||||
class TokenDescription : public rules::RuleFn<string> {
|
||||
string apply_to(const rules::Pattern *rule) {
|
||||
return "/" + util::escape_string(rule->value) + "/";
|
||||
}
|
||||
using std::string;
|
||||
|
||||
string apply_to(const rules::String *rule) {
|
||||
return "'" + util::escape_string(rule->value) + "'";
|
||||
}
|
||||
class TokenDescription : public rules::RuleFn<string> {
|
||||
string apply_to(const rules::Pattern *rule) {
|
||||
return "/" + util::escape_string(rule->value) + "/";
|
||||
}
|
||||
|
||||
string apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
string apply_to(const rules::String *rule) {
|
||||
return "'" + util::escape_string(rule->value) + "'";
|
||||
}
|
||||
|
||||
string apply_to(const rules::Seq *rule) {
|
||||
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
|
||||
}
|
||||
string apply_to(const rules::Metadata *rule) { return apply(rule->rule); }
|
||||
|
||||
string apply_to(const rules::Choice *rule) {
|
||||
string result = "(choice";
|
||||
for (auto &element : rule->elements)
|
||||
result += " " + apply(element);
|
||||
return result + ")";
|
||||
}
|
||||
};
|
||||
string apply_to(const rules::Seq *rule) {
|
||||
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
|
||||
}
|
||||
|
||||
std::string token_description(const rules::rule_ptr &rule) {
|
||||
return TokenDescription().apply(rule);
|
||||
}
|
||||
}
|
||||
string apply_to(const rules::Choice *rule) {
|
||||
string result = "(choice";
|
||||
for (auto &element : rule->elements)
|
||||
result += " " + apply(element);
|
||||
return result + ")";
|
||||
}
|
||||
};
|
||||
|
||||
std::string token_description(const rules::rule_ptr &rule) {
|
||||
return TokenDescription().apply(rule);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,9 +5,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
std::string token_description(const rules::rule_ptr &);
|
||||
}
|
||||
}
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::string token_description(const rules::rule_ptr &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_
|
||||
|
|
|
|||
|
|
@ -5,54 +5,52 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules[symbol.index].second :
|
||||
rules[symbol.index].second;
|
||||
}
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules[symbol.index].first :
|
||||
rules[symbol.index].first;
|
||||
}
|
||||
|
||||
PreparedGrammar::PreparedGrammar() {}
|
||||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
LexicalGrammar::LexicalGrammar() {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
|
||||
PreparedGrammar(rules, aux_rules) {}
|
||||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
|
||||
PreparedGrammar(rules, aux_rules) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
|
||||
rules(rules),
|
||||
aux_rules(aux_rules) {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const set<rules::Symbol> &ubiquitous_tokens) :
|
||||
PreparedGrammar(rules, aux_rules),
|
||||
ubiquitous_tokens(ubiquitous_tokens) {}
|
||||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const set<char> &separators) :
|
||||
PreparedGrammar(rules, aux_rules),
|
||||
separators(separators) {}
|
||||
const rules::rule_ptr &PreparedGrammar::rule(const rules::Symbol &symbol)
|
||||
const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
|
||||
: rules[symbol.index].second;
|
||||
}
|
||||
|
||||
const string &PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].first
|
||||
: rules[symbol.index].first;
|
||||
}
|
||||
|
||||
PreparedGrammar::PreparedGrammar() {}
|
||||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
LexicalGrammar::LexicalGrammar() {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: PreparedGrammar(rules, aux_rules) {}
|
||||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: PreparedGrammar(rules, aux_rules) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules)
|
||||
: rules(rules), aux_rules(aux_rules) {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules,
|
||||
const set<rules::Symbol> &ubiquitous_tokens)
|
||||
: PreparedGrammar(rules, aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
|
||||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr> > &rules,
|
||||
const vector<pair<string, rules::rule_ptr> > &aux_rules,
|
||||
const set<char> &separators)
|
||||
: PreparedGrammar(rules, aux_rules), separators(separators) {}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -9,47 +9,49 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar {
|
||||
public:
|
||||
PreparedGrammar();
|
||||
PreparedGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
class PreparedGrammar {
|
||||
public:
|
||||
PreparedGrammar();
|
||||
PreparedGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
|
||||
|
||||
const std::string & rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr & rule(const rules::Symbol &symbol) const;
|
||||
};
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > aux_rules;
|
||||
|
||||
class SyntaxGrammar : public PreparedGrammar {
|
||||
public:
|
||||
SyntaxGrammar();
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::set<rules::Symbol> &ubiquitous_tokens);
|
||||
const std::string &rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
|
||||
};
|
||||
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
};
|
||||
class SyntaxGrammar : public PreparedGrammar {
|
||||
public:
|
||||
SyntaxGrammar();
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
|
||||
const std::set<rules::Symbol> &ubiquitous_tokens);
|
||||
|
||||
class LexicalGrammar : public PreparedGrammar {
|
||||
public:
|
||||
LexicalGrammar();
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::set<char> &separators);
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
};
|
||||
|
||||
std::set<char> separators;
|
||||
};
|
||||
}
|
||||
class LexicalGrammar : public PreparedGrammar {
|
||||
public:
|
||||
LexicalGrammar();
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
|
||||
const std::set<char> &separators);
|
||||
|
||||
std::set<char> separators;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARED_GRAMMAR_H_
|
||||
|
|
|
|||
|
|
@ -2,28 +2,22 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Blank::Blank() {}
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
bool Blank::operator==(const Rule &rule) const {
|
||||
return dynamic_cast<const Blank *>(&rule) != nullptr;
|
||||
}
|
||||
Blank::Blank() {}
|
||||
|
||||
size_t Blank::hash_code() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rule_ptr Blank::copy() const {
|
||||
return std::make_shared<Blank>();
|
||||
}
|
||||
|
||||
std::string Blank::to_string() const {
|
||||
return "#<blank>";
|
||||
}
|
||||
|
||||
void Blank::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool Blank::operator==(const Rule &rule) const {
|
||||
return dynamic_cast<const Blank *>(&rule) != nullptr;
|
||||
}
|
||||
|
||||
size_t Blank::hash_code() const { return 0; }
|
||||
|
||||
rule_ptr Blank::copy() const { return std::make_shared<Blank>(); }
|
||||
|
||||
std::string Blank::to_string() const { return "#<blank>"; }
|
||||
|
||||
void Blank::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,19 +4,21 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Blank : public Rule {
|
||||
public:
|
||||
Blank();
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
};
|
||||
}
|
||||
}
|
||||
class Blank : public Rule {
|
||||
public:
|
||||
Blank();
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_BLANK_H_
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
|
||||
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
|
||||
Symbol START() { return Symbol(-3); }
|
||||
}
|
||||
}
|
||||
namespace rules {
|
||||
|
||||
Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
|
||||
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
|
||||
Symbol START() { return Symbol(-3); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Symbol ERROR();
|
||||
Symbol START();
|
||||
Symbol END_OF_INPUT();
|
||||
}
|
||||
namespace rules {
|
||||
Symbol ERROR();
|
||||
Symbol START();
|
||||
Symbol END_OF_INPUT();
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_
|
||||
|
|
|
|||
|
|
@ -3,50 +3,55 @@
|
|||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
static const unsigned char MAX_CHAR = -1;
|
||||
using std::string;
|
||||
|
||||
CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {}
|
||||
CharacterRange::CharacterRange(unsigned char min, unsigned char max) : min(min), max(max) {}
|
||||
static const unsigned char MAX_CHAR = -1;
|
||||
|
||||
bool CharacterRange::operator==(const CharacterRange &other) const {
|
||||
return min == other.min && max == other.max;
|
||||
}
|
||||
CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {}
|
||||
CharacterRange::CharacterRange(unsigned char min, unsigned char max)
|
||||
: min(min), max(max) {}
|
||||
|
||||
bool CharacterRange::operator<(const CharacterRange &other) const {
|
||||
if (min < other.min) return true;
|
||||
if (min > other.min) return false;
|
||||
if (max < other.max) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string escape_character(unsigned char input) {
|
||||
switch (input) {
|
||||
case '\0':
|
||||
return "<EOF>";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\r':
|
||||
return "\\r";
|
||||
case '\t':
|
||||
return "\\t";
|
||||
case MAX_CHAR:
|
||||
return "<MAX>";
|
||||
default:
|
||||
return string() + static_cast<char>(input);
|
||||
}
|
||||
}
|
||||
|
||||
string CharacterRange::to_string() const {
|
||||
if (min == 0 && max == MAX_CHAR)
|
||||
return "<ANY>";
|
||||
if (min == max)
|
||||
return escape_character(min);
|
||||
else
|
||||
return string() + escape_character(min) + "-" + escape_character(max);
|
||||
}
|
||||
}
|
||||
bool CharacterRange::operator==(const CharacterRange &other) const {
|
||||
return min == other.min && max == other.max;
|
||||
}
|
||||
|
||||
bool CharacterRange::operator<(const CharacterRange &other) const {
|
||||
if (min < other.min)
|
||||
return true;
|
||||
if (min > other.min)
|
||||
return false;
|
||||
if (max < other.max)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string escape_character(unsigned char input) {
|
||||
switch (input) {
|
||||
case '\0':
|
||||
return "<EOF>";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\r':
|
||||
return "\\r";
|
||||
case '\t':
|
||||
return "\\t";
|
||||
case MAX_CHAR:
|
||||
return "<MAX>";
|
||||
default:
|
||||
return string() + static_cast<char>(input);
|
||||
}
|
||||
}
|
||||
|
||||
string CharacterRange::to_string() const {
|
||||
if (min == 0 && max == MAX_CHAR)
|
||||
return "<ANY>";
|
||||
if (min == max)
|
||||
return escape_character(min);
|
||||
else
|
||||
return string() + escape_character(min) + "-" + escape_character(max);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,29 +5,34 @@
|
|||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
struct CharacterRange {
|
||||
unsigned char min;
|
||||
unsigned char max;
|
||||
namespace rules {
|
||||
|
||||
// IMPLICIT_CONSTRUCTORS
|
||||
CharacterRange(unsigned char value);
|
||||
CharacterRange(unsigned char min, unsigned char max);
|
||||
struct CharacterRange {
|
||||
unsigned char min;
|
||||
unsigned char max;
|
||||
|
||||
bool operator==(const CharacterRange &other) const;
|
||||
bool operator<(const CharacterRange &others) const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
}
|
||||
}
|
||||
// IMPLICIT_CONSTRUCTORS
|
||||
CharacterRange(unsigned char value);
|
||||
CharacterRange(unsigned char min, unsigned char max);
|
||||
|
||||
bool operator==(const CharacterRange &other) const;
|
||||
bool operator<(const CharacterRange &others) const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::CharacterRange> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
|
||||
return (hash<unsigned char>()(range.min) ^ hash<unsigned char>()(range.max));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::rules::CharacterRange> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
|
||||
return (hash<unsigned char>()(range.min) ^
|
||||
hash<unsigned char>()(range.max));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_RULES_CHARACTER_RANGE_H_
|
||||
|
|
|
|||
|
|
@ -3,140 +3,141 @@
|
|||
#include <utility>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using std::initializer_list;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
static const unsigned char MAX_CHAR = -1;
|
||||
static const unsigned char MAX_CHAR = -1;
|
||||
|
||||
CharacterSet::CharacterSet() : ranges({}) {}
|
||||
CharacterSet::CharacterSet(const set<CharacterRange> &ranges) : ranges(ranges) {}
|
||||
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges) : ranges(ranges) {}
|
||||
CharacterSet::CharacterSet() : ranges({}) {}
|
||||
CharacterSet::CharacterSet(const set<CharacterRange> &ranges)
|
||||
: ranges(ranges) {}
|
||||
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges)
|
||||
: ranges(ranges) {}
|
||||
|
||||
bool CharacterSet::operator==(const Rule &rule) const {
|
||||
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
|
||||
return other && (ranges == other->ranges);
|
||||
}
|
||||
|
||||
bool CharacterSet::operator<(const CharacterSet &other) const {
|
||||
return ranges < other.ranges;
|
||||
}
|
||||
|
||||
size_t CharacterSet::hash_code() const {
|
||||
size_t result = std::hash<size_t>()(ranges.size());
|
||||
for (auto &range : ranges) {
|
||||
result ^= std::hash<unsigned char>()(range.min);
|
||||
result ^= std::hash<unsigned char>()(range.max);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr CharacterSet::copy() const {
|
||||
return std::make_shared<CharacterSet>(*this);
|
||||
}
|
||||
|
||||
string CharacterSet::to_string() const {
|
||||
string result("#<char {");
|
||||
for (auto &range : ranges)
|
||||
result += " " + range.to_string();
|
||||
return result + " }>";
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::complement() const {
|
||||
CharacterSet result({ {0, MAX_CHAR} });
|
||||
result.remove_set(*this);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
|
||||
auto first_range = *ranges.begin();
|
||||
if (first_range.min == 0 && first_range.max > 0) {
|
||||
return { this->complement(), false };
|
||||
} else {
|
||||
return { *this, true };
|
||||
}
|
||||
}
|
||||
|
||||
void add_range(CharacterSet *self, CharacterRange addition) {
|
||||
set<CharacterRange> new_ranges;
|
||||
for (auto range : self->ranges) {
|
||||
bool is_adjacent = false;
|
||||
if (range.min < addition.min && range.max >= addition.min - 1) {
|
||||
is_adjacent = true;
|
||||
addition.min = range.min;
|
||||
}
|
||||
if (range.max > addition.max && range.min <= addition.max + 1) {
|
||||
is_adjacent = true;
|
||||
addition.max = range.max;
|
||||
}
|
||||
if (!is_adjacent) {
|
||||
new_ranges.insert(range);
|
||||
}
|
||||
}
|
||||
new_ranges.insert(addition);
|
||||
self->ranges = new_ranges;
|
||||
}
|
||||
|
||||
CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) {
|
||||
CharacterSet removed_set;
|
||||
set<CharacterRange> new_ranges;
|
||||
for (auto range : self->ranges) {
|
||||
if (range_to_remove.min <= range.min) {
|
||||
if (range_to_remove.max < range.min) {
|
||||
new_ranges.insert(range);
|
||||
} else if (range_to_remove.max < range.max) {
|
||||
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
|
||||
add_range(&removed_set, CharacterRange(range.min, range_to_remove.max));
|
||||
} else {
|
||||
add_range(&removed_set, range);
|
||||
}
|
||||
} else if (range_to_remove.min <= range.max) {
|
||||
if (range_to_remove.max < range.max) {
|
||||
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
|
||||
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
|
||||
add_range(&removed_set, range_to_remove);
|
||||
} else {
|
||||
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
|
||||
add_range(&removed_set, CharacterRange(range_to_remove.min, range.max));
|
||||
}
|
||||
} else {
|
||||
new_ranges.insert(range);
|
||||
}
|
||||
}
|
||||
self->ranges = new_ranges;
|
||||
return removed_set;
|
||||
}
|
||||
|
||||
bool CharacterSet::is_empty() const {
|
||||
return ranges.empty();
|
||||
}
|
||||
|
||||
void CharacterSet::add_set(const CharacterSet &other) {
|
||||
for (auto &other_range : other.ranges) {
|
||||
add_range(this, other_range);
|
||||
}
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
|
||||
CharacterSet result;
|
||||
for (auto &other_range : other.ranges) {
|
||||
auto removed_set = remove_range(this, other_range);
|
||||
result.add_set(removed_set);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
|
||||
CharacterSet copy = *this;
|
||||
return copy.remove_set(set);
|
||||
}
|
||||
|
||||
void CharacterSet::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool CharacterSet::operator==(const Rule &rule) const {
|
||||
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
|
||||
return other && (ranges == other->ranges);
|
||||
}
|
||||
|
||||
bool CharacterSet::operator<(const CharacterSet &other) const {
|
||||
return ranges < other.ranges;
|
||||
}
|
||||
|
||||
size_t CharacterSet::hash_code() const {
|
||||
size_t result = std::hash<size_t>()(ranges.size());
|
||||
for (auto &range : ranges) {
|
||||
result ^= std::hash<unsigned char>()(range.min);
|
||||
result ^= std::hash<unsigned char>()(range.max);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr CharacterSet::copy() const {
|
||||
return std::make_shared<CharacterSet>(*this);
|
||||
}
|
||||
|
||||
string CharacterSet::to_string() const {
|
||||
string result("#<char {");
|
||||
for (auto &range : ranges)
|
||||
result += " " + range.to_string();
|
||||
return result + " }>";
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::complement() const {
|
||||
CharacterSet result({ { 0, MAX_CHAR } });
|
||||
result.remove_set(*this);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation()
|
||||
const {
|
||||
auto first_range = *ranges.begin();
|
||||
if (first_range.min == 0 && first_range.max > 0) {
|
||||
return { this->complement(), false };
|
||||
} else {
|
||||
return { *this, true };
|
||||
}
|
||||
}
|
||||
|
||||
void add_range(CharacterSet *self, CharacterRange addition) {
|
||||
set<CharacterRange> new_ranges;
|
||||
for (auto range : self->ranges) {
|
||||
bool is_adjacent = false;
|
||||
if (range.min < addition.min && range.max >= addition.min - 1) {
|
||||
is_adjacent = true;
|
||||
addition.min = range.min;
|
||||
}
|
||||
if (range.max > addition.max && range.min <= addition.max + 1) {
|
||||
is_adjacent = true;
|
||||
addition.max = range.max;
|
||||
}
|
||||
if (!is_adjacent) {
|
||||
new_ranges.insert(range);
|
||||
}
|
||||
}
|
||||
new_ranges.insert(addition);
|
||||
self->ranges = new_ranges;
|
||||
}
|
||||
|
||||
CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) {
|
||||
CharacterSet removed_set;
|
||||
set<CharacterRange> new_ranges;
|
||||
for (auto range : self->ranges) {
|
||||
if (range_to_remove.min <= range.min) {
|
||||
if (range_to_remove.max < range.min) {
|
||||
new_ranges.insert(range);
|
||||
} else if (range_to_remove.max < range.max) {
|
||||
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
|
||||
add_range(&removed_set, CharacterRange(range.min, range_to_remove.max));
|
||||
} else {
|
||||
add_range(&removed_set, range);
|
||||
}
|
||||
} else if (range_to_remove.min <= range.max) {
|
||||
if (range_to_remove.max < range.max) {
|
||||
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
|
||||
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
|
||||
add_range(&removed_set, range_to_remove);
|
||||
} else {
|
||||
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
|
||||
add_range(&removed_set, CharacterRange(range_to_remove.min, range.max));
|
||||
}
|
||||
} else {
|
||||
new_ranges.insert(range);
|
||||
}
|
||||
}
|
||||
self->ranges = new_ranges;
|
||||
return removed_set;
|
||||
}
|
||||
|
||||
bool CharacterSet::is_empty() const { return ranges.empty(); }
|
||||
|
||||
void CharacterSet::add_set(const CharacterSet &other) {
|
||||
for (auto &other_range : other.ranges) {
|
||||
add_range(this, other_range);
|
||||
}
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
|
||||
CharacterSet result;
|
||||
for (auto &other_range : other.ranges) {
|
||||
auto removed_set = remove_range(this, other_range);
|
||||
result.add_set(removed_set);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
|
||||
CharacterSet copy = *this;
|
||||
return copy.remove_set(set);
|
||||
}
|
||||
|
||||
void CharacterSet::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -8,40 +8,44 @@
|
|||
#include "compiler/rules/rule.h"
|
||||
#include "compiler/rules/character_range.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class CharacterSet : public Rule {
|
||||
public:
|
||||
CharacterSet();
|
||||
explicit CharacterSet(const std::set<CharacterRange> &ranges);
|
||||
explicit CharacterSet(const std::initializer_list<CharacterRange> &ranges);
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
bool operator<(const CharacterSet &) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
class CharacterSet : public Rule {
|
||||
public:
|
||||
CharacterSet();
|
||||
explicit CharacterSet(const std::set<CharacterRange> &ranges);
|
||||
explicit CharacterSet(const std::initializer_list<CharacterRange> &ranges);
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
CharacterSet complement() const;
|
||||
CharacterSet intersect(const CharacterSet &) const;
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
bool is_empty() const;
|
||||
bool operator==(const Rule &other) const;
|
||||
bool operator<(const CharacterSet &) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
std::set<CharacterRange> ranges;
|
||||
};
|
||||
}
|
||||
}
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
CharacterSet complement() const;
|
||||
CharacterSet intersect(const CharacterSet &) const;
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
bool is_empty() const;
|
||||
|
||||
std::set<CharacterRange> ranges;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::CharacterSet> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
|
||||
return rule.hash_code();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::rules::CharacterSet> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
|
||||
return rule.hash_code();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_RULES_CHARACTER_SET_H_
|
||||
|
|
|
|||
|
|
@ -3,50 +3,51 @@
|
|||
#include <set>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::make_shared;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::dynamic_pointer_cast;
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
|
||||
using std::string;
|
||||
using std::make_shared;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::dynamic_pointer_cast;
|
||||
|
||||
rule_ptr Choice::Build(const vector<rule_ptr> &elements) {
|
||||
return make_shared<Choice>(elements);
|
||||
}
|
||||
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
|
||||
|
||||
bool Choice::operator==(const Rule &rule) const {
|
||||
const Choice *other = dynamic_cast<const Choice *>(&rule);
|
||||
if (!other) return false;
|
||||
size_t size = elements.size();
|
||||
if (size != other->elements.size()) return false;
|
||||
for (size_t i = 0; i < size; i++)
|
||||
if (!elements[i]->operator==(*other->elements[i])) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t Choice::hash_code() const {
|
||||
size_t result = std::hash<size_t>()(elements.size());
|
||||
for (const auto &element : elements)
|
||||
result ^= element->hash_code();
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr Choice::copy() const {
|
||||
return std::make_shared<Choice>(*this);
|
||||
}
|
||||
|
||||
string Choice::to_string() const {
|
||||
string result = "#<choice";
|
||||
for (const auto &element : elements)
|
||||
result += " " + element->to_string();
|
||||
return result + ">";
|
||||
}
|
||||
|
||||
void Choice::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
rule_ptr Choice::Build(const vector<rule_ptr> &elements) {
|
||||
return make_shared<Choice>(elements);
|
||||
}
|
||||
|
||||
bool Choice::operator==(const Rule &rule) const {
|
||||
const Choice *other = dynamic_cast<const Choice *>(&rule);
|
||||
if (!other)
|
||||
return false;
|
||||
size_t size = elements.size();
|
||||
if (size != other->elements.size())
|
||||
return false;
|
||||
for (size_t i = 0; i < size; i++)
|
||||
if (!elements[i]->operator==(*other->elements[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t Choice::hash_code() const {
|
||||
size_t result = std::hash<size_t>()(elements.size());
|
||||
for (const auto &element : elements)
|
||||
result ^= element->hash_code();
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr Choice::copy() const { return std::make_shared<Choice>(*this); }
|
||||
|
||||
string Choice::to_string() const {
|
||||
string result = "#<choice";
|
||||
for (const auto &element : elements)
|
||||
result += " " + element->to_string();
|
||||
return result + ">";
|
||||
}
|
||||
|
||||
void Choice::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,21 +6,23 @@
|
|||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Choice : public Rule {
|
||||
public:
|
||||
explicit Choice(const std::vector<rule_ptr> &elements);
|
||||
static rule_ptr Build(const std::vector<rule_ptr> &rules);
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
class Choice : public Rule {
|
||||
public:
|
||||
explicit Choice(const std::vector<rule_ptr> &elements);
|
||||
static rule_ptr Build(const std::vector<rule_ptr> &rules);
|
||||
|
||||
const std::vector<rule_ptr> elements;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const std::vector<rule_ptr> elements;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_CHOICE_H_
|
||||
|
|
|
|||
|
|
@ -3,45 +3,44 @@
|
|||
#include <map>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::hash;
|
||||
using std::make_shared;
|
||||
using std::map;
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
Metadata::Metadata(rule_ptr rule, map<MetadataKey, int> values) : rule(rule), value(values) {}
|
||||
using std::hash;
|
||||
using std::make_shared;
|
||||
using std::map;
|
||||
|
||||
bool Metadata::operator==(const Rule &rule) const {
|
||||
auto other = dynamic_cast<const Metadata *>(&rule);
|
||||
return other && other->value == value && other->rule->operator==(*this->rule);
|
||||
}
|
||||
Metadata::Metadata(rule_ptr rule, map<MetadataKey, int> values)
|
||||
: rule(rule), value(values) {}
|
||||
|
||||
size_t Metadata::hash_code() const {
|
||||
size_t result = hash<size_t>()(value.size());
|
||||
for (auto &pair : value) {
|
||||
result ^= hash<int>()(pair.first);
|
||||
result ^= hash<int>()(pair.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr Metadata::copy() const {
|
||||
return make_shared<Metadata>(rule->copy(), value);
|
||||
}
|
||||
|
||||
int Metadata::value_for(MetadataKey key) const {
|
||||
auto pair = value.find(key);
|
||||
return (pair != value.end()) ?
|
||||
pair->second :
|
||||
0;
|
||||
}
|
||||
|
||||
std::string Metadata::to_string() const {
|
||||
return "#<metadata " + rule->to_string() + ">";
|
||||
}
|
||||
|
||||
void Metadata::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool Metadata::operator==(const Rule &rule) const {
|
||||
auto other = dynamic_cast<const Metadata *>(&rule);
|
||||
return other && other->value == value && other->rule->operator==(*this->rule);
|
||||
}
|
||||
|
||||
size_t Metadata::hash_code() const {
|
||||
size_t result = hash<size_t>()(value.size());
|
||||
for (auto &pair : value) {
|
||||
result ^= hash<int>()(pair.first);
|
||||
result ^= hash<int>()(pair.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr Metadata::copy() const {
|
||||
return make_shared<Metadata>(rule->copy(), value);
|
||||
}
|
||||
|
||||
int Metadata::value_for(MetadataKey key) const {
|
||||
auto pair = value.find(key);
|
||||
return (pair != value.end()) ? pair->second : 0;
|
||||
}
|
||||
|
||||
std::string Metadata::to_string() const {
|
||||
return "#<metadata " + rule->to_string() + ">";
|
||||
}
|
||||
|
||||
void Metadata::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,30 +5,32 @@
|
|||
#include <map>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
START_TOKEN,
|
||||
PRECEDENCE,
|
||||
IS_TOKEN,
|
||||
DESCRIPTION,
|
||||
} MetadataKey;
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Metadata : public Rule {
|
||||
public:
|
||||
Metadata(rule_ptr rule, std::map<MetadataKey, int> value);
|
||||
typedef enum {
|
||||
START_TOKEN,
|
||||
PRECEDENCE,
|
||||
IS_TOKEN,
|
||||
DESCRIPTION,
|
||||
} MetadataKey;
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
int value_for(MetadataKey key) const;
|
||||
class Metadata : public Rule {
|
||||
public:
|
||||
Metadata(rule_ptr rule, std::map<MetadataKey, int> value);
|
||||
|
||||
const rule_ptr rule;
|
||||
const std::map<MetadataKey, int> value;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
int value_for(MetadataKey key) const;
|
||||
|
||||
const rule_ptr rule;
|
||||
const std::map<MetadataKey, int> value;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_METADATA_H_
|
||||
|
|
|
|||
|
|
@ -2,32 +2,30 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::hash;
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
bool NamedSymbol::operator==(const Rule &rule) const {
|
||||
auto other = dynamic_cast<const NamedSymbol *>(&rule);
|
||||
return other && other->name == name;
|
||||
}
|
||||
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
|
||||
|
||||
size_t NamedSymbol::hash_code() const {
|
||||
return hash<string>()(name);
|
||||
}
|
||||
|
||||
rule_ptr NamedSymbol::copy() const {
|
||||
return std::make_shared<NamedSymbol>(*this);
|
||||
}
|
||||
|
||||
string NamedSymbol::to_string() const {
|
||||
return string("#<sym '") + name + "'>";
|
||||
}
|
||||
|
||||
void NamedSymbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool NamedSymbol::operator==(const Rule &rule) const {
|
||||
auto other = dynamic_cast<const NamedSymbol *>(&rule);
|
||||
return other && other->name == name;
|
||||
}
|
||||
|
||||
size_t NamedSymbol::hash_code() const { return hash<string>()(name); }
|
||||
|
||||
rule_ptr NamedSymbol::copy() const {
|
||||
return std::make_shared<NamedSymbol>(*this);
|
||||
}
|
||||
|
||||
string NamedSymbol::to_string() const {
|
||||
return string("#<sym '") + name + "'>";
|
||||
}
|
||||
|
||||
void NamedSymbol::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,21 +4,23 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class NamedSymbol : public Rule {
|
||||
public:
|
||||
explicit NamedSymbol(const std::string &name);
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
class NamedSymbol : public Rule {
|
||||
public:
|
||||
explicit NamedSymbol(const std::string &name);
|
||||
|
||||
std::string name;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
std::string name;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_NAMED_SYMBOL_H_
|
||||
|
|
|
|||
|
|
@ -4,31 +4,27 @@
|
|||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
using std::string;
|
||||
using std::hash;
|
||||
namespace rules {
|
||||
|
||||
Pattern::Pattern(const string &string) : value(string) {}
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
|
||||
auto pattern = dynamic_cast<const Pattern *>(&other);
|
||||
return pattern && (pattern->value == value);
|
||||
}
|
||||
Pattern::Pattern(const string &string) : value(string) {}
|
||||
|
||||
size_t Pattern::hash_code() const {
|
||||
return hash<string>()(value);
|
||||
}
|
||||
|
||||
rule_ptr Pattern::copy() const {
|
||||
return std::make_shared<Pattern>(*this);
|
||||
}
|
||||
|
||||
string Pattern::to_string() const {
|
||||
return string("#<pattern '") + util::escape_string(value) + "'>";
|
||||
}
|
||||
|
||||
void Pattern::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
|
||||
auto pattern = dynamic_cast<const Pattern *>(&other);
|
||||
return pattern && (pattern->value == value);
|
||||
}
|
||||
|
||||
size_t Pattern::hash_code() const { return hash<string>()(value); }
|
||||
|
||||
rule_ptr Pattern::copy() const { return std::make_shared<Pattern>(*this); }
|
||||
|
||||
string Pattern::to_string() const {
|
||||
return string("#<pattern '") + util::escape_string(value) + "'>";
|
||||
}
|
||||
|
||||
void Pattern::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,21 +5,22 @@
|
|||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Pattern : public Rule {
|
||||
public:
|
||||
explicit Pattern(const std::string &string);
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
class Pattern : public Rule {
|
||||
public:
|
||||
explicit Pattern(const std::string &string);
|
||||
|
||||
const std::string value;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const std::string value;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_PATTERN_H_
|
||||
|
||||
|
|
|
|||
|
|
@ -3,30 +3,26 @@
|
|||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
Repeat::Repeat(const rule_ptr content) : content(content) {}
|
||||
using std::string;
|
||||
|
||||
bool Repeat::operator==(const Rule &rule) const {
|
||||
const Repeat *other = dynamic_cast<const Repeat *>(&rule);
|
||||
return other && (*other->content == *content);
|
||||
}
|
||||
Repeat::Repeat(const rule_ptr content) : content(content) {}
|
||||
|
||||
size_t Repeat::hash_code() const {
|
||||
return content->hash_code();
|
||||
}
|
||||
|
||||
rule_ptr Repeat::copy() const {
|
||||
return std::make_shared<Repeat>(*this);
|
||||
}
|
||||
|
||||
string Repeat::to_string() const {
|
||||
return string("#<repeat ") + content->to_string() + ">";
|
||||
}
|
||||
|
||||
void Repeat::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool Repeat::operator==(const Rule &rule) const {
|
||||
const Repeat *other = dynamic_cast<const Repeat *>(&rule);
|
||||
return other && (*other->content == *content);
|
||||
}
|
||||
|
||||
size_t Repeat::hash_code() const { return content->hash_code(); }
|
||||
|
||||
rule_ptr Repeat::copy() const { return std::make_shared<Repeat>(*this); }
|
||||
|
||||
string Repeat::to_string() const {
|
||||
return string("#<repeat ") + content->to_string() + ">";
|
||||
}
|
||||
|
||||
void Repeat::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,21 +4,23 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Repeat : public Rule {
|
||||
public:
|
||||
explicit Repeat(rule_ptr content);
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
class Repeat : public Rule {
|
||||
public:
|
||||
explicit Repeat(rule_ptr content);
|
||||
|
||||
const rule_ptr content;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const rule_ptr content;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_REPEAT_H_
|
||||
|
|
|
|||
|
|
@ -2,26 +2,28 @@
|
|||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::ostream;
|
||||
using std::string;
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
bool Rule::operator!=(const Rule &other) const {
|
||||
return !this->operator==(other);
|
||||
}
|
||||
using std::ostream;
|
||||
using std::string;
|
||||
|
||||
ostream& operator<<(ostream& stream, const Rule &rule) {
|
||||
return stream << rule.to_string();
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream& stream, const rule_ptr &rule) {
|
||||
if (rule.get())
|
||||
stream << *rule;
|
||||
else
|
||||
stream << string("#<null-rule>");
|
||||
return stream;
|
||||
}
|
||||
|
||||
Rule::~Rule() {}
|
||||
}
|
||||
bool Rule::operator!=(const Rule &other) const {
|
||||
return !this->operator==(other);
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const Rule &rule) {
|
||||
return stream << rule.to_string();
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const rule_ptr &rule) {
|
||||
if (rule.get())
|
||||
stream << *rule;
|
||||
else
|
||||
stream << string("#<null-rule>");
|
||||
return stream;
|
||||
}
|
||||
|
||||
Rule::~Rule() {}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,35 +5,39 @@
|
|||
#include <memory>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Visitor;
|
||||
class Rule;
|
||||
namespace rules {
|
||||
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
class Visitor;
|
||||
class Rule;
|
||||
|
||||
class Rule {
|
||||
public:
|
||||
virtual bool operator==(const Rule& other) const = 0;
|
||||
bool operator!=(const Rule& other) const;
|
||||
virtual size_t hash_code() const = 0;
|
||||
virtual rule_ptr copy() const = 0;
|
||||
virtual std::string to_string() const = 0;
|
||||
virtual void accept(Visitor *visitor) const = 0;
|
||||
virtual ~Rule();
|
||||
};
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
|
||||
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
|
||||
}
|
||||
}
|
||||
class Rule {
|
||||
public:
|
||||
virtual bool operator==(const Rule &other) const = 0;
|
||||
bool operator!=(const Rule &other) const;
|
||||
virtual size_t hash_code() const = 0;
|
||||
virtual rule_ptr copy() const = 0;
|
||||
virtual std::string to_string() const = 0;
|
||||
virtual void accept(Visitor *visitor) const = 0;
|
||||
virtual ~Rule();
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const Rule &rule);
|
||||
std::ostream &operator<<(std::ostream &stream, const rule_ptr &rule);
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::rule_ptr> {
|
||||
size_t operator()(const tree_sitter::rules::rule_ptr &rule) const {
|
||||
return typeid(*rule).hash_code() ^ rule->hash_code();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::rules::rule_ptr> {
|
||||
size_t operator()(const tree_sitter::rules::rule_ptr &rule) const {
|
||||
return typeid(*rule).hash_code() ^ rule->hash_code();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_RULES_RULE_H_
|
||||
|
|
|
|||
|
|
@ -16,65 +16,55 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::map;
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
static const int KEYWORD_PRECEDENCE = 100;
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::map;
|
||||
|
||||
static rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
|
||||
return std::make_shared<Metadata>(rule, values);
|
||||
}
|
||||
static const int KEYWORD_PRECEDENCE = 100;
|
||||
|
||||
rule_ptr blank() {
|
||||
return make_shared<Blank>();
|
||||
}
|
||||
|
||||
rule_ptr choice(const vector<rule_ptr> &rules) {
|
||||
return Choice::Build(rules);
|
||||
}
|
||||
|
||||
rule_ptr repeat(const rule_ptr &content) {
|
||||
return std::make_shared<Repeat>(content);
|
||||
}
|
||||
|
||||
rule_ptr seq(const vector<rule_ptr> &rules) {
|
||||
return Seq::Build(rules);
|
||||
}
|
||||
|
||||
rule_ptr sym(const string &name) {
|
||||
return make_shared<NamedSymbol>(name);
|
||||
}
|
||||
|
||||
rule_ptr pattern(const string &value) {
|
||||
return make_shared<Pattern>(value);
|
||||
}
|
||||
|
||||
rule_ptr str(const string &value) {
|
||||
return make_shared<String>(value);
|
||||
}
|
||||
|
||||
rule_ptr keyword(const string &value) {
|
||||
return token(prec(KEYWORD_PRECEDENCE, str(value)));
|
||||
}
|
||||
|
||||
rule_ptr keypattern(const string &value) {
|
||||
return token(prec(KEYWORD_PRECEDENCE, pattern(value)));
|
||||
}
|
||||
|
||||
rule_ptr err(const rule_ptr &rule) {
|
||||
return choice({ rule, ERROR().copy() });
|
||||
}
|
||||
|
||||
rule_ptr prec(int precedence, rule_ptr rule) {
|
||||
return metadata(rule, {{ PRECEDENCE, precedence }});
|
||||
}
|
||||
|
||||
rule_ptr token(rule_ptr rule) {
|
||||
return metadata(rule, {{ IS_TOKEN, 1 }});
|
||||
}
|
||||
}
|
||||
static rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
|
||||
return std::make_shared<Metadata>(rule, values);
|
||||
}
|
||||
|
||||
rule_ptr blank() { return make_shared<Blank>(); }
|
||||
|
||||
rule_ptr choice(const vector<rule_ptr> &rules) { return Choice::Build(rules); }
|
||||
|
||||
rule_ptr repeat(const rule_ptr &content) {
|
||||
return std::make_shared<Repeat>(content);
|
||||
}
|
||||
|
||||
rule_ptr seq(const vector<rule_ptr> &rules) { return Seq::Build(rules); }
|
||||
|
||||
rule_ptr sym(const string &name) { return make_shared<NamedSymbol>(name); }
|
||||
|
||||
rule_ptr pattern(const string &value) { return make_shared<Pattern>(value); }
|
||||
|
||||
rule_ptr str(const string &value) { return make_shared<String>(value); }
|
||||
|
||||
rule_ptr keyword(const string &value) {
|
||||
return token(prec(KEYWORD_PRECEDENCE, str(value)));
|
||||
}
|
||||
|
||||
rule_ptr keypattern(const string &value) {
|
||||
return token(prec(KEYWORD_PRECEDENCE, pattern(value)));
|
||||
}
|
||||
|
||||
rule_ptr err(const rule_ptr &rule) {
|
||||
return choice({ rule, ERROR().copy() });
|
||||
}
|
||||
|
||||
rule_ptr prec(int precedence, rule_ptr rule) {
|
||||
return metadata(rule, { { PRECEDENCE, precedence } });
|
||||
}
|
||||
|
||||
rule_ptr token(rule_ptr rule) {
|
||||
return metadata(rule, { { IS_TOKEN, 1 } });
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,39 +4,36 @@
|
|||
#include "compiler/rules/blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
rule_ptr Seq::Build(const std::vector<rule_ptr> &rules) {
|
||||
rule_ptr result = make_shared<Blank>();
|
||||
for (auto &rule : rules)
|
||||
result = (typeid(*result) != typeid(Blank)) ? make_shared<Seq>(result, rule) : rule;
|
||||
return result;
|
||||
}
|
||||
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
|
||||
|
||||
bool Seq::operator==(const Rule &rule) const {
|
||||
const Seq *other = dynamic_cast<const Seq *>(&rule);
|
||||
return other && (*other->left == *left) && (*other->right == *right);
|
||||
}
|
||||
|
||||
size_t Seq::hash_code() const {
|
||||
return left->hash_code() ^ right->hash_code();
|
||||
}
|
||||
|
||||
rule_ptr Seq::copy() const {
|
||||
return std::make_shared<Seq>(*this);
|
||||
}
|
||||
|
||||
string Seq::to_string() const {
|
||||
return string("#<seq ") + left->to_string() + " " + right->to_string() + ">";
|
||||
}
|
||||
|
||||
void Seq::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
rule_ptr Seq::Build(const std::vector<rule_ptr> &rules) {
|
||||
rule_ptr result = make_shared<Blank>();
|
||||
for (auto &rule : rules)
|
||||
result = (typeid(*result) != typeid(Blank)) ? make_shared<Seq>(result, rule)
|
||||
: rule;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Seq::operator==(const Rule &rule) const {
|
||||
const Seq *other = dynamic_cast<const Seq *>(&rule);
|
||||
return other && (*other->left == *left) && (*other->right == *right);
|
||||
}
|
||||
|
||||
size_t Seq::hash_code() const { return left->hash_code() ^ right->hash_code(); }
|
||||
|
||||
rule_ptr Seq::copy() const { return std::make_shared<Seq>(*this); }
|
||||
|
||||
string Seq::to_string() const {
|
||||
return string("#<seq ") + left->to_string() + " " + right->to_string() + ">";
|
||||
}
|
||||
|
||||
void Seq::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -5,23 +5,25 @@
|
|||
#include <vector>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Seq : public Rule {
|
||||
public:
|
||||
Seq(rule_ptr left, rule_ptr right);
|
||||
static rule_ptr Build(const std::vector<rule_ptr> &rules);
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
class Seq : public Rule {
|
||||
public:
|
||||
Seq(rule_ptr left, rule_ptr right);
|
||||
static rule_ptr Build(const std::vector<rule_ptr> &rules);
|
||||
|
||||
const rule_ptr left;
|
||||
const rule_ptr right;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const rule_ptr left;
|
||||
const rule_ptr right;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_SEQ_H_
|
||||
|
|
|
|||
|
|
@ -2,32 +2,26 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::hash;
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
String::String(string value) : value(value) {}
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
bool String::operator==(const Rule &rule) const {
|
||||
const String *other = dynamic_cast<const String *>(&rule);
|
||||
return other && (other->value == value);
|
||||
}
|
||||
String::String(string value) : value(value) {}
|
||||
|
||||
size_t String::hash_code() const {
|
||||
return hash<string>()(value);
|
||||
}
|
||||
|
||||
rule_ptr String::copy() const {
|
||||
return std::make_shared<String>(*this);
|
||||
}
|
||||
|
||||
string String::to_string() const {
|
||||
return string("#<string '") + value + "'>";
|
||||
}
|
||||
|
||||
void String::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool String::operator==(const Rule &rule) const {
|
||||
const String *other = dynamic_cast<const String *>(&rule);
|
||||
return other && (other->value == value);
|
||||
}
|
||||
|
||||
size_t String::hash_code() const { return hash<string>()(value); }
|
||||
|
||||
rule_ptr String::copy() const { return std::make_shared<String>(*this); }
|
||||
|
||||
string String::to_string() const { return string("#<string '") + value + "'>"; }
|
||||
|
||||
void String::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,21 +4,23 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class String : public Rule {
|
||||
public:
|
||||
explicit String(std::string value);
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
class String : public Rule {
|
||||
public:
|
||||
explicit String(std::string value);
|
||||
|
||||
const std::string value;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const std::string value;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_STRING_H_
|
||||
|
|
|
|||
|
|
@ -3,63 +3,54 @@
|
|||
#include <map>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::hash;
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
Symbol::Symbol(int index) :
|
||||
index(index),
|
||||
options(SymbolOption(0)) {}
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::hash;
|
||||
|
||||
Symbol::Symbol(int index, SymbolOption options) :
|
||||
index(index),
|
||||
options(options) {}
|
||||
Symbol::Symbol(int index) : index(index), options(SymbolOption(0)) {}
|
||||
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.index == index) && (other.options == options);
|
||||
}
|
||||
Symbol::Symbol(int index, SymbolOption options)
|
||||
: index(index), options(options) {}
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
|
||||
return other && this->operator==(*other);
|
||||
}
|
||||
|
||||
size_t Symbol::hash_code() const {
|
||||
return hash<int>()(index) ^ hash<int16_t>()(options);
|
||||
}
|
||||
|
||||
rule_ptr Symbol::copy() const {
|
||||
return std::make_shared<Symbol>(*this);
|
||||
}
|
||||
|
||||
string Symbol::to_string() const {
|
||||
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
|
||||
name += (options & SymbolOptionToken) ? "token" : "sym";
|
||||
return "#<" + name + " " + std::to_string(index) + ">";
|
||||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
if (options < other.options) return true;
|
||||
if (options > other.options) return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
||||
bool Symbol::is_token() const {
|
||||
return options & SymbolOptionToken;
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in() const {
|
||||
return index < 0;
|
||||
}
|
||||
|
||||
bool Symbol::is_auxiliary() const {
|
||||
return options & SymbolOptionAuxiliary;
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.index == index) && (other.options == options);
|
||||
}
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
|
||||
return other && this->operator==(*other);
|
||||
}
|
||||
|
||||
size_t Symbol::hash_code() const {
|
||||
return hash<int>()(index) ^ hash<int16_t>()(options);
|
||||
}
|
||||
|
||||
rule_ptr Symbol::copy() const { return std::make_shared<Symbol>(*this); }
|
||||
|
||||
string Symbol::to_string() const {
|
||||
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
|
||||
name += (options & SymbolOptionToken) ? "token" : "sym";
|
||||
return "#<" + name + " " + std::to_string(index) + ">";
|
||||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
if (options < other.options)
|
||||
return true;
|
||||
if (options > other.options)
|
||||
return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
||||
bool Symbol::is_token() const { return options & SymbolOptionToken; }
|
||||
|
||||
bool Symbol::is_built_in() const { return index < 0; }
|
||||
|
||||
bool Symbol::is_auxiliary() const { return options & SymbolOptionAuxiliary; }
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const { visitor->visit(this); }
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,44 +4,48 @@
|
|||
#include <string>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
SymbolOptionToken = 1 << 0,
|
||||
SymbolOptionAuxiliary = 1 << 1,
|
||||
} SymbolOption;
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Symbol : public Rule {
|
||||
public:
|
||||
explicit Symbol(int index);
|
||||
Symbol(int index, SymbolOption options);
|
||||
typedef enum {
|
||||
SymbolOptionToken = 1 << 0,
|
||||
SymbolOptionAuxiliary = 1 << 1,
|
||||
} SymbolOption;
|
||||
|
||||
bool operator==(const Symbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
class Symbol : public Rule {
|
||||
public:
|
||||
explicit Symbol(int index);
|
||||
Symbol(int index, SymbolOption options);
|
||||
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
bool operator==(const Symbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
|
||||
bool operator<(const Symbol &other) const;
|
||||
bool is_token() const;
|
||||
bool is_built_in() const;
|
||||
bool is_auxiliary() const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
int index;
|
||||
SymbolOption options;
|
||||
};
|
||||
}
|
||||
}
|
||||
bool operator<(const Symbol &other) const;
|
||||
bool is_token() const;
|
||||
bool is_built_in() const;
|
||||
bool is_auxiliary() const;
|
||||
|
||||
int index;
|
||||
SymbolOption options;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::Symbol> {
|
||||
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
|
||||
return rule.hash_code();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::rules::Symbol> {
|
||||
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
|
||||
return rule.hash_code();
|
||||
}
|
||||
};
|
||||
|
||||
} // std
|
||||
|
||||
#endif // COMPILER_RULES_SYMBOL_H_
|
||||
|
|
|
|||
|
|
@ -11,32 +11,34 @@
|
|||
#include "compiler/rules/repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::vector;
|
||||
namespace rules {
|
||||
|
||||
namespace rules {
|
||||
Visitor::~Visitor() {}
|
||||
using std::vector;
|
||||
|
||||
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
|
||||
return rule->copy();
|
||||
}
|
||||
Visitor::~Visitor() {}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
|
||||
vector<rule_ptr> rules;
|
||||
for (const auto &el : rule->elements)
|
||||
rules.push_back(apply(el));
|
||||
return Choice::Build(rules);
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
|
||||
return Seq::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
|
||||
return std::make_shared<Repeat>(apply(rule->content));
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
|
||||
return std::make_shared<Metadata>(apply(rule->rule), rule->value);
|
||||
}
|
||||
}
|
||||
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
|
||||
return rule->copy();
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
|
||||
vector<rule_ptr> rules;
|
||||
for (const auto &el : rule->elements)
|
||||
rules.push_back(apply(el));
|
||||
return Choice::Build(rules);
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
|
||||
return Seq::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
|
||||
return std::make_shared<Repeat>(apply(rule->content));
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
|
||||
return std::make_shared<Metadata>(apply(rule->rule), rule->value);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,79 +4,101 @@
|
|||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Blank;
|
||||
class NamedSymbol;
|
||||
class CharacterSet;
|
||||
class Choice;
|
||||
class Repeat;
|
||||
class Seq;
|
||||
class String;
|
||||
class Symbol;
|
||||
class Pattern;
|
||||
class Metadata;
|
||||
namespace rules {
|
||||
|
||||
class Visitor {
|
||||
public:
|
||||
virtual void visit(const Blank *rule) = 0;
|
||||
virtual void visit(const CharacterSet *rule) = 0;
|
||||
virtual void visit(const Choice *rule) = 0;
|
||||
virtual void visit(const Metadata *rule) = 0;
|
||||
virtual void visit(const Pattern *rule) = 0;
|
||||
virtual void visit(const Repeat *rule) = 0;
|
||||
virtual void visit(const Seq *rule) = 0;
|
||||
virtual void visit(const String *rule) = 0;
|
||||
virtual void visit(const NamedSymbol *rule) = 0;
|
||||
virtual void visit(const Symbol *rule) = 0;
|
||||
virtual ~Visitor();
|
||||
};
|
||||
class Blank;
|
||||
class NamedSymbol;
|
||||
class CharacterSet;
|
||||
class Choice;
|
||||
class Repeat;
|
||||
class Seq;
|
||||
class String;
|
||||
class Symbol;
|
||||
class Pattern;
|
||||
class Metadata;
|
||||
|
||||
template<typename T>
|
||||
class RuleFn : private Visitor {
|
||||
public:
|
||||
T apply(const rule_ptr &rule) {
|
||||
value_ = T();
|
||||
rule->accept(this);
|
||||
return value_;
|
||||
}
|
||||
class Visitor {
|
||||
public:
|
||||
virtual void visit(const Blank *rule) = 0;
|
||||
virtual void visit(const CharacterSet *rule) = 0;
|
||||
virtual void visit(const Choice *rule) = 0;
|
||||
virtual void visit(const Metadata *rule) = 0;
|
||||
virtual void visit(const Pattern *rule) = 0;
|
||||
virtual void visit(const Repeat *rule) = 0;
|
||||
virtual void visit(const Seq *rule) = 0;
|
||||
virtual void visit(const String *rule) = 0;
|
||||
virtual void visit(const NamedSymbol *rule) = 0;
|
||||
virtual void visit(const Symbol *rule) = 0;
|
||||
virtual ~Visitor();
|
||||
};
|
||||
|
||||
protected:
|
||||
virtual T default_apply(const Rule *rule) { return T(); }
|
||||
virtual T apply_to(const Blank *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const CharacterSet *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const Choice *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const Metadata *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const Pattern *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const Repeat *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const Seq *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const String *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const NamedSymbol *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const Symbol *rule) { return default_apply((const Rule *)rule); }
|
||||
template <typename T>
|
||||
class RuleFn : private Visitor {
|
||||
public:
|
||||
T apply(const rule_ptr &rule) {
|
||||
value_ = T();
|
||||
rule->accept(this);
|
||||
return value_;
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) { value_ = apply_to(rule); }
|
||||
void visit(const CharacterSet *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Choice *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Metadata *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Pattern *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Repeat *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Seq *rule) { value_ = apply_to(rule); }
|
||||
void visit(const String *rule) { value_ = apply_to(rule); }
|
||||
void visit(const NamedSymbol *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Symbol *rule) { value_ = apply_to(rule); }
|
||||
protected:
|
||||
virtual T default_apply(const Rule *rule) { return T(); }
|
||||
virtual T apply_to(const Blank *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const CharacterSet *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const Choice *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const Metadata *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const Pattern *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const Repeat *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const Seq *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const String *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const NamedSymbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual T apply_to(const Symbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
private:
|
||||
T value_;
|
||||
};
|
||||
void visit(const Blank *rule) { value_ = apply_to(rule); }
|
||||
void visit(const CharacterSet *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Choice *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Metadata *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Pattern *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Repeat *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Seq *rule) { value_ = apply_to(rule); }
|
||||
void visit(const String *rule) { value_ = apply_to(rule); }
|
||||
void visit(const NamedSymbol *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Symbol *rule) { value_ = apply_to(rule); }
|
||||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
protected:
|
||||
virtual rule_ptr default_apply(const Rule *rule);
|
||||
virtual rule_ptr apply_to(const Choice *rule);
|
||||
virtual rule_ptr apply_to(const Metadata *rule);
|
||||
virtual rule_ptr apply_to(const Seq *rule);
|
||||
virtual rule_ptr apply_to(const Repeat *rule);
|
||||
};
|
||||
}
|
||||
}
|
||||
private:
|
||||
T value_;
|
||||
};
|
||||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
protected:
|
||||
virtual rule_ptr default_apply(const Rule *rule);
|
||||
virtual rule_ptr apply_to(const Choice *rule);
|
||||
virtual rule_ptr apply_to(const Metadata *rule);
|
||||
virtual rule_ptr apply_to(const Seq *rule);
|
||||
virtual rule_ptr apply_to(const Repeat *rule);
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_VISITOR_H_
|
||||
|
|
|
|||
|
|
@ -2,47 +2,50 @@
|
|||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
namespace util {
|
||||
|
||||
namespace util {
|
||||
void str_replace(string *input, const string &search, const string &replace) {
|
||||
size_t pos = 0;
|
||||
while (1) {
|
||||
pos = input->find(search, pos);
|
||||
if (pos == string::npos) break;
|
||||
input->erase(pos, search.length());
|
||||
input->insert(pos, replace);
|
||||
pos += replace.length();
|
||||
}
|
||||
}
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
string escape_string(string input) {
|
||||
str_replace(&input, "\"", "\\\"");
|
||||
str_replace(&input, "\n", "\\n");
|
||||
return input;
|
||||
}
|
||||
|
||||
string escape_char(char character) {
|
||||
switch (character) {
|
||||
case '\0':
|
||||
return "\\0";
|
||||
case '"':
|
||||
return "\\\"";
|
||||
case '\'':
|
||||
return "\\'";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\r':
|
||||
return "\\r";
|
||||
case '\t':
|
||||
return "\\t";
|
||||
case '\\':
|
||||
return "\\\\";
|
||||
default:
|
||||
return string() + character;
|
||||
}
|
||||
}
|
||||
}
|
||||
void str_replace(string *input, const string &search, const string &replace) {
|
||||
size_t pos = 0;
|
||||
while (1) {
|
||||
pos = input->find(search, pos);
|
||||
if (pos == string::npos)
|
||||
break;
|
||||
input->erase(pos, search.length());
|
||||
input->insert(pos, replace);
|
||||
pos += replace.length();
|
||||
}
|
||||
}
|
||||
|
||||
string escape_string(string input) {
|
||||
str_replace(&input, "\"", "\\\"");
|
||||
str_replace(&input, "\n", "\\n");
|
||||
return input;
|
||||
}
|
||||
|
||||
string escape_char(char character) {
|
||||
switch (character) {
|
||||
case '\0':
|
||||
return "\\0";
|
||||
case '"':
|
||||
return "\\\"";
|
||||
case '\'':
|
||||
return "\\'";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\r':
|
||||
return "\\r";
|
||||
case '\t':
|
||||
return "\\t";
|
||||
case '\\':
|
||||
return "\\\\";
|
||||
default:
|
||||
return string() + character;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -6,11 +6,14 @@
|
|||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace util {
|
||||
void str_replace(std::string *input, const std::string &search, const std::string &replace);
|
||||
std::string escape_string(std::string input);
|
||||
std::string escape_char(char character);
|
||||
}
|
||||
}
|
||||
namespace util {
|
||||
|
||||
void str_replace(std::string *input, const std::string &search,
|
||||
const std::string &replace);
|
||||
std::string escape_string(std::string input);
|
||||
std::string escape_char(char character);
|
||||
|
||||
} // namespace util
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_UTIL_STRING_HELPERS_H_
|
||||
|
|
|
|||
|
|
@ -11,11 +11,9 @@ struct TSDocument {
|
|||
size_t error_count;
|
||||
};
|
||||
|
||||
TSDocument * ts_document_make() {
|
||||
TSDocument *ts_document_make() {
|
||||
TSDocument *document = malloc(sizeof(TSDocument));
|
||||
*document = (TSDocument) {
|
||||
.input = (TSInput) {}
|
||||
};
|
||||
*document = (TSDocument) { .input = (TSInput) {} };
|
||||
return document;
|
||||
}
|
||||
|
||||
|
|
@ -33,12 +31,13 @@ void ts_document_set_parser(TSDocument *document, TSParser *parser) {
|
|||
document->parser = parser;
|
||||
}
|
||||
|
||||
const TSTree * ts_document_tree(const TSDocument *document) {
|
||||
const TSTree *ts_document_tree(const TSDocument *document) {
|
||||
return document->tree;
|
||||
}
|
||||
|
||||
const char * ts_document_string(const TSDocument *document) {
|
||||
return ts_tree_string(document->tree, ts_parser_config(document->parser).symbol_names);
|
||||
const char *ts_document_string(const TSDocument *document) {
|
||||
return ts_tree_string(document->tree,
|
||||
ts_parser_config(document->parser).symbol_names);
|
||||
}
|
||||
|
||||
void ts_document_set_input(TSDocument *document, TSInput input) {
|
||||
|
|
@ -50,7 +49,8 @@ void ts_document_edit(TSDocument *document, TSInputEdit edit) {
|
|||
document->tree = ts_parser_parse(document->parser, document->input, &edit);
|
||||
}
|
||||
|
||||
const char * ts_document_symbol_name(const TSDocument *document, const TSTree *tree) {
|
||||
const char *ts_document_symbol_name(const TSDocument *document,
|
||||
const TSTree *tree) {
|
||||
return ts_parser_config(document->parser).symbol_names[tree->symbol];
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +60,7 @@ typedef struct {
|
|||
size_t length;
|
||||
} TSStringInput;
|
||||
|
||||
const char * ts_string_input_read(void *d, size_t *bytes_read) {
|
||||
const char *ts_string_input_read(void *d, size_t *bytes_read) {
|
||||
TSStringInput *data = (TSStringInput *)d;
|
||||
if (data->position >= data->length) {
|
||||
*bytes_read = 0;
|
||||
|
|
@ -83,24 +83,22 @@ TSInput ts_string_input_make(const char *string) {
|
|||
data->string = string;
|
||||
data->position = 0;
|
||||
data->length = strlen(string);
|
||||
TSInput input = {
|
||||
.data = (void *)data,
|
||||
.read_fn = ts_string_input_read,
|
||||
.seek_fn = ts_string_input_seek,
|
||||
.release_fn = free,
|
||||
};
|
||||
return input;
|
||||
return (TSInput) { .data = (void *)data,
|
||||
.read_fn = ts_string_input_read,
|
||||
.seek_fn = ts_string_input_seek,
|
||||
.release_fn = free };
|
||||
}
|
||||
|
||||
void ts_document_set_input_string(TSDocument *document, const char *text) {
|
||||
ts_document_set_input(document, ts_string_input_make(text));
|
||||
}
|
||||
|
||||
TSNode * ts_document_root_node(const TSDocument *document) {
|
||||
return ts_node_make_root(document->tree, document->parser->config.symbol_names);
|
||||
TSNode *ts_document_root_node(const TSDocument *document) {
|
||||
return ts_node_make_root(document->tree,
|
||||
document->parser->config.symbol_names);
|
||||
}
|
||||
|
||||
TSNode * ts_document_get_node(const TSDocument *document, size_t pos) {
|
||||
TSNode *ts_document_get_node(const TSDocument *document, size_t pos) {
|
||||
TSNode *root = ts_document_root_node(document);
|
||||
TSNode *result = ts_node_leaf_at_pos(root, pos);
|
||||
ts_node_release(root);
|
||||
|
|
|
|||
|
|
@ -2,16 +2,14 @@
|
|||
#include "runtime/tree.h"
|
||||
|
||||
TSLexer ts_lexer_make() {
|
||||
return (TSLexer) {
|
||||
.chunk = NULL,
|
||||
.debug = 0,
|
||||
.chunk_start = 0,
|
||||
.chunk_size = 0,
|
||||
.position_in_chunk = 0,
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
.reached_end = 0
|
||||
};
|
||||
return (TSLexer) { .chunk = NULL,
|
||||
.debug = 0,
|
||||
.chunk_start = 0,
|
||||
.chunk_size = 0,
|
||||
.position_in_chunk = 0,
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
.reached_end = 0 };
|
||||
}
|
||||
|
||||
int ts_lexer_advance(TSLexer *lexer) {
|
||||
|
|
@ -33,11 +31,10 @@ int ts_lexer_advance(TSLexer *lexer) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
TSTree * ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
|
||||
TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
|
||||
size_t current_position = ts_lexer_position(lexer);
|
||||
size_t size = current_position - lexer->token_start_position;
|
||||
size_t offset = lexer->token_start_position - lexer->token_end_position;
|
||||
lexer->token_end_position = current_position;
|
||||
return ts_tree_make_leaf(symbol, size, offset, is_hidden);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,34 +1,33 @@
|
|||
#include "runtime/node.h"
|
||||
#include "runtime/tree.h"
|
||||
|
||||
TSNode * ts_node_make(const TSTree *tree, TSNode *parent, size_t index, size_t start_position, const char **names) {
|
||||
if (parent) ts_node_retain(parent);
|
||||
TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
|
||||
size_t start_position, const char **names) {
|
||||
if (parent)
|
||||
ts_node_retain(parent);
|
||||
TSNode *result = malloc(sizeof(TSNode));
|
||||
*result = (TSNode) {
|
||||
.ref_count = 1,
|
||||
.parent = parent,
|
||||
.index = index,
|
||||
.content = tree,
|
||||
.start_position = start_position,
|
||||
.names = names,
|
||||
};
|
||||
*result = (TSNode) { .ref_count = 1,
|
||||
.parent = parent,
|
||||
.index = index,
|
||||
.content = tree,
|
||||
.start_position = start_position,
|
||||
.names = names, };
|
||||
return result;
|
||||
}
|
||||
|
||||
TSNode * ts_node_make_root(const TSTree *tree, const char **names) {
|
||||
TSNode *ts_node_make_root(const TSTree *tree, const char **names) {
|
||||
while (ts_tree_is_wrapper(tree))
|
||||
tree = tree->children[0];
|
||||
return ts_node_make(tree, NULL, 0, 0, names);
|
||||
}
|
||||
|
||||
void ts_node_retain(TSNode *node) {
|
||||
node->ref_count++;
|
||||
}
|
||||
void ts_node_retain(TSNode *node) { node->ref_count++; }
|
||||
|
||||
void ts_node_release(TSNode *node) {
|
||||
node->ref_count--;
|
||||
if (node->ref_count == 0) {
|
||||
if (node->parent) ts_node_release(node->parent);
|
||||
if (node->parent)
|
||||
ts_node_release(node->parent);
|
||||
free(node);
|
||||
}
|
||||
}
|
||||
|
|
@ -37,31 +36,27 @@ size_t ts_node_pos(const TSNode *node) {
|
|||
return node->start_position + node->content->offset;
|
||||
}
|
||||
|
||||
size_t ts_node_size(const TSNode *node) {
|
||||
return node->content->size;
|
||||
}
|
||||
size_t ts_node_size(const TSNode *node) { return node->content->size; }
|
||||
|
||||
int ts_node_eq(const TSNode *left, const TSNode *right) {
|
||||
return ts_tree_equals(left->content, right->content);
|
||||
}
|
||||
|
||||
const char * ts_node_name(const TSNode *node) {
|
||||
const char *ts_node_name(const TSNode *node) {
|
||||
return node->names[node->content->symbol];
|
||||
}
|
||||
|
||||
const char * ts_node_string(const TSNode *node) {
|
||||
const char *ts_node_string(const TSNode *node) {
|
||||
return ts_tree_string(node->content, node->names);
|
||||
}
|
||||
|
||||
TSNode * ts_node_parent(TSNode *child) {
|
||||
return child->parent;
|
||||
}
|
||||
TSNode *ts_node_parent(TSNode *child) { return child->parent; }
|
||||
|
||||
TSNode * ts_node_prev_sibling(TSNode *child) {
|
||||
TSNode *ts_node_prev_sibling(TSNode *child) {
|
||||
return ts_node_child(child->parent, child->index - 1);
|
||||
}
|
||||
|
||||
TSNode * ts_node_next_sibling(TSNode *child) {
|
||||
TSNode *ts_node_next_sibling(TSNode *child) {
|
||||
return ts_node_child(child->parent, child->index + 1);
|
||||
}
|
||||
|
||||
|
|
@ -71,25 +66,29 @@ size_t ts_node_child_count(const TSNode *parent) {
|
|||
return result;
|
||||
}
|
||||
|
||||
TSNode * ts_node_child(TSNode *parent, size_t index) {
|
||||
TSNode *ts_node_child(TSNode *parent, size_t index) {
|
||||
size_t child_count;
|
||||
TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count);
|
||||
TSChildWithPosition *children =
|
||||
ts_tree_visible_children(parent->content, &child_count);
|
||||
if (child_count <= index)
|
||||
return NULL;
|
||||
size_t position = parent->start_position + children[index].position;
|
||||
return ts_node_make(children[index].tree, parent, index, position, parent->names);
|
||||
return ts_node_make(children[index].tree, parent, index, position,
|
||||
parent->names);
|
||||
}
|
||||
|
||||
TSNode * ts_node_leaf_at_pos(TSNode *parent, size_t position) {
|
||||
TSNode *ts_node_leaf_at_pos(TSNode *parent, size_t position) {
|
||||
size_t child_count;
|
||||
TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count);
|
||||
TSChildWithPosition *children =
|
||||
ts_tree_visible_children(parent->content, &child_count);
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSChildWithPosition child = children[i];
|
||||
size_t child_left = child.position + child.tree->offset;
|
||||
if (child_left > position)
|
||||
break;
|
||||
if (child_left + child.tree->size > position) {
|
||||
TSNode *node = ts_node_make(child.tree, parent, i, child.position, parent->names);
|
||||
TSNode *node =
|
||||
ts_node_make(child.tree, parent, i, child.position, parent->names);
|
||||
TSNode *result = ts_node_leaf_at_pos(node, position);
|
||||
ts_node_release(node);
|
||||
return result;
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue