In parse table, store symbols as Symbol objects, not strings

This commit is contained in:
Max Brunsfeld 2014-01-27 12:40:06 -08:00
parent 0877d01194
commit ca33c3942a
20 changed files with 262 additions and 262 deletions

View file

@ -30,22 +30,22 @@ describe("computing closures of item sets", []() {
it("computes the item set closure", [&]() {
ParseItemSet item_set = item_set_closure(ParseItemSet({
ParseItem("E", grammar.rule("E"), 0, "__END__")
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__"))
}), grammar);
AssertThat(item_set, Equals(ParseItemSet({
ParseItem("F", grammar.rule("F"), 0, "__END__"),
ParseItem("F", grammar.rule("F"), 0, "+"),
ParseItem("F", grammar.rule("F"), 0, "*"),
ParseItem("T", grammar.rule("T"), 0, "__END__"),
ParseItem("T", grammar.rule("T"), 0, "+"),
ParseItem("E", grammar.rule("E"), 0, "__END__"),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("__END__")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("+")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("*")),
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("__END__")),
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("+")),
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__")),
})));
ParseItemSet next_item_set = *sym_transitions(item_set, grammar)[rules::Symbol("v")];
AssertThat(next_item_set, Equals(ParseItemSet({
ParseItem("F", rules::blank(), 1, "__END__"),
ParseItem("F", rules::blank(), 1, "*"),
ParseItem("F", rules::blank(), 1, "+"),
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("__END__")),
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("*")),
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("+")),
})));
});
});

View file

@ -8,8 +8,8 @@ using namespace rules;
typedef unordered_set<ParseAction> parse_actions;
typedef unordered_set<LexAction> lex_actions;
static unordered_set<string> keys(const unordered_map<string, parse_actions> &map) {
unordered_set<string> result;
static unordered_set<Symbol> keys(const unordered_map<Symbol, parse_actions> &map) {
unordered_set<Symbol> result;
for (auto pair : map) {
result.insert(pair.first);
}
@ -51,7 +51,6 @@ describe("building parse and lex tables", []() {
{ "left-paren", str("(") },
{ "right-paren", str(")") }
});
ParseTable table;
LexTable lex_table;
@ -72,12 +71,12 @@ describe("building parse and lex tables", []() {
};
it("has the right starting state", [&]() {
AssertThat(keys(parse_state(0).actions), Equals(unordered_set<string>({
"expression",
"term",
"number",
"variable",
"left-paren",
AssertThat(keys(parse_state(0).actions), Equals(unordered_set<Symbol>({
Symbol("expression"),
Symbol("term"),
Symbol("number"),
Symbol("variable"),
Symbol("left-paren"),
})));
AssertThat(keys(lex_state(0).actions), Equals(unordered_set<CharMatch>({
@ -94,15 +93,15 @@ describe("building parse and lex tables", []() {
});
it("accepts when the start symbol is reduced", [&]() {
AssertThat(parse_state(1).actions, Equals(unordered_map<string, parse_actions>({
{ "__END__", parse_actions({ ParseAction::Accept() }) }
AssertThat(parse_state(1).actions, Equals(unordered_map<Symbol, parse_actions>({
{ Symbol("__END__"), parse_actions({ ParseAction::Accept() }) }
})));
});
it("has the right next states", [&]() {
AssertThat(parse_state(2).actions, Equals(unordered_map<string, parse_actions>({
{ "plus", parse_actions({ ParseAction::Shift(3) }) },
{ "__END__", parse_actions({ ParseAction::Reduce("expression", 1) }) },
AssertThat(parse_state(2).actions, Equals(unordered_map<Symbol, parse_actions>({
{ Symbol("plus"), parse_actions({ ParseAction::Shift(3) }) },
{ Symbol("__END__"), parse_actions({ ParseAction::Reduce(Symbol("expression"), 1) }) },
})));
});
});

View file

@ -3,28 +3,28 @@
enum ts_symbol {
ts_symbol_factor,
ts_symbol_plus,
ts_symbol_number,
ts_symbol___END__,
ts_symbol_variable,
ts_symbol_1,
ts_symbol_2,
ts_symbol_term,
ts_symbol_number,
ts_symbol_variable,
ts_symbol_times,
ts_symbol_term,
ts_symbol_expression,
ts_symbol_plus,
ts_symbol_2,
ts_symbol___END__,
};
static const char *ts_symbol_names[] = {
"factor",
"plus",
"number",
"__END__",
"variable",
"1",
"2",
"term",
"number",
"variable",
"times",
"term",
"expression",
"plus",
"2",
"__END__",
};
static void ts_lex(TSParser *parser) {
@ -118,18 +118,18 @@ static TSParseResult ts_parse(const char *input) {
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(45);
case ts_symbol_variable:
SHIFT(41);
case ts_symbol_term:
SHIFT(2);
case ts_symbol_1:
SHIFT(42);
case ts_symbol_number:
SHIFT(41);
case ts_symbol_variable:
SHIFT(41);
case ts_symbol_term:
SHIFT(2);
case ts_symbol_expression:
SHIFT(1);
default:
PARSE_ERROR(6, EXPECT({"expression", "number", "1", "term", "variable", "factor"}));
PARSE_ERROR(6, EXPECT({"expression", "factor", "variable", "number", "1", "term"}));
}
case 1:
SET_LEX_STATE(0);
@ -152,18 +152,18 @@ static TSParseResult ts_parse(const char *input) {
case 3:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(34);
case ts_symbol_variable:
SHIFT(5);
case ts_symbol_1:
SHIFT(6);
case ts_symbol_number:
SHIFT(5);
case ts_symbol_variable:
SHIFT(5);
case ts_symbol_factor:
SHIFT(34);
case ts_symbol_term:
SHIFT(4);
default:
PARSE_ERROR(5, EXPECT({"term", "number", "1", "variable", "factor"}));
PARSE_ERROR(5, EXPECT({"term", "factor", "variable", "number", "1"}));
}
case 4:
SET_LEX_STATE(0);
@ -186,20 +186,20 @@ static TSParseResult ts_parse(const char *input) {
case 6:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(32);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 7:
SET_LEX_STATE(9);
@ -214,18 +214,18 @@ static TSParseResult ts_parse(const char *input) {
case 8:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(25);
case ts_symbol_variable:
SHIFT(10);
case ts_symbol_1:
SHIFT(11);
case ts_symbol_number:
SHIFT(10);
case ts_symbol_variable:
SHIFT(10);
case ts_symbol_factor:
SHIFT(25);
case ts_symbol_term:
SHIFT(9);
default:
PARSE_ERROR(5, EXPECT({"term", "number", "1", "variable", "factor"}));
PARSE_ERROR(5, EXPECT({"term", "factor", "variable", "number", "1"}));
}
case 9:
SET_LEX_STATE(4);
@ -248,20 +248,20 @@ static TSParseResult ts_parse(const char *input) {
case 11:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(23);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 12:
SET_LEX_STATE(7);
@ -278,20 +278,20 @@ static TSParseResult ts_parse(const char *input) {
case 13:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(14);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 14:
SET_LEX_STATE(4);
@ -352,20 +352,20 @@ static TSParseResult ts_parse(const char *input) {
case 19:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(20);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 20:
SET_LEX_STATE(4);
@ -448,20 +448,20 @@ static TSParseResult ts_parse(const char *input) {
case 28:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(29);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 29:
SET_LEX_STATE(4);
@ -540,20 +540,20 @@ static TSParseResult ts_parse(const char *input) {
case 37:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(38);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 38:
SET_LEX_STATE(4);
@ -594,20 +594,20 @@ static TSParseResult ts_parse(const char *input) {
case 42:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(43);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 43:
SET_LEX_STATE(4);
@ -668,20 +668,20 @@ static TSParseResult ts_parse(const char *input) {
case 48:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_expression:
SHIFT(49);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
SHIFT(7);
default:
PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"}));
PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"}));
}
case 49:
SET_LEX_STATE(4);

View file

@ -4,37 +4,37 @@
enum ts_symbol {
ts_symbol_number,
ts_symbol_string,
ts_symbol_array,
ts_symbol_7,
ts_symbol_object,
ts_symbol_repeat_helper2,
ts_symbol_6,
ts_symbol_4,
ts_symbol_repeat_helper1,
ts_symbol_3,
ts_symbol_6,
ts_symbol_7,
ts_symbol_4,
ts_symbol___END__,
ts_symbol_array,
ts_symbol_2,
ts_symbol_5,
ts_symbol_1,
ts_symbol_value,
ts_symbol___END__,
};
static const char *ts_symbol_names[] = {
"number",
"string",
"array",
"7",
"object",
"repeat_helper2",
"6",
"4",
"repeat_helper1",
"3",
"6",
"7",
"4",
"__END__",
"array",
"2",
"5",
"1",
"value",
"__END__",
};
static void ts_lex(TSParser *parser) {
@ -116,22 +116,22 @@ static TSParseResult ts_parse(const char *input) {
case 0:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
SHIFT(31);
case ts_symbol_string:
SHIFT(31);
case ts_symbol_array:
SHIFT(31);
case ts_symbol_object:
SHIFT(31);
case ts_symbol_4:
SHIFT(25);
case ts_symbol_number:
SHIFT(31);
case ts_symbol_1:
SHIFT(2);
case ts_symbol_string:
SHIFT(31);
case ts_symbol_object:
SHIFT(31);
case ts_symbol_value:
SHIFT(1);
default:
PARSE_ERROR(7, EXPECT({"value", "1", "object", "array", "4", "string", "number"}));
PARSE_ERROR(7, EXPECT({"value", "object", "string", "number", "1", "4", "array"}));
}
case 1:
SET_LEX_STATE(0);
@ -144,42 +144,42 @@ static TSParseResult ts_parse(const char *input) {
case 2:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
case ts_symbol_array:
SHIFT(19);
case ts_symbol_string:
SHIFT(19);
case ts_symbol_array:
SHIFT(19);
case ts_symbol_object:
SHIFT(19);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_value:
SHIFT(22);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_number:
SHIFT(19);
case ts_symbol_1:
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"}));
}
case 3:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
case ts_symbol_array:
SHIFT(19);
case ts_symbol_string:
SHIFT(19);
case ts_symbol_array:
SHIFT(19);
case ts_symbol_object:
SHIFT(19);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_value:
SHIFT(4);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_number:
SHIFT(19);
case ts_symbol_1:
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"}));
}
case 4:
SET_LEX_STATE(2);
@ -214,22 +214,22 @@ static TSParseResult ts_parse(const char *input) {
case 7:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
case ts_symbol_array:
SHIFT(19);
case ts_symbol_string:
SHIFT(19);
case ts_symbol_array:
SHIFT(19);
case ts_symbol_object:
SHIFT(19);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_value:
SHIFT(20);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_number:
SHIFT(19);
case ts_symbol_1:
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"}));
}
case 8:
SET_LEX_STATE(17);
@ -250,22 +250,22 @@ static TSParseResult ts_parse(const char *input) {
case 10:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
case ts_symbol_array:
SHIFT(19);
case ts_symbol_string:
SHIFT(19);
case ts_symbol_array:
SHIFT(19);
case ts_symbol_object:
SHIFT(19);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_value:
SHIFT(11);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_number:
SHIFT(19);
case ts_symbol_1:
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"}));
}
case 11:
SET_LEX_STATE(2);
@ -316,22 +316,22 @@ static TSParseResult ts_parse(const char *input) {
case 16:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
case ts_symbol_array:
SHIFT(19);
case ts_symbol_string:
SHIFT(19);
case ts_symbol_array:
SHIFT(19);
case ts_symbol_object:
SHIFT(19);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_value:
SHIFT(17);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_number:
SHIFT(19);
case ts_symbol_1:
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"}));
}
case 17:
SET_LEX_STATE(2);
@ -430,22 +430,22 @@ static TSParseResult ts_parse(const char *input) {
case 27:
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_number:
case ts_symbol_array:
SHIFT(19);
case ts_symbol_string:
SHIFT(19);
case ts_symbol_array:
SHIFT(19);
case ts_symbol_object:
SHIFT(19);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_value:
SHIFT(28);
case ts_symbol_4:
SHIFT(8);
case ts_symbol_number:
SHIFT(19);
case ts_symbol_1:
SHIFT(3);
default:
PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"}));
PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"}));
}
case 28:
SET_LEX_STATE(2);

View file

@ -23,7 +23,7 @@ describe("json", []() {
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (2) (6)))"));
TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))"));
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (repeat_helper2 (7) (string) (5) (value (number)) (2)) (6)))"));
});
});

View file

@ -24,7 +24,7 @@ namespace tree_sitter {
void visit(const Symbol *rule) {
if (grammar.has_definition(*rule)) {
value = apply(grammar.rule(rule->name), grammar);
value = apply(grammar.rule(*rule), grammar);
} else {
value = set<Symbol>({ *rule });
}

View file

@ -3,8 +3,6 @@
#include "rule_transitions.h"
#include "grammar.h"
#include "stream_methods.h"
using std::unordered_map;
using std::set;
using std::dynamic_pointer_cast;
@ -22,7 +20,7 @@ namespace tree_sitter {
if (symbol && grammar.has_definition(*symbol)) {
auto following_non_terminals = first_set(pair.second, grammar);
if (rule_can_be_blank(pair.second)) {
following_non_terminals.insert(item.lookahead_sym_name);
following_non_terminals.insert(item.lookahead_sym);
}
result.insert({ *symbol, following_non_terminals });
}

View file

@ -5,11 +5,12 @@
using std::string;
using std::to_string;
using std::ostream;
using tree_sitter::rules::Symbol;
namespace tree_sitter {
namespace build_tables {
Item::Item(const string &rule_name, const rules::rule_ptr rule) :
rule_name(rule_name),
Item::Item(const Symbol &lhs, const rules::rule_ptr rule) :
lhs(lhs),
rule(rule) {};
bool Item::is_done() const {
@ -19,7 +20,7 @@ namespace tree_sitter {
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item '") <<
item.rule_name <<
item.lhs <<
string("' ") <<
*item.rule <<
string(">");
@ -28,53 +29,53 @@ namespace tree_sitter {
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream <<
string("#<item '") <<
item.rule_name <<
item.lhs <<
string("' ") <<
*item.rule <<
string(" ") <<
to_string(item.consumed_sym_count) <<
string(" ") <<
item.lookahead_sym_name <<
item.lookahead_sym <<
string(">");
}
bool LexItem::operator<(const LexItem &other) const {
if (rule_name < other.rule_name) return true;
if (rule_name > other.rule_name) return false;
if (lhs < other.lhs) return true;
if (other.lhs < lhs) return false;
if (rule->to_string() < other.rule->to_string()) return true;
return false;
}
bool ParseItem::operator<(const ParseItem &other) const {
if (rule_name < other.rule_name) return true;
if (rule_name > other.rule_name) return false;
if (lhs < other.lhs) return true;
if (other.lhs < lhs) return false;
if (rule->to_string() < other.rule->to_string()) return true;
if (rule->to_string() > other.rule->to_string()) return false;
if (consumed_sym_count < other.consumed_sym_count) return true;
if (consumed_sym_count > other.consumed_sym_count) return false;
if (lookahead_sym_name < other.lookahead_sym_name) return true;
if (lookahead_sym < other.lookahead_sym) return true;
return false;
}
LexItem::LexItem(const std::string &rule_name, const rules::rule_ptr rule) : Item(rule_name, rule) {}
LexItem::LexItem(const Symbol &lhs, const rules::rule_ptr rule) : Item(lhs, rule) {}
bool LexItem::operator==(const LexItem &other) const {
bool rule_names_eq = other.rule_name == rule_name;
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
return rule_names_eq && rules_eq;
return lhs_eq && rules_eq;
}
ParseItem::ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count, const std::string &lookahead_sym_name) :
Item(rule_name, rule),
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_sym_count(consumed_sym_count),
lookahead_sym_name(lookahead_sym_name) {}
lookahead_sym(lookahead_sym) {}
bool ParseItem::operator==(const ParseItem &other) const {
bool rule_names_eq = other.rule_name == rule_name;
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count);
bool lookaheads_eq = other.lookahead_sym_name == lookahead_sym_name;
return rule_names_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
}
}
}

View file

@ -5,6 +5,7 @@
#include "rule.h"
#include <set>
#include <unordered_set>
#include "symbol.h"
namespace tree_sitter {
class Grammar;
@ -12,28 +13,28 @@ namespace tree_sitter {
namespace build_tables {
class Item {
public:
Item(const std::string &rule_name, const rules::rule_ptr rule);
Item(const rules::Symbol &lhs, const rules::rule_ptr rule);
bool is_done() const;
const std::string rule_name;
const rules::Symbol lhs;
const rules::rule_ptr rule;
};
class LexItem : public Item {
public:
LexItem(const std::string &rule_name, const rules::rule_ptr rule);
LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule);
bool operator<(const LexItem &other) const;
bool operator==(const LexItem &other) const;
};
class ParseItem : public Item {
public:
ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count, const std::string &lookahead_sym_name);
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym);
bool operator<(const ParseItem &other) const;
bool operator==(const ParseItem &other) const;
const int consumed_sym_count;
const std::string lookahead_sym_name;
const rules::Symbol lookahead_sym;
};
typedef std::set<ParseItem> ParseItemSet;
@ -49,8 +50,8 @@ namespace std {
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return
hash<std::string>()(item.rule_name) ^
hash<tree_sitter::rules::Rule>()(*item.rule);
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::Rule>()(*item.rule);
}
};
@ -58,10 +59,10 @@ namespace std {
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<std::string>()(item.rule_name) ^
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::Rule>()(*item.rule) ^
hash<size_t>()(item.consumed_sym_count) ^
hash<std::string>()(item.lookahead_sym_name);
hash<tree_sitter::rules::Symbol>()(item.lookahead_sym);
}
};

View file

@ -20,7 +20,7 @@ namespace tree_sitter {
auto non_terminal = pair.first;
auto terminals = pair.second;
for (rules::Symbol terminal : terminals) {
auto next_item = ParseItem(non_terminal.name, grammar.rule(non_terminal.name), 0, terminal.name);
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), 0, terminal);
add_item(item_set, next_item, grammar);
}
}

View file

@ -22,7 +22,7 @@ namespace tree_sitter {
for (auto transition : rule_transitions(item.rule)) {
auto rule = dynamic_pointer_cast<const rules::Character>(transition.first);
if (rule.get()) {
auto new_item = LexItem(item.rule_name, transition.second);
auto new_item = LexItem(item.lhs, transition.second);
auto new_item_set = LexItemSet({ new_item });
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
}
@ -43,7 +43,7 @@ namespace tree_sitter {
for (auto transition : rule_transitions(item.rule)) {
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
if (rule.get()) {
auto new_item = ParseItem(item.rule_name, transition.second, item.consumed_sym_count + 1, item.lookahead_sym_name);
auto new_item = ParseItem(item.lhs, transition.second, item.consumed_sym_count + 1, item.lookahead_sym);
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
}

View file

@ -40,7 +40,7 @@ namespace tree_sitter {
rules::Symbol symbol = *transition.first;
ParseItemSet item_set = *transition.second;
size_t new_state_index = add_parse_state(item_set);
parse_table.add_action(state_index, symbol.name, ParseAction::Shift(new_state_index));
parse_table.add_action(state_index, symbol, ParseAction::Shift(new_state_index));
}
}
@ -56,7 +56,7 @@ namespace tree_sitter {
void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) {
for (LexItem item : item_set) {
if (item.is_done()) {
lex_table.add_default_action(state_index, LexAction::Accept(item.rule_name));
lex_table.add_default_action(state_index, LexAction::Accept(item.lhs));
}
}
}
@ -64,10 +64,10 @@ namespace tree_sitter {
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
for (ParseItem item : item_set) {
if (item.is_done()) {
ParseAction action = (item.rule_name == START) ?
ParseAction action = (item.lhs.name == START) ?
ParseAction::Accept() :
ParseAction::Reduce(item.rule_name, item.consumed_sym_count);
parse_table.add_action(state_index, item.lookahead_sym_name, action);
ParseAction::Reduce(item.lhs, item.consumed_sym_count);
parse_table.add_action(state_index, item.lookahead_sym, action);
}
}
}
@ -75,12 +75,11 @@ namespace tree_sitter {
void assign_lex_state(size_t state_index) {
ParseState &state = parse_table.states[state_index];
LexItemSet item_set;
for (auto pair : state.actions) {
auto symbol = rules::Symbol(pair.first);
for (auto &symbol : state.expected_inputs()) {
if (symbol.name == END_OF_INPUT)
item_set.insert(LexItem(symbol.name, rules::character('\0')));
item_set.insert(LexItem(symbol, rules::character('\0')));
if (lex_grammar.has_definition(symbol))
item_set.insert(LexItem(symbol.name, lex_grammar.rule(symbol.name)));
item_set.insert(LexItem(symbol, lex_grammar.rule(symbol)));
}
state.lex_state_index = add_lex_state(item_set);

View file

@ -83,8 +83,8 @@ namespace tree_sitter {
lex_table(lex_table)
{}
string symbol_id(string symbol_name) {
return "ts_symbol_" + symbol_name;
string symbol_id(rules::Symbol symbol) {
return "ts_symbol_" + symbol.name;
}
string character_code(char character) {
@ -115,7 +115,7 @@ namespace tree_sitter {
}
}
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<string> &expected_inputs) {
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<rules::Symbol> &expected_inputs) {
auto action = actions.begin();
if (action == actions.end()) {
return parse_error_call(expected_inputs);
@ -126,20 +126,20 @@ namespace tree_sitter {
case ParseActionTypeShift:
return "SHIFT(" + to_string(action->state_index) + ");";
case ParseActionTypeReduce:
return "REDUCE(" + symbol_id(action->symbol_name) + ", " + std::to_string(action->child_symbol_count) + ");";
return "REDUCE(" + symbol_id(action->symbol) + ", " + std::to_string(action->child_symbol_count) + ");";
default:
return "";
}
}
}
string parse_error_call(const unordered_set<string> &expected_inputs) {
string parse_error_call(const unordered_set<rules::Symbol> &expected_inputs) {
string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
bool started = false;
for (auto symbol_name : expected_inputs) {
for (auto symbol : expected_inputs) {
if (started) result += ", ";
started = true;
result += "\"" + symbol_name + "\"";
result += "\"" + symbol.name + "\"";
}
result += "}));";
return result;
@ -171,7 +171,7 @@ namespace tree_sitter {
case LexActionTypeAdvance:
return "ADVANCE(" + to_string(action->state_index) + ");";
case LexActionTypeAccept:
return "ACCEPT_TOKEN(" + symbol_id(action->symbol_name) + ");";
return "ACCEPT_TOKEN(" + symbol_id(action->symbol) + ");";
case LexActionTypeError:
return "";
}
@ -215,15 +215,15 @@ namespace tree_sitter {
string symbol_enum() {
string result = "enum ts_symbol {\n";
for (string rule_name : parse_table.symbol_names)
result += indent(symbol_id(rule_name)) + ",\n";
for (auto symbol : parse_table.symbols)
result += indent(symbol_id(symbol)) + ",\n";
return result + "};";
}
string rule_names_list() {
string result = "static const char *ts_symbol_names[] = {\n";
for (string rule_name : parse_table.symbol_names)
result += indent(string("\"") + rule_name) + "\",\n";
for (auto symbol : parse_table.symbols)
result += indent(string("\"") + symbol.name) + "\",\n";
return result + "};";
}

View file

@ -16,8 +16,8 @@ namespace tree_sitter {
rules(rules),
start_rule_name(start_rule_name) {}
const rules::rule_ptr Grammar::rule(const string &name) const {
auto iter = rules.find(name);
const rules::rule_ptr Grammar::rule(const rules::Symbol &symbol) const {
auto iter = rules.find(symbol.name);
return (iter == rules.end()) ?
rules::rule_ptr(nullptr) :
iter->second;

View file

@ -11,7 +11,7 @@ namespace tree_sitter {
public:
Grammar(const rule_map_init_list &rules);
Grammar(std::string start_rule_name, const std::unordered_map<std::string, const rules::rule_ptr> &rules);
const rules::rule_ptr rule(const std::string &) const;
const rules::rule_ptr rule(const rules::Symbol &) const;
const std::string start_rule_name;
std::vector<std::string> rule_names() const;
bool operator==(const Grammar &other) const;

View file

@ -5,31 +5,32 @@ using std::to_string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
using tree_sitter::rules::Symbol;
namespace tree_sitter {
// Action
LexAction::LexAction(LexActionType type, size_t state_index, std::string symbol_name) :
type(type),
state_index(state_index),
symbol_name(symbol_name) {}
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol) :
type(type),
state_index(state_index),
symbol(symbol) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, "");
return LexAction(LexActionTypeError, -1, Symbol(""));
}
LexAction LexAction::Advance(size_t state_index) {
return LexAction(LexActionTypeAdvance, state_index, "");
return LexAction(LexActionTypeAdvance, state_index, Symbol(""));
}
LexAction LexAction::Accept(std::string symbol_name) {
return LexAction(LexActionTypeAccept, -1, symbol_name);
LexAction LexAction::Accept(Symbol symbol) {
return LexAction(LexActionTypeAccept, -1, symbol);
}
bool LexAction::operator==(const LexAction &other) const {
return
(type == other.type) &&
(state_index == other.state_index) &&
(symbol_name == other.symbol_name);
(symbol == other.symbol);
}
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
@ -37,7 +38,7 @@ namespace tree_sitter {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + action.symbol_name + ">";
return stream << string("#<accept ") + action.symbol.name + ">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) + ">";
}

View file

@ -6,6 +6,7 @@
#include <string>
#include <unordered_set>
#include "char_match.h"
#include "symbol.h"
namespace tree_sitter {
typedef enum {
@ -15,15 +16,15 @@ namespace tree_sitter {
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, std::string symbol_name);
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol);
public:
static LexAction Accept(std::string symbol_name);
static LexAction Accept(rules::Symbol symbol);
static LexAction Error();
static LexAction Advance(size_t state_index);
bool operator==(const LexAction &action) const;
LexActionType type;
std::string symbol_name;
rules::Symbol symbol;
size_t state_index;
};
@ -34,9 +35,8 @@ namespace std {
template<>
struct hash<tree_sitter::LexAction> {
size_t operator()(const tree_sitter::LexAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index));
}
};

View file

@ -4,29 +4,30 @@ using std::string;
using std::ostream;
using std::to_string;
using std::unordered_set;
using tree_sitter::rules::Symbol;
namespace tree_sitter {
// Action
ParseAction::ParseAction(ParseActionType type, size_t state_index, string symbol_name, size_t child_symbol_count) :
type(type),
state_index(state_index),
symbol_name(symbol_name),
child_symbol_count(child_symbol_count) {};
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count) :
type(type),
state_index(state_index),
symbol(symbol),
child_symbol_count(child_symbol_count) {};
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, "", -1);
return ParseAction(ParseActionTypeError, -1, Symbol(""), -1);
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, "", -1);
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), -1);
}
ParseAction ParseAction::Shift(size_t state_index) {
return ParseAction(ParseActionTypeShift, state_index, "", -1);
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), -1);
}
ParseAction ParseAction::Reduce(std::string symbol_name, size_t child_symbol_count) {
return ParseAction(ParseActionTypeReduce, -1, symbol_name, child_symbol_count);
ParseAction ParseAction::Reduce(Symbol symbol, size_t child_symbol_count) {
return ParseAction(ParseActionTypeReduce, -1, symbol, child_symbol_count);
}
bool ParseAction::operator==(const ParseAction &other) const {
@ -45,15 +46,15 @@ namespace tree_sitter {
case ParseActionTypeShift:
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
case ParseActionTypeReduce:
return stream << (string("#<reduce ") + action.symbol_name + ">");
return stream << (string("#<reduce ") + action.symbol.name + ">");
}
}
// State
ParseState::ParseState() : lex_state_index(-1) {}
unordered_set<string> ParseState::expected_inputs() const {
unordered_set<string> result;
unordered_set<rules::Symbol> ParseState::expected_inputs() const {
unordered_set<rules::Symbol> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
@ -84,8 +85,8 @@ namespace tree_sitter {
return states.size() - 1;
}
void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) {
symbol_names.insert(sym_name);
states[state_index].actions[sym_name].insert(action);
void ParseTable::add_action(size_t state_index, rules::Symbol symbol, ParseAction action) {
symbols.insert(symbol);
states[state_index].actions[symbol].insert(action);
}
}

View file

@ -4,7 +4,7 @@
#include <unordered_map>
#include <vector>
#include <unordered_set>
#include "rule.h"
#include "symbol.h"
namespace tree_sitter {
typedef enum {
@ -15,17 +15,17 @@ namespace tree_sitter {
} ParseActionType;
class ParseAction {
ParseAction(ParseActionType type, size_t state_index, std::string symbol_name, size_t child_symbol_count);
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count);
public:
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(size_t state_index);
static ParseAction Reduce(std::string symbol_name, size_t child_symbol_count);
static ParseAction Reduce(rules::Symbol symbol, size_t child_symbol_count);
bool operator==(const ParseAction &action) const;
ParseActionType type;
size_t child_symbol_count;
std::string symbol_name;
rules::Symbol symbol;
size_t state_index;
};
@ -38,7 +38,7 @@ namespace std {
size_t operator()(const tree_sitter::ParseAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.child_symbol_count));
}
@ -49,8 +49,8 @@ namespace tree_sitter {
class ParseState {
public:
ParseState();
std::unordered_map<std::string, std::unordered_set<ParseAction>> actions;
std::unordered_set<std::string> expected_inputs() const;
std::unordered_map<rules::Symbol, std::unordered_set<ParseAction>> actions;
std::unordered_set<rules::Symbol> expected_inputs() const;
size_t lex_state_index;
};
@ -59,10 +59,10 @@ namespace tree_sitter {
class ParseTable {
public:
size_t add_state();
void add_action(size_t state_index, std::string symbol_name, ParseAction action);
void add_action(size_t state_index, rules::Symbol symbol, ParseAction action);
std::vector<ParseState> states;
std::unordered_set<std::string> symbol_names;
std::unordered_set<rules::Symbol> symbols;
};
}

View file

@ -16,7 +16,7 @@ namespace tree_sitter {
void accept(Visitor &visitor) const;
bool operator<(const Symbol &other) const;
const std::string name;
std::string name;
};
}
}