From ca33c3942a31214f6781237a3d6a6ae4b9e15155 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 27 Jan 2014 12:40:06 -0800 Subject: [PATCH] In parse table, store symbols as Symbol objects, not strings --- .../build_tables/item_set_closure_spec.cpp | 20 +-- spec/compiler/build_tables/perform_spec.cpp | 27 ++- spec/fixtures/parsers/arithmetic.c | 166 +++++++++--------- spec/fixtures/parsers/json.c | 106 +++++------ spec/runtime/json_spec.cpp | 2 +- src/compiler/build_tables/first_set.cpp | 2 +- src/compiler/build_tables/follow_sets.cpp | 4 +- src/compiler/build_tables/item.cpp | 39 ++-- src/compiler/build_tables/item.h | 19 +- .../build_tables/item_set_closure.cpp | 2 +- .../build_tables/item_set_transitions.cpp | 4 +- src/compiler/build_tables/perform.cpp | 17 +- src/compiler/generate_code/c_code.cpp | 24 +-- src/compiler/grammar.cpp | 4 +- src/compiler/grammar.h | 2 +- src/compiler/lex_table.cpp | 21 +-- src/compiler/lex_table.h | 12 +- src/compiler/parse_table.cpp | 33 ++-- src/compiler/parse_table.h | 18 +- src/compiler/rules/symbol.h | 2 +- 20 files changed, 262 insertions(+), 262 deletions(-) diff --git a/spec/compiler/build_tables/item_set_closure_spec.cpp b/spec/compiler/build_tables/item_set_closure_spec.cpp index 31395dde..8daf4fe5 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cpp +++ b/spec/compiler/build_tables/item_set_closure_spec.cpp @@ -30,22 +30,22 @@ describe("computing closures of item sets", []() { it("computes the item set closure", [&]() { ParseItemSet item_set = item_set_closure(ParseItemSet({ - ParseItem("E", grammar.rule("E"), 0, "__END__") + ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__")) }), grammar); AssertThat(item_set, Equals(ParseItemSet({ - ParseItem("F", grammar.rule("F"), 0, "__END__"), - ParseItem("F", grammar.rule("F"), 0, "+"), - ParseItem("F", grammar.rule("F"), 0, "*"), - ParseItem("T", grammar.rule("T"), 0, "__END__"), - ParseItem("T", grammar.rule("T"), 0, "+"), - ParseItem("E", grammar.rule("E"), 0, "__END__"), + ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("__END__")), + ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("+")), + ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("*")), + ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("__END__")), + ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("+")), + ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__")), }))); ParseItemSet next_item_set = *sym_transitions(item_set, grammar)[rules::Symbol("v")]; AssertThat(next_item_set, Equals(ParseItemSet({ - ParseItem("F", rules::blank(), 1, "__END__"), - ParseItem("F", rules::blank(), 1, "*"), - ParseItem("F", rules::blank(), 1, "+"), + ParseItem(Symbol("F"), rules::blank(), 1, Symbol("__END__")), + ParseItem(Symbol("F"), rules::blank(), 1, Symbol("*")), + ParseItem(Symbol("F"), rules::blank(), 1, Symbol("+")), }))); }); }); diff --git a/spec/compiler/build_tables/perform_spec.cpp b/spec/compiler/build_tables/perform_spec.cpp index a0eb7b93..c7d708f5 100644 --- a/spec/compiler/build_tables/perform_spec.cpp +++ b/spec/compiler/build_tables/perform_spec.cpp @@ -8,8 +8,8 @@ using namespace rules; typedef unordered_set parse_actions; typedef unordered_set lex_actions; -static unordered_set keys(const unordered_map &map) { - unordered_set result; +static unordered_set keys(const unordered_map &map) { + unordered_set result; for (auto pair : map) { result.insert(pair.first); } @@ -51,7 +51,6 @@ describe("building parse and lex tables", []() { { "left-paren", str("(") }, { "right-paren", str(")") } }); - ParseTable table; LexTable lex_table; @@ -72,12 +71,12 @@ describe("building parse and lex tables", []() { }; it("has the right starting state", [&]() { - AssertThat(keys(parse_state(0).actions), Equals(unordered_set({ - "expression", - "term", - "number", - "variable", - "left-paren", + AssertThat(keys(parse_state(0).actions), Equals(unordered_set({ + Symbol("expression"), + Symbol("term"), + Symbol("number"), + Symbol("variable"), + Symbol("left-paren"), }))); AssertThat(keys(lex_state(0).actions), Equals(unordered_set({ @@ -94,15 +93,15 @@ describe("building parse and lex tables", []() { }); it("accepts when the start symbol is reduced", [&]() { - AssertThat(parse_state(1).actions, Equals(unordered_map({ - { "__END__", parse_actions({ ParseAction::Accept() }) } + AssertThat(parse_state(1).actions, Equals(unordered_map({ + { Symbol("__END__"), parse_actions({ ParseAction::Accept() }) } }))); }); it("has the right next states", [&]() { - AssertThat(parse_state(2).actions, Equals(unordered_map({ - { "plus", parse_actions({ ParseAction::Shift(3) }) }, - { "__END__", parse_actions({ ParseAction::Reduce("expression", 1) }) }, + AssertThat(parse_state(2).actions, Equals(unordered_map({ + { Symbol("plus"), parse_actions({ ParseAction::Shift(3) }) }, + { Symbol("__END__"), parse_actions({ ParseAction::Reduce(Symbol("expression"), 1) }) }, }))); }); }); diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 2a253996..212897ef 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -3,28 +3,28 @@ enum ts_symbol { ts_symbol_factor, - ts_symbol_plus, - ts_symbol_number, - ts_symbol___END__, - ts_symbol_variable, ts_symbol_1, - ts_symbol_2, - ts_symbol_term, + ts_symbol_number, + ts_symbol_variable, ts_symbol_times, + ts_symbol_term, ts_symbol_expression, + ts_symbol_plus, + ts_symbol_2, + ts_symbol___END__, }; static const char *ts_symbol_names[] = { "factor", - "plus", - "number", - "__END__", - "variable", "1", - "2", - "term", + "number", + "variable", "times", + "term", "expression", + "plus", + "2", + "__END__", }; static void ts_lex(TSParser *parser) { @@ -118,18 +118,18 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_factor: SHIFT(45); - case ts_symbol_variable: - SHIFT(41); - case ts_symbol_term: - SHIFT(2); case ts_symbol_1: SHIFT(42); case ts_symbol_number: SHIFT(41); + case ts_symbol_variable: + SHIFT(41); + case ts_symbol_term: + SHIFT(2); case ts_symbol_expression: SHIFT(1); default: - PARSE_ERROR(6, EXPECT({"expression", "number", "1", "term", "variable", "factor"})); + PARSE_ERROR(6, EXPECT({"expression", "factor", "variable", "number", "1", "term"})); } case 1: SET_LEX_STATE(0); @@ -152,18 +152,18 @@ static TSParseResult ts_parse(const char *input) { case 3: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(34); - case ts_symbol_variable: - SHIFT(5); case ts_symbol_1: SHIFT(6); case ts_symbol_number: SHIFT(5); + case ts_symbol_variable: + SHIFT(5); + case ts_symbol_factor: + SHIFT(34); case ts_symbol_term: SHIFT(4); default: - PARSE_ERROR(5, EXPECT({"term", "number", "1", "variable", "factor"})); + PARSE_ERROR(5, EXPECT({"term", "factor", "variable", "number", "1"})); } case 4: SET_LEX_STATE(0); @@ -186,20 +186,20 @@ static TSParseResult ts_parse(const char *input) { case 6: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(32); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 7: SET_LEX_STATE(9); @@ -214,18 +214,18 @@ static TSParseResult ts_parse(const char *input) { case 8: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(25); - case ts_symbol_variable: - SHIFT(10); case ts_symbol_1: SHIFT(11); case ts_symbol_number: SHIFT(10); + case ts_symbol_variable: + SHIFT(10); + case ts_symbol_factor: + SHIFT(25); case ts_symbol_term: SHIFT(9); default: - PARSE_ERROR(5, EXPECT({"term", "number", "1", "variable", "factor"})); + PARSE_ERROR(5, EXPECT({"term", "factor", "variable", "number", "1"})); } case 9: SET_LEX_STATE(4); @@ -248,20 +248,20 @@ static TSParseResult ts_parse(const char *input) { case 11: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(23); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 12: SET_LEX_STATE(7); @@ -278,20 +278,20 @@ static TSParseResult ts_parse(const char *input) { case 13: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(14); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 14: SET_LEX_STATE(4); @@ -352,20 +352,20 @@ static TSParseResult ts_parse(const char *input) { case 19: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(20); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 20: SET_LEX_STATE(4); @@ -448,20 +448,20 @@ static TSParseResult ts_parse(const char *input) { case 28: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(29); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 29: SET_LEX_STATE(4); @@ -540,20 +540,20 @@ static TSParseResult ts_parse(const char *input) { case 37: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(38); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 38: SET_LEX_STATE(4); @@ -594,20 +594,20 @@ static TSParseResult ts_parse(const char *input) { case 42: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(43); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 43: SET_LEX_STATE(4); @@ -668,20 +668,20 @@ static TSParseResult ts_parse(const char *input) { case 48: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_factor: - SHIFT(16); case ts_symbol_expression: SHIFT(49); - case ts_symbol_variable: - SHIFT(12); - case ts_symbol_term: - SHIFT(7); + case ts_symbol_factor: + SHIFT(16); case ts_symbol_1: SHIFT(13); case ts_symbol_number: SHIFT(12); + case ts_symbol_variable: + SHIFT(12); + case ts_symbol_term: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({"number", "1", "term", "variable", "expression", "factor"})); + PARSE_ERROR(6, EXPECT({"term", "variable", "number", "factor", "1", "expression"})); } case 49: SET_LEX_STATE(4); diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index f4806708..5e5ab323 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -4,37 +4,37 @@ enum ts_symbol { ts_symbol_number, ts_symbol_string, - ts_symbol_array, - ts_symbol_7, ts_symbol_object, ts_symbol_repeat_helper2, - ts_symbol_6, - ts_symbol_4, ts_symbol_repeat_helper1, ts_symbol_3, + ts_symbol_6, + ts_symbol_7, + ts_symbol_4, + ts_symbol___END__, + ts_symbol_array, ts_symbol_2, ts_symbol_5, ts_symbol_1, ts_symbol_value, - ts_symbol___END__, }; static const char *ts_symbol_names[] = { "number", "string", - "array", - "7", "object", "repeat_helper2", - "6", - "4", "repeat_helper1", "3", + "6", + "7", + "4", + "__END__", + "array", "2", "5", "1", "value", - "__END__", }; static void ts_lex(TSParser *parser) { @@ -116,22 +116,22 @@ static TSParseResult ts_parse(const char *input) { case 0: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: - SHIFT(31); - case ts_symbol_string: - SHIFT(31); case ts_symbol_array: SHIFT(31); - case ts_symbol_object: - SHIFT(31); case ts_symbol_4: SHIFT(25); + case ts_symbol_number: + SHIFT(31); case ts_symbol_1: SHIFT(2); + case ts_symbol_string: + SHIFT(31); + case ts_symbol_object: + SHIFT(31); case ts_symbol_value: SHIFT(1); default: - PARSE_ERROR(7, EXPECT({"value", "1", "object", "array", "4", "string", "number"})); + PARSE_ERROR(7, EXPECT({"value", "object", "string", "number", "1", "4", "array"})); } case 1: SET_LEX_STATE(0); @@ -144,42 +144,42 @@ static TSParseResult ts_parse(const char *input) { case 2: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: + case ts_symbol_array: SHIFT(19); case ts_symbol_string: SHIFT(19); - case ts_symbol_array: - SHIFT(19); case ts_symbol_object: SHIFT(19); - case ts_symbol_4: - SHIFT(8); case ts_symbol_value: SHIFT(22); + case ts_symbol_4: + SHIFT(8); + case ts_symbol_number: + SHIFT(19); case ts_symbol_1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"})); + PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); } case 3: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: + case ts_symbol_array: SHIFT(19); case ts_symbol_string: SHIFT(19); - case ts_symbol_array: - SHIFT(19); case ts_symbol_object: SHIFT(19); - case ts_symbol_4: - SHIFT(8); case ts_symbol_value: SHIFT(4); + case ts_symbol_4: + SHIFT(8); + case ts_symbol_number: + SHIFT(19); case ts_symbol_1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"})); + PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); } case 4: SET_LEX_STATE(2); @@ -214,22 +214,22 @@ static TSParseResult ts_parse(const char *input) { case 7: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: + case ts_symbol_array: SHIFT(19); case ts_symbol_string: SHIFT(19); - case ts_symbol_array: - SHIFT(19); case ts_symbol_object: SHIFT(19); - case ts_symbol_4: - SHIFT(8); case ts_symbol_value: SHIFT(20); + case ts_symbol_4: + SHIFT(8); + case ts_symbol_number: + SHIFT(19); case ts_symbol_1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"})); + PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); } case 8: SET_LEX_STATE(17); @@ -250,22 +250,22 @@ static TSParseResult ts_parse(const char *input) { case 10: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: + case ts_symbol_array: SHIFT(19); case ts_symbol_string: SHIFT(19); - case ts_symbol_array: - SHIFT(19); case ts_symbol_object: SHIFT(19); - case ts_symbol_4: - SHIFT(8); case ts_symbol_value: SHIFT(11); + case ts_symbol_4: + SHIFT(8); + case ts_symbol_number: + SHIFT(19); case ts_symbol_1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"})); + PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); } case 11: SET_LEX_STATE(2); @@ -316,22 +316,22 @@ static TSParseResult ts_parse(const char *input) { case 16: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: + case ts_symbol_array: SHIFT(19); case ts_symbol_string: SHIFT(19); - case ts_symbol_array: - SHIFT(19); case ts_symbol_object: SHIFT(19); - case ts_symbol_4: - SHIFT(8); case ts_symbol_value: SHIFT(17); + case ts_symbol_4: + SHIFT(8); + case ts_symbol_number: + SHIFT(19); case ts_symbol_1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"})); + PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); } case 17: SET_LEX_STATE(2); @@ -430,22 +430,22 @@ static TSParseResult ts_parse(const char *input) { case 27: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_symbol_number: + case ts_symbol_array: SHIFT(19); case ts_symbol_string: SHIFT(19); - case ts_symbol_array: - SHIFT(19); case ts_symbol_object: SHIFT(19); - case ts_symbol_4: - SHIFT(8); case ts_symbol_value: SHIFT(28); + case ts_symbol_4: + SHIFT(8); + case ts_symbol_number: + SHIFT(19); case ts_symbol_1: SHIFT(3); default: - PARSE_ERROR(7, EXPECT({"1", "value", "object", "array", "4", "string", "number"})); + PARSE_ERROR(7, EXPECT({"1", "number", "4", "value", "object", "string", "array"})); } case 28: SET_LEX_STATE(2); diff --git a/spec/runtime/json_spec.cpp b/spec/runtime/json_spec.cpp index 10a9c116..44e620eb 100644 --- a/spec/runtime/json_spec.cpp +++ b/spec/runtime/json_spec.cpp @@ -23,7 +23,7 @@ describe("json", []() { AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (2) (6)))")); TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}"); - AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))")); + AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (repeat_helper2 (7) (string) (5) (value (number)) (2)) (6)))")); }); }); diff --git a/src/compiler/build_tables/first_set.cpp b/src/compiler/build_tables/first_set.cpp index 045973b2..d94fb2ec 100644 --- a/src/compiler/build_tables/first_set.cpp +++ b/src/compiler/build_tables/first_set.cpp @@ -24,7 +24,7 @@ namespace tree_sitter { void visit(const Symbol *rule) { if (grammar.has_definition(*rule)) { - value = apply(grammar.rule(rule->name), grammar); + value = apply(grammar.rule(*rule), grammar); } else { value = set({ *rule }); } diff --git a/src/compiler/build_tables/follow_sets.cpp b/src/compiler/build_tables/follow_sets.cpp index e5441030..b9a68c12 100644 --- a/src/compiler/build_tables/follow_sets.cpp +++ b/src/compiler/build_tables/follow_sets.cpp @@ -3,8 +3,6 @@ #include "rule_transitions.h" #include "grammar.h" -#include "stream_methods.h" - using std::unordered_map; using std::set; using std::dynamic_pointer_cast; @@ -22,7 +20,7 @@ namespace tree_sitter { if (symbol && grammar.has_definition(*symbol)) { auto following_non_terminals = first_set(pair.second, grammar); if (rule_can_be_blank(pair.second)) { - following_non_terminals.insert(item.lookahead_sym_name); + following_non_terminals.insert(item.lookahead_sym); } result.insert({ *symbol, following_non_terminals }); } diff --git a/src/compiler/build_tables/item.cpp b/src/compiler/build_tables/item.cpp index 4ac701a1..91cb003c 100644 --- a/src/compiler/build_tables/item.cpp +++ b/src/compiler/build_tables/item.cpp @@ -5,11 +5,12 @@ using std::string; using std::to_string; using std::ostream; +using tree_sitter::rules::Symbol; namespace tree_sitter { namespace build_tables { - Item::Item(const string &rule_name, const rules::rule_ptr rule) : - rule_name(rule_name), + Item::Item(const Symbol &lhs, const rules::rule_ptr rule) : + lhs(lhs), rule(rule) {}; bool Item::is_done() const { @@ -19,7 +20,7 @@ namespace tree_sitter { ostream& operator<<(ostream &stream, const LexItem &item) { return stream << string("#"); @@ -28,53 +29,53 @@ namespace tree_sitter { ostream& operator<<(ostream &stream, const ParseItem &item) { return stream << string("#"); } bool LexItem::operator<(const LexItem &other) const { - if (rule_name < other.rule_name) return true; - if (rule_name > other.rule_name) return false; + if (lhs < other.lhs) return true; + if (other.lhs < lhs) return false; if (rule->to_string() < other.rule->to_string()) return true; return false; } bool ParseItem::operator<(const ParseItem &other) const { - if (rule_name < other.rule_name) return true; - if (rule_name > other.rule_name) return false; + if (lhs < other.lhs) return true; + if (other.lhs < lhs) return false; if (rule->to_string() < other.rule->to_string()) return true; if (rule->to_string() > other.rule->to_string()) return false; if (consumed_sym_count < other.consumed_sym_count) return true; if (consumed_sym_count > other.consumed_sym_count) return false; - if (lookahead_sym_name < other.lookahead_sym_name) return true; + if (lookahead_sym < other.lookahead_sym) return true; return false; } - LexItem::LexItem(const std::string &rule_name, const rules::rule_ptr rule) : Item(rule_name, rule) {} + LexItem::LexItem(const Symbol &lhs, const rules::rule_ptr rule) : Item(lhs, rule) {} bool LexItem::operator==(const LexItem &other) const { - bool rule_names_eq = other.rule_name == rule_name; + bool lhs_eq = other.lhs == lhs; bool rules_eq = (*other.rule == *rule); - return rule_names_eq && rules_eq; + return lhs_eq && rules_eq; } - ParseItem::ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count, const std::string &lookahead_sym_name) : - Item(rule_name, rule), + ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym) : + Item(lhs, rule), consumed_sym_count(consumed_sym_count), - lookahead_sym_name(lookahead_sym_name) {} + lookahead_sym(lookahead_sym) {} bool ParseItem::operator==(const ParseItem &other) const { - bool rule_names_eq = other.rule_name == rule_name; + bool lhs_eq = other.lhs == lhs; bool rules_eq = (*other.rule == *rule); bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count); - bool lookaheads_eq = other.lookahead_sym_name == lookahead_sym_name; - return rule_names_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq; + bool lookaheads_eq = other.lookahead_sym == lookahead_sym; + return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq; } } } diff --git a/src/compiler/build_tables/item.h b/src/compiler/build_tables/item.h index 537ec7e2..d15da384 100644 --- a/src/compiler/build_tables/item.h +++ b/src/compiler/build_tables/item.h @@ -5,6 +5,7 @@ #include "rule.h" #include #include +#include "symbol.h" namespace tree_sitter { class Grammar; @@ -12,28 +13,28 @@ namespace tree_sitter { namespace build_tables { class Item { public: - Item(const std::string &rule_name, const rules::rule_ptr rule); + Item(const rules::Symbol &lhs, const rules::rule_ptr rule); bool is_done() const; - const std::string rule_name; + const rules::Symbol lhs; const rules::rule_ptr rule; }; class LexItem : public Item { public: - LexItem(const std::string &rule_name, const rules::rule_ptr rule); + LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule); bool operator<(const LexItem &other) const; bool operator==(const LexItem &other) const; }; class ParseItem : public Item { public: - ParseItem(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count, const std::string &lookahead_sym_name); + ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym); bool operator<(const ParseItem &other) const; bool operator==(const ParseItem &other) const; const int consumed_sym_count; - const std::string lookahead_sym_name; + const rules::Symbol lookahead_sym; }; typedef std::set ParseItemSet; @@ -49,8 +50,8 @@ namespace std { struct hash { size_t operator()(const tree_sitter::build_tables::Item &item) const { return - hash()(item.rule_name) ^ - hash()(*item.rule); + hash()(item.lhs) ^ + hash()(*item.rule); } }; @@ -58,10 +59,10 @@ namespace std { struct hash { size_t operator()(const tree_sitter::build_tables::ParseItem &item) const { return - hash()(item.rule_name) ^ + hash()(item.lhs) ^ hash()(*item.rule) ^ hash()(item.consumed_sym_count) ^ - hash()(item.lookahead_sym_name); + hash()(item.lookahead_sym); } }; diff --git a/src/compiler/build_tables/item_set_closure.cpp b/src/compiler/build_tables/item_set_closure.cpp index 83c9275b..1b4b081b 100644 --- a/src/compiler/build_tables/item_set_closure.cpp +++ b/src/compiler/build_tables/item_set_closure.cpp @@ -20,7 +20,7 @@ namespace tree_sitter { auto non_terminal = pair.first; auto terminals = pair.second; for (rules::Symbol terminal : terminals) { - auto next_item = ParseItem(non_terminal.name, grammar.rule(non_terminal.name), 0, terminal.name); + auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), 0, terminal); add_item(item_set, next_item, grammar); } } diff --git a/src/compiler/build_tables/item_set_transitions.cpp b/src/compiler/build_tables/item_set_transitions.cpp index ef139614..410b630b 100644 --- a/src/compiler/build_tables/item_set_transitions.cpp +++ b/src/compiler/build_tables/item_set_transitions.cpp @@ -22,7 +22,7 @@ namespace tree_sitter { for (auto transition : rule_transitions(item.rule)) { auto rule = dynamic_pointer_cast(transition.first); if (rule.get()) { - auto new_item = LexItem(item.rule_name, transition.second); + auto new_item = LexItem(item.lhs, transition.second); auto new_item_set = LexItemSet({ new_item }); item_transitions.add(rule, make_shared(new_item_set)); } @@ -43,7 +43,7 @@ namespace tree_sitter { for (auto transition : rule_transitions(item.rule)) { auto rule = dynamic_pointer_cast(transition.first); if (rule.get()) { - auto new_item = ParseItem(item.rule_name, transition.second, item.consumed_sym_count + 1, item.lookahead_sym_name); + auto new_item = ParseItem(item.lhs, transition.second, item.consumed_sym_count + 1, item.lookahead_sym); auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar); item_transitions.add(rule, make_shared(new_item_set)); } diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index c8f01a37..ca13114f 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -40,7 +40,7 @@ namespace tree_sitter { rules::Symbol symbol = *transition.first; ParseItemSet item_set = *transition.second; size_t new_state_index = add_parse_state(item_set); - parse_table.add_action(state_index, symbol.name, ParseAction::Shift(new_state_index)); + parse_table.add_action(state_index, symbol, ParseAction::Shift(new_state_index)); } } @@ -56,7 +56,7 @@ namespace tree_sitter { void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) { for (LexItem item : item_set) { if (item.is_done()) { - lex_table.add_default_action(state_index, LexAction::Accept(item.rule_name)); + lex_table.add_default_action(state_index, LexAction::Accept(item.lhs)); } } } @@ -64,10 +64,10 @@ namespace tree_sitter { void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) { for (ParseItem item : item_set) { if (item.is_done()) { - ParseAction action = (item.rule_name == START) ? + ParseAction action = (item.lhs.name == START) ? ParseAction::Accept() : - ParseAction::Reduce(item.rule_name, item.consumed_sym_count); - parse_table.add_action(state_index, item.lookahead_sym_name, action); + ParseAction::Reduce(item.lhs, item.consumed_sym_count); + parse_table.add_action(state_index, item.lookahead_sym, action); } } } @@ -75,12 +75,11 @@ namespace tree_sitter { void assign_lex_state(size_t state_index) { ParseState &state = parse_table.states[state_index]; LexItemSet item_set; - for (auto pair : state.actions) { - auto symbol = rules::Symbol(pair.first); + for (auto &symbol : state.expected_inputs()) { if (symbol.name == END_OF_INPUT) - item_set.insert(LexItem(symbol.name, rules::character('\0'))); + item_set.insert(LexItem(symbol, rules::character('\0'))); if (lex_grammar.has_definition(symbol)) - item_set.insert(LexItem(symbol.name, lex_grammar.rule(symbol.name))); + item_set.insert(LexItem(symbol, lex_grammar.rule(symbol))); } state.lex_state_index = add_lex_state(item_set); diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index 1ccd0e99..2ae8485e 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -83,8 +83,8 @@ namespace tree_sitter { lex_table(lex_table) {} - string symbol_id(string symbol_name) { - return "ts_symbol_" + symbol_name; + string symbol_id(rules::Symbol symbol) { + return "ts_symbol_" + symbol.name; } string character_code(char character) { @@ -115,7 +115,7 @@ namespace tree_sitter { } } - string code_for_parse_actions(const unordered_set &actions, const unordered_set &expected_inputs) { + string code_for_parse_actions(const unordered_set &actions, const unordered_set &expected_inputs) { auto action = actions.begin(); if (action == actions.end()) { return parse_error_call(expected_inputs); @@ -126,20 +126,20 @@ namespace tree_sitter { case ParseActionTypeShift: return "SHIFT(" + to_string(action->state_index) + ");"; case ParseActionTypeReduce: - return "REDUCE(" + symbol_id(action->symbol_name) + ", " + std::to_string(action->child_symbol_count) + ");"; + return "REDUCE(" + symbol_id(action->symbol) + ", " + std::to_string(action->child_symbol_count) + ");"; default: return ""; } } } - string parse_error_call(const unordered_set &expected_inputs) { + string parse_error_call(const unordered_set &expected_inputs) { string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({"; bool started = false; - for (auto symbol_name : expected_inputs) { + for (auto symbol : expected_inputs) { if (started) result += ", "; started = true; - result += "\"" + symbol_name + "\""; + result += "\"" + symbol.name + "\""; } result += "}));"; return result; @@ -171,7 +171,7 @@ namespace tree_sitter { case LexActionTypeAdvance: return "ADVANCE(" + to_string(action->state_index) + ");"; case LexActionTypeAccept: - return "ACCEPT_TOKEN(" + symbol_id(action->symbol_name) + ");"; + return "ACCEPT_TOKEN(" + symbol_id(action->symbol) + ");"; case LexActionTypeError: return ""; } @@ -215,15 +215,15 @@ namespace tree_sitter { string symbol_enum() { string result = "enum ts_symbol {\n"; - for (string rule_name : parse_table.symbol_names) - result += indent(symbol_id(rule_name)) + ",\n"; + for (auto symbol : parse_table.symbols) + result += indent(symbol_id(symbol)) + ",\n"; return result + "};"; } string rule_names_list() { string result = "static const char *ts_symbol_names[] = {\n"; - for (string rule_name : parse_table.symbol_names) - result += indent(string("\"") + rule_name) + "\",\n"; + for (auto symbol : parse_table.symbols) + result += indent(string("\"") + symbol.name) + "\",\n"; return result + "};"; } diff --git a/src/compiler/grammar.cpp b/src/compiler/grammar.cpp index 940c0920..097c6611 100644 --- a/src/compiler/grammar.cpp +++ b/src/compiler/grammar.cpp @@ -16,8 +16,8 @@ namespace tree_sitter { rules(rules), start_rule_name(start_rule_name) {} - const rules::rule_ptr Grammar::rule(const string &name) const { - auto iter = rules.find(name); + const rules::rule_ptr Grammar::rule(const rules::Symbol &symbol) const { + auto iter = rules.find(symbol.name); return (iter == rules.end()) ? rules::rule_ptr(nullptr) : iter->second; diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h index 0754f229..71cd1d14 100644 --- a/src/compiler/grammar.h +++ b/src/compiler/grammar.h @@ -11,7 +11,7 @@ namespace tree_sitter { public: Grammar(const rule_map_init_list &rules); Grammar(std::string start_rule_name, const std::unordered_map &rules); - const rules::rule_ptr rule(const std::string &) const; + const rules::rule_ptr rule(const rules::Symbol &) const; const std::string start_rule_name; std::vector rule_names() const; bool operator==(const Grammar &other) const; diff --git a/src/compiler/lex_table.cpp b/src/compiler/lex_table.cpp index 8ee09dbb..5a211c6c 100644 --- a/src/compiler/lex_table.cpp +++ b/src/compiler/lex_table.cpp @@ -5,31 +5,32 @@ using std::to_string; using std::unordered_map; using std::unordered_set; using std::vector; +using tree_sitter::rules::Symbol; namespace tree_sitter { // Action - LexAction::LexAction(LexActionType type, size_t state_index, std::string symbol_name) : - type(type), - state_index(state_index), - symbol_name(symbol_name) {} + LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol) : + type(type), + state_index(state_index), + symbol(symbol) {} LexAction LexAction::Error() { - return LexAction(LexActionTypeError, -1, ""); + return LexAction(LexActionTypeError, -1, Symbol("")); } LexAction LexAction::Advance(size_t state_index) { - return LexAction(LexActionTypeAdvance, state_index, ""); + return LexAction(LexActionTypeAdvance, state_index, Symbol("")); } - LexAction LexAction::Accept(std::string symbol_name) { - return LexAction(LexActionTypeAccept, -1, symbol_name); + LexAction LexAction::Accept(Symbol symbol) { + return LexAction(LexActionTypeAccept, -1, symbol); } bool LexAction::operator==(const LexAction &other) const { return (type == other.type) && (state_index == other.state_index) && - (symbol_name == other.symbol_name); + (symbol == other.symbol); } std::ostream& operator<<(std::ostream &stream, const LexAction &action) { @@ -37,7 +38,7 @@ namespace tree_sitter { case LexActionTypeError: return stream << string("#"); case LexActionTypeAccept: - return stream << string("#"; + return stream << string("#"; case LexActionTypeAdvance: return stream << string("#"; } diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 67b4c9ef..55fe48a2 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -6,6 +6,7 @@ #include #include #include "char_match.h" +#include "symbol.h" namespace tree_sitter { typedef enum { @@ -15,15 +16,15 @@ namespace tree_sitter { } LexActionType; class LexAction { - LexAction(LexActionType type, size_t state_index, std::string symbol_name); + LexAction(LexActionType type, size_t state_index, rules::Symbol symbol); public: - static LexAction Accept(std::string symbol_name); + static LexAction Accept(rules::Symbol symbol); static LexAction Error(); static LexAction Advance(size_t state_index); bool operator==(const LexAction &action) const; LexActionType type; - std::string symbol_name; + rules::Symbol symbol; size_t state_index; }; @@ -34,9 +35,8 @@ namespace std { template<> struct hash { size_t operator()(const tree_sitter::LexAction &action) const { - return ( - hash()(action.type) ^ - hash()(action.symbol_name) ^ + return (hash()(action.type) ^ + hash()(action.symbol) ^ hash()(action.state_index)); } }; diff --git a/src/compiler/parse_table.cpp b/src/compiler/parse_table.cpp index 4aaa83fd..de93abde 100644 --- a/src/compiler/parse_table.cpp +++ b/src/compiler/parse_table.cpp @@ -4,29 +4,30 @@ using std::string; using std::ostream; using std::to_string; using std::unordered_set; +using tree_sitter::rules::Symbol; namespace tree_sitter { // Action - ParseAction::ParseAction(ParseActionType type, size_t state_index, string symbol_name, size_t child_symbol_count) : - type(type), - state_index(state_index), - symbol_name(symbol_name), - child_symbol_count(child_symbol_count) {}; + ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count) : + type(type), + state_index(state_index), + symbol(symbol), + child_symbol_count(child_symbol_count) {}; ParseAction ParseAction::Error() { - return ParseAction(ParseActionTypeError, -1, "", -1); + return ParseAction(ParseActionTypeError, -1, Symbol(""), -1); } ParseAction ParseAction::Accept() { - return ParseAction(ParseActionTypeAccept, -1, "", -1); + return ParseAction(ParseActionTypeAccept, -1, Symbol(""), -1); } ParseAction ParseAction::Shift(size_t state_index) { - return ParseAction(ParseActionTypeShift, state_index, "", -1); + return ParseAction(ParseActionTypeShift, state_index, Symbol(""), -1); } - ParseAction ParseAction::Reduce(std::string symbol_name, size_t child_symbol_count) { - return ParseAction(ParseActionTypeReduce, -1, symbol_name, child_symbol_count); + ParseAction ParseAction::Reduce(Symbol symbol, size_t child_symbol_count) { + return ParseAction(ParseActionTypeReduce, -1, symbol, child_symbol_count); } bool ParseAction::operator==(const ParseAction &other) const { @@ -45,15 +46,15 @@ namespace tree_sitter { case ParseActionTypeShift: return stream << (string("#"); case ParseActionTypeReduce: - return stream << (string("#"); + return stream << (string("#"); } } // State ParseState::ParseState() : lex_state_index(-1) {} - unordered_set ParseState::expected_inputs() const { - unordered_set result; + unordered_set ParseState::expected_inputs() const { + unordered_set result; for (auto pair : actions) result.insert(pair.first); return result; @@ -84,8 +85,8 @@ namespace tree_sitter { return states.size() - 1; } - void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) { - symbol_names.insert(sym_name); - states[state_index].actions[sym_name].insert(action); + void ParseTable::add_action(size_t state_index, rules::Symbol symbol, ParseAction action) { + symbols.insert(symbol); + states[state_index].actions[symbol].insert(action); } } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 82225bd6..41847044 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -4,7 +4,7 @@ #include #include #include -#include "rule.h" +#include "symbol.h" namespace tree_sitter { typedef enum { @@ -15,17 +15,17 @@ namespace tree_sitter { } ParseActionType; class ParseAction { - ParseAction(ParseActionType type, size_t state_index, std::string symbol_name, size_t child_symbol_count); + ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count); public: static ParseAction Accept(); static ParseAction Error(); static ParseAction Shift(size_t state_index); - static ParseAction Reduce(std::string symbol_name, size_t child_symbol_count); + static ParseAction Reduce(rules::Symbol symbol, size_t child_symbol_count); bool operator==(const ParseAction &action) const; ParseActionType type; size_t child_symbol_count; - std::string symbol_name; + rules::Symbol symbol; size_t state_index; }; @@ -38,7 +38,7 @@ namespace std { size_t operator()(const tree_sitter::ParseAction &action) const { return ( hash()(action.type) ^ - hash()(action.symbol_name) ^ + hash()(action.symbol) ^ hash()(action.state_index) ^ hash()(action.child_symbol_count)); } @@ -49,8 +49,8 @@ namespace tree_sitter { class ParseState { public: ParseState(); - std::unordered_map> actions; - std::unordered_set expected_inputs() const; + std::unordered_map> actions; + std::unordered_set expected_inputs() const; size_t lex_state_index; }; @@ -59,10 +59,10 @@ namespace tree_sitter { class ParseTable { public: size_t add_state(); - void add_action(size_t state_index, std::string symbol_name, ParseAction action); + void add_action(size_t state_index, rules::Symbol symbol, ParseAction action); std::vector states; - std::unordered_set symbol_names; + std::unordered_set symbols; }; } diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index fe2584f4..0ad5bbdf 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -16,7 +16,7 @@ namespace tree_sitter { void accept(Visitor &visitor) const; bool operator<(const Symbol &other) const; - const std::string name; + std::string name; }; } }