diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 514f57a2..dbd72e4e 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -53,18 +53,11 @@ namespace tree_sitter { namespace tree_sitter { class Grammar { - typedef std::map rule_map; public: - Grammar(std::string start_rule_name, const rule_map &rules); - Grammar(std::string start_rule_name, const rule_map &rules, const rule_map &aux_rules); - + Grammar(std::string start_rule_name, const std::map &rules); bool operator==(const Grammar &other) const; - bool has_definition(const rules::Symbol &symbol) const; - const rules::rule_ptr rule(const rules::Symbol &symbol) const; - const std::string start_rule_name; - const rule_map rules; - const rule_map aux_rules; + const std::map rules; }; std::ostream& operator<<(std::ostream &stream, const Grammar &grammar); diff --git a/spec/compiler/build_tables/first_set_spec.cpp b/spec/compiler/build_tables/first_set_spec.cpp index 6af27d0a..51be7b98 100644 --- a/spec/compiler/build_tables/first_set_spec.cpp +++ b/spec/compiler/build_tables/first_set_spec.cpp @@ -1,4 +1,5 @@ #include "spec_helper.h" +#include "prepared_grammar.h" #include "build_tables/first_set.h" using std::set; @@ -8,7 +9,7 @@ using namespace rules; START_TEST describe("computing FIRST sets", []() { - const Grammar null_grammar("", {{ "something", blank() }}); + const PreparedGrammar null_grammar("", {}, {}); describe("for a sequence AB", [&]() { it("ignores B when A cannot be blank", [&]() { diff --git a/spec/compiler/build_tables/item_set_closure_spec.cpp b/spec/compiler/build_tables/item_set_closure_spec.cpp index 21bc7345..891edfc7 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cpp +++ b/spec/compiler/build_tables/item_set_closure_spec.cpp @@ -1,4 +1,5 @@ #include "spec_helper.h" +#include "prepared_grammar.h" #include "build_tables/item_set_closure.h" #include "build_tables/item_set_transitions.h" @@ -8,7 +9,7 @@ using namespace rules; START_TEST describe("computing closures of item sets", []() { - Grammar grammar("E", { + PreparedGrammar grammar("E", { { "E", choice({ seq({ sym("T"), @@ -24,7 +25,7 @@ describe("computing closures of item sets", []() { { "F", choice({ sym("v"), sym("n") }) } - }); + }, {}); it("computes the item set closure", [&]() { ParseItemSet item_set = item_set_closure(ParseItemSet({ diff --git a/spec/compiler/build_tables/perform_spec.cpp b/spec/compiler/build_tables/perform_spec.cpp index e981ec3a..12539c49 100644 --- a/spec/compiler/build_tables/perform_spec.cpp +++ b/spec/compiler/build_tables/perform_spec.cpp @@ -1,6 +1,7 @@ #include "spec_helper.h" -#include +#include "prepared_grammar.h" #include "build_tables/perform.h" +#include using build_tables::perform; using namespace rules; @@ -19,7 +20,7 @@ static set keys(const map &map) { START_TEST describe("building parse and lex tables", []() { - Grammar grammar("expression", { + PreparedGrammar grammar("expression", { { "expression", choice({ seq({ sym("term"), @@ -34,15 +35,15 @@ describe("building parse and lex tables", []() { sym("expression"), sym("right-paren") }) }) } - }); + }, {}); - Grammar lex_grammar("", { + PreparedGrammar lex_grammar("", { { "plus", str("+") }, { "variable", pattern("\\w+") }, { "number", pattern("\\d+") }, { "left-paren", str("(") }, { "right-paren", str(")") } - }); + }, {}); ParseTable table; LexTable lex_table; diff --git a/spec/compiler/prepare_grammar_spec.cpp b/spec/compiler/prepare_grammar_spec.cpp index 63b06ba1..b9e8f36a 100644 --- a/spec/compiler/prepare_grammar_spec.cpp +++ b/spec/compiler/prepare_grammar_spec.cpp @@ -1,4 +1,5 @@ #include "spec_helper.h" +#include "prepared_grammar.h" #include "prepare_grammar/perform.h" #include "rules/symbol.h" @@ -10,7 +11,7 @@ using prepare_grammar::perform; describe("preparing a grammar", []() { describe("extracting tokens", []() { it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() { - pair result = perform(Grammar("rule1", { + pair result = perform(Grammar("rule1", { { "rule1", seq({ character({ 'a' }), character({ 'b' }), @@ -22,16 +23,16 @@ describe("preparing a grammar", []() { character({ 'b' }) }) }) } })); - AssertThat(result.first, Equals(Grammar("rule1", { + AssertThat(result.first, Equals(PreparedGrammar("rule1", { { "rule1", seq({ make_shared("token1", SymbolTypeAuxiliary), seq({ sym("rule2"), sym("rule3") }), make_shared("token1", SymbolTypeAuxiliary) }) } - }))); + }, {}))); - AssertThat(result.second, Equals(Grammar("", map(), { + AssertThat(result.second, Equals(PreparedGrammar("", {}, { { "token1", rules::seq({ rules::character({ 'a' }), rules::character({ 'b' }) }) }, @@ -46,52 +47,33 @@ describe("preparing a grammar", []() { character({ 'b' }) }) } })); - AssertThat(result.first, Equals(Grammar("rule1", { + AssertThat(result.first, Equals(PreparedGrammar("rule1", { { "rule1", sym("rule2") } - }))); + }, {}))); - AssertThat(result.second, Equals(Grammar("", { + AssertThat(result.second, Equals(PreparedGrammar("", { { "rule2", seq({ character({ 'a' }), character({ 'b' }) }) }, - }))); - }); - - it("moves parts of auxiliary rules into auxiliary lexical rules", []() { - auto result = perform(Grammar("rule1", map(), { - { "rule1", sym("rule2") }, - { "rule2", seq({ - character({ 'a' }), - character({ 'b' }) }) } - })); - - AssertThat(result.first, Equals(Grammar("rule1", map(), { - { "rule1", sym("rule2") } - }))); - - AssertThat(result.second, Equals(Grammar("", map(), { - { "rule2", seq({ - character({ 'a' }), - character({ 'b' }) }) }, - }))); + }, {}))); }); it("does not extract blanks into tokens", [&]() { - pair result = perform(Grammar("rule1", { + pair result = perform(Grammar("rule1", { { "rule1", choice({ sym("rule2"), blank() }) }, })); - AssertThat(result.first, Equals(Grammar("rule1", { + AssertThat(result.first, Equals(PreparedGrammar("rule1", { { "rule1", choice({ sym("rule2"), blank() }) }, - }))); + }, {}))); - AssertThat(result.second, Equals(Grammar("", map()))); + AssertThat(result.second, Equals(PreparedGrammar("", {}, {}))); }); }); describe("expanding repeats", []() { it("replaces repeat rules with pairs of recursive rules", [&]() { - Grammar result = perform(Grammar("rule1", { + PreparedGrammar result = perform(Grammar("rule1", { { "rule1", seq({ sym("x"), repeat(seq({ sym("a"), sym("b") })), @@ -99,7 +81,7 @@ describe("preparing a grammar", []() { }) }, })).first; - AssertThat(result, Equals(Grammar("rule1", { + AssertThat(result, Equals(PreparedGrammar("rule1", { { "rule1", seq({ sym("x"), make_shared("repeat_helper1", SymbolTypeAuxiliary), @@ -117,7 +99,7 @@ describe("preparing a grammar", []() { }); it("does not replace repeat rules that can be moved into the lexical grammar", [&]() { - pair result = perform(Grammar("rule1", { + pair result = perform(Grammar("rule1", { { "rule1", seq({ sym("x"), repeat(seq({ str("a"), str("b") })), @@ -125,15 +107,15 @@ describe("preparing a grammar", []() { }) }, })); - AssertThat(result.first, Equals(Grammar("rule1", { + AssertThat(result.first, Equals(PreparedGrammar("rule1", { { "rule1", seq({ sym("x"), make_shared("token1", SymbolTypeAuxiliary), sym("y") }) }, - }))); + }, {}))); - AssertThat(result.second, Equals(Grammar("", map(), { + AssertThat(result.second, Equals(PreparedGrammar("", {}, { { "token1", repeat(seq({ str("a"), str("b") })) }, }))); }); diff --git a/src/compiler/build_tables/first_set.cpp b/src/compiler/build_tables/first_set.cpp index ed548d19..57a65067 100644 --- a/src/compiler/build_tables/first_set.cpp +++ b/src/compiler/build_tables/first_set.cpp @@ -1,5 +1,6 @@ #include "first_set.h" #include "tree_sitter/compiler.h" +#include "prepared_grammar.h" #include "rule_can_be_blank.h" #include "rules/visitor.h" #include "rules/seq.h" @@ -12,9 +13,9 @@ namespace tree_sitter { namespace build_tables { class FirstSetVisitor : Visitor { set value; - const Grammar grammar; + const PreparedGrammar grammar; - FirstSetVisitor(const Grammar &grammar) : grammar(grammar) {} + FirstSetVisitor(const PreparedGrammar &grammar) : grammar(grammar) {} set set_union(const set &left, const set &right) { set result = left; @@ -42,14 +43,14 @@ namespace tree_sitter { } public: - static set apply(const rule_ptr rule, const Grammar &grammar) { + static set apply(const rule_ptr rule, const PreparedGrammar &grammar) { FirstSetVisitor visitor(grammar); rule->accept(visitor); return visitor.value; } }; - set first_set(const rule_ptr &rule, const Grammar &grammar) { + set first_set(const rule_ptr &rule, const PreparedGrammar &grammar) { return FirstSetVisitor::apply(rule, grammar); } } diff --git a/src/compiler/build_tables/first_set.h b/src/compiler/build_tables/first_set.h index c7af3352..f755e836 100644 --- a/src/compiler/build_tables/first_set.h +++ b/src/compiler/build_tables/first_set.h @@ -5,10 +5,10 @@ #include namespace tree_sitter { - class Grammar; + class PreparedGrammar; namespace build_tables { - std::set first_set(const rules::rule_ptr &rule, const Grammar &grammar); + std::set first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar); } } diff --git a/src/compiler/build_tables/follow_sets.cpp b/src/compiler/build_tables/follow_sets.cpp index 5deb8581..8f52a4f9 100644 --- a/src/compiler/build_tables/follow_sets.cpp +++ b/src/compiler/build_tables/follow_sets.cpp @@ -2,7 +2,7 @@ #include "first_set.h" #include "rule_transitions.h" #include "rule_can_be_blank.h" -#include "tree_sitter/compiler.h" +#include "prepared_grammar.h" namespace tree_sitter { using std::set; @@ -10,10 +10,8 @@ namespace tree_sitter { using rules::Symbol; using rules::rule_ptr; - class Grammar; - namespace build_tables { - map> follow_sets(const ParseItem &item, const Grammar &grammar) { + map> follow_sets(const ParseItem &item, const PreparedGrammar &grammar) { map> result; for (auto &pair : sym_transitions(item.rule)) { diff --git a/src/compiler/build_tables/follow_sets.h b/src/compiler/build_tables/follow_sets.h index b5609e98..cc633438 100644 --- a/src/compiler/build_tables/follow_sets.h +++ b/src/compiler/build_tables/follow_sets.h @@ -7,10 +7,10 @@ #include namespace tree_sitter { - class Grammar; + class PreparedGrammar; namespace build_tables { - std::map> follow_sets(const ParseItem &item, const Grammar &grammar); + std::map> follow_sets(const ParseItem &item, const PreparedGrammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_closure.cpp b/src/compiler/build_tables/item_set_closure.cpp index 3ca97f26..de417ac8 100644 --- a/src/compiler/build_tables/item_set_closure.cpp +++ b/src/compiler/build_tables/item_set_closure.cpp @@ -2,6 +2,7 @@ #include "tree_sitter/compiler.h" #include "./follow_sets.h" #include "item.h" +#include "prepared_grammar.h" #include namespace tree_sitter { @@ -13,7 +14,7 @@ namespace tree_sitter { return items.size() > 0 && (std::find(items.begin(), items.end(), item) != items.end()); } - static void add_item(ParseItemSet &item_set, const ParseItem &item, const Grammar &grammar) { + static void add_item(ParseItemSet &item_set, const ParseItem &item, const PreparedGrammar &grammar) { if (!contains(item_set, item)) { item_set.insert(item); for (auto pair : follow_sets(item, grammar)) { @@ -27,7 +28,7 @@ namespace tree_sitter { } } - const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar) { + const ParseItemSet item_set_closure(const ParseItemSet &item_set, const PreparedGrammar &grammar) { ParseItemSet result; for (ParseItem item : item_set) add_item(result, item, grammar); diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h index aa0fc307..714bdd4a 100644 --- a/src/compiler/build_tables/item_set_closure.h +++ b/src/compiler/build_tables/item_set_closure.h @@ -4,10 +4,10 @@ #include "item.h" namespace tree_sitter { - class Grammar; + class PreparedGrammar; namespace build_tables { - const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar); + const ParseItemSet item_set_closure(const ParseItemSet &item_set, const PreparedGrammar &grammar); } } diff --git a/src/compiler/build_tables/item_set_transitions.cpp b/src/compiler/build_tables/item_set_transitions.cpp index ea09ab09..1d2f9661 100644 --- a/src/compiler/build_tables/item_set_transitions.cpp +++ b/src/compiler/build_tables/item_set_transitions.cpp @@ -19,7 +19,7 @@ namespace tree_sitter { return result; } - map sym_transitions(const ParseItem &item, const Grammar &grammar) { + map sym_transitions(const ParseItem &item, const PreparedGrammar &grammar) { map result; for (auto transition : sym_transitions(item.rule)) { Symbol rule = transition.first; @@ -38,7 +38,7 @@ namespace tree_sitter { return result; } - map char_transitions(const LexItemSet &item_set, const Grammar &grammar) { + map char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) { map result; for (const LexItem &item : item_set) { map item_transitions = char_transitions(item); @@ -49,7 +49,7 @@ namespace tree_sitter { return result; } - map sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) { + map sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) { map result; for (const ParseItem &item : item_set) { map item_transitions = sym_transitions(item, grammar); diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index 95b59eaa..18cd0b5c 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -5,14 +5,15 @@ #include namespace tree_sitter { + class PreparedGrammar; namespace rules { class CharacterSet; class Symbol; } - + namespace build_tables { - std::map char_transitions(const LexItemSet &item_set, const Grammar &grammar); - std::map sym_transitions(const ParseItemSet &item_set, const Grammar &grammar); + std::map char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar); + std::map sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar); } } diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index fbe354dc..5987ecb2 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -1,4 +1,5 @@ #include "./perform.h" +#include "prepared_grammar.h" #include "item.h" #include "item_set_closure.h" #include "item_set_transitions.h" @@ -18,8 +19,8 @@ namespace tree_sitter { static Symbol END_OF_INPUT("end", rules::SymbolTypeAuxiliary); class TableBuilder { - const Grammar grammar; - const Grammar lex_grammar; + const PreparedGrammar grammar; + const PreparedGrammar lex_grammar; map parse_state_indices; map lex_state_indices; ParseTable parse_table; @@ -126,7 +127,7 @@ namespace tree_sitter { public: - TableBuilder(const Grammar &grammar, const Grammar &lex_grammar) : + TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) : grammar(grammar), lex_grammar(lex_grammar) {}; @@ -138,7 +139,7 @@ namespace tree_sitter { } }; - pair perform(const Grammar &grammar, const Grammar &lex_grammar) { + pair perform(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) { return TableBuilder(grammar, lex_grammar).build(); } } diff --git a/src/compiler/build_tables/perform.h b/src/compiler/build_tables/perform.h index 1fcc8094..ca5a132c 100644 --- a/src/compiler/build_tables/perform.h +++ b/src/compiler/build_tables/perform.h @@ -5,10 +5,10 @@ #include "lex_table.h" namespace tree_sitter { - class Grammar; + class PreparedGrammar; namespace build_tables { - std::pair perform(const Grammar &grammar, const Grammar &lex_grammar); + std::pair perform(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar); } } diff --git a/src/compiler/build_tables/rule_can_be_blank.cpp b/src/compiler/build_tables/rule_can_be_blank.cpp index c935d564..80dfdb14 100644 --- a/src/compiler/build_tables/rule_can_be_blank.cpp +++ b/src/compiler/build_tables/rule_can_be_blank.cpp @@ -1,5 +1,6 @@ #include "rule_can_be_blank.h" #include "tree_sitter/compiler.h" +#include "prepared_grammar.h" #include "rules/symbol.h" #include "rules/visitor.h" #include "rules/seq.h" @@ -41,7 +42,7 @@ namespace tree_sitter { return visitor.value; } - bool rule_can_be_blank(const rule_ptr &rule, const Grammar &grammar) { + bool rule_can_be_blank(const rule_ptr &rule, const PreparedGrammar &grammar) { if (rule_can_be_blank(rule)) return true; auto symbol = std::dynamic_pointer_cast(rule); return (symbol.get() && grammar.has_definition(*symbol) && rule_can_be_blank(grammar.rule(*symbol), grammar)); diff --git a/src/compiler/build_tables/rule_can_be_blank.h b/src/compiler/build_tables/rule_can_be_blank.h index 94175b33..b0145b46 100644 --- a/src/compiler/build_tables/rule_can_be_blank.h +++ b/src/compiler/build_tables/rule_can_be_blank.h @@ -4,9 +4,11 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { + class PreparedGrammar; + namespace build_tables { bool rule_can_be_blank(const rules::rule_ptr &rule); - bool rule_can_be_blank(const rules::rule_ptr &rule, const Grammar &grammar); + bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar); } } diff --git a/src/compiler/compile.cpp b/src/compiler/compile.cpp index 1873f369..a935caa3 100644 --- a/src/compiler/compile.cpp +++ b/src/compiler/compile.cpp @@ -2,6 +2,7 @@ #include "prepare_grammar/perform.h" #include "build_tables/perform.h" #include "generate_code/c_code.h" +#include "prepared_grammar.h" namespace tree_sitter { std::string compile(const Grammar &grammar, std::string name) { diff --git a/src/compiler/grammar.cpp b/src/compiler/grammar.cpp index b8d6e71b..62114b58 100644 --- a/src/compiler/grammar.cpp +++ b/src/compiler/grammar.cpp @@ -1,63 +1,31 @@ #include "tree_sitter/compiler.h" -#include "rules/symbol.h" +#include "rules/rule.h" namespace tree_sitter { - using std::vector; using std::string; - using std::pair; - using std::initializer_list; using std::map; using std::ostream; using rules::rule_ptr; - using rules::Symbol; - Grammar::Grammar(std::string start_rule_name, - const std::map &rules) : + Grammar::Grammar(std::string start_rule_name, const map &rules) : start_rule_name(start_rule_name), rules(rules) {} - - Grammar::Grammar(std::string start_rule_name, - const map &rules, - const map &aux_rules) : - start_rule_name(start_rule_name), - rules(rules), - aux_rules(aux_rules) {} - - const rule_ptr Grammar::rule(const Symbol &symbol) const { - auto map = symbol.is_auxiliary() ? aux_rules : rules; - auto iter = map.find(symbol.name); - if (iter != map.end()) - return iter->second; - else - return rule_ptr(); - } bool Grammar::operator==(const Grammar &other) const { if (other.start_rule_name != start_rule_name) return false; if (other.rules.size() != rules.size()) return false; - if (other.aux_rules.size() != aux_rules.size()) return false; for (auto pair : rules) { auto other_pair = other.rules.find(pair.first); if (other_pair == other.rules.end()) return false; if (!other_pair->second->operator==(*pair.second)) return false; } - for (auto pair : aux_rules) { - auto other_pair = other.aux_rules.find(pair.first); - if (other_pair == other.aux_rules.end()) return false; - if (!other_pair->second->operator==(*pair.second)) return false; - } return true; } - bool Grammar::has_definition(const Symbol &symbol) const { - return rule(symbol).get() != nullptr; - } - ostream& operator<<(ostream &stream, const Grammar &grammar) { stream << string("# "); - stream << pair.second; - started = true; - } - stream << string("}"); - - return stream << string(">"); + return stream << string("}>"); } } diff --git a/src/compiler/prepare_grammar/expand_repeats.cpp b/src/compiler/prepare_grammar/expand_repeats.cpp index a680f8aa..92cb7b2e 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cpp +++ b/src/compiler/prepare_grammar/expand_repeats.cpp @@ -1,11 +1,12 @@ #include "expand_repeats.h" -#include +#include "prepared_grammar.h" #include "rules/visitor.h" #include "rules/seq.h" #include "rules/symbol.h" #include "rules/choice.h" #include "rules/blank.h" #include "rules/repeat.h" +#include namespace tree_sitter { using std::string; @@ -51,7 +52,7 @@ namespace tree_sitter { } }; - Grammar expand_repeats(const Grammar &grammar) { + PreparedGrammar expand_repeats(const PreparedGrammar &grammar) { map rules; map aux_rules(grammar.aux_rules); RepeatExpander visitor; @@ -61,7 +62,7 @@ namespace tree_sitter { aux_rules.insert(visitor.aux_rules.begin(), visitor.aux_rules.end()); - return Grammar(grammar.start_rule_name, rules, aux_rules); + return PreparedGrammar(grammar.start_rule_name, rules, aux_rules); } } } \ No newline at end of file diff --git a/src/compiler/prepare_grammar/expand_repeats.h b/src/compiler/prepare_grammar/expand_repeats.h index b9529d6a..dab4018c 100644 --- a/src/compiler/prepare_grammar/expand_repeats.h +++ b/src/compiler/prepare_grammar/expand_repeats.h @@ -4,8 +4,10 @@ #include "tree_sitter/compiler.h" namespace tree_sitter { + class PreparedGrammar; + namespace prepare_grammar { - Grammar expand_repeats(const Grammar &); + PreparedGrammar expand_repeats(const PreparedGrammar &); } } diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index 7687f58d..74364b93 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -1,13 +1,14 @@ #include "extract_tokens.h" #include "search_for_symbols.h" -#include #include "tree_sitter/compiler.h" +#include "prepared_grammar.h" #include "rules/visitor.h" #include "rules/seq.h" #include "rules/choice.h" #include "rules/repeat.h" #include "rules/blank.h" #include "rules/symbol.h" +#include namespace tree_sitter { using std::pair; @@ -67,7 +68,7 @@ namespace tree_sitter { } }; - pair extract_tokens(const Grammar &input_grammar) { + pair extract_tokens(const PreparedGrammar &input_grammar) { TokenExtractor extractor; map rules; map tokens; @@ -97,8 +98,8 @@ namespace tree_sitter { aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end()); return { - Grammar(input_grammar.start_rule_name, rules, aux_rules), - Grammar("", tokens, aux_tokens) + PreparedGrammar(input_grammar.start_rule_name, rules, aux_rules), + PreparedGrammar("", tokens, aux_tokens) }; } } diff --git a/src/compiler/prepare_grammar/extract_tokens.h b/src/compiler/prepare_grammar/extract_tokens.h index e07b2119..e45efda5 100644 --- a/src/compiler/prepare_grammar/extract_tokens.h +++ b/src/compiler/prepare_grammar/extract_tokens.h @@ -4,10 +4,10 @@ #include namespace tree_sitter { - class Grammar; + class PreparedGrammar; namespace prepare_grammar { - std::pair extract_tokens(const Grammar &); + std::pair extract_tokens(const PreparedGrammar &); } } diff --git a/src/compiler/prepare_grammar/perform.cpp b/src/compiler/prepare_grammar/perform.cpp index 5946a884..6b388554 100644 --- a/src/compiler/prepare_grammar/perform.cpp +++ b/src/compiler/prepare_grammar/perform.cpp @@ -1,3 +1,4 @@ +#include "prepared_grammar.h" #include "./perform.h" #include "./extract_tokens.h" #include "./expand_repeats.h" @@ -6,8 +7,9 @@ namespace tree_sitter { using std::pair; namespace prepare_grammar { - pair perform(const Grammar &input_grammar) { - auto grammars = prepare_grammar::extract_tokens(input_grammar); + pair perform(const Grammar &input_grammar) { + PreparedGrammar grammar(input_grammar); + auto grammars = prepare_grammar::extract_tokens(grammar); auto rule_grammar = expand_repeats(grammars.first); auto lex_grammar = grammars.second; return { rule_grammar, lex_grammar }; diff --git a/src/compiler/prepare_grammar/perform.h b/src/compiler/prepare_grammar/perform.h index e065e3b1..fe60f121 100644 --- a/src/compiler/prepare_grammar/perform.h +++ b/src/compiler/prepare_grammar/perform.h @@ -5,9 +5,10 @@ namespace tree_sitter { class Grammar; + class PreparedGrammar; namespace prepare_grammar { - std::pair perform(const Grammar &); + std::pair perform(const Grammar &); } } diff --git a/src/compiler/prepared_grammar.cpp b/src/compiler/prepared_grammar.cpp new file mode 100644 index 00000000..ae766f98 --- /dev/null +++ b/src/compiler/prepared_grammar.cpp @@ -0,0 +1,88 @@ +#include "prepared_grammar.h" +#include "rules/symbol.h" + +namespace tree_sitter { + using std::string; + using std::initializer_list; + using std::pair; + using std::map; + using std::ostream; + using rules::rule_ptr; + using rules::Symbol; + + PreparedGrammar::PreparedGrammar(std::string start_rule_name, + const map &rules, + const map &aux_rules) : + Grammar(start_rule_name, rules), + aux_rules(aux_rules) {} + + PreparedGrammar::PreparedGrammar(std::string start_rule_name, + const initializer_list> &rules, + const initializer_list> &aux_rules) : + Grammar(start_rule_name, rules), + aux_rules(aux_rules) {} + + PreparedGrammar::PreparedGrammar(const Grammar &grammar) : + Grammar(grammar), + aux_rules({}) {} + + const rule_ptr PreparedGrammar::rule(const Symbol &symbol) const { + auto map = symbol.is_auxiliary() ? aux_rules : rules; + auto iter = map.find(symbol.name); + if (iter != map.end()) + return iter->second; + else + return rule_ptr(); + } + + bool PreparedGrammar::operator==(const PreparedGrammar &other) const { + if (other.start_rule_name != start_rule_name) return false; + if (other.rules.size() != rules.size()) return false; + if (other.aux_rules.size() != aux_rules.size()) return false; + + for (auto pair : rules) { + auto other_pair = other.rules.find(pair.first); + if (other_pair == other.rules.end()) return false; + if (!other_pair->second->operator==(*pair.second)) return false; + } + for (auto pair : aux_rules) { + auto other_pair = other.aux_rules.find(pair.first); + if (other_pair == other.aux_rules.end()) return false; + if (!other_pair->second->operator==(*pair.second)) return false; + } + + return true; + } + + bool PreparedGrammar::has_definition(const Symbol &symbol) const { + return rule(symbol).get() != nullptr; + } + + ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) { + stream << string("# "); + stream << pair.second; + started = true; + } + stream << string("}"); + + stream << string(" aux_rules: {"); + started = false; + for (auto pair : grammar.aux_rules) { + if (started) stream << string(", "); + stream << pair.first; + stream << string(" => "); + stream << pair.second; + started = true; + } + stream << string("}"); + + return stream << string(">"); + } +} \ No newline at end of file diff --git a/src/compiler/prepared_grammar.h b/src/compiler/prepared_grammar.h new file mode 100644 index 00000000..e45acce1 --- /dev/null +++ b/src/compiler/prepared_grammar.h @@ -0,0 +1,29 @@ +#ifndef __tree_sitter__prepared_grammar__ +#define __tree_sitter__prepared_grammar__ + +#include "tree_sitter/compiler.h" + +namespace tree_sitter { + class PreparedGrammar : public Grammar { + public: + PreparedGrammar(std::string start_rule_name, + const std::map &rules, + const std::map &aux_rules); + PreparedGrammar(std::string start_rule_name, + const std::initializer_list> &rules, + const std::initializer_list> &aux_rules); + PreparedGrammar(const Grammar &grammar); + + bool operator==(const PreparedGrammar &other) const; + bool has_definition(const rules::Symbol &symbol) const; + const rules::rule_ptr rule(const rules::Symbol &symbol) const; + + const std::map aux_rules; + }; + + std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar); + + std::string compile(const Grammar &grammar, std::string name); +} + +#endif diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index fd764217..2bef22c3 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -16,6 +16,7 @@ 1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; }; 1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */; }; 1236A7C518B287DC00593ABB /* character_range.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7C318B287DC00593ABB /* character_range.cpp */; }; + 1236A7D218B554C800593ABB /* prepared_grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7D018B554C800593ABB /* prepared_grammar.cpp */; }; 1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; }; 12661BF418A1505A00A259FB /* character_set_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12661BF318A1505A00A259FB /* character_set_spec.cpp */; }; 127528B318AACAAA006B682B /* rule_can_be_blank.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 127528B118AACAAA006B682B /* rule_can_be_blank.cpp */; }; @@ -102,6 +103,8 @@ 1236A7CA18B2ABB900593ABB /* equals_pointer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = equals_pointer.h; sourceTree = ""; }; 1236A7CC18B2B4F000593ABB /* Makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = Makefile; sourceTree = ""; }; 1236A7CE18B3CC4800593ABB /* .travis.yml */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .travis.yml; sourceTree = ""; }; + 1236A7D018B554C800593ABB /* prepared_grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepared_grammar.cpp; sourceTree = ""; }; + 1236A7D118B554C800593ABB /* prepared_grammar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prepared_grammar.h; sourceTree = ""; }; 1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = ""; }; 125120A3183083BD00C9B56A /* arithmetic.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; lineEnding = 0; path = arithmetic.hpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12661BF318A1505A00A259FB /* character_set_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = character_set_spec.cpp; path = spec/compiler/rules/character_set_spec.cpp; sourceTree = SOURCE_ROOT; }; @@ -384,6 +387,8 @@ 12EDCF9D18820116005A7A07 /* parse_table.cpp */, 12EDCF9E18820116005A7A07 /* parse_table.h */, 12ED72A5186FC6D90089229B /* prepare_grammar */, + 1236A7D018B554C800593ABB /* prepared_grammar.cpp */, + 1236A7D118B554C800593ABB /* prepared_grammar.h */, 12130602182C344400FCF928 /* rules */, ); path = compiler; @@ -535,6 +540,7 @@ 1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */, 12EDCF9A1881FCD9005A7A07 /* search_for_symbols.cpp in Sources */, 12EDCFB21882039A005A7A07 /* perform.cpp in Sources */, + 1236A7D218B554C800593ABB /* prepared_grammar.cpp in Sources */, 12FD40E718639B910041A84E /* visitor.cpp in Sources */, 12EDCF991881FCD9005A7A07 /* perform.cpp in Sources */, 12EDCFBC188205BF005A7A07 /* rule_transitions_spec.cpp in Sources */,