diff --git a/character_set_spec.cpp b/character_set_spec.cpp index 772db771..206e27e7 100644 --- a/character_set_spec.cpp +++ b/character_set_spec.cpp @@ -1,5 +1,5 @@ #include "spec_helper.h" -#include "rules.h" +#include "character_set.h" using namespace rules; @@ -13,8 +13,8 @@ describe("character sets", []() { CharacterSet set1({ '\0' }); auto set2 = set1.complement(); AssertThat(set2, Equals(CharacterSet({ - { 1, max_char }, - }, true))); + { 1, max_char } + }))); AssertThat(set2.complement(), Equals(set1)); }); @@ -31,56 +31,56 @@ describe("character sets", []() { describe("computing unions", [&]() { it("works for disjoint sets", [&]() { - CharacterSet set({ {'a', 'z'} }, true); - set.add_set(CharacterSet({ {'A', 'Z'} }, true)); - AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'}, }))); + CharacterSet set({ {'a', 'z'} }); + set.add_set(CharacterSet({ {'A', 'Z'} })); + AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'} }))); }); it("works for sets with adjacent ranges", [&]() { - CharacterSet set({ {'a', 'r'} }, true); - set.add_set(CharacterSet({ {'s', 'z'} }, true)); - AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true))); + CharacterSet set({ CharacterRange('a', 'r') }); + set.add_set(CharacterSet({ CharacterRange('s', 'z') })); + AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }))); set = CharacterSet({ 'c' }); auto c = set.complement(); set.add_set(c); - AssertThat(set, Equals(CharacterSet({ {0, max_char} }, true))); + AssertThat(set, Equals(CharacterSet({ {0, max_char} }))); }); it("works when the result becomes a continuous range", []() { - CharacterSet set({ {'a', 'd'}, {'f', 'z'} }, true); - set.add_set(CharacterSet({ {'c', 'g'} }, true)); - AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true))); + CharacterSet set({ {'a', 'd'}, {'f', 'z'} }); + set.add_set(CharacterSet({ {'c', 'g'} })); + AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }))); }); it("does nothing for the set of all characters", [&]() { CharacterSet set({ 'a' }); set.add_set(set.complement()); - AssertThat(set, Equals(CharacterSet({ {'\0', max_char} }, true))); + AssertThat(set, Equals(CharacterSet({ {'\0', max_char} }))); }); }); describe("computing differences", []() { it("works for disjoint sets", []() { - CharacterSet set1({ {'a','z'} }, true); - set1.remove_set(CharacterSet({ {'A','Z'} }, true)); - AssertThat(set1, Equals(CharacterSet({ {'a', 'z'} }, true))); + CharacterSet set1({ {'a','z'} }); + set1.remove_set(CharacterSet({ {'A','Z'} })); + AssertThat(set1, Equals(CharacterSet({ {'a', 'z'} }))); }); it("works when one set spans the other", []() { - CharacterSet set1({ {'a','z'} }, true); - set1.remove_set(CharacterSet({ {'d','s'} }, true)); + CharacterSet set1({ {'a','z'} }); + set1.remove_set(CharacterSet({ {'d','s'} })); AssertThat(set1, Equals(CharacterSet({ {'a', 'c'}, {'t', 'z'} }))); }); it("works for sets that overlap", []() { - CharacterSet set1({ {'a','s'} }, true); - set1.remove_set(CharacterSet({ {'m','z'} }, true)); - AssertThat(set1, Equals(CharacterSet({ {'a', 'l'} }, true))); + CharacterSet set1({ {'a','s'} }); + set1.remove_set(CharacterSet({ {'m','z'} })); + AssertThat(set1, Equals(CharacterSet({ {'a', 'l'} }))); - CharacterSet set2({ {'m','z'} }, true); - set2.remove_set(CharacterSet({ {'a','s'} }, true)); - AssertThat(set2, Equals(CharacterSet({ {'t', 'z'} }, true))); + CharacterSet set2({ {'m','z'} }); + set2.remove_set(CharacterSet({ {'a','s'} })); + AssertThat(set2, Equals(CharacterSet({ {'t', 'z'} }))); }); it("works for sets with multiple ranges", []() { @@ -92,20 +92,20 @@ describe("character sets", []() { describe("computing intersections", []() { it("returns an empty set for disjoint sets", []() { - CharacterSet set1({ {'a','d'} }, true); - CharacterSet set2({ {'e','x'} }, true); + CharacterSet set1({ {'a','d'} }); + CharacterSet set2({ {'e','x'} }); AssertThat(set1.intersect(set2), Equals(CharacterSet())); }); it("works for sets with a single overlapping range", []() { - CharacterSet set1({ {'a','e'} }, true); - CharacterSet set2({ {'c','x'} }, true); - AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'} }, true))); + CharacterSet set1({ {'a','e'} }); + CharacterSet set2({ {'c','x'} }); + AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'} }))); }); it("works for sets with two overlapping ranges", []() { - CharacterSet set1({ {'a','e'}, {'w','z'} }, true); - CharacterSet set2({ {'c','y'} }, true); + CharacterSet set1({ {'a','e'}, {'w','z'} }); + CharacterSet set2({ {'c','y'} }); AssertThat(set1.intersect(set2), Equals(CharacterSet({ {'c', 'e'}, {'w', 'y'} }))); }); }); diff --git a/include/compiler.h b/include/compiler.h new file mode 100644 index 00000000..4b8742d3 --- /dev/null +++ b/include/compiler.h @@ -0,0 +1,74 @@ +#ifndef tree_sitter_compiler_h +#define tree_sitter_compiler_h + +#include +#include +#include +#include + +namespace tree_sitter { + namespace rules { + class Rule; + class Symbol; + + struct CharacterRange { + char min; + char max; + CharacterRange(char); + CharacterRange(char, char); + bool operator==(const CharacterRange &) const; + bool operator<(const CharacterRange &) const; + std::string to_string() const; + }; + } +} + +namespace std { + template<> + struct hash { + size_t operator()(const tree_sitter::rules::CharacterRange &range) const { + return (hash()(range.min) ^ hash()(range.max)); + } + }; +} + +namespace tree_sitter { + namespace rules { + typedef std::shared_ptr rule_ptr; + std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule); + + rule_ptr blank(); + rule_ptr character(const std::set &matches); + rule_ptr character(const std::set &matches, bool); + rule_ptr choice(const std::vector &rules); + rule_ptr pattern(const std::string &value); + rule_ptr repeat(const rule_ptr content); + rule_ptr seq(const std::vector &rules); + rule_ptr str(const std::string &value); + rule_ptr sym(const std::string &name); + rule_ptr aux_sym(const std::string &name); + } +} + +namespace tree_sitter { + class Grammar { + typedef std::map rule_map; + public: + Grammar(std::string start_rule_name, const rule_map &rules); + Grammar(std::string start_rule_name, const rule_map &rules, const rule_map &aux_rules); + + bool operator==(const Grammar &other) const; + bool has_definition(const rules::Symbol &symbol) const; + const rules::rule_ptr rule(const rules::Symbol &symbol) const; + + const std::string start_rule_name; + const rule_map rules; + const rule_map aux_rules; + }; + + std::ostream& operator<<(std::ostream &stream, const Grammar &grammar); + + std::string compile(const Grammar &grammar, std::string name); +} + +#endif diff --git a/spec/compiler/build_tables/first_set_spec.cpp b/spec/compiler/build_tables/first_set_spec.cpp index dfd84aab..6af27d0a 100644 --- a/spec/compiler/build_tables/first_set_spec.cpp +++ b/spec/compiler/build_tables/first_set_spec.cpp @@ -1,7 +1,5 @@ #include "spec_helper.h" #include "build_tables/first_set.h" -#include "grammar.h" -#include "rules.h" using std::set; using namespace build_tables; @@ -10,7 +8,7 @@ using namespace rules; START_TEST describe("computing FIRST sets", []() { - const Grammar null_grammar({{ "something", blank() }}); + const Grammar null_grammar("", {{ "something", blank() }}); describe("for a sequence AB", [&]() { it("ignores B when A cannot be blank", [&]() { @@ -42,7 +40,7 @@ describe("computing FIRST sets", []() { sym("A") }), sym("A") }); - Grammar grammar({ + Grammar grammar("A", { { "A", choice({ seq({ sym("y"), @@ -57,7 +55,7 @@ describe("computing FIRST sets", []() { }); it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() { - Grammar grammar({{ "A", choice({ sym("x"), blank() }) }}); + Grammar grammar("A", {{ "A", choice({ sym("x"), blank() }) }}); auto rule = seq({ sym("A"), diff --git a/spec/compiler/build_tables/item_set_closure_spec.cpp b/spec/compiler/build_tables/item_set_closure_spec.cpp index d242ac47..21bc7345 100644 --- a/spec/compiler/build_tables/item_set_closure_spec.cpp +++ b/spec/compiler/build_tables/item_set_closure_spec.cpp @@ -1,8 +1,6 @@ #include "spec_helper.h" #include "build_tables/item_set_closure.h" #include "build_tables/item_set_transitions.h" -#include "grammar.h" -#include "rules.h" using namespace build_tables; using namespace rules; @@ -10,7 +8,7 @@ using namespace rules; START_TEST describe("computing closures of item sets", []() { - Grammar grammar({ + Grammar grammar("E", { { "E", choice({ seq({ sym("T"), diff --git a/spec/compiler/build_tables/perform_spec.cpp b/spec/compiler/build_tables/perform_spec.cpp index 39695270..e981ec3a 100644 --- a/spec/compiler/build_tables/perform_spec.cpp +++ b/spec/compiler/build_tables/perform_spec.cpp @@ -19,7 +19,7 @@ static set keys(const map &map) { START_TEST describe("building parse and lex tables", []() { - Grammar grammar({ + Grammar grammar("expression", { { "expression", choice({ seq({ sym("term"), @@ -36,7 +36,7 @@ describe("building parse and lex tables", []() { }) }) } }); - Grammar lex_grammar({ + Grammar lex_grammar("", { { "plus", str("+") }, { "variable", pattern("\\w+") }, { "number", pattern("\\d+") }, @@ -72,9 +72,9 @@ describe("building parse and lex tables", []() { }))); AssertThat(lex_state(0).expected_inputs(), Equals(set({ - CharacterSet({ '(' }, true), - CharacterSet({ {'0', '9'} }, true), - CharacterSet({ {'a', 'z'}, {'A', 'Z'} }, true), + CharacterSet({ '(' }), + CharacterSet({ CharacterRange('0', '9') }), + CharacterSet({ {'a', 'z'}, {'A', 'Z'} }), }))); }); }); diff --git a/spec/compiler/build_tables/rule_transitions_spec.cpp b/spec/compiler/build_tables/rule_transitions_spec.cpp index 16c6acfe..6df19872 100644 --- a/spec/compiler/build_tables/rule_transitions_spec.cpp +++ b/spec/compiler/build_tables/rule_transitions_spec.cpp @@ -90,7 +90,7 @@ describe("rule transitions", []() { it("handles characters", [&]() { AssertThat( - char_transitions(character('1')), + char_transitions(character({ '1' })), Equals(rule_map({ { CharacterSet({ '1' }), blank() } }))); @@ -100,7 +100,7 @@ describe("rule transitions", []() { AssertThat( char_transitions(str("bad")), Equals(rule_map({ - { CharacterSet({ 'b' }, true), seq({ character('a'), character('d') }) } + { CharacterSet({ 'b' }), seq({ character({ 'a' }), character({ 'd' }) }) } }))); }); @@ -124,9 +124,9 @@ describe("rule transitions", []() { character({ { 'm', 'z' } }), sym("y") }) })), Equals(rule_map({ - { CharacterSet({ {'a','l'} }, true), sym("x") }, - { CharacterSet({ {'m','s'} }, true), choice({ sym("x"), sym("y") }) }, - { CharacterSet({ {'t','z'} }, true), sym("y") }, + { CharacterSet({ {'a','l'} }), sym("x") }, + { CharacterSet({ {'m','s'} }), choice({ sym("x"), sym("y") }) }, + { CharacterSet({ {'t','z'} }), sym("y") }, }))); }); @@ -138,7 +138,7 @@ describe("rule transitions", []() { { CharacterSet({ 'a' }), seq({ - character('b'), + character({ 'b' }), choice({ rule, blank() @@ -166,17 +166,17 @@ describe("rule transitions", []() { repeat(character({ '"' }, false)), blank(), }), - character('"'), + character({ '"' }), }); AssertThat(char_transitions(rule), Equals(rule_map({ - { CharacterSet({ '"' }, false), seq({ + { CharacterSet({ '"' }).complement(), seq({ choice({ repeat(character({ '"' }, false)), blank(), }), - character('"'), }) }, - { CharacterSet({ '"' }, true), blank() }, + character({ '"' }), }) }, + { CharacterSet({ '"' }), blank() }, }))); }); }); diff --git a/spec/compiler/compile_fixtures.cpp b/spec/compiler/compile_fixtures.cpp index 1851d1d9..1d11b8d1 100644 --- a/spec/compiler/compile_fixtures.cpp +++ b/spec/compiler/compile_fixtures.cpp @@ -1,5 +1,4 @@ #include "spec_helper.h" -#include "compile.h" #include "../fixtures/grammars/test_grammars.h" #include diff --git a/spec/compiler/prepare_grammar_spec.cpp b/spec/compiler/prepare_grammar_spec.cpp index dfec69af..a501c3cd 100644 --- a/spec/compiler/prepare_grammar_spec.cpp +++ b/spec/compiler/prepare_grammar_spec.cpp @@ -9,19 +9,19 @@ using prepare_grammar::perform; describe("preparing a grammar", []() { describe("extracting tokens", []() { it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() { - pair result = perform(Grammar({ + pair result = perform(Grammar("rule1", { { "rule1", seq({ - character('a'), - character('b'), + character({ 'a' }), + character({ 'b' }), seq({ sym("rule2"), sym("rule3") }), seq({ - character('a'), - character('b') }) }) } + character({ 'a' }), + character({ 'b' }) }) }) } })); - AssertThat(result.first, Equals(Grammar({ + AssertThat(result.first, Equals(Grammar("rule1", { { "rule1", seq({ aux_sym("token1"), seq({ @@ -32,27 +32,27 @@ describe("preparing a grammar", []() { AssertThat(result.second, Equals(Grammar("", map(), { { "token1", rules::seq({ - rules::character('a'), - rules::character('b') }) }, + rules::character({ 'a' }), + rules::character({ 'b' }) }) }, }))); }); it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() { - auto result = perform(Grammar({ + auto result = perform(Grammar("rule1", { { "rule1", sym("rule2") }, { "rule2", seq({ - character('a'), - character('b') }) } + character({ 'a' }), + character({ 'b' }) }) } })); - AssertThat(result.first, Equals(Grammar({ + AssertThat(result.first, Equals(Grammar("rule1", { { "rule1", sym("rule2") } }))); AssertThat(result.second, Equals(Grammar("", { { "rule2", seq({ - character('a'), - character('b') }) }, + character({ 'a' }), + character({ 'b' }) }) }, }))); }); @@ -60,8 +60,8 @@ describe("preparing a grammar", []() { auto result = perform(Grammar("rule1", map(), { { "rule1", sym("rule2") }, { "rule2", seq({ - character('a'), - character('b') }) } + character({ 'a' }), + character({ 'b' }) }) } })); AssertThat(result.first, Equals(Grammar("rule1", map(), { @@ -70,13 +70,13 @@ describe("preparing a grammar", []() { AssertThat(result.second, Equals(Grammar("", map(), { { "rule2", seq({ - character('a'), - character('b') }) }, + character({ 'a' }), + character({ 'b' }) }) }, }))); }); it("does not extract blanks into tokens", [&]() { - pair result = perform(Grammar({ + pair result = perform(Grammar("rule1", { { "rule1", choice({ sym("rule2"), blank() }) }, })); @@ -90,7 +90,7 @@ describe("preparing a grammar", []() { describe("expanding repeats", []() { it("replaces repeat rules with pairs of recursive rules", [&]() { - Grammar result = perform(Grammar({ + Grammar result = perform(Grammar("rule1", { { "rule1", seq({ sym("x"), repeat(seq({ sym("a"), sym("b") })), @@ -116,7 +116,7 @@ describe("preparing a grammar", []() { }); it("does not replace repeat rules that can be moved into the lexical grammar", [&]() { - pair result = perform(Grammar({ + pair result = perform(Grammar("rule1", { { "rule1", seq({ sym("x"), repeat(seq({ str("a"), str("b") })), diff --git a/spec/compiler/rules/pattern_spec.cpp b/spec/compiler/rules/pattern_spec.cpp index f64ffeca..432aead0 100644 --- a/spec/compiler/rules/pattern_spec.cpp +++ b/spec/compiler/rules/pattern_spec.cpp @@ -1,5 +1,6 @@ #include "spec_helper.h" -#include "rules.h" +#include "pattern.h" +#include "character_set.h" using namespace rules; @@ -11,9 +12,9 @@ describe("parsing pattern rules", []() { AssertThat( rule.to_rule_tree(), EqualsPointer(seq({ - character('a'), - character('b'), - character('c') + character({ 'a' }), + character({ 'b' }), + character({ 'c' }) }))); }); @@ -23,7 +24,7 @@ describe("parsing pattern rules", []() { rule.to_rule_tree(), EqualsPointer(seq({ character({ {'a', 'z'}, {'A', 'Z'} }), - character('-'), + character({ '-' }), character({ {'0', '9'} }) }))); }); @@ -34,16 +35,16 @@ describe("parsing pattern rules", []() { rule.to_rule_tree(), EqualsPointer(choice({ seq({ - character('a'), - character('b'), + character({ 'a' }), + character({ 'b' }), }), seq({ - character('c'), - character('d') + character({ 'c' }), + character({ 'd' }) }), seq({ - character('e'), - character('f') + character({ 'e' }), + character({ 'f' }) }) }))); }); @@ -73,7 +74,7 @@ describe("parsing pattern rules", []() { Pattern rule("\\\\"); AssertThat( rule.to_rule_tree(), - EqualsPointer(character('\\'))); + EqualsPointer(character({ '\\' }))); }); it("parses character groups in sequences", []() { @@ -81,12 +82,12 @@ describe("parsing pattern rules", []() { AssertThat( rule.to_rule_tree(), EqualsPointer(seq({ - character('"'), + character({ '"' }), repeat(choice({ character({ '"' }, false), - seq({ character('\\'), character('"') }) + seq({ character({ '\\' }), character({ '"' }) }) })), - character('"') + character({ '"' }) }))); }); @@ -96,11 +97,11 @@ describe("parsing pattern rules", []() { rule.to_rule_tree(), EqualsPointer(seq({ choice({ - character('a'), - character('b'), + character({ 'a' }), + character({ 'b' }), }), - character('c'), - character('d') + character({ 'c' }), + character({ 'd' }) }))); }); @@ -109,9 +110,9 @@ describe("parsing pattern rules", []() { AssertThat( rule.to_rule_tree(), EqualsPointer(seq({ - character('a'), - character('('), - character('b') + character({ 'a' }), + character({ '(' }), + character({ 'b' }) }))); }); @@ -122,12 +123,12 @@ describe("parsing pattern rules", []() { EqualsPointer( seq({ repeat(seq({ - character('a'), - character('b') + character({ 'a' }), + character({ 'b' }) })), repeat(seq({ - character('c'), - character('d') + character({ 'c' }), + character({ 'd' }) })), }) )); diff --git a/spec/compiler/rules/rules_spec.cpp b/spec/compiler/rules/rules_spec.cpp index 2d497a55..457c0d45 100644 --- a/spec/compiler/rules/rules_spec.cpp +++ b/spec/compiler/rules/rules_spec.cpp @@ -1,4 +1,5 @@ #include "spec_helper.h" +#include "rule.h" using namespace rules; diff --git a/spec/fixtures/grammars/arithmetic.cpp b/spec/fixtures/grammars/arithmetic.cpp index a0b33c8f..a0d1a3b8 100644 --- a/spec/fixtures/grammars/arithmetic.cpp +++ b/spec/fixtures/grammars/arithmetic.cpp @@ -1,12 +1,12 @@ #include "test_grammars.h" -#include "rules.h" +#include "compiler.h" using namespace tree_sitter; using namespace rules; namespace test_grammars { Grammar arithmetic() { - return Grammar({ + return Grammar("expression", { { "expression", choice({ seq({ sym("term"), diff --git a/spec/fixtures/grammars/json.cpp b/spec/fixtures/grammars/json.cpp index f623f9a0..57bafba4 100644 --- a/spec/fixtures/grammars/json.cpp +++ b/spec/fixtures/grammars/json.cpp @@ -1,5 +1,5 @@ #include "test_grammars.h" -#include "rules.h" +#include "compiler.h" using namespace tree_sitter; using namespace rules; @@ -34,12 +34,12 @@ namespace test_grammars { comma_sep(sym("value")), aux_sym("right_bracket"), }) }, { "string", seq({ - character('"'), + character({ '"' }), repeat(choice({ pattern("[^\"]"), str("\\\""), })), - character('"') }) }, + character({ '"' }) }) }, { "number", pattern("\\d+") } }, { { "comma", str(",") }, diff --git a/spec/fixtures/grammars/test_grammars.h b/spec/fixtures/grammars/test_grammars.h index dec8649a..d4ac381c 100644 --- a/spec/fixtures/grammars/test_grammars.h +++ b/spec/fixtures/grammars/test_grammars.h @@ -1,7 +1,7 @@ #ifndef TreeSitter_test_grammars_h #define TreeSitter_test_grammars_h -#include "grammar.h" +#include "compiler.h" namespace test_grammars { tree_sitter::Grammar arithmetic(); diff --git a/spec/spec_helper.h b/spec/spec_helper.h index 56e12806..3ae4fa3f 100644 --- a/spec/spec_helper.h +++ b/spec/spec_helper.h @@ -4,7 +4,7 @@ #include "bandit/bandit.h" #include "stream_methods.h" #include -#include "grammar.h" +#include "compiler.h" using namespace tree_sitter; using namespace std; diff --git a/src/compiler/build_tables/first_set.cpp b/src/compiler/build_tables/first_set.cpp index a756e5d4..cd4a3b79 100644 --- a/src/compiler/build_tables/first_set.cpp +++ b/src/compiler/build_tables/first_set.cpp @@ -1,7 +1,9 @@ #include "first_set.h" +#include "compiler.h" #include "rule_can_be_blank.h" -#include "grammar.h" - +#include "rules/visitor.h" +#include "rules/seq.h" +#include "rules/choice.h" namespace tree_sitter { using std::set; diff --git a/src/compiler/build_tables/follow_sets.cpp b/src/compiler/build_tables/follow_sets.cpp index a32dba01..9d9feb93 100644 --- a/src/compiler/build_tables/follow_sets.cpp +++ b/src/compiler/build_tables/follow_sets.cpp @@ -2,7 +2,7 @@ #include "first_set.h" #include "rule_transitions.h" #include "rule_can_be_blank.h" -#include "grammar.h" +#include "compiler.h" namespace tree_sitter { using std::set; diff --git a/src/compiler/build_tables/item.cpp b/src/compiler/build_tables/item.cpp index b3b49f1e..016113c8 100644 --- a/src/compiler/build_tables/item.cpp +++ b/src/compiler/build_tables/item.cpp @@ -1,5 +1,5 @@ #include "item.h" -#include "grammar.h" +#include "compiler.h" #include "rule_can_be_blank.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/item.h b/src/compiler/build_tables/item.h index 79d7ab5c..206d0422 100644 --- a/src/compiler/build_tables/item.h +++ b/src/compiler/build_tables/item.h @@ -1,10 +1,9 @@ -#ifndef __TreeSitter__item__ -#define __TreeSitter__item__ +#ifndef __tree_sitter__item__ +#define __tree_sitter__item__ -#include -#include "rule.h" -#include #include "symbol.h" +#include +#include #include namespace tree_sitter { @@ -77,5 +76,4 @@ namespace std { }; } - #endif diff --git a/src/compiler/build_tables/item_set_closure.cpp b/src/compiler/build_tables/item_set_closure.cpp index ca2e1170..d3a55378 100644 --- a/src/compiler/build_tables/item_set_closure.cpp +++ b/src/compiler/build_tables/item_set_closure.cpp @@ -1,6 +1,6 @@ #include "item_set_closure.h" +#include "compiler.h" #include "./follow_sets.h" -#include "grammar.h" #include "item.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index eeba842d..95b59eaa 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -1,12 +1,15 @@ #ifndef __tree_sitter__item_set_transitions__ #define __tree_sitter__item_set_transitions__ -#include "character_set.h" -#include "symbol.h" #include "item.h" #include namespace tree_sitter { + namespace rules { + class CharacterSet; + class Symbol; + } + namespace build_tables { std::map char_transitions(const LexItemSet &item_set, const Grammar &grammar); std::map sym_transitions(const ParseItemSet &item_set, const Grammar &grammar); diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index c483d80c..5e1281c5 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -2,8 +2,7 @@ #include "item.h" #include "item_set_closure.h" #include "item_set_transitions.h" -#include "rules.h" -#include "grammar.h" +#include "compiler.h" #include "stream_methods.h" @@ -11,6 +10,7 @@ namespace tree_sitter { using std::pair; using std::string; using std::map; + using std::make_shared; using rules::Symbol; using rules::CharacterSet; @@ -79,7 +79,7 @@ namespace tree_sitter { LexItemSet item_set; for (auto &symbol : state.expected_inputs()) { if (symbol == END_OF_INPUT) - item_set.insert(LexItem(symbol, rules::character('\0'))); + item_set.insert(LexItem(symbol, make_shared(std::set{ '\0' }))); if (lex_grammar.has_definition(symbol)) item_set.insert(LexItem(symbol, lex_grammar.rule(symbol))); } @@ -134,7 +134,7 @@ namespace tree_sitter { lex_grammar(lex_grammar) {}; pair build() { - auto item = ParseItem(START, rules::sym(grammar.start_rule_name), {}, END_OF_INPUT); + auto item = ParseItem(START, make_shared(grammar.start_rule_name), {}, END_OF_INPUT); ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar); add_parse_state(item_set); return pair(parse_table, lex_table); diff --git a/src/compiler/build_tables/rule_can_be_blank.cpp b/src/compiler/build_tables/rule_can_be_blank.cpp index 2e636cca..f6359165 100644 --- a/src/compiler/build_tables/rule_can_be_blank.cpp +++ b/src/compiler/build_tables/rule_can_be_blank.cpp @@ -1,6 +1,10 @@ #include "rule_can_be_blank.h" -#include "grammar.h" -#include "rules.h" +#include "compiler.h" +#include "rules/symbol.h" +#include "rules/visitor.h" +#include "rules/seq.h" +#include "rules/choice.h" +#include "rules/blank.h" namespace tree_sitter { using namespace rules; diff --git a/src/compiler/build_tables/rule_can_be_blank.h b/src/compiler/build_tables/rule_can_be_blank.h index ab0a2579..faa2d044 100644 --- a/src/compiler/build_tables/rule_can_be_blank.h +++ b/src/compiler/build_tables/rule_can_be_blank.h @@ -1,11 +1,9 @@ #ifndef __tree_sitter__rule_can_be_blank__ #define __tree_sitter__rule_can_be_blank__ -#include "rule.h" +#include "compiler.h" namespace tree_sitter { - class Grammar; - namespace build_tables { bool rule_can_be_blank(const rules::rule_ptr &rule); bool rule_can_be_blank(const rules::rule_ptr &rule, const Grammar &grammar); diff --git a/src/compiler/build_tables/rule_transitions.cpp b/src/compiler/build_tables/rule_transitions.cpp index bc71fd06..56b88423 100644 --- a/src/compiler/build_tables/rule_transitions.cpp +++ b/src/compiler/build_tables/rule_transitions.cpp @@ -1,10 +1,19 @@ -#include "rules.h" #include "rule_transitions.h" #include "rule_can_be_blank.h" #include "merge_transitions.h" +#include "rules/blank.h" +#include "rules/choice.h" +#include "rules/seq.h" +#include "rules/string.h" +#include "rules/repeat.h" +#include "rules/pattern.h" +#include "rules/visitor.h" +#include "rules/character_set.h" namespace tree_sitter { using std::map; + using std::set; + using std::make_shared; using namespace rules; namespace build_tables { @@ -18,7 +27,7 @@ namespace tree_sitter { template<> map merge_transitions(const map &left, const map &right) { auto transitions = merge_char_transitions(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr { - return choice({ left, right }); + return make_shared(left, right); }); return *static_cast *>(&transitions); } @@ -26,7 +35,7 @@ namespace tree_sitter { template<> map merge_transitions(const map &left, const map &right) { auto transitions = merge_sym_transitions(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr { - return choice({ left, right }); + return make_shared(left, right); }); return *static_cast *>(&transitions); } @@ -54,7 +63,7 @@ namespace tree_sitter { auto atom = dynamic_cast(rule); if (atom) { value = map(); - value.insert({ *atom, blank() }); + value.insert({ *atom, make_shared() }); } } @@ -72,11 +81,8 @@ namespace tree_sitter { } void visit(const Seq *rule) { - value = map_transitions(transitions(rule->left), [&](const rule_ptr left_rule) -> rule_ptr { - if (is_blank(left_rule)) - return rule->right; - else - return seq({ left_rule, rule->right }); + value = map_transitions(transitions(rule->left), [&](const rule_ptr left_rule) { + return Seq::Build({ left_rule, rule->right }); }); if (rule_can_be_blank(rule->left)) { value = merge_transitions(value, transitions(rule->right)); @@ -84,15 +90,17 @@ namespace tree_sitter { } void visit(const Repeat *rule) { - value = map_transitions(transitions(rule->content), [&](const rule_ptr &value) -> rule_ptr { - return seq({ value, choice({ rule->copy(), blank() }) }); + value = map_transitions(transitions(rule->content), [&](const rule_ptr &value) { + return Seq::Build({ + value, + make_shared(rule->copy(), make_shared()) }); }); } void visit(const String *rule) { - rule_ptr result = character(rule->value[0]); - for (int i = 1; i < rule->value.length(); i++) - result = seq({ result, character(rule->value[i]) }); + rule_ptr result = make_shared(); + for (char val : rule->value) + result = Seq::Build({ result, make_shared(set({ val })) }); value = transitions(result); } diff --git a/src/compiler/compile.cpp b/src/compiler/compile.cpp index a344368a..6169200f 100644 --- a/src/compiler/compile.cpp +++ b/src/compiler/compile.cpp @@ -1,6 +1,4 @@ -#include "compile.h" - -#include "grammar.h" +#include "compiler.h" #include "prepare_grammar/perform.h" #include "build_tables/perform.h" #include "generate_code/c_code.h" diff --git a/src/compiler/compile.h b/src/compiler/compile.h deleted file mode 100644 index d02a95a6..00000000 --- a/src/compiler/compile.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __tree_sitter__compile__ -#define __tree_sitter__compile__ - -#include - -namespace tree_sitter { - class Grammar; - - std::string compile(const Grammar &grammar, std::string name); -} - -#endif diff --git a/src/compiler/generate_code/c_code.h b/src/compiler/generate_code/c_code.h index 8d093d65..1c76345a 100644 --- a/src/compiler/generate_code/c_code.h +++ b/src/compiler/generate_code/c_code.h @@ -1,7 +1,6 @@ #ifndef __tree_sitter__code_generator__ #define __tree_sitter__code_generator__ -#include "grammar.h" #include "parse_table.h" #include "lex_table.h" diff --git a/src/compiler/grammar.cpp b/src/compiler/grammar.cpp index d986d9d6..7dc36385 100644 --- a/src/compiler/grammar.cpp +++ b/src/compiler/grammar.cpp @@ -1,40 +1,35 @@ -#include "grammar.h" +#include "compiler.h" +#include "symbol.h" namespace tree_sitter { using std::vector; using std::string; using std::pair; using std::initializer_list; + using std::map; using std::ostream; + using rules::rule_ptr; + using rules::Symbol; - Grammar::Grammar(const initializer_list> &rules) : - rules(rules), - start_rule_name(rules.begin()->first) {} - - Grammar::Grammar(std::string start_rule_name, const rule_map &rules) : + Grammar::Grammar(std::string start_rule_name, + const std::map &rules) : rules(rules), start_rule_name(start_rule_name) {} - Grammar::Grammar(std::string start_rule_name, const rule_map &rules, const rule_map &aux_rules) : + Grammar::Grammar(std::string start_rule_name, + const map &rules, + const map &aux_rules) : rules(rules), aux_rules(aux_rules), start_rule_name(start_rule_name) {} - const rules::rule_ptr Grammar::rule(const rules::Symbol &symbol) const { + const rule_ptr Grammar::rule(const Symbol &symbol) const { auto map = symbol.is_auxiliary ? aux_rules : rules; auto iter = map.find(symbol.name); if (iter != map.end()) return iter->second; else - return rules::rule_ptr(); - } - - vector Grammar::rule_names() const { - vector result; - for (auto pair : rules) { - result.push_back(pair.first); - } - return result; + return rule_ptr(); } bool Grammar::operator==(const Grammar &other) const { @@ -56,7 +51,7 @@ namespace tree_sitter { return true; } - bool Grammar::has_definition(const rules::Symbol &symbol) const { + bool Grammar::has_definition(const Symbol &symbol) const { return rule(symbol).get() != nullptr; } diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h deleted file mode 100644 index f87da155..00000000 --- a/src/compiler/grammar.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef __TreeSitter__grammar__ -#define __TreeSitter__grammar__ - -#include -#include -#include "rules.h" - -namespace tree_sitter { - class Grammar { - typedef std::initializer_list> rule_map_init_list; - typedef std::map rule_map; - - public: - Grammar(const rule_map_init_list &rules); - Grammar(std::string start_rule_name, const rule_map &rules); - Grammar(std::string start_rule_name, const rule_map &rules, const rule_map &aux_rules); - - const std::string start_rule_name; - std::vector rule_names() const; - bool operator==(const Grammar &other) const; - bool has_definition(const rules::Symbol &symbol) const; - const rules::rule_ptr rule(const rules::Symbol &symbol) const; - - rule_map rules; - rule_map aux_rules; - }; - - std::ostream& operator<<(std::ostream &stream, const Grammar &grammar); -} - -#endif diff --git a/src/compiler/prepare_grammar/expand_repeats.cpp b/src/compiler/prepare_grammar/expand_repeats.cpp index 5f297050..4b7f52a0 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cpp +++ b/src/compiler/prepare_grammar/expand_repeats.cpp @@ -1,10 +1,17 @@ #include "expand_repeats.h" #include +#include "rules/visitor.h" +#include "rules/seq.h" +#include "rules/symbol.h" +#include "rules/choice.h" +#include "rules/blank.h" +#include "rules/repeat.h" namespace tree_sitter { using std::string; using std::to_string; using std::map; + using std::make_shared; using namespace rules; namespace prepare_grammar { @@ -19,28 +26,24 @@ namespace tree_sitter { } rule_ptr make_repeat_helper(string name, const rule_ptr &rule) { - return choice({ - seq({ - rule, - aux_sym(name), - }), - blank(), - }); + return Choice::Build({ + Seq::Build({ rule, make_shared(name, true) }), + make_shared() }); } void visit(const Repeat *rule) { rule_ptr inner_rule = apply(rule->content); string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1); aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) }); - value = aux_sym(helper_rule_name); + value = make_shared(helper_rule_name, true); } void visit(const Seq *rule) { - value = seq({ apply(rule->left), apply(rule->right) }); + value = Seq::Build({ apply(rule->left), apply(rule->right) }); } void visit(const Choice *rule) { - value = choice({ apply(rule->left), apply(rule->right) }); + value = Choice::Build({ apply(rule->left), apply(rule->right) }); } void default_visit(const Rule *rule) { diff --git a/src/compiler/prepare_grammar/expand_repeats.h b/src/compiler/prepare_grammar/expand_repeats.h index 69eef554..7d075d5e 100644 --- a/src/compiler/prepare_grammar/expand_repeats.h +++ b/src/compiler/prepare_grammar/expand_repeats.h @@ -1,7 +1,7 @@ #ifndef __tree_sitter__expand_repeats__ #define __tree_sitter__expand_repeats__ -#include "grammar.h" +#include "compiler.h" namespace tree_sitter { namespace prepare_grammar { diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index 0b97b750..654519a7 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -1,12 +1,20 @@ #include "extract_tokens.h" #include "search_for_symbols.h" #include +#include "compiler.h" +#include "rules/visitor.h" +#include "rules/seq.h" +#include "rules/choice.h" +#include "rules/repeat.h" +#include "rules/blank.h" +#include "rules/symbol.h" namespace tree_sitter { using std::pair; using std::string; using std::to_string; using std::map; + using std::make_shared; using namespace rules; namespace prepare_grammar { @@ -29,7 +37,7 @@ namespace tree_sitter { return value; } else { string token_name = add_token(rule); - return aux_sym(token_name); + return make_shared(token_name, true); } } @@ -47,15 +55,15 @@ namespace tree_sitter { } void visit(const Choice *rule) { - value = choice({ apply(rule->left), apply(rule->right) }); + value = Choice::Build({ apply(rule->left), apply(rule->right) }); } void visit(const Seq *rule) { - value = seq({ apply(rule->left), apply(rule->right) }); + value = Seq::Build({ apply(rule->left), apply(rule->right) }); } void visit(const Repeat *rule) { - value = repeat(apply(rule->content)); + value = make_shared(apply(rule->content)); } }; diff --git a/src/compiler/prepare_grammar/extract_tokens.h b/src/compiler/prepare_grammar/extract_tokens.h index 63dd21f7..e07b2119 100644 --- a/src/compiler/prepare_grammar/extract_tokens.h +++ b/src/compiler/prepare_grammar/extract_tokens.h @@ -1,9 +1,11 @@ #ifndef __tree_sitter__extract_tokens__ #define __tree_sitter__extract_tokens__ -#include "grammar.h" +#include namespace tree_sitter { + class Grammar; + namespace prepare_grammar { std::pair extract_tokens(const Grammar &); } diff --git a/src/compiler/prepare_grammar/perform.h b/src/compiler/prepare_grammar/perform.h index c46c7d42..e065e3b1 100644 --- a/src/compiler/prepare_grammar/perform.h +++ b/src/compiler/prepare_grammar/perform.h @@ -1,9 +1,11 @@ #ifndef __tree_sitter__prepare_grammar__ #define __tree_sitter__prepare_grammar__ -#include "grammar.h" +#include namespace tree_sitter { + class Grammar; + namespace prepare_grammar { std::pair perform(const Grammar &); } diff --git a/src/compiler/prepare_grammar/search_for_symbols.cpp b/src/compiler/prepare_grammar/search_for_symbols.cpp index eef4c9ff..302d4da6 100644 --- a/src/compiler/prepare_grammar/search_for_symbols.cpp +++ b/src/compiler/prepare_grammar/search_for_symbols.cpp @@ -1,10 +1,14 @@ #include "search_for_symbols.h" +#include "rules/visitor.h" +#include "choice.h" +#include "seq.h" +#include "repeat.h" namespace tree_sitter { using namespace rules; namespace prepare_grammar { - class SymbolSearcher : Visitor { + class SymbolSearcher : rules::Visitor { public: bool value; diff --git a/src/compiler/prepare_grammar/search_for_symbols.h b/src/compiler/prepare_grammar/search_for_symbols.h index 7d532e17..14d53a3a 100644 --- a/src/compiler/prepare_grammar/search_for_symbols.h +++ b/src/compiler/prepare_grammar/search_for_symbols.h @@ -1,7 +1,7 @@ #ifndef __tree_sitter__search_for_symbols__ #define __tree_sitter__search_for_symbols__ -#include "rules.h" +#include "rule.h" namespace tree_sitter { namespace prepare_grammar { diff --git a/src/compiler/rules/blank.cpp b/src/compiler/rules/blank.cpp index dbd68ffe..9a4a9162 100644 --- a/src/compiler/rules/blank.cpp +++ b/src/compiler/rules/blank.cpp @@ -1,4 +1,5 @@ -#include "rules.h" +#include "blank.h" +#include "visitor.h" namespace tree_sitter { namespace rules { diff --git a/src/compiler/rules/character_range.cpp b/src/compiler/rules/character_range.cpp new file mode 100644 index 00000000..d5466746 --- /dev/null +++ b/src/compiler/rules/character_range.cpp @@ -0,0 +1,44 @@ +#include "compiler.h" + +namespace tree_sitter { + using std::string; + + namespace rules { + static const char MAX_CHAR = '\xff'; + + CharacterRange::CharacterRange(char value) : min(value), max(value) {} + CharacterRange::CharacterRange(char min, char max) : min(min), max(max) {} + + bool CharacterRange::operator==(const CharacterRange &other) const { + return min == other.min && max == other.max; + } + + bool CharacterRange::operator<(const CharacterRange &other) const { + if (min < other.min) return true; + if (min > other.min) return false; + if (max < other.max) return true; + return false; + } + + string escape_character(char input) { + switch (input) { + case '\0': + return ""; + case MAX_CHAR: + return ""; + default: + return string() + input; + } + } + + string CharacterRange::to_string() const { + if (min == 0 && max == MAX_CHAR) + return ""; + if (min == max) + return escape_character(min); + else + return string() + escape_character(min) + "-" + escape_character(max); + } + } +} + diff --git a/src/compiler/rules/character_set.cpp b/src/compiler/rules/character_set.cpp index b12f367a..468349ee 100644 --- a/src/compiler/rules/character_set.cpp +++ b/src/compiler/rules/character_set.cpp @@ -1,42 +1,16 @@ -#include "rules.h" +#include "character_set.h" +#include "visitor.h" using std::string; using std::hash; using std::set; using std::pair; +using std::initializer_list; namespace tree_sitter { namespace rules { - const char MAX_CHAR = '\xff'; - - CharacterRange::CharacterRange(char value) : min(value), max(value) {} - CharacterRange::CharacterRange(char min, char max) : - min(min), - max(max) - {} - - bool CharacterRange::operator==(const CharacterRange &other) const { - return min == other.min && max == other.max; - } + static const char MAX_CHAR = '\xff'; - bool CharacterRange::operator<(const CharacterRange &other) const { - if (min < other.min) return true; - if (min > other.min) return false; - if (max < other.max) return true; - return false; - } - - string escape_character(char input) { - switch (input) { - case '\0': - return ""; - case MAX_CHAR: - return ""; - default: - return string() + input; - } - } - int max_int(const CharacterRange &range) { return range.max == MAX_CHAR ? 255 : (int)range.max; } @@ -45,19 +19,9 @@ namespace tree_sitter { return (int)range.min; } - string CharacterRange::to_string() const { - if (min == 0 && max == MAX_CHAR) - return ""; - if (min == max) - return escape_character(min); - else - return string() + escape_character(min) + "-" + escape_character(max); - } - CharacterSet::CharacterSet() : ranges({}) {} CharacterSet::CharacterSet(const set &ranges) : ranges(ranges) {} - CharacterSet::CharacterSet(const set &ranges, bool sign) : - ranges(sign ? ranges : CharacterSet(ranges).complement().ranges) {} + CharacterSet::CharacterSet(const initializer_list &ranges) : ranges(ranges) {} bool CharacterSet::operator==(const Rule &rule) const { const CharacterSet *other = dynamic_cast(&rule); @@ -89,7 +53,7 @@ namespace tree_sitter { } CharacterSet CharacterSet::complement() const { - CharacterSet result({ {0, MAX_CHAR} }, true); + CharacterSet result({ {0, MAX_CHAR} }); result.remove_set(*this); return result; } diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index 021c41b0..6252ec37 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -1,31 +1,10 @@ #ifndef __tree_sitter__character_set__ #define __tree_sitter__character_set__ +#include "compiler.h" #include "rule.h" #include - -namespace tree_sitter { - namespace rules { - struct CharacterRange { - char min; - char max; - CharacterRange(char); - CharacterRange(char, char); - bool operator==(const CharacterRange &) const; - bool operator<(const CharacterRange &) const; - std::string to_string() const; - }; - } -} - -namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::rules::CharacterRange &range) const { - return (hash()(range.min) ^ hash()(range.max)); - } - }; -} +#include namespace tree_sitter { namespace rules { @@ -33,15 +12,7 @@ namespace tree_sitter { public: CharacterSet(); CharacterSet(const std::set &ranges); - CharacterSet(const std::set &ranges, bool); - - CharacterSet complement() const; - CharacterSet intersect(const CharacterSet &) const; - std::pair most_compact_representation() const; - bool is_empty() const; - - void add_set(const CharacterSet &other); - CharacterSet remove_set(const CharacterSet &other); + CharacterSet(const std::initializer_list &ranges); bool operator==(const Rule& other) const; bool operator<(const CharacterSet &) const; @@ -50,6 +21,13 @@ namespace tree_sitter { std::string to_string() const; void accept(Visitor &visitor) const; + void add_set(const CharacterSet &other); + CharacterSet remove_set(const CharacterSet &other); + CharacterSet complement() const; + CharacterSet intersect(const CharacterSet &) const; + std::pair most_compact_representation() const; + bool is_empty() const; + std::set ranges; }; } diff --git a/src/compiler/rules/choice.cpp b/src/compiler/rules/choice.cpp index f654fed7..e7a372ff 100644 --- a/src/compiler/rules/choice.cpp +++ b/src/compiler/rules/choice.cpp @@ -1,11 +1,21 @@ -#include "rules.h" - -using std::string; +#include "choice.h" +#include "visitor.h" namespace tree_sitter { + using std::string; + using std::make_shared; + using std::vector; + namespace rules { Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {}; + rule_ptr Choice::Build(const vector &rules) { + rule_ptr result; + for (auto rule : rules) + result = result.get() ? make_shared(result, rule) : rule; + return result; + } + bool Choice::operator==(const Rule &rule) const { const Choice *other = dynamic_cast(&rule); return other && (*other->left == *left) && (*other->right == *right); diff --git a/src/compiler/rules/choice.h b/src/compiler/rules/choice.h index bca1f6d9..8f0cf235 100644 --- a/src/compiler/rules/choice.h +++ b/src/compiler/rules/choice.h @@ -2,12 +2,14 @@ #define __tree_sitter__choice__ #include "rule.h" +#include namespace tree_sitter { namespace rules { class Choice : public Rule { public: Choice(rule_ptr left, rule_ptr right); + static rule_ptr Build(const std::vector &rules); bool operator==(const Rule& other) const; size_t hash_code() const; diff --git a/src/compiler/rules/pattern.cpp b/src/compiler/rules/pattern.cpp index 88666799..fb5a8d2f 100644 --- a/src/compiler/rules/pattern.cpp +++ b/src/compiler/rules/pattern.cpp @@ -1,11 +1,18 @@ -#include "rules.h" - -using std::string; -using std::hash; -using std::set; +#include "pattern.h" +#include "visitor.h" +#include "choice.h" +#include "seq.h" +#include "repeat.h" +#include "character_set.h" +#include namespace tree_sitter { namespace rules { + using std::string; + using std::hash; + using std::make_shared; + using std::set; + class PatternParser { public: PatternParser(const string &input) : @@ -17,7 +24,7 @@ namespace tree_sitter { auto result = term(); while (has_more_input() && peek() == '|') { next(); - result = choice({ result, term() }); + result = make_shared(result, term()); } return result; } @@ -26,7 +33,7 @@ namespace tree_sitter { rule_ptr term() { rule_ptr result = factor(); while (has_more_input() && (peek() != '|') && (peek() != ')')) - result = seq({ result, factor() }); + result = Seq::Build({ result, factor() }); return result; } @@ -34,7 +41,7 @@ namespace tree_sitter { rule_ptr result = atom(); if (has_more_input() && (peek() == '+')) { next(); - result = repeat(result); + result = make_shared(result); } return result; } @@ -92,7 +99,7 @@ namespace tree_sitter { next(); if (peek() == '-') { next(); - value = CharacterSet({ {first_char, peek()} }, true); + value = CharacterSet({ CharacterRange(first_char, peek()) }); next(); } else { value = CharacterSet({ first_char }); @@ -108,9 +115,9 @@ namespace tree_sitter { case ')': return CharacterSet({ value }); case 'w': - return CharacterSet({{'a', 'z'}, {'A', 'Z'}}, true); + return CharacterSet({{'a', 'z'}, {'A', 'Z'}}); case 'd': - return CharacterSet({{'0', '9'}}, true); + return CharacterSet({CharacterRange('0', '9')}); default: error("unrecognized escape sequence"); return CharacterSet(); diff --git a/src/compiler/rules/repeat.cpp b/src/compiler/rules/repeat.cpp index d3c929ef..76c1c3cc 100644 --- a/src/compiler/rules/repeat.cpp +++ b/src/compiler/rules/repeat.cpp @@ -1,4 +1,5 @@ -#include "rules.h" +#include "repeat.h" +#include "visitor.h" using std::string; diff --git a/src/compiler/rules/rules.cpp b/src/compiler/rules/rules.cpp index 8e6a3fe0..74a6e798 100644 --- a/src/compiler/rules/rules.cpp +++ b/src/compiler/rules/rules.cpp @@ -1,34 +1,38 @@ -#include "rules.h" - -using std::make_shared; -using std::string; -using std::set; -using std::vector; +#include "compiler.h" +#include "rule.h" +#include "blank.h" +#include "symbol.h" +#include "choice.h" +#include "seq.h" +#include "string.h" +#include "pattern.h" +#include "character_set.h" +#include "repeat.h" namespace tree_sitter { + using std::make_shared; + using std::string; + using std::set; + using std::vector; + namespace rules { rule_ptr blank() { return make_shared(); } - rule_ptr character(char value) { - set ranges = { value }; - return make_shared(ranges); - } - rule_ptr character(const set &ranges) { return make_shared(ranges); } rule_ptr character(const set &ranges, bool sign) { - return make_shared(ranges, sign); + if (sign) + return character(ranges); + else + return CharacterSet(ranges).complement().copy(); } rule_ptr choice(const vector &rules) { - rule_ptr result; - for (auto rule : rules) - result = result.get() ? make_shared(result, rule) : rule; - return result; + return Choice::Build(rules); } rule_ptr pattern(const string &value) { @@ -40,12 +44,7 @@ namespace tree_sitter { } rule_ptr seq(const vector &rules) { - rule_ptr result = blank(); - for (auto rule : rules) - result = (typeid(*result) != typeid(Blank)) ? - make_shared(result, rule) : - rule; - return result; + return Seq::Build(rules); } rule_ptr str(const string &value) { diff --git a/src/compiler/rules/rules.h b/src/compiler/rules/rules.h deleted file mode 100644 index 02fee8b1..00000000 --- a/src/compiler/rules/rules.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef __TreeSitter__rules__ -#define __TreeSitter__rules__ - -#include "rule.h" -#include "blank.h" -#include "symbol.h" -#include "choice.h" -#include "seq.h" -#include "string.h" -#include "pattern.h" -#include "character_set.h" -#include "repeat.h" -#include "visitor.h" -#include - -namespace tree_sitter { - namespace rules { - rule_ptr blank(); - rule_ptr character(char value); - rule_ptr character(const std::set &matches); - rule_ptr character(const std::set &matches, bool); - - rule_ptr choice(const std::vector &rules); - rule_ptr pattern(const std::string &value); - rule_ptr repeat(const rule_ptr content); - rule_ptr seq(const std::vector &rules); - rule_ptr str(const std::string &value); - rule_ptr sym(const std::string &name); - rule_ptr aux_sym(const std::string &name); - } -} - -#endif diff --git a/src/compiler/rules/seq.cpp b/src/compiler/rules/seq.cpp index aa4583c8..29e2878c 100644 --- a/src/compiler/rules/seq.cpp +++ b/src/compiler/rules/seq.cpp @@ -1,11 +1,22 @@ -#include "rules.h" - -using std::string; +#include "seq.h" +#include "visitor.h" +#include "blank.h" namespace tree_sitter { + using std::make_shared; + using std::string; + using std::vector; + namespace rules { Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}; + rule_ptr Seq::Build(const std::vector &rules) { + rule_ptr result = make_shared(); + for (auto &rule : rules) + result = (typeid(*result) != typeid(Blank)) ? make_shared(result, rule) : rule; + return result; + } + bool Seq::operator==(const Rule &rule) const { const Seq *other = dynamic_cast(&rule); return other && (*other->left == *left) && (*other->right == *right); diff --git a/src/compiler/rules/seq.h b/src/compiler/rules/seq.h index db77dda1..2c9fbf02 100644 --- a/src/compiler/rules/seq.h +++ b/src/compiler/rules/seq.h @@ -2,12 +2,14 @@ #define __tree_sitter__seq__ #include "rule.h" +#include namespace tree_sitter { namespace rules { class Seq : public Rule { public: Seq(rule_ptr left, rule_ptr right); + static rule_ptr Build(const std::vector &rules); bool operator==(const Rule& other) const; size_t hash_code() const; diff --git a/src/compiler/rules/string.cpp b/src/compiler/rules/string.cpp index df501fba..c4e5ead5 100644 --- a/src/compiler/rules/string.cpp +++ b/src/compiler/rules/string.cpp @@ -1,4 +1,5 @@ -#include "rules.h" +#include "string.h" +#include "visitor.h" using std::string; using std::hash; diff --git a/src/compiler/rules/symbol.cpp b/src/compiler/rules/symbol.cpp index 7521ead5..eac7f799 100644 --- a/src/compiler/rules/symbol.cpp +++ b/src/compiler/rules/symbol.cpp @@ -1,4 +1,5 @@ -#include "rules.h" +#include "symbol.h" +#include "visitor.h" using std::string; using std::hash; diff --git a/src/compiler/rules/visitor.cpp b/src/compiler/rules/visitor.cpp index 49b48d9d..e7ee297f 100644 --- a/src/compiler/rules/visitor.cpp +++ b/src/compiler/rules/visitor.cpp @@ -1,4 +1,13 @@ #include "visitor.h" +#include "rule.h" +#include "blank.h" +#include "symbol.h" +#include "choice.h" +#include "seq.h" +#include "string.h" +#include "pattern.h" +#include "character_set.h" +#include "repeat.h" namespace tree_sitter { namespace rules { diff --git a/src/compiler/rules/visitor.h b/src/compiler/rules/visitor.h index de23e56c..b5aa5e21 100644 --- a/src/compiler/rules/visitor.h +++ b/src/compiler/rules/visitor.h @@ -1,10 +1,18 @@ #ifndef __tree_sitter__rule_visitor__ #define __tree_sitter__rule_visitor__ -#include "rules.h" - namespace tree_sitter { namespace rules { + class Rule; + class Blank; + class Symbol; + class CharacterSet; + class Choice; + class Repeat; + class Seq; + class String; + class Pattern; + class Visitor { public: virtual void default_visit(const Rule *rule); diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index a1c95752..4d7dcd37 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -15,6 +15,7 @@ 12130617182C3D2900FCF928 /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130615182C3D2900FCF928 /* string.cpp */; }; 1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; }; 1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */; }; + 1236A7C518B287DC00593ABB /* character_range.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7C318B287DC00593ABB /* character_range.cpp */; }; 1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; }; 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; 12661BF418A1505A00A259FB /* character_set_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12661BF318A1505A00A259FB /* character_set_spec.cpp */; }; @@ -83,8 +84,7 @@ /* Begin PBXFileReference section */ 12130603182C348F00FCF928 /* character_set.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = character_set.cpp; sourceTree = ""; }; - 12130604182C348F00FCF928 /* character_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = character_set.h; sourceTree = ""; }; - 12130607182C374800FCF928 /* rule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rule.h; sourceTree = ""; }; + 12130604182C348F00FCF928 /* character_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = character_set.h; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; 12130609182C389100FCF928 /* symbol.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = symbol.cpp; sourceTree = ""; }; 1213060A182C389100FCF928 /* symbol.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = symbol.h; sourceTree = ""; }; 1213060C182C398300FCF928 /* choice.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = choice.cpp; sourceTree = ""; }; @@ -99,16 +99,18 @@ 121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/compiler/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; }; 121D8B3018795CC0003CF44B /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = ""; }; 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepare_grammar_spec.cpp; sourceTree = ""; }; + 1236A7C318B287DC00593ABB /* character_range.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = character_range.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; + 1236A7C918B2A79F00593ABB /* rule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rule.h; sourceTree = ""; }; 1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = ""; }; - 125120A218307FFD00C9B56A /* test_grammars.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = test_grammars.h; path = spec/fixtures/grammars/test_grammars.h; sourceTree = SOURCE_ROOT; }; - 125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; }; + 125120A218307FFD00C9B56A /* test_grammars.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = test_grammars.h; path = spec/fixtures/grammars/test_grammars.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; + 125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12661BF318A1505A00A259FB /* character_set_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = character_set_spec.cpp; sourceTree = SOURCE_ROOT; }; 127528AF18A6F9C6006B682B /* merge_transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = merge_transitions.h; sourceTree = ""; }; - 127528B118AACAAA006B682B /* rule_can_be_blank.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule_can_be_blank.cpp; sourceTree = ""; }; - 127528B218AACAAA006B682B /* rule_can_be_blank.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rule_can_be_blank.h; sourceTree = ""; }; + 127528B118AACAAA006B682B /* rule_can_be_blank.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = rule_can_be_blank.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; + 127528B218AACAAA006B682B /* rule_can_be_blank.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = rule_can_be_blank.h; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; 127528B418AACB70006B682B /* rule_can_be_blank_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule_can_be_blank_spec.cpp; sourceTree = ""; }; 127528B918B041B6006B682B /* runtime.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = runtime.h; sourceTree = ""; }; - 12AB465D188BD03E00DE79DF /* follow_sets.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = follow_sets.cpp; sourceTree = ""; }; + 12AB465D188BD03E00DE79DF /* follow_sets.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = follow_sets.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12AB465E188BD03E00DE79DF /* follow_sets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = follow_sets.h; sourceTree = ""; }; 12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = item_set_closure_spec.cpp; sourceTree = ""; }; 12AB4663188DCB9800DE79DF /* stream_methods.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = stream_methods.h; sourceTree = ""; }; @@ -119,15 +121,15 @@ 12D136A2183678A2005F3369 /* repeat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = repeat.cpp; sourceTree = ""; }; 12D136A3183678A2005F3369 /* repeat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = repeat.h; sourceTree = ""; }; 12E71794181D02A80051A649 /* compiler_specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = compiler_specs; sourceTree = BUILT_PRODUCTS_DIR; }; - 12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rules.h; path = src/compiler/rules/rules.h; sourceTree = SOURCE_ROOT; }; - 12E75A961891BD32001B8F10 /* json.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = json.cpp; sourceTree = ""; }; + 12E71852181D081C0051A649 /* compiler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compiler.h; path = include/compiler.h; sourceTree = SOURCE_ROOT; }; + 12E75A961891BD32001B8F10 /* json.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = json.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12E75A981891BF3B001B8F10 /* json.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = json.c; sourceTree = ""; }; 12E75A9B1891C17D001B8F10 /* json_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = json_spec.cpp; sourceTree = ""; }; 12E75AA018930931001B8F10 /* expand_repeats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = expand_repeats.cpp; path = src/compiler/prepare_grammar/expand_repeats.cpp; sourceTree = SOURCE_ROOT; }; - 12E75AA118930931001B8F10 /* expand_repeats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_repeats.h; path = src/compiler/prepare_grammar/expand_repeats.h; sourceTree = SOURCE_ROOT; }; + 12E75AA118930931001B8F10 /* expand_repeats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = expand_repeats.h; path = src/compiler/prepare_grammar/expand_repeats.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; 12EDCF89187B498C005A7A07 /* tree_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tree_spec.cpp; sourceTree = ""; }; 12EDCF8C187C6282005A7A07 /* document.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = document.c; sourceTree = ""; }; - 12EDCF8F1881FCCA005A7A07 /* extract_tokens.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = extract_tokens.cpp; path = src/compiler/prepare_grammar/extract_tokens.cpp; sourceTree = SOURCE_ROOT; }; + 12EDCF8F1881FCCA005A7A07 /* extract_tokens.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = extract_tokens.cpp; path = src/compiler/prepare_grammar/extract_tokens.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12EDCF901881FCCA005A7A07 /* extract_tokens.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = extract_tokens.h; path = src/compiler/prepare_grammar/extract_tokens.h; sourceTree = SOURCE_ROOT; }; 12EDCF911881FCCA005A7A07 /* perform.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = perform.cpp; path = src/compiler/prepare_grammar/perform.cpp; sourceTree = SOURCE_ROOT; }; 12EDCF921881FCCA005A7A07 /* perform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = perform.h; path = src/compiler/prepare_grammar/perform.h; sourceTree = SOURCE_ROOT; }; @@ -136,26 +138,24 @@ 12EDCF9C18820116005A7A07 /* lex_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lex_table.h; sourceTree = ""; }; 12EDCF9D18820116005A7A07 /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = ""; }; 12EDCF9E18820116005A7A07 /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = ""; }; - 12EDCFA218820137005A7A07 /* item.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item.cpp; path = src/compiler/build_tables/item.cpp; sourceTree = SOURCE_ROOT; }; + 12EDCFA218820137005A7A07 /* item.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = item.cpp; path = src/compiler/build_tables/item.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12EDCFA318820137005A7A07 /* item.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = item.h; path = src/compiler/build_tables/item.h; sourceTree = SOURCE_ROOT; }; - 12EDCFA418820137005A7A07 /* perform.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = perform.cpp; path = src/compiler/build_tables/perform.cpp; sourceTree = SOURCE_ROOT; }; + 12EDCFA418820137005A7A07 /* perform.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = perform.cpp; path = src/compiler/build_tables/perform.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12EDCFA518820137005A7A07 /* perform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = perform.h; path = src/compiler/build_tables/perform.h; sourceTree = SOURCE_ROOT; }; 12EDCFA618820137005A7A07 /* rule_transitions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rule_transitions.cpp; path = src/compiler/build_tables/rule_transitions.cpp; sourceTree = SOURCE_ROOT; }; 12EDCFA718820137005A7A07 /* rule_transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rule_transitions.h; path = src/compiler/build_tables/rule_transitions.h; sourceTree = SOURCE_ROOT; }; - 12EDCFAC18820181005A7A07 /* compile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = compile.cpp; sourceTree = ""; }; - 12EDCFAD18820181005A7A07 /* compile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compile.h; sourceTree = ""; }; + 12EDCFAC18820181005A7A07 /* compile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = compile.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12EDCFB6188205BA005A7A07 /* rule_transitions_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule_transitions_spec.cpp; sourceTree = ""; }; 12EDCFB7188205BA005A7A07 /* perform_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = perform_spec.cpp; sourceTree = ""; }; - 12EDCFBE18820880005A7A07 /* item_set_closure.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = item_set_closure.cpp; sourceTree = ""; }; + 12EDCFBE18820880005A7A07 /* item_set_closure.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = item_set_closure.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12EDCFBF18820880005A7A07 /* item_set_closure.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = item_set_closure.h; sourceTree = ""; }; 12EDCFC118820A70005A7A07 /* item_set_transitions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = item_set_transitions.cpp; sourceTree = ""; }; 12EDCFC218820A70005A7A07 /* item_set_transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = item_set_transitions.h; sourceTree = ""; }; - 12EDCFC41882153D005A7A07 /* first_set.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = first_set.cpp; sourceTree = ""; }; + 12EDCFC41882153D005A7A07 /* first_set.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = first_set.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12EDCFC51882153D005A7A07 /* first_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = first_set.h; sourceTree = ""; }; 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; }; - 12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spec_helper.h; path = spec/spec_helper.h; sourceTree = SOURCE_ROOT; }; - 12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = grammar.cpp; path = src/compiler/grammar.cpp; sourceTree = SOURCE_ROOT; }; - 12F9A650182DD6BC00FAF50C /* grammar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = grammar.h; path = src/compiler/grammar.h; sourceTree = SOURCE_ROOT; }; + 12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = spec_helper.h; path = spec/spec_helper.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; + 12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = grammar.cpp; path = src/compiler/grammar.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12FD405F185E68470041A84E /* c_code.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = c_code.cpp; path = src/compiler/generate_code/c_code.cpp; sourceTree = SOURCE_ROOT; }; 12FD4060185E68470041A84E /* c_code.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = c_code.h; path = src/compiler/generate_code/c_code.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; 12FD4063185E75290041A84E /* compile_fixtures.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = compile_fixtures.cpp; path = spec/compiler/compile_fixtures.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; @@ -165,7 +165,7 @@ 12FD40DE1860064C0041A84E /* tree.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tree.c; sourceTree = ""; }; 12FD40E41862B3530041A84E /* visitor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = visitor.h; sourceTree = ""; }; 12FD40E618639B910041A84E /* visitor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = visitor.cpp; sourceTree = ""; }; - 12FD40E818641FB70041A84E /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = ""; }; + 12FD40E818641FB70041A84E /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = rules.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 12FD40F5186A16020041A84E /* lex_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = lex_table.cpp; sourceTree = ""; }; 27A340F3EEB184C040521323 /* pattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pattern.cpp; sourceTree = ""; }; 27A3438C4FA59A3882E8493B /* pattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pattern.h; sourceTree = ""; }; @@ -194,6 +194,7 @@ children = ( 1213060F182C3A1100FCF928 /* blank.cpp */, 12130610182C3A1100FCF928 /* blank.h */, + 1236A7C318B287DC00593ABB /* character_range.cpp */, 12130603182C348F00FCF928 /* character_set.cpp */, 12130604182C348F00FCF928 /* character_set.h */, 1213060C182C398300FCF928 /* choice.cpp */, @@ -203,9 +204,8 @@ 12D136A2183678A2005F3369 /* repeat.cpp */, 12D136A3183678A2005F3369 /* repeat.h */, 1251209A1830145300C9B56A /* rule.cpp */, - 12130607182C374800FCF928 /* rule.h */, + 1236A7C918B2A79F00593ABB /* rule.h */, 12FD40E818641FB70041A84E /* rules.cpp */, - 12E71852181D081C0051A649 /* rules.h */, 12130612182C3A1700FCF928 /* seq.cpp */, 12130613182C3A1700FCF928 /* seq.h */, 12130615182C3D2900FCF928 /* string.cpp */, @@ -234,10 +234,10 @@ 127528AF18A6F9C6006B682B /* merge_transitions.h */, 12EDCFA418820137005A7A07 /* perform.cpp */, 12EDCFA518820137005A7A07 /* perform.h */, - 12EDCFA618820137005A7A07 /* rule_transitions.cpp */, - 12EDCFA718820137005A7A07 /* rule_transitions.h */, 127528B118AACAAA006B682B /* rule_can_be_blank.cpp */, 127528B218AACAAA006B682B /* rule_can_be_blank.h */, + 12EDCFA618820137005A7A07 /* rule_transitions.cpp */, + 12EDCFA718820137005A7A07 /* rule_transitions.h */, ); path = build_tables; sourceTree = ""; @@ -365,10 +365,8 @@ children = ( 12130618182C84B700FCF928 /* build_tables */, 12EDCFAC18820181005A7A07 /* compile.cpp */, - 12EDCFAD18820181005A7A07 /* compile.h */, 12FD4067185E8AF40041A84E /* generate_code */, 12F9A64F182DD6BC00FAF50C /* grammar.cpp */, - 12F9A650182DD6BC00FAF50C /* grammar.h */, 12FD40F5186A16020041A84E /* lex_table.cpp */, 12EDCF9C18820116005A7A07 /* lex_table.h */, 12EDCF9D18820116005A7A07 /* parse_table.cpp */, @@ -422,6 +420,7 @@ 12FD40D3185FED630041A84E /* include */ = { isa = PBXGroup; children = ( + 12E71852181D081C0051A649 /* compiler.h */, 121D8B3018795CC0003CF44B /* parser.h */, 127528B918B041B6006B682B /* runtime.h */, ); @@ -536,6 +535,7 @@ 12BC470718830BC5005AC502 /* first_set_spec.cpp in Sources */, 1213060B182C389100FCF928 /* symbol.cpp in Sources */, 127528B518AACB70006B682B /* rule_can_be_blank_spec.cpp in Sources */, + 1236A7C518B287DC00593ABB /* character_range.cpp in Sources */, 1251209B1830145300C9B56A /* rule.cpp in Sources */, 27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */, );