Make separate types for syntax and lexical grammars
This way, the separator characters can be added as a field to lexical grammars only
This commit is contained in:
parent
d5674d33c4
commit
7df35f9b8d
49 changed files with 467 additions and 395 deletions
|
|
@ -10,16 +10,16 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("building parse tables", []() {
|
||||
auto parse_grammar = PreparedGrammar({
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
}, {}).ubiquitous_tokens({ Symbol(2, SymbolOptionToken) });
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
|
||||
PreparedGrammar lex_grammar({
|
||||
LexicalGrammar lex_grammar({
|
||||
{ "token0", pattern("[a-c]") },
|
||||
{ "token1", pattern("[b-d]") },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("first looks for the start rule and its item set closure", [&]() {
|
||||
auto result = build_parse_table(parse_grammar, lex_grammar);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -10,16 +11,16 @@ START_TEST
|
|||
describe("resolving parse conflicts", []() {
|
||||
bool update;
|
||||
|
||||
PreparedGrammar parse_grammar({
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
|
||||
{ "rule2", sym("token1") },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
PreparedGrammar lex_grammar({
|
||||
LexicalGrammar lex_grammar({
|
||||
{ "token1", pattern("[a-c]") },
|
||||
{ "token2", pattern("[b-d]") },
|
||||
{ "token3", keyword("stuff") },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
describe("lexical conflicts", [&]() {
|
||||
Symbol sym1(0, SymbolOptionToken);
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing FIRST sets", []() {
|
||||
const PreparedGrammar null_grammar({}, {});
|
||||
const SyntaxGrammar null_grammar;
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
|
|
@ -41,12 +41,12 @@ describe("computing FIRST sets", []() {
|
|||
i_token(1) }),
|
||||
i_sym(0) });
|
||||
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4) }) }
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
|
|
@ -59,11 +59,11 @@ describe("computing FIRST sets", []() {
|
|||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
i_token(0),
|
||||
blank() }) }
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
|
|
@ -74,12 +74,12 @@ describe("computing FIRST sets", []() {
|
|||
|
||||
describe("when there are left-recursive rules", [&]() {
|
||||
it("terminates", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -9,14 +9,14 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing closures of item sets", []() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "E", seq({
|
||||
i_sym(1),
|
||||
i_token(11) }) },
|
||||
{ "T", seq({
|
||||
i_token(12),
|
||||
i_token(13) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
|
||||
|
|
|
|||
|
|
@ -8,15 +8,13 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("lexical item set transitions", []() {
|
||||
PreparedGrammar grammar({}, {});
|
||||
|
||||
describe("when two items in the set have transitions on the same character", [&]() {
|
||||
it("merges the transitions by computing the union of the two item sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), character({ {'a', 'f'} })),
|
||||
LexItem(Symbol(2), character({ {'e', 'x'} })) });
|
||||
|
||||
AssertThat(char_transitions(set1, grammar), Equals(map<CharacterSet, LexItemSet>({
|
||||
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
{ CharacterSet({ {'a', 'd'} }), LexItemSet({
|
||||
LexItem(Symbol(1), blank()) }) },
|
||||
{ CharacterSet({ {'e', 'f'} }), LexItemSet({
|
||||
|
|
@ -30,10 +28,10 @@ describe("lexical item set transitions", []() {
|
|||
});
|
||||
|
||||
describe("syntactic item set transitions", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
|
|
|
|||
|
|
@ -56,14 +56,14 @@ describe("checking if rules can be blank", [&]() {
|
|||
});
|
||||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank() }) },
|
||||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
|
|
|
|||
|
|
@ -1,26 +1,11 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
template<typename K>
|
||||
class rule_map : public map<K, rule_ptr> {
|
||||
public:
|
||||
bool operator==(const map<K, rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (const auto &pair : *this) {
|
||||
auto other_pair = other.find(pair.first);
|
||||
if (other_pair == other.end()) return false;
|
||||
if (!pair.second->operator==(*other_pair->second)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("rule transitions", []() {
|
||||
|
|
|
|||
52
spec/compiler/helpers/containers.h
Normal file
52
spec/compiler/helpers/containers.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
#ifndef HELPERS_CONTAINERS_H_
|
||||
#define HELPERS_CONTAINERS_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <initializer_list>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using std::initializer_list;
|
||||
using std::pair;
|
||||
using tree_sitter::rules::rule_ptr;
|
||||
|
||||
template<typename K>
|
||||
class rule_map : public map<K, rule_ptr> {
|
||||
public:
|
||||
bool operator==(const map<K, rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (const auto &pair : *this) {
|
||||
auto other_pair = other.find(pair.first);
|
||||
if (other_pair == other.end()) return false;
|
||||
if (!pair.second->operator==(*other_pair->second)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
class rule_list : public vector<pair<string, rule_ptr>> {
|
||||
public:
|
||||
bool operator==(const vector<pair<string, rule_ptr>> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (size_t i = 0; i < this->size(); i++) {
|
||||
auto pair = this->operator[](i);
|
||||
auto other_pair = other[i];
|
||||
if (!pair.second->operator==(*other_pair.second))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_list(const initializer_list<pair<string, rule_ptr>> &list) :
|
||||
vector<pair<string, rule_ptr>>(list) {}
|
||||
};
|
||||
|
||||
|
||||
#endif // HELPERS_CONTAINERS_H_
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -9,29 +10,33 @@ using prepare_grammar::expand_repeats;
|
|||
|
||||
describe("expanding repeat rules in a grammar", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", i_aux_sym(0) },
|
||||
}, {
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({
|
||||
i_token(0),
|
||||
i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({ seq({ i_token(0), i_aux_sym(0) }), blank() }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", seq({ i_token(10), i_aux_sym(0) }) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({ i_token(11), i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
|
|
@ -39,13 +44,17 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", choice({ i_token(10), i_aux_sym(0) }) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({ i_token(11), i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
|
|
@ -53,13 +62,17 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({
|
||||
i_token(10),
|
||||
|
|
@ -74,15 +87,19 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", i_aux_sym(0) },
|
||||
{ "rule1", i_aux_sym(1) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({ i_token(10), i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
|
||||
START_TEST
|
||||
|
|
@ -9,50 +10,50 @@ using prepare_grammar::expand_tokens;
|
|||
|
||||
describe("expanding token rules", []() {
|
||||
it("replaces regex patterns with their expansion", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11) }) },
|
||||
}, {})));
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces string rules with a sequence of characters", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }),
|
||||
i_sym(11) }) },
|
||||
}, {})));
|
||||
})));
|
||||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("[") }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,160 +1,172 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InternedGrammar;
|
||||
|
||||
describe("extracting tokens from a grammar", []() {
|
||||
it("moves string rules into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves pattern rules into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "/a+/", pattern("a+") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves other rules marked as tokens into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({
|
||||
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
|
||||
i_sym(0) }) }
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({
|
||||
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
|
||||
i_sym(0) }) }
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not extract blanks", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {})));
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, IsEmpty())
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("extracts tokens from the grammar's auxiliary rules", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
})))
|
||||
});
|
||||
|
||||
describe("when an entire rule can be extracted", [&]() {
|
||||
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", i_sym(1) },
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {}));
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", i_sym(1) },
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", i_token(0) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty());
|
||||
AssertThat(result.second.rules, Equals(rule_list({
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {})));
|
||||
})));
|
||||
AssertThat(result.second.aux_rules, IsEmpty());
|
||||
});
|
||||
|
||||
it("updates symbols whose indices need to change due to deleted rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}, {}));
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_B", i_token(0) },
|
||||
{ "rule_C", i_sym(0) },
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty());
|
||||
AssertThat(result.second.rules, Equals(rule_list({
|
||||
{ "rule_A", str("ab") },
|
||||
}, {})));
|
||||
})));
|
||||
AssertThat(result.second.aux_rules, IsEmpty());
|
||||
});
|
||||
|
||||
it("updates the grammar's ubiquitous_tokens", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}, {}).ubiquitous_tokens({ Symbol(0) }));
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
},
|
||||
{ Symbol(0) },
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
|
||||
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
{ Symbol(0, SymbolOptionToken) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("extracts entire auxiliary rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_aux_sym(0) },
|
||||
{ "rule_C", i_aux_sym(1) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_B", i_aux_token(0) },
|
||||
{ "rule_C", i_aux_sym(0) },
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -20,11 +21,11 @@ describe("interning symbols in a grammar", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "x", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "y", i_sym(2) },
|
||||
{ "z", str("stuff") },
|
||||
}, {})));
|
||||
})));
|
||||
});
|
||||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
|
|
@ -49,10 +50,20 @@ describe("interning symbols in a grammar", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
|
||||
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
Symbol(2)
|
||||
})));
|
||||
});
|
||||
|
||||
it("preserves the grammar's separator character set", [&]() {
|
||||
auto grammar = Grammar({
|
||||
{ "z", str("stuff") }
|
||||
}).separators({ 'x', 'y' });
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.separators, Equals(vector<char>({ 'x', 'y' })))
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue