Rename spec -> test
'Test' is a lot more straightforward of a name.
This commit is contained in:
parent
7d8daf573e
commit
6dc0ff359d
109 changed files with 44 additions and 44 deletions
89
test/compiler/build_tables/lex_conflict_manager_test.cc
Normal file
89
test/compiler/build_tables/lex_conflict_manager_test.cc
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#include "test_helper.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
||||
LexConflictManager conflict_manager;
|
||||
bool update;
|
||||
Symbol sym1(0, Symbol::Terminal);
|
||||
Symbol sym2(1, Symbol::Terminal);
|
||||
Symbol sym3(2, Symbol::Terminal);
|
||||
Symbol sym4(3, Symbol::Terminal);
|
||||
LexItemSet item_set({ LexItem(sym4, blank() )});
|
||||
|
||||
before_each([&]() {
|
||||
conflict_manager = LexConflictManager();
|
||||
});
|
||||
|
||||
it("favors advance actions over empty accept token actions", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
describe("accept-token/accept-token conflicts", [&]() {
|
||||
describe("when the tokens' precedence values differ", [&]() {
|
||||
it("favors the token with higher precedence", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym1, 2, false), AcceptTokenAction(sym2, 1, false));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
it("adds the preferred token as a possible homonym for the discarded one", [&]() {
|
||||
conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(conflict_manager.possible_homonyms[sym2.index], Contains(sym1.index));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when one token is string-based and the other is regexp-based", [&]() {
|
||||
it("favors the string-based token", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, true));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, true), AcceptTokenAction(sym1, 0, false));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the tokens have equal precedence", [&]() {
|
||||
it("favors the token listed earlier in the grammar", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, false), AcceptTokenAction(sym1, 0, false));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, false));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("advance/accept-token conflicts", [&]() {
|
||||
describe("when the token to accept has higher precedence", [&]() {
|
||||
it("prefers the accept-token action", [&]() {
|
||||
AssertThat(conflict_manager.possible_extensions, IsEmpty());
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(update, IsFalse());
|
||||
AssertThat(conflict_manager.possible_extensions, IsEmpty());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the token to accept does not have a higher precedence", [&]() {
|
||||
it("favors the advance action and adds the in-progress tokens as possible extensions of the discarded token", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
|
||||
AssertThat(update, IsTrue());
|
||||
AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index));
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
514
test/compiler/build_tables/lex_item_test.cc
Normal file
514
test/compiler/build_tables/lex_item_test.cc
Normal file
|
|
@ -0,0 +1,514 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
typedef LexItemSet::Transition Transition;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("LexItem", []() {
|
||||
describe("completion_status()", [&]() {
|
||||
it("indicates whether the item is done and its precedence", [&]() {
|
||||
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
|
||||
AssertThat(item1.completion_status().is_done, IsFalse());
|
||||
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
|
||||
MetadataParams params;
|
||||
params.precedence = 3;
|
||||
params.has_precedence = true;
|
||||
params.is_string = 1;
|
||||
LexItem item2(Symbol(0, Symbol::Terminal), choice({
|
||||
metadata(blank(), params),
|
||||
character({ 'a', 'b', 'c' })
|
||||
}));
|
||||
|
||||
AssertThat(item2.completion_status().is_done, IsTrue());
|
||||
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
|
||||
|
||||
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
|
||||
AssertThat(item3.completion_status().is_done, IsTrue());
|
||||
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("LexItemSet::transitions()", [&]() {
|
||||
it("handles single characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("marks transitions that are within the main token (as opposed to separators)", [&]() {
|
||||
MetadataParams params;
|
||||
params.is_main_token = true;
|
||||
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
true
|
||||
}
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('w'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences with nested precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
prec(3, seq({
|
||||
character({ 'v' }),
|
||||
prec(4, seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }) })),
|
||||
character({ 'y' }) })),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
});
|
||||
|
||||
auto transitions = item_set.transitions();
|
||||
|
||||
AssertThat(
|
||||
transitions,
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('v'),
|
||||
Transition{
|
||||
// The outer precedence is now 'active', because we are within its
|
||||
// contained rule.
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
prec(4, seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }) })),
|
||||
character({ 'y' }) })),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
}),
|
||||
|
||||
// No precedence is applied upon entering a rule.
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
|
||||
LexItemSet item_set2 = transitions[CharacterSet().include('v')].destination;
|
||||
transitions = item_set2.transitions();
|
||||
|
||||
AssertThat(
|
||||
transitions,
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('w'),
|
||||
Transition{
|
||||
// The inner precedence is now 'active'
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
active_prec(4, character({ 'x' })),
|
||||
character({ 'y' }) })),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
}),
|
||||
|
||||
// The outer precedence is applied.
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
|
||||
LexItemSet item_set3 = transitions[CharacterSet().include('w')].destination;
|
||||
transitions = item_set3.transitions();
|
||||
|
||||
AssertThat(
|
||||
transitions,
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, character({ 'y' })),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
}),
|
||||
|
||||
// The inner precedence is applied.
|
||||
PrecedenceRange(4),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
|
||||
LexItemSet item_set4 = transitions[CharacterSet().include('x')].destination;
|
||||
transitions = item_set4.transitions();
|
||||
|
||||
AssertThat(
|
||||
transitions,
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences where the left hand side can be blank", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ 'x' }),
|
||||
blank(),
|
||||
}),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles blanks", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
});
|
||||
|
||||
AssertThat(item_set.transitions(), IsEmpty());
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'b' }),
|
||||
repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))
|
||||
})),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('c'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles repeats with precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
|
||||
}),
|
||||
PrecedenceRange(-1),
|
||||
false
|
||||
}
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between overlapping character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
active_prec(2, seq({
|
||||
character({ 'a', 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
})),
|
||||
active_prec(3, seq({
|
||||
character({ 'c', 'd', 'e', 'f' }),
|
||||
character({ 'y' }),
|
||||
})),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('a', 'b'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
}),
|
||||
PrecedenceRange(2),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('c', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
}),
|
||||
PrecedenceRange(2, 3),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
}
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between a subset and a superset of characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
seq({
|
||||
character({ 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
}),
|
||||
seq({
|
||||
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
|
||||
character({ 'y' }),
|
||||
}),
|
||||
})),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('a').include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('b', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between whitelisted and blacklisted character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ '/' }, false),
|
||||
seq({
|
||||
character({ '\\' }),
|
||||
character({ '/' }),
|
||||
}),
|
||||
}),
|
||||
character({ '/' }),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include_all().exclude('/').exclude('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles different items with overlapping character sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
|
||||
});
|
||||
|
||||
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'i'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
}
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
134
test/compiler/build_tables/parse_item_set_builder_test.cc
Normal file
134
test/compiler/build_tables/parse_item_set_builder_test.cc
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("ParseItemSetBuilder", []() {
|
||||
vector<LexicalVariable> lexical_variables;
|
||||
for (size_t i = 0; i < 20; i++) {
|
||||
lexical_variables.push_back({
|
||||
"token_" + to_string(i),
|
||||
VariableTypeNamed,
|
||||
blank(),
|
||||
false
|
||||
});
|
||||
}
|
||||
|
||||
LexicalGrammar lexical_grammar{lexical_variables, {}};
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
}},
|
||||
SyntaxVariable{"rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
|
||||
})
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
}
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles rules with empty productions", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({})
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
}
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
60
test/compiler/build_tables/rule_can_be_blank_test.cc
Normal file
60
test/compiler/build_tables/rule_can_be_blank_test.cc
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
using namespace rules;
|
||||
using build_tables::rule_can_be_blank;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("rule_can_be_blank", [&]() {
|
||||
rule_ptr rule;
|
||||
|
||||
it("returns false for basic rules", [&]() {
|
||||
AssertThat(rule_can_be_blank(i_sym(3)), IsFalse());
|
||||
AssertThat(rule_can_be_blank(str("x")), IsFalse());
|
||||
AssertThat(rule_can_be_blank(pattern("x")), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true for blanks", [&]() {
|
||||
AssertThat(rule_can_be_blank(blank()), IsTrue());
|
||||
});
|
||||
|
||||
it("returns true for repeats", [&]() {
|
||||
AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue());
|
||||
});
|
||||
|
||||
it("returns true for choices iff one or more sides can be blank", [&]() {
|
||||
rule = choice({ sym("x"), blank() });
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
|
||||
rule = choice({ blank(), sym("x") });
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
|
||||
rule = choice({ sym("x"), sym("y") });
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true for sequences iff both sides can be blank", [&]() {
|
||||
rule = seq({ blank(), str("x") });
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
|
||||
rule = seq({ str("x"), blank() });
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
|
||||
rule = seq({ blank(), choice({ sym("x"), blank() }) });
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
});
|
||||
|
||||
it("ignores metadata rules", [&]() {
|
||||
rule = make_shared<rules::Metadata>(blank(), MetadataParams());
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
|
||||
rule = make_shared<rules::Metadata>(sym("one"), MetadataParams());
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
171
test/compiler/prepare_grammar/expand_repeats_test.cc
Normal file
171
test/compiler/prepare_grammar/expand_repeats_test.cc
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using prepare_grammar::expand_repeats;
|
||||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(1)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(0) }),
|
||||
i_token(0),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
repeat1(i_token(11)),
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11)
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
repeat1(i_token(11))
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("does not create redundant auxiliary rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), repeat1(i_token(4)) }),
|
||||
seq({ i_token(2), repeat1(i_token(4)) }),
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
repeat1(i_token(4))
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), i_sym(2) }),
|
||||
seq({ i_token(2), i_sym(2) }),
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
i_sym(2),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(4) }),
|
||||
i_token(4),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
repeat1(i_token(10)),
|
||||
repeat1(i_token(11)),
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(10) }),
|
||||
i_token(10),
|
||||
})},
|
||||
Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(11) }),
|
||||
i_token(11),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))},
|
||||
Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"rule1", VariableTypeNamed, i_sym(3)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(10) }),
|
||||
i_token(10),
|
||||
})},
|
||||
Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(3), i_token(11) }),
|
||||
i_token(11),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
169
test/compiler/prepare_grammar/expand_tokens_test.cc
Normal file
169
test/compiler/prepare_grammar/expand_tokens_test.cc
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::expand_tokens;
|
||||
|
||||
describe("expand_tokens", []() {
|
||||
MetadataParams string_token_params;
|
||||
string_token_params.is_string = true;
|
||||
string_token_params.is_token = true;
|
||||
|
||||
describe("string rules", [&]() {
|
||||
it("replaces strings with sequences of character sets", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
metadata(seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
}), string_token_params),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
|
||||
it("handles strings containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
str("\u03B1 \u03B2"),
|
||||
false
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
metadata(seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }),
|
||||
}), string_token_params),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
});
|
||||
|
||||
describe("regexp rules", [&]() {
|
||||
it("replaces regexps with the equivalent rule tree", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
|
||||
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
pattern("[^\u03B1-\u03B4]*"),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
repeat(character({ 945, 946, 947, 948 }, false)),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("["),
|
||||
}),
|
||||
false
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
106
test/compiler/prepare_grammar/extract_choices_test.cc
Normal file
106
test/compiler/prepare_grammar/extract_choices_test.cc
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_choices;
|
||||
|
||||
class rule_vector : public vector<rule_ptr> {
|
||||
public:
|
||||
bool operator==(const vector<rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (size_t i = 0; i < this->size(); i++) {
|
||||
auto rule = this->operator[](i);
|
||||
auto other_rule = other[i];
|
||||
if (!rule->operator==(*rule))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_vector(const initializer_list<rule_ptr> &list) :
|
||||
vector<rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
describe("extract_choices", []() {
|
||||
it("expands rules containing choices into multiple rules", [&]() {
|
||||
auto rule = seq({
|
||||
sym("a"),
|
||||
choice({ sym("b"), sym("c"), sym("d") }),
|
||||
sym("e")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("b"), sym("e") }),
|
||||
seq({ sym("a"), sym("c"), sym("e") }),
|
||||
seq({ sym("a"), sym("d"), sym("e") }),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles metadata rules", [&]() {
|
||||
auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
prec(5, sym("b")),
|
||||
prec(5, sym("c")),
|
||||
prec(5, sym("d")),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles nested choices", [&]() {
|
||||
auto rule = choice({
|
||||
seq({ choice({ sym("a"), sym("b") }), sym("c") }),
|
||||
sym("d")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("c") }),
|
||||
seq({ sym("b"), sym("c") }),
|
||||
sym("d"),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles blank rules", [&]() {
|
||||
AssertThat(extract_choices(blank()), Equals(rule_vector({
|
||||
blank(),
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not move choices outside of repeats", [&]() {
|
||||
auto rule = seq({
|
||||
choice({ sym("a"), sym("b") }),
|
||||
repeat1(seq({
|
||||
sym("c"),
|
||||
choice({
|
||||
sym("d"),
|
||||
sym("e"),
|
||||
}),
|
||||
sym("f"),
|
||||
})),
|
||||
sym("g"),
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({
|
||||
sym("a"),
|
||||
repeat1(choice({
|
||||
seq({ sym("c"), sym("d"), sym("f") }),
|
||||
seq({ sym("c"), sym("e"), sym("f") }),
|
||||
})),
|
||||
sym("g"),
|
||||
}),
|
||||
seq({
|
||||
sym("b"),
|
||||
repeat1(choice({
|
||||
seq({ sym("c"), sym("d"), sym("f") }),
|
||||
seq({ sym("c"), sym("e"), sym("f") }),
|
||||
})),
|
||||
sym("g"),
|
||||
}),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
276
test/compiler/prepare_grammar/extract_tokens_test.cc
Normal file
276
test/compiler/prepare_grammar/extract_tokens_test.cc
Normal file
|
|
@ -0,0 +1,276 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InternedGrammar;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
token(repeat1(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
}))},
|
||||
Variable{"rule_B", VariableTypeNamed, pattern("ij+")},
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
CompileError error = get<2>(result);
|
||||
|
||||
AssertThat(error, Equals(CompileError::none()));
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
|
||||
// This string is now the first token in the lexical grammar.
|
||||
i_token(0),
|
||||
|
||||
// This pattern is now the second rule in the lexical grammar.
|
||||
i_token(1),
|
||||
|
||||
choice({
|
||||
// Rule 1, which this symbol pointed to, has been moved to the
|
||||
// lexical grammar.
|
||||
i_token(3),
|
||||
|
||||
// This symbol's index has been decremented, because a previous rule
|
||||
// was moved to the lexical grammar.
|
||||
i_sym(1),
|
||||
|
||||
// This token rule is now the third rule in the lexical grammar.
|
||||
i_token(2),
|
||||
}),
|
||||
}))},
|
||||
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))},
|
||||
}));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable>({
|
||||
// Strings become anonymous rules.
|
||||
LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
|
||||
// Patterns become hidden rules.
|
||||
LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false},
|
||||
|
||||
// Rules marked as tokens become hidden rules.
|
||||
LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
|
||||
str("ef"),
|
||||
str("gh")
|
||||
})), false},
|
||||
|
||||
// This named rule was moved wholesale to the lexical grammar.
|
||||
LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false},
|
||||
|
||||
// Strings become anonymous rules.
|
||||
LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true},
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
})},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable> {
|
||||
Variable {"rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })},
|
||||
}));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
}))
|
||||
});
|
||||
|
||||
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("cd")},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })},
|
||||
}, {}, {}, {}});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })},
|
||||
Variable{"rule_B", VariableTypeNamed, i_token(1)},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })},
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
LexicalVariable {"cd", VariableTypeAnonymous, str("cd"), true},
|
||||
LexicalVariable {"ef", VariableTypeAnonymous, str("ef"), true},
|
||||
}));
|
||||
});
|
||||
|
||||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("ok")},
|
||||
Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))},
|
||||
Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))},
|
||||
},
|
||||
{
|
||||
str(" ")
|
||||
},
|
||||
{
|
||||
{ Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) }
|
||||
},
|
||||
{}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
|
||||
})));
|
||||
});
|
||||
|
||||
describe("handling extra tokens", [&]() {
|
||||
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
|
||||
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
|
||||
AssertThat(get<1>(result).separators[1], EqualsPointer(pattern("\\s+")));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, IsEmpty());
|
||||
});
|
||||
|
||||
it("handles inline extra tokens that match tokens in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
|
||||
});
|
||||
|
||||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
Variable{"rule_C", VariableTypeNamed, str("z")},
|
||||
},
|
||||
{
|
||||
i_sym(2),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, Symbol::Terminal) },
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
});
|
||||
|
||||
it("returns an error if any extra tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
}, { i_sym(1) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidExtraToken,
|
||||
"Not a token: rule_B")));
|
||||
});
|
||||
|
||||
it("returns an error if any extra tokens are non-token rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
}, { choice({ i_sym(1), blank() }) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(CompileError(
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
"Not a token: (choice (non-terminal 1) (blank))"
|
||||
)));
|
||||
});
|
||||
});
|
||||
|
||||
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{
|
||||
ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)}
|
||||
}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError(
|
||||
TSCompileErrorTypeInvalidExternalToken,
|
||||
"Name 'rule_A' cannot be used for both an external token and a non-terminal rule"
|
||||
)));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
89
test/compiler/prepare_grammar/flatten_grammar_test.cc
Normal file
89
test/compiler/prepare_grammar/flatten_grammar_test.cc
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::flatten_rule;
|
||||
|
||||
describe("flatten_grammar", []() {
|
||||
it("associates each symbol with the precedence and associativity binding it to its successor", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
"test",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(1),
|
||||
prec_left(101, seq({
|
||||
i_sym(2),
|
||||
choice({
|
||||
prec_right(102, seq({
|
||||
i_sym(3),
|
||||
i_sym(4)
|
||||
})),
|
||||
i_sym(5),
|
||||
}),
|
||||
i_sym(6),
|
||||
})),
|
||||
i_sym(7),
|
||||
})
|
||||
});
|
||||
|
||||
AssertThat(result.name, Equals("test"));
|
||||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
|
||||
{Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
||||
it("uses the last assigned precedence", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
"test1",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
|
||||
result = flatten_rule(Variable{
|
||||
"test2",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
103
test/compiler/prepare_grammar/intern_symbols_test.cc
Normal file
103
test/compiler/prepare_grammar/intern_symbols_test.cc
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::intern_symbols;
|
||||
|
||||
describe("intern_symbols", []() {
|
||||
it("replaces named symbols with numerically-indexed symbols", [&]() {
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("_z") })},
|
||||
{"y", sym("_z")},
|
||||
{"_z", str("stuff")}
|
||||
}, {}, {}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>{
|
||||
Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })},
|
||||
Variable{"y", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"_z", VariableTypeHidden, str("stuff")},
|
||||
}));
|
||||
});
|
||||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
it("returns an error", []() {
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", sym("y")},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second.message, Equals("Undefined rule 'y'"));
|
||||
});
|
||||
});
|
||||
|
||||
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")}
|
||||
},
|
||||
{
|
||||
sym("z")
|
||||
},
|
||||
{}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
});
|
||||
|
||||
it("records any rule names that match external token names", [&]() {
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{
|
||||
"w",
|
||||
"z"
|
||||
}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>{
|
||||
ExternalToken{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
rules::NONE()
|
||||
},
|
||||
ExternalToken{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
Symbol(2, Symbol::NonTerminal)
|
||||
},
|
||||
}))
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
245
test/compiler/prepare_grammar/parse_regex_test.cc
Normal file
245
test/compiler/prepare_grammar/parse_regex_test.cc
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/parse_regex.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::parse_regex;
|
||||
|
||||
describe("parse_regex", []() {
|
||||
struct ValidInputRow {
|
||||
string description;
|
||||
string pattern;
|
||||
rule_ptr rule;
|
||||
};
|
||||
|
||||
vector<ValidInputRow> valid_inputs = {
|
||||
{
|
||||
"character sets",
|
||||
"[aAeE]",
|
||||
character({ 'a', 'A', 'e', 'E' })
|
||||
},
|
||||
|
||||
{
|
||||
"'.' characters as wildcards",
|
||||
".",
|
||||
character({ '\n' }, false)
|
||||
},
|
||||
|
||||
{
|
||||
"character classes",
|
||||
"\\w-\\d-\\s-\\W-\\D-\\S",
|
||||
seq({
|
||||
character({
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }),
|
||||
character({ '-' }),
|
||||
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }),
|
||||
character({ '-' }),
|
||||
character({ ' ', '\t', '\r', '\n' }),
|
||||
character({ '-' }),
|
||||
character({
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }, false),
|
||||
character({ '-' }),
|
||||
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false),
|
||||
character({ '-' }),
|
||||
character({ ' ', '\t', '\r', '\n' }, false),
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"choices",
|
||||
"ab|cd|ef",
|
||||
choice({
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }) }),
|
||||
seq({
|
||||
character({ 'c' }),
|
||||
character({ 'd' }) }),
|
||||
seq({
|
||||
character({ 'e' }),
|
||||
character({ 'f' }) }) })
|
||||
},
|
||||
|
||||
{
|
||||
"simple sequences",
|
||||
"abc",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
character({ 'c' }) })
|
||||
},
|
||||
|
||||
{
|
||||
"character ranges",
|
||||
"[12a-dA-D3]",
|
||||
character({
|
||||
'1', '2', '3',
|
||||
'a', 'b', 'c', 'd',
|
||||
'A', 'B', 'C', 'D' })
|
||||
},
|
||||
|
||||
{
|
||||
"negated characters",
|
||||
"[^a\\d]",
|
||||
character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false)
|
||||
},
|
||||
|
||||
{
|
||||
"backslashes",
|
||||
"\\\\",
|
||||
character({ '\\' })
|
||||
},
|
||||
|
||||
{
|
||||
"character groups in sequences",
|
||||
"x([^x]|\\\\x)*x",
|
||||
seq({
|
||||
character({ 'x' }),
|
||||
repeat(choice({
|
||||
character({ 'x' }, false),
|
||||
seq({ character({ '\\' }), character({ 'x' }) }) })),
|
||||
character({ 'x' }) })
|
||||
},
|
||||
|
||||
{
|
||||
"choices in sequences",
|
||||
"(a|b)cd",
|
||||
seq({
|
||||
choice({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }) }),
|
||||
character({ 'c' }),
|
||||
character({ 'd' }) })
|
||||
},
|
||||
|
||||
{
|
||||
"escaped parentheses",
|
||||
"a\\(b",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ '(' }),
|
||||
character({ 'b' }) })
|
||||
},
|
||||
|
||||
{
|
||||
"escaped periods",
|
||||
"a\\.",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ '.' }) })
|
||||
},
|
||||
|
||||
{
|
||||
"escaped characters",
|
||||
"\\t\\n\\r",
|
||||
seq({
|
||||
character({ '\t' }),
|
||||
character({ '\n' }),
|
||||
character({ '\r' }) })
|
||||
},
|
||||
|
||||
{
|
||||
"plus repeats",
|
||||
"(ab)+(cd)+",
|
||||
seq({
|
||||
repeat1(seq({ character({ 'a' }), character({ 'b' }) })),
|
||||
repeat1(seq({ character({ 'c' }), character({ 'd' }) })) })
|
||||
},
|
||||
|
||||
{
|
||||
"asterix repeats",
|
||||
"(ab)*(cd)*",
|
||||
seq({
|
||||
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
||||
repeat(seq({ character({ 'c' }), character({ 'd' }) })) })
|
||||
},
|
||||
|
||||
{
|
||||
"optional rules",
|
||||
"a(bc)?",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
choice({
|
||||
seq({ character({ 'b' }), character({ 'c' }) }),
|
||||
blank() }) })
|
||||
},
|
||||
|
||||
{
|
||||
"choices containing negated character classes",
|
||||
"/([^/]|(\\\\/))*/",
|
||||
seq({
|
||||
character({ '/' }),
|
||||
repeat(choice({
|
||||
character({ '/' }, false),
|
||||
seq({ character({ '\\' }), character({ '/' }) }) })),
|
||||
character({ '/' }), }),
|
||||
},
|
||||
};
|
||||
|
||||
struct InvalidInputRow {
|
||||
string description;
|
||||
string pattern;
|
||||
const char *message;
|
||||
};
|
||||
|
||||
vector<InvalidInputRow> invalid_inputs = {
|
||||
{
|
||||
"mismatched open parens",
|
||||
"(a",
|
||||
"unmatched open paren",
|
||||
},
|
||||
{
|
||||
"mismatched nested open parens",
|
||||
"((a) (b)",
|
||||
"unmatched open paren",
|
||||
},
|
||||
{
|
||||
"mismatched close parens",
|
||||
"a)",
|
||||
"unmatched close paren",
|
||||
},
|
||||
{
|
||||
"mismatched nested close parens",
|
||||
"((a) b))",
|
||||
"unmatched close paren",
|
||||
},
|
||||
{
|
||||
"mismatched brackets for character classes",
|
||||
"[a",
|
||||
"unmatched open square bracket",
|
||||
},
|
||||
{
|
||||
"mismatched brackets for character classes",
|
||||
"a]",
|
||||
"unmatched close square bracket",
|
||||
},
|
||||
};
|
||||
|
||||
for (auto &row : valid_inputs) {
|
||||
it(("parses " + row.description).c_str(), [&]() {
|
||||
auto result = parse_regex(row.pattern);
|
||||
AssertThat(result.first, EqualsPointer(row.rule));
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &row : invalid_inputs) {
|
||||
it(("handles invalid regexes with " + row.description).c_str(), [&]() {
|
||||
auto result = parse_regex(row.pattern);
|
||||
AssertThat(result.second.type, Equals(TSCompileErrorTypeInvalidRegex));
|
||||
AssertThat(result.second.message, Contains(row.message));
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
END_TEST
|
||||
337
test/compiler/rules/character_set_test.cc
Normal file
337
test/compiler/rules/character_set_test.cc
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("CharacterSet", []() {
|
||||
describe("equality", [&]() {
|
||||
it("returns true for identical character sets", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'd')
|
||||
.include('f', 'm');
|
||||
|
||||
CharacterSet set2 = CharacterSet()
|
||||
.include('a', 'd')
|
||||
.include('f', 'm');
|
||||
|
||||
AssertThat(set1, Equals(set2));
|
||||
});
|
||||
|
||||
it("returns false for character sets that include different ranges", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'd')
|
||||
.include('f', 'm');
|
||||
|
||||
CharacterSet set2 = CharacterSet()
|
||||
.include('a', 'c')
|
||||
.include('f', 'm');
|
||||
|
||||
AssertThat(set1, !Equals(set2));
|
||||
AssertThat(set2, !Equals(set1));
|
||||
});
|
||||
|
||||
it("returns false for character sets that exclude different ranges", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'd')
|
||||
.exclude('f', 'm');
|
||||
|
||||
CharacterSet set2 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'c')
|
||||
.exclude('f', 'm');
|
||||
|
||||
AssertThat(set1, !Equals(set2));
|
||||
AssertThat(set2, !Equals(set1));
|
||||
});
|
||||
|
||||
it("returns false for character sets with different sign", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include_all();
|
||||
CharacterSet set2 = CharacterSet();
|
||||
|
||||
AssertThat(set1, !Equals(set2));
|
||||
AssertThat(set2, !Equals(set1));
|
||||
});
|
||||
});
|
||||
|
||||
describe("hashing", [&]() {
|
||||
it("returns the same number for identical character sets", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'd')
|
||||
.include('f', 'm');
|
||||
|
||||
CharacterSet set2 = CharacterSet()
|
||||
.include('a', 'd')
|
||||
.include('f', 'm');
|
||||
|
||||
AssertThat(set1.hash_code(), Equals(set2.hash_code()));
|
||||
});
|
||||
|
||||
it("returns different numbers for character sets that include different ranges", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'd')
|
||||
.include('f', 'm');
|
||||
|
||||
CharacterSet set2 = CharacterSet()
|
||||
.include('a', 'c')
|
||||
.include('f', 'm');
|
||||
|
||||
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
|
||||
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
|
||||
});
|
||||
|
||||
it("returns different numbers for character sets that exclude different ranges", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'd')
|
||||
.exclude('f', 'm');
|
||||
|
||||
CharacterSet set2 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'c')
|
||||
.exclude('f', 'm');
|
||||
|
||||
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
|
||||
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
|
||||
});
|
||||
|
||||
it("returns different numbers for character sets with different sign", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include_all();
|
||||
CharacterSet set2 = CharacterSet();
|
||||
|
||||
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
|
||||
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
|
||||
});
|
||||
});
|
||||
|
||||
describe("::is_empty", [&]() {
|
||||
it("returns true for empty character sets", [&]() {
|
||||
AssertThat(CharacterSet().is_empty(), Equals(true));
|
||||
});
|
||||
|
||||
it("returns false for full character sets", [&]() {
|
||||
AssertThat(CharacterSet().include_all().is_empty(), Equals(false));
|
||||
});
|
||||
|
||||
it("returns false for character sets that include some characters", [&]() {
|
||||
AssertThat(CharacterSet().include('x').is_empty(), Equals(false));
|
||||
});
|
||||
});
|
||||
|
||||
describe("::include", [&]() {
|
||||
describe("when the set has a whitelist of characters", [&]() {
|
||||
it("adds included characters", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include('a', 'd');
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include('a')
|
||||
.include('b')
|
||||
.include('c')
|
||||
.include('d')));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the set has a blacklist of characters", [&]() {
|
||||
it("removes excluded characters", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'g')
|
||||
.include('c', 'e');
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a')
|
||||
.exclude('b')
|
||||
.exclude('f')
|
||||
.exclude('g')));
|
||||
});
|
||||
|
||||
it("does nothing if the character are already not excluded", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.include('a', 'c');
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet().include_all()));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("::exclude", [&]() {
|
||||
describe("when the set has a whitelist of characters", [&]() {
|
||||
it("removes included characters", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'g')
|
||||
.exclude('c', 'e');
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include('a')
|
||||
.include('b')
|
||||
.include('f')
|
||||
.include('g')));
|
||||
});
|
||||
|
||||
it("does nothing if the character's are already not included", [&]() {
|
||||
CharacterSet set1 = CharacterSet().exclude('a', 'c');
|
||||
AssertThat(set1, Equals(CharacterSet()));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the set has a blacklist of characters", [&]() {
|
||||
it("removes excluded characters", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'd');
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a')
|
||||
.exclude('b')
|
||||
.exclude('c')
|
||||
.exclude('d')));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("::remove_set", []() {
|
||||
CharacterSet intersection;
|
||||
|
||||
describe("for a set with whitelisted characters", [&]() {
|
||||
describe("when the subtractend has whitelisted characters", [&]() {
|
||||
it("removes the included characters that the other set also includes", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include('a', 'z');
|
||||
set1.remove_set(CharacterSet().include('d', 's'));
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include('a', 'c')
|
||||
.include('t', 'z')));
|
||||
});
|
||||
|
||||
it("returns the characters that were removed", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include('a', 'z');
|
||||
intersection = set1.remove_set(CharacterSet().include('d', 's'));
|
||||
AssertThat(intersection, Equals(CharacterSet()
|
||||
.include('d', 's')));
|
||||
});
|
||||
|
||||
it("returns the empty set when the sets are disjoint", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include('a', 'z');
|
||||
intersection = set1.remove_set(CharacterSet().include('A', 'Z'));
|
||||
AssertThat(set1, Equals(CharacterSet().include('a', 'z')));
|
||||
AssertThat(intersection, Equals(CharacterSet()));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the subtractend has blacklisted characters", [&]() {
|
||||
it("removes the included characters that are not excluded by the other set", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include('a', 'f');
|
||||
|
||||
intersection = set1.remove_set(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('d', 'z'));
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include('d', 'f')));
|
||||
AssertThat(intersection, Equals(CharacterSet()
|
||||
.include('a', 'c')));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("for a set with blacklisted characters", [&]() {
|
||||
describe("when the subtractend has whitelisted characters", [&]() {
|
||||
it("adds the subtractend's inclusions to the receiver's exclusions", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'f');
|
||||
|
||||
intersection = set1.remove_set(CharacterSet()
|
||||
.include('x', 'z'));
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'f')
|
||||
.exclude('x', 'z')));
|
||||
|
||||
AssertThat(intersection, Equals(CharacterSet().include('x', 'z')));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the subtractend has blacklisted characters", [&]() {
|
||||
it("includes only the characters excluded by the subtractend but not by the receiver", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'm');
|
||||
|
||||
set1.remove_set(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('d', 'z'));
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include('n', 'z')));
|
||||
});
|
||||
|
||||
it("returns the characters excluded by neither set", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'm');
|
||||
|
||||
intersection = set1.remove_set(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('d', 'z'));
|
||||
|
||||
AssertThat(intersection, Equals(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'z')));
|
||||
});
|
||||
|
||||
it("works when the sets are disjoint", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'm');
|
||||
|
||||
intersection = set1.remove_set(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('d', 'z'));
|
||||
|
||||
AssertThat(set1, Equals(CharacterSet()
|
||||
.include('n', 'z')));
|
||||
|
||||
AssertThat(intersection, Equals(CharacterSet()
|
||||
.include_all()
|
||||
.exclude('a', 'z')));
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("::included_ranges", [&]() {
|
||||
it("consolidates sequences of 3 or more consecutive characters into ranges", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'c')
|
||||
.include('g')
|
||||
.include('z');
|
||||
|
||||
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
|
||||
CharacterRange('a', 'c'),
|
||||
CharacterRange('g'),
|
||||
CharacterRange('z'),
|
||||
})));
|
||||
});
|
||||
|
||||
it("doesn't consolidate sequences of 2 consecutive characters", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'b')
|
||||
.include('g')
|
||||
.include('z');
|
||||
|
||||
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
|
||||
CharacterRange('a'),
|
||||
CharacterRange('b'),
|
||||
CharacterRange('g'),
|
||||
CharacterRange('z'),
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
53
test/compiler/rules/choice_test.cc
Normal file
53
test/compiler/rules/choice_test.cc
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("Choice", []() {
|
||||
describe("constructing choices", [&]() {
|
||||
it("eliminates duplicate members", [&]() {
|
||||
auto rule = Choice::build({
|
||||
seq({ sym("one"), sym("two") }),
|
||||
sym("three"),
|
||||
seq({ sym("one"), sym("two") })
|
||||
});
|
||||
|
||||
AssertThat(rule, EqualsPointer(choice({
|
||||
seq({ sym("one"), sym("two") }),
|
||||
sym("three"),
|
||||
})));
|
||||
});
|
||||
|
||||
it("eliminates duplicates within nested choices", [&]() {
|
||||
auto rule = Choice::build({
|
||||
seq({ sym("one"), sym("two") }),
|
||||
Choice::build({
|
||||
sym("three"),
|
||||
seq({ sym("one"), sym("two") })
|
||||
})
|
||||
});
|
||||
|
||||
AssertThat(rule, EqualsPointer(choice({
|
||||
seq({ sym("one"), sym("two") }),
|
||||
sym("three"),
|
||||
})));
|
||||
});
|
||||
|
||||
it("doesn't construct a choice if there's only one unique member", [&]() {
|
||||
auto rule = Choice::build({
|
||||
sym("one"),
|
||||
Choice::build({
|
||||
sym("one"),
|
||||
})
|
||||
});
|
||||
|
||||
AssertThat(rule, EqualsPointer(sym("one")));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
22
test/compiler/rules/repeat_test.cc
Normal file
22
test/compiler/rules/repeat_test.cc
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("Repeat", []() {
|
||||
describe("constructing repeats", [&]() {
|
||||
it("doesn't create redundant repeats", [&]() {
|
||||
auto sym = make_shared<Symbol>(1, Symbol::NonTerminal);
|
||||
auto repeat = Repeat::build(sym);
|
||||
auto outer_repeat = Repeat::build(repeat);
|
||||
|
||||
AssertThat(repeat, !Equals(sym));
|
||||
AssertThat(outer_repeat, Equals(repeat));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
26
test/compiler/util/string_helpers_test.cc
Normal file
26
test/compiler/util/string_helpers_test.cc
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
using util::escape_char;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("escape_char", []() {
|
||||
it("returns ascii characters as strings", [&]() {
|
||||
AssertThat(escape_char('x'), Equals("'x'"));
|
||||
});
|
||||
|
||||
it("escapes special characters with backslashes", [&]() {
|
||||
AssertThat(escape_char('\\'), Equals("'\\\\'"));
|
||||
AssertThat(escape_char('\n'), Equals("'\\n'"));
|
||||
AssertThat(escape_char('\t'), Equals("'\\t'"));
|
||||
AssertThat(escape_char('\r'), Equals("'\\r'"));
|
||||
AssertThat(escape_char('\''), Equals("'\\''"));
|
||||
});
|
||||
|
||||
it("prints non-ascii characters as numbers", [&]() {
|
||||
AssertThat(escape_char(256), Equals("256"));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
Loading…
Add table
Add a link
Reference in a new issue