Rename spec -> test

'Test' is a lot more straightforward of a name.
This commit is contained in:
Max Brunsfeld 2017-03-09 20:40:01 -08:00
parent 7d8daf573e
commit 6dc0ff359d
109 changed files with 44 additions and 44 deletions

View file

@ -0,0 +1,89 @@
#include "test_helper.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/parse_table.h"
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/lex_item.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("LexConflictManager::resolve(new_action, old_action)", []() {
LexConflictManager conflict_manager;
bool update;
Symbol sym1(0, Symbol::Terminal);
Symbol sym2(1, Symbol::Terminal);
Symbol sym3(2, Symbol::Terminal);
Symbol sym4(3, Symbol::Terminal);
LexItemSet item_set({ LexItem(sym4, blank() )});
before_each([&]() {
conflict_manager = LexConflictManager();
});
it("favors advance actions over empty accept token actions", [&]() {
update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
AssertThat(update, IsTrue());
});
describe("accept-token/accept-token conflicts", [&]() {
describe("when the tokens' precedence values differ", [&]() {
it("favors the token with higher precedence", [&]() {
update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(AcceptTokenAction(sym1, 2, false), AcceptTokenAction(sym2, 1, false));
AssertThat(update, IsTrue());
});
it("adds the preferred token as a possible homonym for the discarded one", [&]() {
conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
AssertThat(conflict_manager.possible_homonyms[sym2.index], Contains(sym1.index));
});
});
describe("when one token is string-based and the other is regexp-based", [&]() {
it("favors the string-based token", [&]() {
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, true));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, true), AcceptTokenAction(sym1, 0, false));
AssertThat(update, IsTrue());
});
});
describe("when the tokens have equal precedence", [&]() {
it("favors the token listed earlier in the grammar", [&]() {
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, false), AcceptTokenAction(sym1, 0, false));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, false));
AssertThat(update, IsTrue());
});
});
});
describe("advance/accept-token conflicts", [&]() {
describe("when the token to accept has higher precedence", [&]() {
it("prefers the accept-token action", [&]() {
AssertThat(conflict_manager.possible_extensions, IsEmpty());
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
AssertThat(update, IsFalse());
AssertThat(conflict_manager.possible_extensions, IsEmpty());
});
});
describe("when the token to accept does not have a higher precedence", [&]() {
it("favors the advance action and adds the in-progress tokens as possible extensions of the discarded token", [&]() {
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
AssertThat(update, IsTrue());
AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index));
});
});
});
});
END_TEST

View file

@ -0,0 +1,514 @@
#include "test_helper.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
using namespace rules;
using namespace build_tables;
typedef LexItemSet::Transition Transition;
START_TEST
describe("LexItem", []() {
describe("completion_status()", [&]() {
it("indicates whether the item is done and its precedence", [&]() {
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
AssertThat(item1.completion_status().is_done, IsFalse());
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
MetadataParams params;
params.precedence = 3;
params.has_precedence = true;
params.is_string = 1;
LexItem item2(Symbol(0, Symbol::Terminal), choice({
metadata(blank(), params),
character({ 'a', 'b', 'c' })
}));
AssertThat(item2.completion_status().is_done, IsTrue());
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
AssertThat(item3.completion_status().is_done, IsTrue());
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
});
});
});
describe("LexItemSet::transitions()", [&]() {
it("handles single characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
}
}
})));
});
it("marks transitions that are within the main token (as opposed to separators)", [&]() {
MetadataParams params;
params.is_main_token = true;
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
}),
PrecedenceRange(),
true
}
}
})));
});
it("handles sequences", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'w' }),
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('w'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
}),
PrecedenceRange(),
false
}
}
})));
});
it("handles sequences with nested precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
prec(3, seq({
character({ 'v' }),
prec(4, seq({
character({ 'w' }),
character({ 'x' }) })),
character({ 'y' }) })),
character({ 'z' }),
})),
});
auto transitions = item_set.transitions();
AssertThat(
transitions,
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('v'),
Transition{
// The outer precedence is now 'active', because we are within its
// contained rule.
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, seq({
prec(4, seq({
character({ 'w' }),
character({ 'x' }) })),
character({ 'y' }) })),
character({ 'z' }),
})),
}),
// No precedence is applied upon entering a rule.
PrecedenceRange(),
false
}
}
})));
LexItemSet item_set2 = transitions[CharacterSet().include('v')].destination;
transitions = item_set2.transitions();
AssertThat(
transitions,
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('w'),
Transition{
// The inner precedence is now 'active'
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, seq({
active_prec(4, character({ 'x' })),
character({ 'y' }) })),
character({ 'z' }),
})),
}),
// The outer precedence is applied.
PrecedenceRange(3),
false
}
}
})));
LexItemSet item_set3 = transitions[CharacterSet().include('w')].destination;
transitions = item_set3.transitions();
AssertThat(
transitions,
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, character({ 'y' })),
character({ 'z' }),
})),
}),
// The inner precedence is applied.
PrecedenceRange(4),
false
}
}
})));
LexItemSet item_set4 = transitions[CharacterSet().include('x')].destination;
transitions = item_set4.transitions();
AssertThat(
transitions,
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('y'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
}),
PrecedenceRange(3),
false
}
}
})));
});
it("handles sequences where the left hand side can be blank", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
choice({
character({ 'x' }),
blank(),
}),
character({ 'y' }),
character({ 'z' }),
})),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'y' }),
character({ 'z' }),
})),
}),
PrecedenceRange(),
false
}
},
{
CharacterSet().include('y'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
}),
PrecedenceRange(),
false
}
}
})));
});
it("handles blanks", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
});
AssertThat(item_set.transitions(), IsEmpty());
});
it("handles repeats", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))),
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'b' }),
repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))
})),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
}),
PrecedenceRange(),
false
}
},
{
CharacterSet().include('c'),
Transition{
LexItemSet({
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
}
}
})));
});
it("handles repeats with precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
}),
PrecedenceRange(-1),
false
}
}
})));
});
it("handles choices between overlapping character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), choice({
active_prec(2, seq({
character({ 'a', 'b', 'c', 'd' }),
character({ 'x' }),
})),
active_prec(3, seq({
character({ 'c', 'd', 'e', 'f' }),
character({ 'y' }),
})),
}))
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a', 'b'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
}),
PrecedenceRange(2),
false
}
},
{
CharacterSet().include('c', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
}),
PrecedenceRange(2, 3),
false
}
},
{
CharacterSet().include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
}),
PrecedenceRange(3),
false
}
},
})));
});
it("handles choices between a subset and a superset of characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), choice({
seq({
character({ 'b', 'c', 'd' }),
character({ 'x' }),
}),
seq({
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
character({ 'y' }),
}),
})),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a').include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
}),
PrecedenceRange(),
false
}
},
{
CharacterSet().include('b', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
}),
PrecedenceRange(),
false
}
},
})));
});
it("handles choices between whitelisted and blacklisted character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
choice({
character({ '/' }, false),
seq({
character({ '\\' }),
character({ '/' }),
}),
}),
character({ '/' }),
}))
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include_all().exclude('/').exclude('\\'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
}),
PrecedenceRange(),
false
}
},
{
CharacterSet().include('\\'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
}),
PrecedenceRange(),
false
}
},
})));
});
it("handles different items with overlapping character sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
});
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
}
},
{
CharacterSet().include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
}
},
{
CharacterSet().include('g', 'i'),
Transition{
LexItemSet({
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
}
},
})));
});
});
END_TEST

View file

@ -0,0 +1,134 @@
#include "test_helper.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/build_tables/parse_item_set_builder.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/built_in_symbols.h"
#include "helpers/rule_helpers.h"
using namespace build_tables;
using namespace rules;
START_TEST
describe("ParseItemSetBuilder", []() {
vector<LexicalVariable> lexical_variables;
for (size_t i = 0; i < 20; i++) {
lexical_variables.push_back({
"token_" + to_string(i),
VariableTypeNamed,
blank(),
false
});
}
LexicalGrammar lexical_grammar{lexical_variables, {}};
it("adds items at the beginnings of referenced rules", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
}),
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
}),
Production({
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
})
}},
SyntaxVariable{"rule2", VariableTypeNamed, {
Production({
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
})
}},
}, {}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
};
ParseItemSet item_set({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
}
});
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
item_set_builder.apply_transitive_closure(&item_set);
AssertThat(item_set, Equals(ParseItemSet({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
{
ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
})));
});
it("handles rules with empty productions", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
}),
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
}),
Production({})
}},
}, {}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
};
ParseItemSet item_set({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
}
});
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
item_set_builder.apply_transitive_closure(&item_set);
AssertThat(item_set, Equals(ParseItemSet({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
})));
});
});
END_TEST

View file

@ -0,0 +1,60 @@
#include "test_helper.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules.h"
#include "helpers/rule_helpers.h"
using namespace rules;
using build_tables::rule_can_be_blank;
START_TEST
describe("rule_can_be_blank", [&]() {
rule_ptr rule;
it("returns false for basic rules", [&]() {
AssertThat(rule_can_be_blank(i_sym(3)), IsFalse());
AssertThat(rule_can_be_blank(str("x")), IsFalse());
AssertThat(rule_can_be_blank(pattern("x")), IsFalse());
});
it("returns true for blanks", [&]() {
AssertThat(rule_can_be_blank(blank()), IsTrue());
});
it("returns true for repeats", [&]() {
AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue());
});
it("returns true for choices iff one or more sides can be blank", [&]() {
rule = choice({ sym("x"), blank() });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ blank(), sym("x") });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ sym("x"), sym("y") });
AssertThat(rule_can_be_blank(rule), IsFalse());
});
it("returns true for sequences iff both sides can be blank", [&]() {
rule = seq({ blank(), str("x") });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ str("x"), blank() });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ blank(), choice({ sym("x"), blank() }) });
AssertThat(rule_can_be_blank(rule), IsTrue());
});
it("ignores metadata rules", [&]() {
rule = make_shared<rules::Metadata>(blank(), MetadataParams());
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = make_shared<rules::Metadata>(sym("one"), MetadataParams());
AssertThat(rule_can_be_blank(rule), IsFalse());
});
});
END_TEST

View file

@ -0,0 +1,171 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/prepare_grammar/expand_repeats.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
START_TEST
using namespace rules;
using prepare_grammar::InitialSyntaxGrammar;
using prepare_grammar::expand_repeats;
describe("expand_repeats", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))},
},
{}, {}, {}
};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, i_sym(1)},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(0) }),
i_token(0),
})},
}));
});
it("replaces repeats inside of sequences", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, seq({
i_token(10),
repeat1(i_token(11)),
})},
},
{}, {}, {}
};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, seq({
i_token(10),
i_sym(1),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(11) }),
i_token(11)
})},
}));
});
it("replaces repeats inside of choices", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, choice({
i_token(10),
repeat1(i_token(11))
})},
},
{}, {}, {}
};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, choice({
i_token(10),
i_sym(1),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(11) }),
i_token(11),
})},
}));
});
it("does not create redundant auxiliary rules", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, choice({
seq({ i_token(1), repeat1(i_token(4)) }),
seq({ i_token(2), repeat1(i_token(4)) }),
})},
Variable{"rule1", VariableTypeNamed, seq({
i_token(3),
repeat1(i_token(4))
})},
},
{}, {}, {}
};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, choice({
seq({ i_token(1), i_sym(2) }),
seq({ i_token(2), i_sym(2) }),
})},
Variable{"rule1", VariableTypeNamed, seq({
i_token(3),
i_sym(2),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(2), i_token(4) }),
i_token(4),
})},
}));
});
it("can replace multiple repeats in the same rule", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, seq({
repeat1(i_token(10)),
repeat1(i_token(11)),
})},
},
{}, {}, {}
};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, seq({
i_sym(1),
i_sym(2),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(10) }),
i_token(10),
})},
Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({
seq({ i_sym(2), i_token(11) }),
i_token(11),
})},
}));
});
it("can replace repeats in multiple rules", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))},
Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))},
},
{}, {}, {}
};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, i_sym(2)},
Variable{"rule1", VariableTypeNamed, i_sym(3)},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(2), i_token(10) }),
i_token(10),
})},
Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(3), i_token(11) }),
i_token(11),
})},
}));
});
});
END_TEST

View file

@ -0,0 +1,169 @@
#include "test_helper.h"
#include "compiler/lexical_grammar.h"
#include "compiler/prepare_grammar/expand_tokens.h"
#include "helpers/rule_helpers.h"
START_TEST
using namespace rules;
using prepare_grammar::expand_tokens;
describe("expand_tokens", []() {
MetadataParams string_token_params;
string_token_params.is_string = true;
string_token_params.is_token = true;
describe("string rules", [&]() {
it("replaces strings with sequences of character sets", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
str("xyz"),
i_sym(11),
}),
false
}
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
metadata(seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
}), string_token_params),
i_sym(11),
}),
false
}
}));
});
it("handles strings containing non-ASCII UTF8 characters", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
str("\u03B1 \u03B2"),
false
},
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
metadata(seq({
character({ 945 }),
character({ ' ' }),
character({ 946 }),
}), string_token_params),
false
}
}));
});
});
describe("regexp rules", [&]() {
it("replaces regexps with the equivalent rule tree", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
pattern("x*"),
i_sym(11),
}),
false
}
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
repeat(character({ 'x' })),
i_sym(11),
}),
false
}
}));
});
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
pattern("[^\u03B1-\u03B4]*"),
false
}
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
repeat(character({ 945, 946, 947, 948 }, false)),
false
}
}));
});
it("returns an error when the grammar contains an invalid regex", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
pattern("("),
str("xyz"),
pattern("["),
}),
false
},
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
});
});
});
END_TEST

View file

@ -0,0 +1,106 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/extract_choices.h"
#include "helpers/rule_helpers.h"
START_TEST
using namespace rules;
using prepare_grammar::extract_choices;
class rule_vector : public vector<rule_ptr> {
public:
bool operator==(const vector<rule_ptr> &other) const {
if (this->size() != other.size()) return false;
for (size_t i = 0; i < this->size(); i++) {
auto rule = this->operator[](i);
auto other_rule = other[i];
if (!rule->operator==(*rule))
return false;
}
return true;
}
rule_vector(const initializer_list<rule_ptr> &list) :
vector<rule_ptr>(list) {}
};
describe("extract_choices", []() {
it("expands rules containing choices into multiple rules", [&]() {
auto rule = seq({
sym("a"),
choice({ sym("b"), sym("c"), sym("d") }),
sym("e")
});
AssertThat(extract_choices(rule), Equals(rule_vector({
seq({ sym("a"), sym("b"), sym("e") }),
seq({ sym("a"), sym("c"), sym("e") }),
seq({ sym("a"), sym("d"), sym("e") }),
})));
});
it("handles metadata rules", [&]() {
auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
AssertThat(extract_choices(rule), Equals(rule_vector({
prec(5, sym("b")),
prec(5, sym("c")),
prec(5, sym("d")),
})));
});
it("handles nested choices", [&]() {
auto rule = choice({
seq({ choice({ sym("a"), sym("b") }), sym("c") }),
sym("d")
});
AssertThat(extract_choices(rule), Equals(rule_vector({
seq({ sym("a"), sym("c") }),
seq({ sym("b"), sym("c") }),
sym("d"),
})));
});
it("handles blank rules", [&]() {
AssertThat(extract_choices(blank()), Equals(rule_vector({
blank(),
})));
});
it("does not move choices outside of repeats", [&]() {
auto rule = seq({
choice({ sym("a"), sym("b") }),
repeat1(seq({
sym("c"),
choice({
sym("d"),
sym("e"),
}),
sym("f"),
})),
sym("g"),
});
AssertThat(extract_choices(rule), Equals(rule_vector({
seq({
sym("a"),
repeat1(choice({
seq({ sym("c"), sym("d"), sym("f") }),
seq({ sym("c"), sym("e"), sym("f") }),
})),
sym("g"),
}),
seq({
sym("b"),
repeat1(choice({
seq({ sym("c"), sym("d"), sym("f") }),
seq({ sym("c"), sym("e"), sym("f") }),
})),
sym("g"),
}),
})));
});
});
END_TEST

View file

@ -0,0 +1,276 @@
#include "test_helper.h"
#include "compiler/lexical_grammar.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/prepare_grammar/extract_tokens.h"
#include "helpers/rule_helpers.h"
#include "helpers/equals_pointer.h"
#include "helpers/stream_methods.h"
START_TEST
using namespace rules;
using prepare_grammar::extract_tokens;
using prepare_grammar::InternedGrammar;
using prepare_grammar::InitialSyntaxGrammar;
describe("extract_tokens", []() {
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
str("ab"),
pattern("cd*"),
choice({
i_sym(1),
i_sym(2),
token(repeat1(choice({ str("ef"), str("gh") }))),
}),
}))},
Variable{"rule_B", VariableTypeNamed, pattern("ij+")},
Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })},
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))},
},
{},
{},
{}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
CompileError error = get<2>(result);
AssertThat(error, Equals(CompileError::none()));
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
// This string is now the first token in the lexical grammar.
i_token(0),
// This pattern is now the second rule in the lexical grammar.
i_token(1),
choice({
// Rule 1, which this symbol pointed to, has been moved to the
// lexical grammar.
i_token(3),
// This symbol's index has been decremented, because a previous rule
// was moved to the lexical grammar.
i_sym(1),
// This token rule is now the third rule in the lexical grammar.
i_token(2),
}),
}))},
Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })},
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))},
}));
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable>({
// Strings become anonymous rules.
LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true},
// Patterns become hidden rules.
LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false},
// Rules marked as tokens become hidden rules.
LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
str("ef"),
str("gh")
})), false},
// This named rule was moved wholesale to the lexical grammar.
LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false},
// Strings become anonymous rules.
LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true},
})));
});
it("does not create duplicate tokens in the lexical grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, seq({
str("ab"),
i_sym(0),
str("ab"),
})},
},
{},
{},
{}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
AssertThat(syntax_grammar.variables, Equals(vector<Variable> {
Variable {"rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })},
}));
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
}))
});
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })},
Variable{"rule_B", VariableTypeNamed, str("cd")},
Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })},
}, {}, {}, {}});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })},
Variable{"rule_B", VariableTypeNamed, i_token(1)},
Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })},
})));
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
LexicalVariable {"cd", VariableTypeAnonymous, str("cd"), true},
LexicalVariable {"ef", VariableTypeAnonymous, str("ef"), true},
}));
});
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, str("ok")},
Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))},
Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))},
},
{
str(" ")
},
{
{ Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) }
},
{}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
{ Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
})));
});
describe("handling extra tokens", [&]() {
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, str("x")},
},
{
str("y"),
pattern("\\s+"),
},
{},
{}
});
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
AssertThat(get<1>(result).separators[1], EqualsPointer(pattern("\\s+")));
AssertThat(get<0>(result).extra_tokens, IsEmpty());
});
it("handles inline extra tokens that match tokens in the grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, str("x")},
Variable{"rule_B", VariableTypeNamed, str("y")},
},
{
str("y"),
},
{},
{}
});
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
});
it("updates extra symbols according to the new symbol numbers", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })},
Variable{"rule_B", VariableTypeNamed, str("y")},
Variable{"rule_C", VariableTypeNamed, str("z")},
},
{
i_sym(2),
},
{},
{}
});
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
{ Symbol(3, Symbol::Terminal) },
})));
AssertThat(get<1>(result).separators, IsEmpty());
});
it("returns an error if any extra tokens are non-token symbols", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
}, { i_sym(1) }, {}, {}});
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(
CompileError(TSCompileErrorTypeInvalidExtraToken,
"Not a token: rule_B")));
});
it("returns an error if any extra tokens are non-token rules", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable{"rule_A", VariableTypeNamed, str("x")},
Variable{"rule_B", VariableTypeNamed, str("y")},
}, { choice({ i_sym(1), blank() }) }, {}, {}});
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(CompileError(
TSCompileErrorTypeInvalidExtraToken,
"Not a token: (choice (non-terminal 1) (blank))"
)));
});
});
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
},
{},
{},
{
ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)}
}
});
AssertThat(get<2>(result), Equals(CompileError(
TSCompileErrorTypeInvalidExternalToken,
"Name 'rule_A' cannot be used for both an external token and a non-terminal rule"
)));
});
});
END_TEST

View file

@ -0,0 +1,89 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/flatten_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/syntax_grammar.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
START_TEST
using namespace rules;
using prepare_grammar::flatten_rule;
describe("flatten_grammar", []() {
it("associates each symbol with the precedence and associativity binding it to its successor", [&]() {
SyntaxVariable result = flatten_rule(Variable{
"test",
VariableTypeNamed,
seq({
i_sym(1),
prec_left(101, seq({
i_sym(2),
choice({
prec_right(102, seq({
i_sym(3),
i_sym(4)
})),
i_sym(5),
}),
i_sym(6),
})),
i_sym(7),
})
});
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
{Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
}),
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
})
})))
});
it("uses the last assigned precedence", [&]() {
SyntaxVariable result = flatten_rule(Variable{
"test1",
VariableTypeNamed,
prec_left(101, seq({
i_sym(1),
i_sym(2),
}))
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
})
})))
result = flatten_rule(Variable{
"test2",
VariableTypeNamed,
prec_left(101, seq({
i_sym(1),
}))
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
})
})))
});
});
END_TEST

View file

@ -0,0 +1,103 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/intern_symbols.h"
#include "compiler/grammar.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "helpers/equals_pointer.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
START_TEST
using namespace rules;
using prepare_grammar::intern_symbols;
describe("intern_symbols", []() {
it("replaces named symbols with numerically-indexed symbols", [&]() {
Grammar grammar{
{
{"x", choice({ sym("y"), sym("_z") })},
{"y", sym("_z")},
{"_z", str("stuff")}
}, {}, {}, {}
};
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<Variable>{
Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })},
Variable{"y", VariableTypeNamed, i_sym(2)},
Variable{"_z", VariableTypeHidden, str("stuff")},
}));
});
describe("when there are symbols that reference undefined rules", [&]() {
it("returns an error", []() {
Grammar grammar{
{
{"x", sym("y")},
},
{}, {}, {}
};
auto result = intern_symbols(grammar);
AssertThat(result.second.message, Equals("Undefined rule 'y'"));
});
});
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
Grammar grammar{
{
{"x", choice({ sym("y"), sym("z") })},
{"y", sym("z")},
{"z", str("stuff")}
},
{
sym("z")
},
{}, {}
};
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
});
it("records any rule names that match external token names", [&]() {
Grammar grammar{
{
{"x", choice({ sym("y"), sym("z") })},
{"y", sym("z")},
{"z", str("stuff")},
},
{},
{},
{
"w",
"z"
}
};
auto result = intern_symbols(grammar);
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>{
ExternalToken{
"w",
VariableTypeNamed,
rules::NONE()
},
ExternalToken{
"z",
VariableTypeNamed,
Symbol(2, Symbol::NonTerminal)
},
}))
});
});
END_TEST

View file

@ -0,0 +1,245 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/parse_regex.h"
#include "helpers/equals_pointer.h"
#include "helpers/rule_helpers.h"
START_TEST
using namespace rules;
using prepare_grammar::parse_regex;
describe("parse_regex", []() {
struct ValidInputRow {
string description;
string pattern;
rule_ptr rule;
};
vector<ValidInputRow> valid_inputs = {
{
"character sets",
"[aAeE]",
character({ 'a', 'A', 'e', 'E' })
},
{
"'.' characters as wildcards",
".",
character({ '\n' }, false)
},
{
"character classes",
"\\w-\\d-\\s-\\W-\\D-\\S",
seq({
character({
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }),
character({ '-' }),
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }),
character({ '-' }),
character({ ' ', '\t', '\r', '\n' }),
character({ '-' }),
character({
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }, false),
character({ '-' }),
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false),
character({ '-' }),
character({ ' ', '\t', '\r', '\n' }, false),
})
},
{
"choices",
"ab|cd|ef",
choice({
seq({
character({ 'a' }),
character({ 'b' }) }),
seq({
character({ 'c' }),
character({ 'd' }) }),
seq({
character({ 'e' }),
character({ 'f' }) }) })
},
{
"simple sequences",
"abc",
seq({
character({ 'a' }),
character({ 'b' }),
character({ 'c' }) })
},
{
"character ranges",
"[12a-dA-D3]",
character({
'1', '2', '3',
'a', 'b', 'c', 'd',
'A', 'B', 'C', 'D' })
},
{
"negated characters",
"[^a\\d]",
character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false)
},
{
"backslashes",
"\\\\",
character({ '\\' })
},
{
"character groups in sequences",
"x([^x]|\\\\x)*x",
seq({
character({ 'x' }),
repeat(choice({
character({ 'x' }, false),
seq({ character({ '\\' }), character({ 'x' }) }) })),
character({ 'x' }) })
},
{
"choices in sequences",
"(a|b)cd",
seq({
choice({
character({ 'a' }),
character({ 'b' }) }),
character({ 'c' }),
character({ 'd' }) })
},
{
"escaped parentheses",
"a\\(b",
seq({
character({ 'a' }),
character({ '(' }),
character({ 'b' }) })
},
{
"escaped periods",
"a\\.",
seq({
character({ 'a' }),
character({ '.' }) })
},
{
"escaped characters",
"\\t\\n\\r",
seq({
character({ '\t' }),
character({ '\n' }),
character({ '\r' }) })
},
{
"plus repeats",
"(ab)+(cd)+",
seq({
repeat1(seq({ character({ 'a' }), character({ 'b' }) })),
repeat1(seq({ character({ 'c' }), character({ 'd' }) })) })
},
{
"asterix repeats",
"(ab)*(cd)*",
seq({
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
repeat(seq({ character({ 'c' }), character({ 'd' }) })) })
},
{
"optional rules",
"a(bc)?",
seq({
character({ 'a' }),
choice({
seq({ character({ 'b' }), character({ 'c' }) }),
blank() }) })
},
{
"choices containing negated character classes",
"/([^/]|(\\\\/))*/",
seq({
character({ '/' }),
repeat(choice({
character({ '/' }, false),
seq({ character({ '\\' }), character({ '/' }) }) })),
character({ '/' }), }),
},
};
struct InvalidInputRow {
string description;
string pattern;
const char *message;
};
vector<InvalidInputRow> invalid_inputs = {
{
"mismatched open parens",
"(a",
"unmatched open paren",
},
{
"mismatched nested open parens",
"((a) (b)",
"unmatched open paren",
},
{
"mismatched close parens",
"a)",
"unmatched close paren",
},
{
"mismatched nested close parens",
"((a) b))",
"unmatched close paren",
},
{
"mismatched brackets for character classes",
"[a",
"unmatched open square bracket",
},
{
"mismatched brackets for character classes",
"a]",
"unmatched close square bracket",
},
};
for (auto &row : valid_inputs) {
it(("parses " + row.description).c_str(), [&]() {
auto result = parse_regex(row.pattern);
AssertThat(result.first, EqualsPointer(row.rule));
});
}
for (auto &row : invalid_inputs) {
it(("handles invalid regexes with " + row.description).c_str(), [&]() {
auto result = parse_regex(row.pattern);
AssertThat(result.second.type, Equals(TSCompileErrorTypeInvalidRegex));
AssertThat(result.second.message, Contains(row.message));
});
}
});
END_TEST

View file

@ -0,0 +1,337 @@
#include "test_helper.h"
#include "compiler/rules/character_set.h"
using namespace rules;
START_TEST
describe("CharacterSet", []() {
describe("equality", [&]() {
it("returns true for identical character sets", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'd')
.include('f', 'm');
CharacterSet set2 = CharacterSet()
.include('a', 'd')
.include('f', 'm');
AssertThat(set1, Equals(set2));
});
it("returns false for character sets that include different ranges", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'd')
.include('f', 'm');
CharacterSet set2 = CharacterSet()
.include('a', 'c')
.include('f', 'm');
AssertThat(set1, !Equals(set2));
AssertThat(set2, !Equals(set1));
});
it("returns false for character sets that exclude different ranges", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'd')
.exclude('f', 'm');
CharacterSet set2 = CharacterSet()
.include_all()
.exclude('a', 'c')
.exclude('f', 'm');
AssertThat(set1, !Equals(set2));
AssertThat(set2, !Equals(set1));
});
it("returns false for character sets with different sign", [&]() {
CharacterSet set1 = CharacterSet().include_all();
CharacterSet set2 = CharacterSet();
AssertThat(set1, !Equals(set2));
AssertThat(set2, !Equals(set1));
});
});
describe("hashing", [&]() {
it("returns the same number for identical character sets", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'd')
.include('f', 'm');
CharacterSet set2 = CharacterSet()
.include('a', 'd')
.include('f', 'm');
AssertThat(set1.hash_code(), Equals(set2.hash_code()));
});
it("returns different numbers for character sets that include different ranges", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'd')
.include('f', 'm');
CharacterSet set2 = CharacterSet()
.include('a', 'c')
.include('f', 'm');
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
});
it("returns different numbers for character sets that exclude different ranges", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'd')
.exclude('f', 'm');
CharacterSet set2 = CharacterSet()
.include_all()
.exclude('a', 'c')
.exclude('f', 'm');
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
});
it("returns different numbers for character sets with different sign", [&]() {
CharacterSet set1 = CharacterSet().include_all();
CharacterSet set2 = CharacterSet();
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
});
});
describe("::is_empty", [&]() {
it("returns true for empty character sets", [&]() {
AssertThat(CharacterSet().is_empty(), Equals(true));
});
it("returns false for full character sets", [&]() {
AssertThat(CharacterSet().include_all().is_empty(), Equals(false));
});
it("returns false for character sets that include some characters", [&]() {
AssertThat(CharacterSet().include('x').is_empty(), Equals(false));
});
});
describe("::include", [&]() {
describe("when the set has a whitelist of characters", [&]() {
it("adds included characters", [&]() {
CharacterSet set1 = CharacterSet().include('a', 'd');
AssertThat(set1, Equals(CharacterSet()
.include('a')
.include('b')
.include('c')
.include('d')));
});
});
describe("when the set has a blacklist of characters", [&]() {
it("removes excluded characters", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'g')
.include('c', 'e');
AssertThat(set1, Equals(CharacterSet()
.include_all()
.exclude('a')
.exclude('b')
.exclude('f')
.exclude('g')));
});
it("does nothing if the character are already not excluded", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.include('a', 'c');
AssertThat(set1, Equals(CharacterSet().include_all()));
});
});
});
describe("::exclude", [&]() {
describe("when the set has a whitelist of characters", [&]() {
it("removes included characters", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'g')
.exclude('c', 'e');
AssertThat(set1, Equals(CharacterSet()
.include('a')
.include('b')
.include('f')
.include('g')));
});
it("does nothing if the character's are already not included", [&]() {
CharacterSet set1 = CharacterSet().exclude('a', 'c');
AssertThat(set1, Equals(CharacterSet()));
});
});
describe("when the set has a blacklist of characters", [&]() {
it("removes excluded characters", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'd');
AssertThat(set1, Equals(CharacterSet()
.include_all()
.exclude('a')
.exclude('b')
.exclude('c')
.exclude('d')));
});
});
});
describe("::remove_set", []() {
CharacterSet intersection;
describe("for a set with whitelisted characters", [&]() {
describe("when the subtractend has whitelisted characters", [&]() {
it("removes the included characters that the other set also includes", [&]() {
CharacterSet set1 = CharacterSet().include('a', 'z');
set1.remove_set(CharacterSet().include('d', 's'));
AssertThat(set1, Equals(CharacterSet()
.include('a', 'c')
.include('t', 'z')));
});
it("returns the characters that were removed", [&]() {
CharacterSet set1 = CharacterSet().include('a', 'z');
intersection = set1.remove_set(CharacterSet().include('d', 's'));
AssertThat(intersection, Equals(CharacterSet()
.include('d', 's')));
});
it("returns the empty set when the sets are disjoint", [&]() {
CharacterSet set1 = CharacterSet().include('a', 'z');
intersection = set1.remove_set(CharacterSet().include('A', 'Z'));
AssertThat(set1, Equals(CharacterSet().include('a', 'z')));
AssertThat(intersection, Equals(CharacterSet()));
});
});
describe("when the subtractend has blacklisted characters", [&]() {
it("removes the included characters that are not excluded by the other set", [&]() {
CharacterSet set1 = CharacterSet().include('a', 'f');
intersection = set1.remove_set(CharacterSet()
.include_all()
.exclude('d', 'z'));
AssertThat(set1, Equals(CharacterSet()
.include('d', 'f')));
AssertThat(intersection, Equals(CharacterSet()
.include('a', 'c')));
});
});
});
describe("for a set with blacklisted characters", [&]() {
describe("when the subtractend has whitelisted characters", [&]() {
it("adds the subtractend's inclusions to the receiver's exclusions", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'f');
intersection = set1.remove_set(CharacterSet()
.include('x', 'z'));
AssertThat(set1, Equals(CharacterSet()
.include_all()
.exclude('a', 'f')
.exclude('x', 'z')));
AssertThat(intersection, Equals(CharacterSet().include('x', 'z')));
});
});
describe("when the subtractend has blacklisted characters", [&]() {
it("includes only the characters excluded by the subtractend but not by the receiver", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'm');
set1.remove_set(CharacterSet()
.include_all()
.exclude('d', 'z'));
AssertThat(set1, Equals(CharacterSet()
.include('n', 'z')));
});
it("returns the characters excluded by neither set", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'm');
intersection = set1.remove_set(CharacterSet()
.include_all()
.exclude('d', 'z'));
AssertThat(intersection, Equals(CharacterSet()
.include_all()
.exclude('a', 'z')));
});
it("works when the sets are disjoint", [&]() {
CharacterSet set1 = CharacterSet()
.include_all()
.exclude('a', 'm');
intersection = set1.remove_set(CharacterSet()
.include_all()
.exclude('d', 'z'));
AssertThat(set1, Equals(CharacterSet()
.include('n', 'z')));
AssertThat(intersection, Equals(CharacterSet()
.include_all()
.exclude('a', 'z')));
});
});
});
});
describe("::included_ranges", [&]() {
it("consolidates sequences of 3 or more consecutive characters into ranges", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'c')
.include('g')
.include('z');
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
CharacterRange('a', 'c'),
CharacterRange('g'),
CharacterRange('z'),
})));
});
it("doesn't consolidate sequences of 2 consecutive characters", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'b')
.include('g')
.include('z');
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
CharacterRange('a'),
CharacterRange('b'),
CharacterRange('g'),
CharacterRange('z'),
})));
});
});
});
END_TEST

View file

@ -0,0 +1,53 @@
#include "test_helper.h"
#include "compiler/rules/choice.h"
#include "helpers/rule_helpers.h"
#include "helpers/equals_pointer.h"
using namespace rules;
START_TEST
describe("Choice", []() {
describe("constructing choices", [&]() {
it("eliminates duplicate members", [&]() {
auto rule = Choice::build({
seq({ sym("one"), sym("two") }),
sym("three"),
seq({ sym("one"), sym("two") })
});
AssertThat(rule, EqualsPointer(choice({
seq({ sym("one"), sym("two") }),
sym("three"),
})));
});
it("eliminates duplicates within nested choices", [&]() {
auto rule = Choice::build({
seq({ sym("one"), sym("two") }),
Choice::build({
sym("three"),
seq({ sym("one"), sym("two") })
})
});
AssertThat(rule, EqualsPointer(choice({
seq({ sym("one"), sym("two") }),
sym("three"),
})));
});
it("doesn't construct a choice if there's only one unique member", [&]() {
auto rule = Choice::build({
sym("one"),
Choice::build({
sym("one"),
})
});
AssertThat(rule, EqualsPointer(sym("one")));
});
});
});
END_TEST

View file

@ -0,0 +1,22 @@
#include "test_helper.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/symbol.h"
using namespace rules;
START_TEST
describe("Repeat", []() {
describe("constructing repeats", [&]() {
it("doesn't create redundant repeats", [&]() {
auto sym = make_shared<Symbol>(1, Symbol::NonTerminal);
auto repeat = Repeat::build(sym);
auto outer_repeat = Repeat::build(repeat);
AssertThat(repeat, !Equals(sym));
AssertThat(outer_repeat, Equals(repeat));
});
});
});
END_TEST

View file

@ -0,0 +1,26 @@
#include "test_helper.h"
#include "compiler/util/string_helpers.h"
using util::escape_char;
START_TEST
describe("escape_char", []() {
it("returns ascii characters as strings", [&]() {
AssertThat(escape_char('x'), Equals("'x'"));
});
it("escapes special characters with backslashes", [&]() {
AssertThat(escape_char('\\'), Equals("'\\\\'"));
AssertThat(escape_char('\n'), Equals("'\\n'"));
AssertThat(escape_char('\t'), Equals("'\\t'"));
AssertThat(escape_char('\r'), Equals("'\\r'"));
AssertThat(escape_char('\''), Equals("'\\''"));
});
it("prints non-ascii characters as numbers", [&]() {
AssertThat(escape_char(256), Equals("256"));
});
});
END_TEST