Separate syntax rules into flat lists of symbols
This way, every ParseItem can be associated with a particular production for its non-terminal. That lets us keep track of which productions are involved in shift/reduce conflicts.
This commit is contained in:
parent
68a0e16d1e
commit
52daffb3f3
37 changed files with 842 additions and 426 deletions
|
|
@ -2,7 +2,6 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/action_takes_precedence.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
|
|||
|
|
@ -12,11 +12,11 @@ describe("build_conflict", []() {
|
|||
Conflict conflict("");
|
||||
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "in_progress_rule1", i_token(0) },
|
||||
{ "in_progress_rule2", i_token(0) },
|
||||
{ "reduced_rule", i_token(0) },
|
||||
{ "other_ruel1", i_token(0) },
|
||||
{ "other_rule2", i_token(0) },
|
||||
{ "in_progress_rule1", {} },
|
||||
{ "in_progress_rule2", {} },
|
||||
{ "reduced_rule", {} },
|
||||
{ "other_ruel1", {} },
|
||||
{ "other_rule2", {} },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
|
||||
LexicalGrammar lex_grammar({
|
||||
|
|
@ -30,15 +30,15 @@ describe("build_conflict", []() {
|
|||
ParseAction::Reduce(Symbol(2), 1, 0), // reduced_rule
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), blank(), 2), // in_progress_rule1
|
||||
ParseItem(Symbol(0), 0, 0, 2), // in_progress_rule1
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), blank(), 2), // in_progress_rule2
|
||||
ParseItem(Symbol(1), 0, 0, 2), // in_progress_rule2
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(3), blank(), 0), // other_rule1
|
||||
ParseItem(Symbol(3), 0, 0, 0), // other_rule1
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
}),
|
||||
|
|
@ -58,11 +58,11 @@ describe("build_conflict", []() {
|
|||
ParseAction::Shift(2, set<int>()),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), blank(), 2), // in_progress_rule1
|
||||
ParseItem(Symbol(0), 0, 0, 2), // in_progress_rule1
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), blank(), 2), // in_progress_rule2
|
||||
ParseItem(Symbol(1), 0, 0, 2), // in_progress_rule2
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -12,9 +12,25 @@ START_TEST
|
|||
|
||||
describe("build_parse_table", []() {
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
{
|
||||
"rule0",
|
||||
{
|
||||
Production({ {Symbol(1), 0, 1} }, 0),
|
||||
Production({ {Symbol(2), 0, 2} }, 0)
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
{
|
||||
Production({ {Symbol(0, SymbolOptionToken), 0, 3} }, 0)
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule2",
|
||||
{
|
||||
Production({ {Symbol(1, SymbolOptionToken), 0, 4} }, 0)
|
||||
}
|
||||
},
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
|
||||
LexicalGrammar lex_grammar({
|
||||
|
|
|
|||
|
|
@ -1,103 +0,0 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("first_symbols", []() {
|
||||
const SyntaxGrammar null_grammar;
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
auto rule = seq({ i_token(0), i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(B) when A can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
choice({
|
||||
i_token(0),
|
||||
blank() }),
|
||||
i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken)
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(A's right hand side) when A is a non-terminal", [&]() {
|
||||
auto rule = choice({
|
||||
seq({
|
||||
i_token(0),
|
||||
i_token(1) }),
|
||||
i_sym(0) });
|
||||
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4) }) }
|
||||
}, {});
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(2, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(B) when A is a non-terminal and its expansion can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
i_token(0),
|
||||
blank() }) }
|
||||
}, {});
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when there are left-recursive rules", [&]() {
|
||||
it("terminates", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}) },
|
||||
}, {});
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(11, SymbolOptionToken)
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores metadata rules", [&]() {
|
||||
auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(3, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -10,30 +10,61 @@ START_TEST
|
|||
|
||||
describe("item_set_closure", []() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "E", seq({
|
||||
i_sym(1),
|
||||
i_token(11) }) },
|
||||
{ "T", seq({
|
||||
i_token(12),
|
||||
i_token(13) }) },
|
||||
}, {});
|
||||
{
|
||||
"rule0",
|
||||
{
|
||||
Production({
|
||||
{Symbol(1), 0, 100},
|
||||
{Symbol(11, SymbolOptionToken), 0, 101}
|
||||
}, 107),
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
{
|
||||
Production({
|
||||
{Symbol(12, SymbolOptionToken), 0, 102},
|
||||
{Symbol(13, SymbolOptionToken), 0, 103}
|
||||
}, 108),
|
||||
Production({
|
||||
{Symbol(2), 0, 104},
|
||||
}, 109)
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule2",
|
||||
{
|
||||
Production({
|
||||
{Symbol(14, SymbolOptionToken), 0, 105},
|
||||
{Symbol(15, SymbolOptionToken), 0, 106}
|
||||
}, 110)
|
||||
}
|
||||
},
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(
|
||||
ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
grammar
|
||||
);
|
||||
ParseItem(Symbol(0), 0, 100, 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
grammar);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(1), grammar.rule(Symbol(1)), 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 100, 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 102, 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 1, 104, 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), 0, 105, 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -12,63 +12,79 @@ describe("char_transitions(LexItemSet)", []() {
|
|||
describe("when two items in the set have transitions on the same character", [&]() {
|
||||
it("merges the transitions by computing the union of the two item sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
|
||||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
|
||||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
});
|
||||
|
||||
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
{
|
||||
ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), 3),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) })
|
||||
SyntaxGrammar grammar({
|
||||
{
|
||||
"A", {
|
||||
Production({
|
||||
{Symbol(11, SymbolOptionToken), 0, 101},
|
||||
{Symbol(12, SymbolOptionToken), 0, 102},
|
||||
{Symbol(13, SymbolOptionToken), 0, 103},
|
||||
{Symbol(1), 0, 104},
|
||||
{Symbol(14, SymbolOptionToken), 0, 105},
|
||||
}, 1)
|
||||
},
|
||||
},
|
||||
{
|
||||
"B", {
|
||||
Production({
|
||||
{Symbol(15, SymbolOptionToken), 0, 106},
|
||||
}, 2)
|
||||
},
|
||||
}
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
ParseItemSet set1({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 103, 2),
|
||||
set<Symbol>({ Symbol(16, SymbolOptionToken) })
|
||||
}
|
||||
});
|
||||
|
||||
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
{
|
||||
Symbol(22, SymbolOptionToken),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), i_sym(1), 4),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), i_token(21), 0),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) })
|
||||
},
|
||||
})
|
||||
},
|
||||
{
|
||||
Symbol(13, SymbolOptionToken),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 104, 3),
|
||||
set<Symbol>({ Symbol(16, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 106, 0),
|
||||
set<Symbol>({ Symbol(14, SymbolOptionToken) })
|
||||
},
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using build_tables::rule_can_be_blank;
|
||||
|
|
@ -54,27 +53,6 @@ describe("rule_can_be_blank", [&]() {
|
|||
rule = make_shared<rules::Metadata>(sym("one"), map<rules::MetadataKey, int>());
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank() }) },
|
||||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
AssertThat(rule_can_be_blank(rule, grammar), IsTrue());
|
||||
});
|
||||
|
||||
it("terminates for left-recursive rules that can't be blank", [&]() {
|
||||
rule = i_sym(1);
|
||||
AssertThat(rule_can_be_blank(rule, grammar), IsFalse());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -15,6 +15,15 @@ using std::initializer_list;
|
|||
using std::pair;
|
||||
using tree_sitter::rules::rule_ptr;
|
||||
|
||||
template<typename T, typename Func>
|
||||
std::vector<typename std::result_of<Func(T)>::type>
|
||||
collect(const std::vector<T> &v, Func f) {
|
||||
vector<typename std::result_of<Func(T)>::type> result;
|
||||
for (const T &item : v)
|
||||
result.push_back(f(item));
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename K>
|
||||
class rule_map : public map<K, rule_ptr> {
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@ inline std::ostream& operator<<(std::ostream &stream, const std::set<T> &set) {
|
|||
return stream << ")";
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T> &set) {
|
||||
template<typename T, typename H, typename E>
|
||||
inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T, H, E> &set) {
|
||||
stream << std::string("(set: ");
|
||||
bool started = false;
|
||||
for (auto item : set) {
|
||||
|
|
|
|||
|
|
@ -1,16 +1,17 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using prepare_grammar::expand_repeats;
|
||||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
|
|
@ -28,7 +29,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)) }) },
|
||||
|
|
@ -50,7 +51,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
|
|
@ -68,7 +69,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)) }) },
|
||||
|
|
@ -93,7 +94,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {}, set<Symbol>());
|
||||
|
|
|
|||
58
spec/compiler/prepare_grammar/extract_choices_spec.cc
Normal file
58
spec/compiler/prepare_grammar/extract_choices_spec.cc
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_choices;
|
||||
|
||||
describe("extract_choices", []() {
|
||||
it("expands rules containing choices into multiple rules", [&]() {
|
||||
auto rule = seq({
|
||||
sym("a"),
|
||||
choice({ sym("b"), sym("c"), sym("d") }),
|
||||
sym("e")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("b"), sym("e") }),
|
||||
seq({ sym("a"), sym("c"), sym("e") }),
|
||||
seq({ sym("a"), sym("d"), sym("e") }),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles metadata rules", [&]() {
|
||||
auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
prec(5, sym("b")),
|
||||
prec(5, sym("c")),
|
||||
prec(5, sym("d")),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles nested choices", [&]() {
|
||||
auto rule = choice({
|
||||
seq({ choice({ sym("a"), sym("b") }), sym("c") }),
|
||||
sym("d")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("c") }),
|
||||
seq({ sym("b"), sym("c") }),
|
||||
sym("d"),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
auto rule = repeat(choice({ sym("a"), sym("b") }));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
repeat(sym("a")),
|
||||
repeat(sym("b")),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
|
|
@ -8,10 +8,11 @@ START_TEST
|
|||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves string rules into the lexical grammar", [&]() {
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> result =
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> result =
|
||||
extract_tokens(Grammar({
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}));
|
||||
|
|
|
|||
109
spec/compiler/prepare_grammar/flatten_grammar_spec.cc
Normal file
109
spec/compiler/prepare_grammar/flatten_grammar_spec.cc
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::flatten_grammar;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
|
||||
describe("flatten_grammar", []() {
|
||||
InitialSyntaxGrammar input_grammar({
|
||||
{ "rule1", seq({
|
||||
i_sym(1),
|
||||
choice({ i_sym(2), i_sym(3) }),
|
||||
i_sym(4) }) },
|
||||
{ "rule2", seq({
|
||||
i_sym(1),
|
||||
prec(50, seq({
|
||||
i_sym(2),
|
||||
choice({
|
||||
prec(100, seq({
|
||||
i_sym(3),
|
||||
i_sym(4)
|
||||
})),
|
||||
i_sym(5),
|
||||
}),
|
||||
i_sym(6) })),
|
||||
i_sym(7) }) },
|
||||
}, {});
|
||||
|
||||
it("turns each rule into a list of possible symbol sequences", [&]() {
|
||||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto get_symbol_lists = [&](int rule_index) {
|
||||
return collect(grammar.rules[rule_index].second, [](Production p) {
|
||||
return collect(p.entries, [](ProductionEntry e) {
|
||||
return e.symbol;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
AssertThat(grammar.rules[0].first, Equals("rule1"));
|
||||
AssertThat(grammar.rules[1].first, Equals("rule2"));
|
||||
|
||||
AssertThat(
|
||||
get_symbol_lists(0),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(4) },
|
||||
{ Symbol(1), Symbol(3), Symbol(4) }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_symbol_lists(1),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) },
|
||||
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("associates each symbol with the precedence binding it to its previous neighbor", [&]() {
|
||||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto get_precedence_lists = [&](int rule_index) {
|
||||
return collect(grammar.rules[rule_index].second, [](Production p) {
|
||||
return collect(p.entries, [](ProductionEntry e) {
|
||||
return e.precedence;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
AssertThat(
|
||||
get_precedence_lists(0),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 0 },
|
||||
{ 0, 0, 0 }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_precedence_lists(1),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 50, 100, 50, 0 },
|
||||
{ 0, 0, 50, 50, 0 }
|
||||
})));
|
||||
});
|
||||
|
||||
it("associates each unique subsequence of symbols and precedences with a rule_id", [&]() {
|
||||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto rule_id = [&](int rule_index, int production_index, int symbol_index) {
|
||||
return grammar.rules[rule_index].second[production_index].rule_id_at(symbol_index);
|
||||
};
|
||||
|
||||
// Rule 1: last symbol is the same for both productions.
|
||||
AssertThat(rule_id(0, 0, 0), !Equals(rule_id(0, 1, 0)));
|
||||
AssertThat(rule_id(0, 0, 1), !Equals(rule_id(0, 1, 1)));
|
||||
AssertThat(rule_id(0, 0, 2), Equals(rule_id(0, 1, 2)));
|
||||
|
||||
// Rule 2: last two symbols are the same for both productions.
|
||||
AssertThat(rule_id(1, 0, 0), !Equals(rule_id(1, 1, 0)));
|
||||
AssertThat(rule_id(1, 0, 1), !Equals(rule_id(1, 1, 1)));
|
||||
AssertThat(rule_id(1, 0, 4), Equals(rule_id(1, 1, 3)));
|
||||
AssertThat(rule_id(1, 0, 5), Equals(rule_id(1, 1, 4)));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
Loading…
Add table
Add a link
Reference in a new issue