Compute conflicting tokens more precisely

While generating the parse table, keep track of which tokens can follow one another.
Then use this information to evaluate token conflicts more precisely. This will
result in a smaller parse table than the previous, overly-conservative approach.
This commit is contained in:
Max Brunsfeld 2017-07-07 17:54:24 -07:00
parent a98abde529
commit 1586d70cbe
6 changed files with 160 additions and 61 deletions

View file

@ -16,7 +16,7 @@ describe("LexTableBuilder::detect_conflict", []() {
auto builder = LexTableBuilder::create(LexicalGrammar{
{
LexicalVariable{
"token_1",
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'a' }),
@ -26,7 +26,7 @@ describe("LexTableBuilder::detect_conflict", []() {
false
},
LexicalVariable{
"token_2",
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'b' }),
@ -39,22 +39,22 @@ describe("LexTableBuilder::detect_conflict", []() {
separators
});
AssertThat(builder->detect_conflict(0, 1), IsFalse());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when one token matches a string that the other matches, "
"plus some addition content that begins with a separator character", [&]() {
it("returns true when the left token can match a string that the right token matches, "
"plus a separator character", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_1",
"token_0",
VariableTypeNamed,
Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
false
},
LexicalVariable{
"token_2",
"token_1",
VariableTypeNamed,
Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
true
@ -64,24 +64,32 @@ describe("LexTableBuilder::detect_conflict", []() {
};
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
grammar.variables[1].is_string = false;
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when one token matches a string that the other matches, "
"plus some addition content that matches another one-character token", [&]() {
it("returns true when the left token matches a string that the right token matches, "
"plus the first character of some token that can follow the right token", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '=' }),
}),
true
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '>' }),
}),
true
},
@ -89,7 +97,7 @@ describe("LexTableBuilder::detect_conflict", []() {
"token_2",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '=' }),
}),
true
},
@ -97,9 +105,17 @@ describe("LexTableBuilder::detect_conflict", []() {
separators
};
// If no tokens can follow token_1, then there's no conflict
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
vector<set<Symbol::Index>> following_tokens_by_token_index(3);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
// If token_2 can follow token_1, then token_0 conflicts with token_1
builder = LexTableBuilder::create(grammar);
following_tokens_by_token_index[1].insert(2);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
});
});