tree-sitter/test/compiler/build_tables/lex_table_builder_test.cc
Max Brunsfeld 1586d70cbe Compute conflicting tokens more precisely
While generating the parse table, keep track of which tokens can follow one another.
Then use this information to evaluate token conflicts more precisely. This will
result in a smaller parse table than the previous, overly-conservative approach.
2017-07-07 17:54:24 -07:00

122 lines
3.5 KiB
C++

#include "test_helper.h"
#include "compiler/lexical_grammar.h"
#include "compiler/build_tables/lex_table_builder.h"
using namespace build_tables;
using namespace rules;
START_TEST
describe("LexTableBuilder::detect_conflict", []() {
vector<Rule> separators({
CharacterSet({ ' ', '\t' }),
});
it("returns false for tokens that don't match the same string", [&]() {
auto builder = LexTableBuilder::create(LexicalGrammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'a' }),
CharacterSet({ 'b' }),
CharacterSet({ 'c' }),
}),
false
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'b' }),
CharacterSet({ 'c' }),
CharacterSet({ 'd' }),
}),
false
},
},
separators
});
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when the left token can match a string that the right token matches, "
"plus a separator character", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
false
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
true
},
},
separators
};
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
grammar.variables[1].is_string = false;
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when the left token matches a string that the right token matches, "
"plus the first character of some token that can follow the right token", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '=' }),
}),
true
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
}),
true
},
LexicalVariable{
"token_2",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '=' }),
}),
true
},
},
separators
};
// If no tokens can follow token_1, then there's no conflict
auto builder = LexTableBuilder::create(grammar);
vector<set<Symbol::Index>> following_tokens_by_token_index(3);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
// If token_2 can follow token_1, then token_0 conflicts with token_1
builder = LexTableBuilder::create(grammar);
following_tokens_by_token_index[1].insert(2);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
});
});
END_TEST