tree-sitter/test/compiler/build_tables/lex_table_builder_test.cc
Max Brunsfeld 2c043803f1 Be more conservative about avoiding lexing conflicts when merging states
This fixes a bug in the C++ grammar where the `>>` token was merged into
a state where it was previously not valid, but the `>` token *was*
valid. This caused nested templates like -

std::vector<std::pair<int, int>>

to not parse correctly.
2017-06-22 15:32:13 -07:00

106 lines
2.8 KiB
C++

#include "test_helper.h"
#include "compiler/lexical_grammar.h"
#include "compiler/build_tables/lex_table_builder.h"
using namespace build_tables;
using namespace rules;
START_TEST
describe("LexTableBuilder::detect_conflict", []() {
vector<Rule> separators({
CharacterSet({ ' ', '\t' }),
});
it("returns false for tokens that don't match the same string", [&]() {
auto builder = LexTableBuilder::create(LexicalGrammar{
{
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'a' }),
CharacterSet({ 'b' }),
CharacterSet({ 'c' }),
}),
false
},
LexicalVariable{
"token_2",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'b' }),
CharacterSet({ 'c' }),
CharacterSet({ 'd' }),
}),
false
},
},
separators
});
AssertThat(builder->detect_conflict(0, 1), IsFalse());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
});
it("returns true when one token matches a string that the other matches, "
"plus some addition content that begins with a separator character", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
false
},
LexicalVariable{
"token_2",
VariableTypeNamed,
Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
true
},
},
separators
};
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
grammar.variables[1].is_string = false;
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
});
it("returns true when one token matches a string that the other matches, "
"plus some addition content that matches another one-character token", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '>' }),
}),
true
},
LexicalVariable{
"token_2",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
}),
true
},
},
separators
};
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
});
});
END_TEST