In lexer, prefer tokens to skipped separator characters
This was causing newlines in go and javascript to be parsed as meaningless separator characters instead of statement terminators
This commit is contained in:
parent
220e081c49
commit
e93e254518
26 changed files with 5559 additions and 6650 deletions
|
|
@ -8,7 +8,7 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("resolving parse conflicts", []() {
|
||||
bool should_update;
|
||||
bool update;
|
||||
|
||||
PreparedGrammar parse_grammar({
|
||||
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
|
||||
|
|
@ -37,31 +37,67 @@ describe("resolving parse conflicts", []() {
|
|||
});
|
||||
|
||||
it("favors non-errors over lexical errors", [&]() {
|
||||
should_update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2, {0}));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
should_update = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error());
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_lex_action(LexAction::Advance(2, {0}), LexAction::Error());
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
|
||||
describe("accept-token/advance conflicts", [&]() {
|
||||
describe("when the the accept-token has higher precedence", [&]() {
|
||||
it("prefers the accept", [&]() {
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -1 }));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = manager->resolve_lex_action(LexAction::Advance(1, { -1 }), LexAction::Accept(sym3, 2));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the the actions have the same precedence", [&]() {
|
||||
it("prefers the advance", [&]() {
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { 0 }));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
update = manager->resolve_lex_action(LexAction::Advance(1, { 0 }), LexAction::Accept(sym3, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the advance has conflicting precedences compared to the accept", [&]() {
|
||||
it("prefers the advance", [&]() {
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
update = manager->resolve_lex_action(LexAction::Advance(1, { -2, 2 }), LexAction::Accept(sym3, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
|
||||
it_skip("records a conflict", [&]() {
|
||||
manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("accept-token/accept-token conflicts", [&]() {
|
||||
describe("when one token has a higher precedence than the other", [&]() {
|
||||
it("prefers the token with the higher precedence", [&]() {
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when both tokens have the same precedence", [&]() {
|
||||
it("prefers the token listed earlier in the grammar", [&]() {
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -81,11 +117,11 @@ describe("resolving parse conflicts", []() {
|
|||
});
|
||||
|
||||
it("favors non-errors over parse errors", [&]() {
|
||||
should_update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
should_update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
|
||||
describe("shift/reduce conflicts", [&]() {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ namespace tree_sitter {
|
|||
rule_ptr i_aux_token(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
|
||||
}
|
||||
|
||||
|
||||
rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
|
||||
return make_shared<Metadata>(rule, values);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,9 +15,9 @@ describe("expanding token rules", []() {
|
|||
pattern("x*"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -26,7 +26,7 @@ describe("expanding token rules", []() {
|
|||
i_sym(11) }) },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("replaces string rules with a sequence of characters", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -34,9 +34,9 @@ describe("expanding token rules", []() {
|
|||
str("xyz"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -45,7 +45,7 @@ describe("expanding token rules", []() {
|
|||
i_sym(11) }) },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -53,7 +53,7 @@ describe("expanding token rules", []() {
|
|||
str("xyz"),
|
||||
pattern("[") }) },
|
||||
}, {});
|
||||
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
|
||||
|
|
|
|||
|
|
@ -82,11 +82,11 @@ describe("extracting tokens from a grammar", []() {
|
|||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
|
|
@ -99,34 +99,34 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_A", i_token(0) }
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("updates symbols whose indices need to change due to deleted rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_B", i_token(0) },
|
||||
{ "rule_C", i_sym(0) },
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("updates the grammar's ubiquitous_tokens", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
|
|
@ -135,24 +135,24 @@ describe("extracting tokens from a grammar", []() {
|
|||
}, {}, PreparedGrammarOptions({
|
||||
{ Symbol(0) }
|
||||
})));
|
||||
|
||||
|
||||
AssertThat(result.first.options.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
{ Symbol(0, SymbolOptionToken) }
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("extracts entire auxiliary rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_aux_sym(0) },
|
||||
{ "rule_C", i_aux_sym(1) },
|
||||
}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_B", i_aux_token(0) },
|
||||
{ "rule_C", i_aux_sym(0) },
|
||||
})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
})));
|
||||
|
|
|
|||
|
|
@ -13,13 +13,13 @@ describe("parsing regex patterns", []() {
|
|||
"[aAeE]",
|
||||
character({ 'a', 'A', 'e', 'E' })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"'.' characters as wildcards",
|
||||
".",
|
||||
CharacterSet({'\n'}).complement().copy()
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"character classes",
|
||||
"\\w-\\d",
|
||||
|
|
@ -28,7 +28,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ '-' }),
|
||||
character({ {'0', '9'} }) })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"choices",
|
||||
"ab|cd|ef",
|
||||
|
|
@ -47,7 +47,7 @@ describe("parsing regex patterns", []() {
|
|||
})
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"simple sequences",
|
||||
"abc",
|
||||
|
|
@ -56,25 +56,25 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'b' }),
|
||||
character({ 'c' }) })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"character ranges",
|
||||
"[12a-dA-D3]",
|
||||
character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"negated characters",
|
||||
"[^a\\d]",
|
||||
character({ {'a'}, {'0', '9'} }, false)
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"backslashes",
|
||||
"\\\\",
|
||||
character({ '\\' })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"character groups in sequences",
|
||||
"x([^x]|\\\\x)*x",
|
||||
|
|
@ -87,7 +87,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'x' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"choices in sequences",
|
||||
"(a|b)cd",
|
||||
|
|
@ -100,7 +100,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'd' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"escaped parentheses",
|
||||
"a\\(b",
|
||||
|
|
@ -110,7 +110,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'b' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"escaped periods",
|
||||
"a\\.",
|
||||
|
|
@ -119,7 +119,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ '.' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"plus repeats",
|
||||
"(ab)+(cd)+",
|
||||
|
|
@ -134,7 +134,7 @@ describe("parsing regex patterns", []() {
|
|||
}),
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"asterix repeats",
|
||||
"(ab)*(cd)*",
|
||||
|
|
@ -143,7 +143,7 @@ describe("parsing regex patterns", []() {
|
|||
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"optional rules",
|
||||
"a(bc)?",
|
||||
|
|
@ -156,7 +156,7 @@ describe("parsing regex patterns", []() {
|
|||
})
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
vector<tuple<string, string, const char *>> invalid_inputs = {
|
||||
{
|
||||
"mismatched open parens",
|
||||
|
|
@ -189,23 +189,23 @@ describe("parsing regex patterns", []() {
|
|||
"unmatched close square bracket",
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
for (auto &triple : valid_inputs) {
|
||||
string description = get<0>(triple);
|
||||
string regex = get<1>(triple);
|
||||
rule_ptr rule = get<2>(triple);
|
||||
|
||||
|
||||
it(("parses " + description).c_str(), [&]() {
|
||||
auto result = parse_regex(regex);
|
||||
AssertThat(result.first, EqualsPointer(rule));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
for (auto &triple : invalid_inputs) {
|
||||
string description = get<0>(triple);
|
||||
string regex = get<1>(triple);
|
||||
const char *expected_message = get<2>(triple);
|
||||
|
||||
|
||||
it(("handles invalid regexes with " + description).c_str(), [&]() {
|
||||
auto result = parse_regex(regex);
|
||||
AssertThat(result.second, !Equals((const GrammarError *)nullptr));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue