Make grammars' separator characters configurable
This commit is contained in:
parent
8aea89750d
commit
a9dff20658
5 changed files with 24 additions and 5 deletions
|
|
@ -107,6 +107,18 @@ describe("extracting tokens from a grammar", []() {
|
|||
})))
|
||||
});
|
||||
|
||||
it("preserves the separator characters in the lexical grammar", [&]() {
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", str("ab") },
|
||||
},
|
||||
{},
|
||||
{ 'x', 'y', 'z' }
|
||||
});
|
||||
|
||||
AssertThat(result.second.separators, Equals(vector<char>({ 'x', 'y', 'z' })));
|
||||
});
|
||||
|
||||
describe("when an entire rule can be extracted", [&]() {
|
||||
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
|
|
|
|||
|
|
@ -93,11 +93,17 @@ namespace tree_sitter {
|
|||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
CharacterSet separator_set() const {
|
||||
set<rules::CharacterRange> ranges;
|
||||
for (char c : lex_grammar.separators)
|
||||
ranges.insert(c);
|
||||
return CharacterSet(ranges);
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build({
|
||||
make_shared<rules::Metadata>(
|
||||
make_shared<rules::Repeat>(
|
||||
CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
|
||||
make_shared<rules::Repeat>(separator_set().copy()),
|
||||
map<rules::MetadataKey, int>({
|
||||
{rules::START_TOKEN, 1},
|
||||
{rules::PRECEDENCE, -1},
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ namespace tree_sitter {
|
|||
ParseStateId state_id = parse_table.add_state();
|
||||
parse_state_ids[item_set] = state_id;
|
||||
add_shift_actions(item_set, state_id);
|
||||
add_ubiquitous_token_actions(item_set, state_id);
|
||||
add_reduce_actions(item_set, state_id);
|
||||
add_ubiquitous_token_actions(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ namespace tree_sitter {
|
|||
|
||||
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
|
||||
rules_(rules),
|
||||
ubiquitous_tokens_({}) {}
|
||||
ubiquitous_tokens_({}),
|
||||
separators_({ ' ', '\r', '\t', '\n' }) {}
|
||||
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.rules_.size() != rules_.size()) return false;
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ namespace tree_sitter {
|
|||
|
||||
return {
|
||||
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
|
||||
LexicalGrammar(tokens, aux_tokens, {}),
|
||||
LexicalGrammar(tokens, aux_tokens, input_grammar.separators),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue