Make grammars' separator characters configurable

This commit is contained in:
Max Brunsfeld 2014-06-26 07:31:08 -07:00
parent 8aea89750d
commit a9dff20658
5 changed files with 24 additions and 5 deletions

View file

@ -107,6 +107,18 @@ describe("extracting tokens from a grammar", []() {
})))
});
it("preserves the separator characters in the lexical grammar", [&]() {
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", str("ab") },
},
{},
{ 'x', 'y', 'z' }
});
AssertThat(result.second.separators, Equals(vector<char>({ 'x', 'y', 'z' })));
});
describe("when an entire rule can be extracted", [&]() {
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
auto result = extract_tokens(InternedGrammar{

View file

@ -93,11 +93,17 @@ namespace tree_sitter {
lex_table.state(state_id).is_token_start = true;
}
CharacterSet separator_set() const {
set<rules::CharacterRange> ranges;
for (char c : lex_grammar.separators)
ranges.insert(c);
return CharacterSet(ranges);
}
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build({
make_shared<rules::Metadata>(
make_shared<rules::Repeat>(
CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
make_shared<rules::Repeat>(separator_set().copy()),
map<rules::MetadataKey, int>({
{rules::START_TOKEN, 1},
{rules::PRECEDENCE, -1},

View file

@ -34,8 +34,8 @@ namespace tree_sitter {
ParseStateId state_id = parse_table.add_state();
parse_state_ids[item_set] = state_id;
add_shift_actions(item_set, state_id);
add_ubiquitous_token_actions(item_set, state_id);
add_reduce_actions(item_set, state_id);
add_ubiquitous_token_actions(item_set, state_id);
return state_id;
} else {
return pair->second;

View file

@ -10,7 +10,8 @@ namespace tree_sitter {
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
rules_(rules),
ubiquitous_tokens_({}) {}
ubiquitous_tokens_({}),
separators_({ ' ', '\r', '\t', '\n' }) {}
bool Grammar::operator==(const Grammar &other) const {
if (other.rules_.size() != rules_.size()) return false;

View file

@ -124,7 +124,7 @@ namespace tree_sitter {
return {
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, {}),
LexicalGrammar(tokens, aux_tokens, input_grammar.separators),
};
}
}