Define repeat rule in terms of repeat1 rule

This commit is contained in:
Max Brunsfeld 2015-10-12 15:33:00 -07:00
parent 5c67f58a4b
commit 82726ad53b
20 changed files with 7861 additions and 7961 deletions

View file

@ -20,6 +20,7 @@ enum Associativity {
rule_ptr blank();
rule_ptr choice(const std::vector<rule_ptr> &);
rule_ptr repeat(const rule_ptr &);
rule_ptr repeat1(const rule_ptr &);
rule_ptr seq(const std::vector<rule_ptr> &);
rule_ptr sym(const std::string &);
rule_ptr pattern(const std::string &);

View file

@ -127,11 +127,11 @@ describe("LexItemSet::transitions()", [&]() {
it("handles repeats", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), repeat(seq({
LexItem(Symbol(1), repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))),
LexItem(Symbol(2), repeat(character({ 'c' }))),
LexItem(Symbol(2), repeat1(character({ 'c' }))),
});
AssertThat(
@ -142,17 +142,19 @@ describe("LexItemSet::transitions()", [&]() {
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'b' }),
repeat(seq({
repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))
}))
})),
LexItem(Symbol(1), character({ 'b' })),
})
},
{
CharacterSet().include('c'),
LexItemSet({
LexItem(Symbol(2), repeat(character({ 'c' }))),
LexItem(Symbol(2), repeat1(character({ 'c' }))),
LexItem(Symbol(2), blank()),
})
}
})));

View file

@ -11,13 +11,13 @@ using prepare_grammar::expand_repeats;
describe("expand_repeats", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, repeat(i_token(0))),
Variable("rule0", VariableTypeNamed, repeat1(i_token(0))),
}, {}, {}};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>({
Variable("rule0", VariableTypeNamed, choice({ i_sym(1), blank() })),
Variable("rule0", VariableTypeNamed, i_sym(1)),
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
i_token(0),
choice({ i_sym(1), blank() })
@ -29,7 +29,7 @@ describe("expand_repeats", []() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, seq({
i_token(10),
repeat(i_token(11)),
repeat1(i_token(11)),
})),
}, {}, {}};
@ -38,7 +38,7 @@ describe("expand_repeats", []() {
AssertThat(result.variables, Equals(vector<Variable>({
Variable("rule0", VariableTypeNamed, seq({
i_token(10),
choice({ i_sym(1), blank() })
i_sym(1),
})),
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
i_token(11),
@ -51,14 +51,17 @@ describe("expand_repeats", []() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, choice({
i_token(10),
repeat(i_token(11))
repeat1(i_token(11))
})),
}, {}, {}};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>({
Variable("rule0", VariableTypeNamed, choice({ i_token(10), i_sym(1), blank() })),
Variable("rule0", VariableTypeNamed, choice({
i_token(10),
i_sym(1),
})),
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
i_token(11),
choice({ i_sym(1), blank() }),
@ -69,12 +72,12 @@ describe("expand_repeats", []() {
it("does not create redundant auxiliary rules", [&]() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, choice({
seq({ i_token(1), repeat(i_token(4)) }),
seq({ i_token(2), repeat(i_token(4)) }),
seq({ i_token(1), repeat1(i_token(4)) }),
seq({ i_token(2), repeat1(i_token(4)) }),
})),
Variable("rule1", VariableTypeNamed, seq({
i_token(3),
repeat(i_token(4))
repeat1(i_token(4))
})),
}, {}, {}};
@ -82,12 +85,12 @@ describe("expand_repeats", []() {
AssertThat(result.variables, Equals(vector<Variable>({
Variable("rule0", VariableTypeNamed, choice({
seq({ i_token(1), choice({ i_sym(2), blank() }) }),
seq({ i_token(2), choice({ i_sym(2), blank() }) }),
seq({ i_token(1), i_sym(2) }),
seq({ i_token(2), i_sym(2) }),
})),
Variable("rule1", VariableTypeNamed, seq({
i_token(3),
choice({ i_sym(2), blank() })
i_sym(2),
})),
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
i_token(4),
@ -99,8 +102,8 @@ describe("expand_repeats", []() {
it("can replace multiple repeats in the same rule", [&]() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, seq({
repeat(i_token(10)),
repeat(i_token(11)),
repeat1(i_token(10)),
repeat1(i_token(11)),
})),
}, {}, {}};
@ -108,8 +111,8 @@ describe("expand_repeats", []() {
AssertThat(result.variables, Equals(vector<Variable>({
Variable("rule0", VariableTypeNamed, seq({
choice({ i_sym(1), blank() }),
choice({ i_sym(2), blank() }),
i_sym(1),
i_sym(2),
})),
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
i_token(10),
@ -124,21 +127,15 @@ describe("expand_repeats", []() {
it("can replace repeats in multiple rules", [&]() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, repeat(i_token(10))),
Variable("rule1", VariableTypeNamed, repeat(i_token(11))),
Variable("rule0", VariableTypeNamed, repeat1(i_token(10))),
Variable("rule1", VariableTypeNamed, repeat1(i_token(11))),
}, {}, {}};
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>({
Variable("rule0", VariableTypeNamed, choice({
i_sym(2),
blank(),
})),
Variable("rule1", VariableTypeNamed, choice({
i_sym(3),
blank(),
})),
Variable("rule0", VariableTypeNamed, i_sym(2)),
Variable("rule1", VariableTypeNamed, i_sym(3)),
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
i_token(10),
choice({ i_sym(2), blank() }),

View file

@ -64,7 +64,7 @@ describe("extract_choices", []() {
it("does not move choices outside of repeats", [&]() {
auto rule = seq({
choice({ sym("a"), sym("b") }),
repeat(seq({
repeat1(seq({
sym("c"),
choice({
sym("d"),
@ -78,7 +78,7 @@ describe("extract_choices", []() {
AssertThat(extract_choices(rule), Equals(rule_vector({
seq({
sym("a"),
repeat(choice({
repeat1(choice({
seq({ sym("c"), sym("d"), sym("f") }),
seq({ sym("c"), sym("e"), sym("f") }),
})),
@ -86,7 +86,7 @@ describe("extract_choices", []() {
}),
seq({
sym("b"),
repeat(choice({
repeat1(choice({
seq({ sym("c"), sym("d"), sym("f") }),
seq({ sym("c"), sym("e"), sym("f") }),
})),

View file

@ -14,18 +14,18 @@ using prepare_grammar::InitialSyntaxGrammar;
describe("extract_tokens", []() {
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable("rule_A", VariableTypeNamed, repeat(seq({
Variable("rule_A", VariableTypeNamed, repeat1(seq({
str("ab"),
pattern("cd*"),
choice({
i_sym(1),
i_sym(2),
token(repeat(choice({ str("ef"), str("gh") }))),
token(repeat1(choice({ str("ef"), str("gh") }))),
}),
}))),
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
Variable("rule_D", VariableTypeNamed, repeat(i_sym(3)))
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(3)))
}, {}, {}});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
@ -35,7 +35,7 @@ describe("extract_tokens", []() {
AssertThat(error, Equals<const GrammarError *>(nullptr));
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
Variable("rule_A", VariableTypeNamed, repeat(seq({
Variable("rule_A", VariableTypeNamed, repeat1(seq({
// This string is now the first token in the lexical grammar.
i_token(0),
@ -58,7 +58,7 @@ describe("extract_tokens", []() {
}))),
Variable("rule_C", VariableTypeNamed, choice({ i_token(4), blank() })),
Variable("rule_D", VariableTypeNamed, repeat(i_sym(2))),
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(2))),
})));
AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
@ -69,7 +69,7 @@ describe("extract_tokens", []() {
Variable("/cd*/", VariableTypeAuxiliary, pattern("cd*")),
// Rules marked as tokens become hidden rules.
Variable("/(ef|gh)*/", VariableTypeAuxiliary, repeat(choice({
Variable("/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
str("ef"),
str("gh")
}))),

View file

@ -139,12 +139,8 @@ describe("parse_regex", []() {
"plus repeats",
"(ab)+(cd)+",
seq({
seq({
seq({ character({ 'a' }), character({ 'b' }) }),
repeat(seq({ character({ 'a' }), character({ 'b' }) })) }),
seq({
seq({ character({ 'c' }), character({ 'd' }) }),
repeat(seq({ character({ 'c' }), character({ 'd' }) })) }) })
repeat1(seq({ character({ 'a' }), character({ 'b' }) })),
repeat1(seq({ character({ 'c' }), character({ 'd' }) })) })
},
{

View file

@ -1,3 +1,23 @@
======================================
strings with escaped quotes
======================================
"ok\""
---
(string)
======================================
strings with other escaped characters
======================================
"ok\n\r\n\\ok"
---
(string)
=============================
floating point numbers
=============================

View file

@ -4,10 +4,6 @@ namespace tree_sitter_examples {
using namespace tree_sitter;
rule_ptr repeat1(rule_ptr element) {
return seq({ element, repeat(element) });
}
rule_ptr comma_sep1(rule_ptr element) {
return seq({ element, repeat(seq({ str(","), element })) });
}

View file

@ -7,7 +7,6 @@ namespace tree_sitter_examples {
using namespace tree_sitter;
rule_ptr repeat1(rule_ptr element);
rule_ptr comma_sep1(rule_ptr element);
rule_ptr comma_sep(rule_ptr element);
rule_ptr optional(rule_ptr rule);

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -16,6 +16,7 @@
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/blank.h"
namespace tree_sitter {
namespace build_tables {
@ -35,17 +36,16 @@ class LexTableBuilder {
ParseTable *parse_table;
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
LexTable lex_table;
rule_ptr separator_rule;
vector<rule_ptr> separator_rules;
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
: lex_grammar(lex_grammar),
conflict_manager(lex_grammar),
parse_table(parse_table) {
vector<rule_ptr> separators;
for (const rule_ptr &rule : lex_grammar.separators)
separators.push_back(rules::Repeat::build(rule));
separator_rule = rules::Choice::build(separators);
separator_rules.push_back(rules::Repeat::build(rule));
separator_rules.push_back(rules::Blank::build());
}
LexTable build() {
@ -64,15 +64,34 @@ class LexTableBuilder {
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet result;
for (const Symbol &symbol : symbols) {
if (symbol == rules::ERROR())
vector<rule_ptr> rules;
if (symbol == rules::ERROR()) {
continue;
else if (symbol == rules::END_OF_INPUT())
result.entries.insert(
LexItem(symbol, after_separators(CharacterSet().include(0).copy())));
else if (symbol.is_token)
result.entries.insert(LexItem(
symbol, after_separators(lex_grammar.variables[symbol.index].rule)));
} else if (symbol == rules::END_OF_INPUT()) {
rules.push_back(CharacterSet().include(0).copy());
} else if (symbol.is_token) {
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
auto choice = rule->as<rules::Choice>();
if (choice)
for (const rule_ptr &element : choice->elements)
rules.push_back(element);
else
rules.push_back(rule);
}
for (const rule_ptr &rule : rules)
for (const rule_ptr &separator_rule : separator_rules)
result.entries.insert(LexItem(
symbol, rules::Seq::build({
rules::Metadata::build(
separator_rule,
{
{ rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
}),
rule,
})));
}
return result;
}
@ -126,16 +145,6 @@ class LexTableBuilder {
lex_table.state(state_id).is_token_start = true;
}
rule_ptr after_separators(rule_ptr rule) {
return rules::Seq::build({
make_shared<rules::Metadata>(
separator_rule, map<rules::MetadataKey, int>({
{ rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
})),
rule,
});
}
PrecedenceRange precedence_range_for_item_set(const LexItemSet &item_set) const {
PrecedenceRange result;
for (const auto &item : item_set.entries) {

View file

@ -3,6 +3,7 @@
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
namespace tree_sitter {
namespace build_tables {
@ -29,7 +30,7 @@ class GetCompletionStatus : public rules::RuleFn<CompletionStatus> {
}
CompletionStatus apply_to(const rules::Repeat *rule) {
return { true, 0, AssociativityNone };
return apply(rule->content);
}
CompletionStatus apply_to(const rules::Blank *rule) {

View file

@ -79,7 +79,7 @@ class LexItemTransitions : public rules::RuleFn<void> {
void apply_to(const rules::Seq *rule) {
map<CharacterSet, LexItemSet> left_transitions;
LexItemTransitions(&left_transitions, item_lhs).apply(rule->left);
for (auto &pair : left_transitions)
for (const auto &pair : left_transitions)
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
@ -93,22 +93,24 @@ class LexItemTransitions : public rules::RuleFn<void> {
void apply_to(const rules::Repeat *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->content);
for (auto &pair : content_transitions)
for (const auto &pair : content_transitions) {
merge_transition(transitions, pair.first, pair.second);
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->copy() });
}));
}
}
void apply_to(const rules::Metadata *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule);
for (auto &pair : content_transitions)
for (const auto &pair : content_transitions)
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
return make_shared<rules::Metadata>(item_rule, rule->value);
return rules::Metadata::build(item_rule, rule->value);
}));
}

View file

@ -6,6 +6,7 @@
#include "compiler/rules/choice.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
namespace tree_sitter {
namespace build_tables {
@ -17,7 +18,7 @@ class CanBeBlank : public rules::RuleFn<bool> {
}
bool apply_to(const rules::Repeat *rule) {
return true;
return apply(rule->content);
}
bool apply_to(const rules::Choice *rule) {

View file

@ -30,16 +30,14 @@ class ExpandRepeats : public rules::IdentityRuleFn {
size_t repeat_count;
vector<pair<rule_ptr, Symbol>> existing_repeats;
rule_ptr expand_repeat(const Repeat *rule) {
for (const auto pair : existing_repeats) {
rule_ptr apply_to(const Repeat *rule) {
for (const auto pair : existing_repeats)
if (pair.first->operator==(*rule))
return pair.second.copy();
}
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name =
rule_name + string("_repeat") + to_string(++repeat_count);
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
Symbol repeat_symbol(offset + index);
existing_repeats.push_back({ rule->copy(), repeat_symbol });
aux_rules.push_back(Variable(
@ -49,10 +47,6 @@ class ExpandRepeats : public rules::IdentityRuleFn {
return repeat_symbol.copy();
}
rule_ptr apply_to(const Repeat *rule) {
return Choice::build({ expand_repeat(rule), make_shared<Blank>() });
}
public:
explicit ExpandRepeats(size_t offset) : offset(offset) {}

View file

@ -76,15 +76,15 @@ class PatternParser {
switch (peek()) {
case '*':
next();
result = Repeat::build(result);
result = Choice::build({ Repeat::build(result), Blank::build() });
break;
case '+':
next();
result = make_shared<Seq>(result, Repeat::build(result));
result = Repeat::build(result);
break;
case '?':
next();
result = Choice::build({ result, make_shared<Blank>() });
result = Choice::build({ result, Blank::build() });
break;
}
}

View file

@ -2,6 +2,7 @@
#include <string>
#include <map>
#include "compiler/rules/visitor.h"
#include "compiler/rules/blank.h"
namespace tree_sitter {
namespace rules {

View file

@ -36,6 +36,10 @@ rule_ptr choice(const vector<rule_ptr> &rules) {
}
rule_ptr repeat(const rule_ptr &content) {
return choice({ repeat1(content), blank() });
}
rule_ptr repeat1(const rule_ptr &content) {
return rules::Repeat::build(content);
}