From a77ca1ee08c8047efb32c99c54b5f41efed1a275 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Feb 2014 13:09:00 -0800 Subject: [PATCH] Allow users to put their own auxiliary rules in grammars --- spec/compiler/prepare_grammar_spec.cpp | 221 ++++++++++-------- .../prepare_grammar/expand_repeats.cpp | 9 +- .../prepare_grammar/extract_tokens.cpp | 17 +- 3 files changed, 138 insertions(+), 109 deletions(-) diff --git a/spec/compiler/prepare_grammar_spec.cpp b/spec/compiler/prepare_grammar_spec.cpp index 996ca0b8..e915d391 100644 --- a/spec/compiler/prepare_grammar_spec.cpp +++ b/spec/compiler/prepare_grammar_spec.cpp @@ -7,112 +7,135 @@ using namespace rules; using prepare_grammar::perform; describe("preparing a grammar", []() { - it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() { - pair result = perform(Grammar({ - { "rule1", seq({ - character('a'), - character('b'), - seq({ - sym("rule2"), - sym("rule3") }), - seq({ + describe("extracting tokens", []() { + it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() { + pair result = perform(Grammar({ + { "rule1", seq({ character('a'), - character('b') }) }) } - })); + character('b'), + seq({ + sym("rule2"), + sym("rule3") }), + seq({ + character('a'), + character('b') }) }) } + })); + + AssertThat(result.first, Equals(Grammar({ + { "rule1", seq({ + aux_sym("token1"), + seq({ + sym("rule2"), + sym("rule3") }), + aux_sym("token1") }) } + }))); + + AssertThat(result.second, Equals(Grammar("", map(), { + { "token1", rules::seq({ + rules::character('a'), + rules::character('b') }) }, + }))); + }); - AssertThat(result.first, Equals(Grammar({ - { "rule1", seq({ - aux_sym("token1"), - seq({ - sym("rule2"), - sym("rule3") }), - aux_sym("token1") }) } - }))); + it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() { + auto result = perform(Grammar({ + { "rule1", sym("rule2") }, + { "rule2", seq({ + character('a'), + character('b') }) } + })); + + AssertThat(result.first, Equals(Grammar({ + { "rule1", sym("rule2") } + }))); + + AssertThat(result.second, Equals(Grammar("", { + { "rule2", seq({ + character('a'), + character('b') }) }, + }))); + }); - AssertThat(result.second, Equals(Grammar("", map(), { - { "token1", rules::seq({ - rules::character('a'), - rules::character('b') }) }, - }))); - }); - - it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() { - auto result = perform(Grammar({ - { "rule1", sym("rule2") }, - { "rule2", seq({ - character('a'), - character('b') }) } - })); + it("moves parts of auxiliary rules into auxiliary lexical rules", []() { + auto result = perform(Grammar("rule1", map(), { + { "rule1", sym("rule2") }, + { "rule2", seq({ + character('a'), + character('b') }) } + })); + + AssertThat(result.first, Equals(Grammar("rule1", map(), { + { "rule1", sym("rule2") } + }))); + + AssertThat(result.second, Equals(Grammar("", map(), { + { "rule2", seq({ + character('a'), + character('b') }) }, + }))); + }); - AssertThat(result.first, Equals(Grammar({ - { "rule1", sym("rule2") } - }))); - - AssertThat(result.second, Equals(Grammar("", { - { "rule2", seq({ - character('a'), - character('b') }) }, - }))); - }); - - it("replaces repeat rules with pairs of recursive rules", [&]() { - Grammar result = perform(Grammar({ - { "rule1", seq({ - sym("x"), - repeat(seq({ sym("a"), sym("b") })), - sym("y") - }) }, - })).first; - - AssertThat(result, Equals(Grammar("rule1", { - { "rule1", seq({ - sym("x"), - aux_sym("repeat_helper1"), - sym("y") - }) }, - }, { - { "repeat_helper1", seq({ - seq({ sym("a"), sym("b") }), - choice({ - aux_sym("repeat_helper1") , - blank() - }), - }) } - }))); + it("does not extract blanks into tokens", [&]() { + pair result = perform(Grammar({ + { "rule1", choice({ sym("rule2"), blank() }) }, + })); + + AssertThat(result.first, Equals(Grammar("rule1", { + { "rule1", choice({ sym("rule2"), blank() }) }, + }))); + + AssertThat(result.second, Equals(Grammar("", map()))); + }); }); - it("does not replace repeat rules that can be moved into the lexical grammar", [&]() { - pair result = perform(Grammar({ - { "rule1", seq({ - sym("x"), - repeat(seq({ str("a"), str("b") })), - sym("y") - }) }, - })); - - AssertThat(result.first, Equals(Grammar("rule1", { - { "rule1", seq({ - sym("x"), - aux_sym("token1"), - sym("y") - }) }, - }))); + describe("expanding repeats", []() { + it("replaces repeat rules with pairs of recursive rules", [&]() { + Grammar result = perform(Grammar({ + { "rule1", seq({ + sym("x"), + repeat(seq({ sym("a"), sym("b") })), + sym("y") + }) }, + })).first; + + AssertThat(result, Equals(Grammar("rule1", { + { "rule1", seq({ + sym("x"), + aux_sym("repeat_helper1"), + sym("y") + }) }, + }, { + { "repeat_helper1", seq({ + seq({ sym("a"), sym("b") }), + choice({ + aux_sym("repeat_helper1"), + blank(), + }), + }) } + }))); + }); - AssertThat(result.second, Equals(Grammar("", map(), { - { "token1", repeat(seq({ str("a"), str("b") })) }, - }))); - }); - - it("does not extract blanks into tokens", [&]() { - pair result = perform(Grammar({ - { "rule1", choice({ sym("rule2"), blank() }) }, - })); - - AssertThat(result.first, Equals(Grammar("rule1", { - { "rule1", choice({ sym("rule2"), blank() }) }, - }))); - - AssertThat(result.second, Equals(Grammar("", map()))); + it("does not replace repeat rules that can be moved into the lexical grammar", [&]() { + pair result = perform(Grammar({ + { "rule1", seq({ + sym("x"), + repeat(seq({ str("a"), str("b") })), + sym("y") + }) }, + })); + + AssertThat(result.first, Equals(Grammar("rule1", { + { "rule1", seq({ + sym("x"), + aux_sym("token1"), + sym("y") + }) }, + }))); + + AssertThat(result.second, Equals(Grammar("", map(), { + { "token1", repeat(seq({ str("a"), str("b") })) }, + }))); + }); }); }); diff --git a/src/compiler/prepare_grammar/expand_repeats.cpp b/src/compiler/prepare_grammar/expand_repeats.cpp index 3520a54d..53db30de 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cpp +++ b/src/compiler/prepare_grammar/expand_repeats.cpp @@ -46,13 +46,16 @@ namespace tree_sitter { }; Grammar expand_repeats(const Grammar &grammar) { - map result; + map rules; + map aux_rules(grammar.aux_rules); RepeatExpander visitor; for (auto pair : grammar.rules) - result.insert({ pair.first, visitor.apply(pair.second) }); + rules.insert({ pair.first, visitor.apply(pair.second) }); + + aux_rules.insert(visitor.aux_rules.begin(), visitor.aux_rules.end()); - return Grammar(grammar.start_rule_name, result, visitor.aux_rules); + return Grammar(grammar.start_rule_name, rules, aux_rules); } } } \ No newline at end of file diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index d512db72..0b97b750 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -62,13 +62,14 @@ namespace tree_sitter { pair extract_tokens(const Grammar &input_grammar) { TokenExtractor extractor; map rules; - map aux_rules; map tokens; + map aux_rules; + map aux_tokens; for (auto pair : input_grammar.rules) { string name = pair.first; rule_ptr rule = pair.second; - auto new_rule = extractor.initial_apply(rule); + rule_ptr new_rule = extractor.initial_apply(rule); if (new_rule.get()) rules.insert({ name, new_rule }); else @@ -78,16 +79,18 @@ namespace tree_sitter { for (auto pair : input_grammar.aux_rules) { string name = pair.first; rule_ptr rule = pair.second; - auto new_rule = extractor.initial_apply(rule); + rule_ptr new_rule = extractor.initial_apply(rule); if (new_rule.get()) aux_rules.insert({ name, new_rule }); else - tokens.insert({ name, rule }); + aux_tokens.insert({ name, rule }); } - + + aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end()); + return { - Grammar(input_grammar.start_rule_name, rules), - Grammar("", tokens, extractor.tokens) + Grammar(input_grammar.start_rule_name, rules, aux_rules), + Grammar("", tokens, aux_tokens) }; } }