Make separate types for syntax and lexical grammars

This way, the separator characters can be added as a field to
lexical grammars only
This commit is contained in:
Max Brunsfeld 2014-06-25 13:27:16 -07:00
parent d5674d33c4
commit 7df35f9b8d
49 changed files with 467 additions and 395 deletions

View file

@ -10,16 +10,16 @@ using namespace build_tables;
START_TEST
describe("building parse tables", []() {
auto parse_grammar = PreparedGrammar({
SyntaxGrammar parse_grammar({
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
{ "rule1", i_token(0) },
{ "rule2", i_token(1) },
}, {}).ubiquitous_tokens({ Symbol(2, SymbolOptionToken) });
}, {}, { Symbol(2, SymbolOptionToken) });
PreparedGrammar lex_grammar({
LexicalGrammar lex_grammar({
{ "token0", pattern("[a-c]") },
{ "token1", pattern("[b-d]") },
}, {});
}, {}, {});
it("first looks for the start rule and its item set closure", [&]() {
auto result = build_parse_table(parse_grammar, lex_grammar);

View file

@ -1,6 +1,7 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/prepared_grammar.h"
using namespace rules;
using namespace build_tables;
@ -10,16 +11,16 @@ START_TEST
describe("resolving parse conflicts", []() {
bool update;
PreparedGrammar parse_grammar({
SyntaxGrammar parse_grammar({
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
{ "rule2", sym("token1") },
}, {});
}, {}, {});
PreparedGrammar lex_grammar({
LexicalGrammar lex_grammar({
{ "token1", pattern("[a-c]") },
{ "token2", pattern("[b-d]") },
{ "token3", keyword("stuff") },
}, {});
}, {}, {});
describe("lexical conflicts", [&]() {
Symbol sym1(0, SymbolOptionToken);

View file

@ -10,7 +10,7 @@ using namespace rules;
START_TEST
describe("computing FIRST sets", []() {
const PreparedGrammar null_grammar({}, {});
const SyntaxGrammar null_grammar;
describe("for a sequence AB", [&]() {
it("ignores B when A cannot be blank", [&]() {
@ -41,12 +41,12 @@ describe("computing FIRST sets", []() {
i_token(1) }),
i_sym(0) });
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", seq({
i_token(2),
i_token(3),
i_token(4) }) }
}, {});
}, {}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
@ -59,11 +59,11 @@ describe("computing FIRST sets", []() {
i_sym(0),
i_token(1) });
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", choice({
i_token(0),
blank() }) }
}, {});
}, {}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
@ -74,12 +74,12 @@ describe("computing FIRST sets", []() {
describe("when there are left-recursive rules", [&]() {
it("terminates", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", choice({
seq({ i_sym(0), i_token(10) }),
i_token(11),
}) },
}, {});
}, {}, {});
auto rule = i_sym(0);

View file

@ -9,14 +9,14 @@ using namespace rules;
START_TEST
describe("computing closures of item sets", []() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "E", seq({
i_sym(1),
i_token(11) }) },
{ "T", seq({
i_token(12),
i_token(13) }) },
}, {});
}, {}, {});
it("adds items at the beginnings of referenced rules", [&]() {
ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),

View file

@ -8,15 +8,13 @@ using namespace build_tables;
START_TEST
describe("lexical item set transitions", []() {
PreparedGrammar grammar({}, {});
describe("when two items in the set have transitions on the same character", [&]() {
it("merges the transitions by computing the union of the two item sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), character({ {'a', 'f'} })),
LexItem(Symbol(2), character({ {'e', 'x'} })) });
AssertThat(char_transitions(set1, grammar), Equals(map<CharacterSet, LexItemSet>({
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
{ CharacterSet({ {'a', 'd'} }), LexItemSet({
LexItem(Symbol(1), blank()) }) },
{ CharacterSet({ {'e', 'f'} }), LexItemSet({
@ -30,10 +28,10 @@ describe("lexical item set transitions", []() {
});
describe("syntactic item set transitions", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "A", blank() },
{ "B", i_token(21) },
}, {});
}, {}, {});
it("computes the closure of the new item sets", [&]() {
ParseItemSet set1({

View file

@ -56,14 +56,14 @@ describe("checking if rules can be blank", [&]() {
});
describe("checking recursively (by expanding non-terminals)", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "A", choice({
seq({ i_sym(0), i_token(11) }),
blank() }) },
{ "B", choice({
seq({ i_sym(1), i_token(12) }),
i_token(13) }) },
}, {});
}, {}, {});
it("terminates for left-recursive rules that can be blank", [&]() {
rule = i_sym(0);

View file

@ -1,26 +1,11 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/rules/metadata.h"
#include "compiler/helpers/containers.h"
using namespace rules;
using namespace build_tables;
template<typename K>
class rule_map : public map<K, rule_ptr> {
public:
bool operator==(const map<K, rule_ptr> &other) const {
if (this->size() != other.size()) return false;
for (const auto &pair : *this) {
auto other_pair = other.find(pair.first);
if (other_pair == other.end()) return false;
if (!pair.second->operator==(*other_pair->second)) return false;
}
return true;
}
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
};
START_TEST
describe("rule transitions", []() {

View file

@ -0,0 +1,52 @@
#ifndef HELPERS_CONTAINERS_H_
#define HELPERS_CONTAINERS_H_
#include <map>
#include <vector>
#include <string>
#include <initializer_list>
#include "tree_sitter/compiler.h"
#include "compiler/rules/rule.h"
using std::map;
using std::vector;
using std::string;
using std::initializer_list;
using std::pair;
using tree_sitter::rules::rule_ptr;
template<typename K>
class rule_map : public map<K, rule_ptr> {
public:
bool operator==(const map<K, rule_ptr> &other) const {
if (this->size() != other.size()) return false;
for (const auto &pair : *this) {
auto other_pair = other.find(pair.first);
if (other_pair == other.end()) return false;
if (!pair.second->operator==(*other_pair->second)) return false;
}
return true;
}
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
};
class rule_list : public vector<pair<string, rule_ptr>> {
public:
bool operator==(const vector<pair<string, rule_ptr>> &other) const {
if (this->size() != other.size()) return false;
for (size_t i = 0; i < this->size(); i++) {
auto pair = this->operator[](i);
auto other_pair = other[i];
if (!pair.second->operator==(*other_pair.second))
return false;
}
return true;
}
rule_list(const initializer_list<pair<string, rule_ptr>> &list) :
vector<pair<string, rule_ptr>>(list) {}
};
#endif // HELPERS_CONTAINERS_H_

View file

@ -1,6 +1,7 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/prepare_grammar/expand_repeats.h"
#include "compiler/helpers/containers.h"
START_TEST
@ -9,29 +10,33 @@ using prepare_grammar::expand_repeats;
describe("expanding repeat rules in a grammar", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(0)) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", i_aux_sym(0) },
}, {
{ "rule0_repeat0", choice({
seq({
i_token(0),
i_aux_sym(0) }),
blank() }) },
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({ seq({ i_token(0), i_aux_sym(0) }), blank() }) },
})));
});
it("replaces repeats inside of sequences", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", seq({ i_token(10), i_aux_sym(0) }) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({ i_token(11), i_aux_sym(0) }),
blank() }) },
@ -39,13 +44,17 @@ describe("expanding repeat rules in a grammar", []() {
});
it("replaces repeats inside of choices", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", choice({ i_token(10), i_aux_sym(0) }) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({ i_token(11), i_aux_sym(0) }),
blank() }) },
@ -53,13 +62,17 @@ describe("expanding repeat rules in a grammar", []() {
});
it("can replace multiple repeats in the same rule", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({
i_token(10),
@ -74,15 +87,19 @@ describe("expanding repeat rules in a grammar", []() {
});
it("can replace repeats in multiple rules", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(10)) },
{ "rule1", repeat(i_token(11)) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", i_aux_sym(0) },
{ "rule1", i_aux_sym(1) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({ i_token(10), i_aux_sym(0) }),
blank() }) },

View file

@ -1,5 +1,6 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/helpers/containers.h"
#include "compiler/prepare_grammar/expand_tokens.h"
START_TEST
@ -9,50 +10,50 @@ using prepare_grammar::expand_tokens;
describe("expanding token rules", []() {
it("replaces regex patterns with their expansion", [&]() {
PreparedGrammar grammar({
LexicalGrammar grammar({
{ "rule_A", seq({
i_sym(10),
pattern("x*"),
i_sym(11) }) },
}, {});
}, {}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({
i_sym(10),
repeat(character({ 'x' })),
i_sym(11) }) },
}, {})));
})));
});
it("replaces string rules with a sequence of characters", [&]() {
PreparedGrammar grammar({
LexicalGrammar grammar({
{ "rule_A", seq({
i_sym(10),
str("xyz"),
i_sym(11) }) },
}, {});
}, {}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({
i_sym(10),
seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }),
i_sym(11) }) },
}, {})));
})));
});
it("returns an error when the grammar contains an invalid regex", [&]() {
PreparedGrammar grammar({
LexicalGrammar grammar({
{ "rule_A", seq({
pattern("("),
str("xyz"),
pattern("[") }) },
}, {});
}, {}, {});
auto result = expand_tokens(grammar);

View file

@ -1,160 +1,172 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/prepare_grammar/extract_tokens.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepared_grammar.h"
#include "compiler/helpers/containers.h"
START_TEST
using namespace rules;
using prepare_grammar::extract_tokens;
using prepare_grammar::InternedGrammar;
describe("extracting tokens from a grammar", []() {
it("moves string rules into the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "'ab'", str("ab") },
})));
});
it("moves pattern rules into the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "/a+/", pattern("a+") },
})));
});
it("moves other rules marked as tokens into the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
i_sym(0) }) }
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
i_sym(0) }) }
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
})));
});
it("does not extract blanks", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", choice({ i_sym(0), blank() }) },
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", choice({ i_sym(0), blank() }) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", choice({ i_sym(0), blank() }) },
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {})));
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, IsEmpty())
});
it("does not create duplicate tokens in the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "'ab'", str("ab") },
})));
});
it("extracts tokens from the grammar's auxiliary rules", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
}));
AssertThat(result.first, Equals(PreparedGrammar({}, {
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
{ "'ab'", str("ab") },
})));
})))
});
describe("when an entire rule can be extracted", [&]() {
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", i_sym(1) },
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
}, {}));
auto result = extract_tokens(InternedGrammar{
{
{ "rule_A", i_sym(1) },
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", i_token(0) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({
})));
AssertThat(result.first.aux_rules, IsEmpty());
AssertThat(result.second.rules, Equals(rule_list({
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
}, {})));
})));
AssertThat(result.second.aux_rules, IsEmpty());
});
it("updates symbols whose indices need to change due to deleted rules", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
}, {}));
auto result = extract_tokens(InternedGrammar{
{
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_B", i_token(0) },
{ "rule_C", i_sym(0) },
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({
})));
AssertThat(result.first.aux_rules, IsEmpty());
AssertThat(result.second.rules, Equals(rule_list({
{ "rule_A", str("ab") },
}, {})));
})));
AssertThat(result.second.aux_rules, IsEmpty());
});
it("updates the grammar's ubiquitous_tokens", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
}, {}).ubiquitous_tokens({ Symbol(0) }));
auto result = extract_tokens(InternedGrammar{
{
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
},
{ Symbol(0) },
{}
});
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
{ Symbol(0, SymbolOptionToken) }
})));
});
it("extracts entire auxiliary rules", [&]() {
auto result = extract_tokens(PreparedGrammar({}, {
{ "rule_A", str("ab") },
{ "rule_B", i_aux_sym(0) },
{ "rule_C", i_aux_sym(1) },
}));
AssertThat(result.first, Equals(PreparedGrammar({}, {
{ "rule_B", i_aux_token(0) },
{ "rule_C", i_aux_sym(0) },
})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
{ "rule_A", str("ab") },
})));
});
});
});

View file

@ -3,6 +3,7 @@
#include "compiler/prepare_grammar/intern_symbols.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/symbol.h"
#include "compiler/helpers/containers.h"
START_TEST
@ -20,11 +21,11 @@ describe("interning symbols in a grammar", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "x", choice({ i_sym(1), i_sym(2) }) },
{ "y", i_sym(2) },
{ "z", str("stuff") },
}, {})));
})));
});
describe("when there are symbols that reference undefined rules", [&]() {
@ -49,10 +50,20 @@ describe("interning symbols in a grammar", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
Symbol(2)
})));
});
it("preserves the grammar's separator character set", [&]() {
auto grammar = Grammar({
{ "z", str("stuff") }
}).separators({ 'x', 'y' });
auto result = intern_symbols(grammar);
AssertThat(result.first.separators, Equals(vector<char>({ 'x', 'y' })))
});
});
END_TEST