Make separate types for syntax and lexical grammars
This way, the separator characters can be added as a field to lexical grammars only
This commit is contained in:
parent
d5674d33c4
commit
7df35f9b8d
49 changed files with 467 additions and 395 deletions
|
|
@ -191,5 +191,7 @@ namespace tree_sitter_examples {
|
|||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
}).ubiquitous_tokens({ "comment" });
|
||||
})
|
||||
.ubiquitous_tokens({ "comment" })
|
||||
.separators({ ' ', '\t', '\r' });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ namespace tree_sitter {
|
|||
protected:
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
|
||||
std::vector<std::string> ubiquitous_tokens_;
|
||||
std::vector<char> separators_;
|
||||
|
||||
public:
|
||||
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
|
||||
|
|
@ -37,9 +38,11 @@ namespace tree_sitter {
|
|||
std::string start_rule_name() const;
|
||||
const rules::rule_ptr rule(const std::string &name) const;
|
||||
|
||||
const std::vector<std::string> & ubiquitous_tokens() const;
|
||||
const Grammar & ubiquitous_tokens(const std::vector<std::string> &ubiquitous_tokens);
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
|
||||
const std::vector<std::string> & ubiquitous_tokens() const;
|
||||
Grammar & ubiquitous_tokens(const std::vector<std::string> &ubiquitous_tokens);
|
||||
const std::vector<char> & separators() const;
|
||||
Grammar & separators(const std::vector<char> &separators);
|
||||
};
|
||||
|
||||
struct Conflict {
|
||||
|
|
|
|||
|
|
@ -10,16 +10,16 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("building parse tables", []() {
|
||||
auto parse_grammar = PreparedGrammar({
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
}, {}).ubiquitous_tokens({ Symbol(2, SymbolOptionToken) });
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
|
||||
PreparedGrammar lex_grammar({
|
||||
LexicalGrammar lex_grammar({
|
||||
{ "token0", pattern("[a-c]") },
|
||||
{ "token1", pattern("[b-d]") },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("first looks for the start rule and its item set closure", [&]() {
|
||||
auto result = build_parse_table(parse_grammar, lex_grammar);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -10,16 +11,16 @@ START_TEST
|
|||
describe("resolving parse conflicts", []() {
|
||||
bool update;
|
||||
|
||||
PreparedGrammar parse_grammar({
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
|
||||
{ "rule2", sym("token1") },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
PreparedGrammar lex_grammar({
|
||||
LexicalGrammar lex_grammar({
|
||||
{ "token1", pattern("[a-c]") },
|
||||
{ "token2", pattern("[b-d]") },
|
||||
{ "token3", keyword("stuff") },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
describe("lexical conflicts", [&]() {
|
||||
Symbol sym1(0, SymbolOptionToken);
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing FIRST sets", []() {
|
||||
const PreparedGrammar null_grammar({}, {});
|
||||
const SyntaxGrammar null_grammar;
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
|
|
@ -41,12 +41,12 @@ describe("computing FIRST sets", []() {
|
|||
i_token(1) }),
|
||||
i_sym(0) });
|
||||
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4) }) }
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
|
|
@ -59,11 +59,11 @@ describe("computing FIRST sets", []() {
|
|||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
i_token(0),
|
||||
blank() }) }
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
|
|
@ -74,12 +74,12 @@ describe("computing FIRST sets", []() {
|
|||
|
||||
describe("when there are left-recursive rules", [&]() {
|
||||
it("terminates", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -9,14 +9,14 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing closures of item sets", []() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "E", seq({
|
||||
i_sym(1),
|
||||
i_token(11) }) },
|
||||
{ "T", seq({
|
||||
i_token(12),
|
||||
i_token(13) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
|
||||
|
|
|
|||
|
|
@ -8,15 +8,13 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("lexical item set transitions", []() {
|
||||
PreparedGrammar grammar({}, {});
|
||||
|
||||
describe("when two items in the set have transitions on the same character", [&]() {
|
||||
it("merges the transitions by computing the union of the two item sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), character({ {'a', 'f'} })),
|
||||
LexItem(Symbol(2), character({ {'e', 'x'} })) });
|
||||
|
||||
AssertThat(char_transitions(set1, grammar), Equals(map<CharacterSet, LexItemSet>({
|
||||
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
{ CharacterSet({ {'a', 'd'} }), LexItemSet({
|
||||
LexItem(Symbol(1), blank()) }) },
|
||||
{ CharacterSet({ {'e', 'f'} }), LexItemSet({
|
||||
|
|
@ -30,10 +28,10 @@ describe("lexical item set transitions", []() {
|
|||
});
|
||||
|
||||
describe("syntactic item set transitions", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
|
|
|
|||
|
|
@ -56,14 +56,14 @@ describe("checking if rules can be blank", [&]() {
|
|||
});
|
||||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank() }) },
|
||||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
|
|
|
|||
|
|
@ -1,26 +1,11 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
template<typename K>
|
||||
class rule_map : public map<K, rule_ptr> {
|
||||
public:
|
||||
bool operator==(const map<K, rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (const auto &pair : *this) {
|
||||
auto other_pair = other.find(pair.first);
|
||||
if (other_pair == other.end()) return false;
|
||||
if (!pair.second->operator==(*other_pair->second)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("rule transitions", []() {
|
||||
|
|
|
|||
52
spec/compiler/helpers/containers.h
Normal file
52
spec/compiler/helpers/containers.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
#ifndef HELPERS_CONTAINERS_H_
|
||||
#define HELPERS_CONTAINERS_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <initializer_list>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using std::initializer_list;
|
||||
using std::pair;
|
||||
using tree_sitter::rules::rule_ptr;
|
||||
|
||||
template<typename K>
|
||||
class rule_map : public map<K, rule_ptr> {
|
||||
public:
|
||||
bool operator==(const map<K, rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (const auto &pair : *this) {
|
||||
auto other_pair = other.find(pair.first);
|
||||
if (other_pair == other.end()) return false;
|
||||
if (!pair.second->operator==(*other_pair->second)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
class rule_list : public vector<pair<string, rule_ptr>> {
|
||||
public:
|
||||
bool operator==(const vector<pair<string, rule_ptr>> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (size_t i = 0; i < this->size(); i++) {
|
||||
auto pair = this->operator[](i);
|
||||
auto other_pair = other[i];
|
||||
if (!pair.second->operator==(*other_pair.second))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_list(const initializer_list<pair<string, rule_ptr>> &list) :
|
||||
vector<pair<string, rule_ptr>>(list) {}
|
||||
};
|
||||
|
||||
|
||||
#endif // HELPERS_CONTAINERS_H_
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -9,29 +10,33 @@ using prepare_grammar::expand_repeats;
|
|||
|
||||
describe("expanding repeat rules in a grammar", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", i_aux_sym(0) },
|
||||
}, {
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({
|
||||
i_token(0),
|
||||
i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({ seq({ i_token(0), i_aux_sym(0) }), blank() }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", seq({ i_token(10), i_aux_sym(0) }) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({ i_token(11), i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
|
|
@ -39,13 +44,17 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", choice({ i_token(10), i_aux_sym(0) }) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({ i_token(11), i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
|
|
@ -53,13 +62,17 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({
|
||||
i_token(10),
|
||||
|
|
@ -74,15 +87,19 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", i_aux_sym(0) },
|
||||
{ "rule1", i_aux_sym(1) },
|
||||
}, {
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", choice({
|
||||
seq({ i_token(10), i_aux_sym(0) }),
|
||||
blank() }) },
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
|
||||
START_TEST
|
||||
|
|
@ -9,50 +10,50 @@ using prepare_grammar::expand_tokens;
|
|||
|
||||
describe("expanding token rules", []() {
|
||||
it("replaces regex patterns with their expansion", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11) }) },
|
||||
}, {})));
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces string rules with a sequence of characters", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }),
|
||||
i_sym(11) }) },
|
||||
}, {})));
|
||||
})));
|
||||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("[") }) },
|
||||
}, {});
|
||||
}, {}, {});
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,160 +1,172 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InternedGrammar;
|
||||
|
||||
describe("extracting tokens from a grammar", []() {
|
||||
it("moves string rules into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves pattern rules into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "/a+/", pattern("a+") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves other rules marked as tokens into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({
|
||||
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
|
||||
i_sym(0) }) }
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({
|
||||
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
|
||||
i_sym(0) }) }
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not extract blanks", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {})));
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, IsEmpty())
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
}, {}));
|
||||
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty())
|
||||
AssertThat(result.second.rules, IsEmpty())
|
||||
AssertThat(result.second.aux_rules, Equals(rule_list({
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("extracts tokens from the grammar's auxiliary rules", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
})))
|
||||
});
|
||||
|
||||
describe("when an entire rule can be extracted", [&]() {
|
||||
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", i_sym(1) },
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {}));
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", i_sym(1) },
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", i_token(0) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty());
|
||||
AssertThat(result.second.rules, Equals(rule_list({
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {})));
|
||||
})));
|
||||
AssertThat(result.second.aux_rules, IsEmpty());
|
||||
});
|
||||
|
||||
it("updates symbols whose indices need to change due to deleted rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}, {}));
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_B", i_token(0) },
|
||||
{ "rule_C", i_sym(0) },
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
})));
|
||||
AssertThat(result.first.aux_rules, IsEmpty());
|
||||
AssertThat(result.second.rules, Equals(rule_list({
|
||||
{ "rule_A", str("ab") },
|
||||
}, {})));
|
||||
})));
|
||||
AssertThat(result.second.aux_rules, IsEmpty());
|
||||
});
|
||||
|
||||
it("updates the grammar's ubiquitous_tokens", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}, {}).ubiquitous_tokens({ Symbol(0) }));
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
},
|
||||
{ Symbol(0) },
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
|
||||
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
{ Symbol(0, SymbolOptionToken) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("extracts entire auxiliary rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_aux_sym(0) },
|
||||
{ "rule_C", i_aux_sym(1) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_B", i_aux_token(0) },
|
||||
{ "rule_C", i_aux_sym(0) },
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -20,11 +21,11 @@ describe("interning symbols in a grammar", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "x", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "y", i_sym(2) },
|
||||
{ "z", str("stuff") },
|
||||
}, {})));
|
||||
})));
|
||||
});
|
||||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
|
|
@ -49,10 +50,20 @@ describe("interning symbols in a grammar", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
|
||||
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
Symbol(2)
|
||||
})));
|
||||
});
|
||||
|
||||
it("preserves the grammar's separator character set", [&]() {
|
||||
auto grammar = Grammar({
|
||||
{ "z", str("stuff") }
|
||||
}).separators({ 'x', 'y' });
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.separators, Equals(vector<char>({ 'x', 'y' })))
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
class LexTableBuilder {
|
||||
const PreparedGrammar lex_grammar;
|
||||
const LexicalGrammar lex_grammar;
|
||||
ParseTable *parse_table;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
|
|
@ -65,7 +65,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set, lex_grammar);
|
||||
auto transitions = char_transitions(item_set);
|
||||
for (const auto &transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
|
|
@ -114,7 +114,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
|
||||
lex_grammar(lex_grammar),
|
||||
parse_table(parse_table),
|
||||
conflict_manager(LexConflictManager(lex_grammar)) {}
|
||||
|
|
@ -129,7 +129,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar) {
|
||||
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
|
||||
return LexTableBuilder(parse_table, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,12 +5,11 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class LexicalGrammar;
|
||||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
LexTable
|
||||
build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
|
||||
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
class ParseTableBuilder {
|
||||
const PreparedGrammar grammar;
|
||||
const SyntaxGrammar grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
ParseTable parse_table;
|
||||
|
|
@ -59,7 +59,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const Symbol &symbol : grammar.ubiquitous_tokens()) {
|
||||
for (const Symbol &symbol : grammar.ubiquitous_tokens) {
|
||||
auto &actions = parse_table.states[state_id].actions;
|
||||
if (actions.find(symbol) == actions.end())
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(state_id, { 0 }));
|
||||
|
|
@ -99,7 +99,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
|
||||
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ namespace tree_sitter {
|
|||
};
|
||||
|
||||
pair<ParseTable, vector<Conflict>>
|
||||
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
|
||||
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
return ParseTableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,11 +7,12 @@
|
|||
#include "compiler/parse_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, std::vector<Conflict>>
|
||||
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
|
||||
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/build_tables/build_lex_table.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
|
|
@ -9,8 +10,8 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
tuple<ParseTable, LexTable, vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar) {
|
||||
build_tables(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
vector<Conflict> conflicts = parse_table_result.second;
|
||||
|
|
|
|||
|
|
@ -8,12 +8,13 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::tuple<ParseTable, LexTable, std::vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar);
|
||||
build_tables(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,11 +14,11 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
class FirstSet : public rules::RuleFn<set<Symbol>> {
|
||||
const PreparedGrammar *grammar;
|
||||
const SyntaxGrammar *grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
|
||||
public:
|
||||
explicit FirstSet(const PreparedGrammar *grammar) : grammar(grammar) {}
|
||||
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
auto insertion_result = visited_symbols.insert(*rule);
|
||||
|
|
@ -54,7 +54,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return FirstSet(&grammar).apply(rule);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,17 +6,17 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Returns the set of terminal symbols that can appear at
|
||||
* the beginning of a string derivable from a given rule,
|
||||
* in a given gramamr.
|
||||
* in a given grammar.
|
||||
*/
|
||||
std::set<rules::Symbol>
|
||||
first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(const ParseItem &starting_item,
|
||||
const set<Symbol> &starting_lookahead_symbols,
|
||||
const PreparedGrammar &grammar) {
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
|
||||
vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(const ParseItem &item,
|
||||
const std::set<rules::Symbol> &lookahead_symbols,
|
||||
const PreparedGrammar &grammar);
|
||||
const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::map;
|
||||
|
|
@ -13,7 +14,7 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
map<Symbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
|
|
@ -31,7 +32,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
map<CharacterSet, LexItemSet>
|
||||
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
char_transitions(const LexItemSet &item_set) {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
for (auto &transition : char_transitions(item.rule)) {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
class Symbol;
|
||||
|
|
@ -14,10 +14,10 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
std::map<rules::Symbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);
|
||||
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);
|
||||
|
||||
std::map<rules::CharacterSet, LexItemSet>
|
||||
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
|
||||
char_transitions(const LexItemSet &item_set);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -13,7 +14,7 @@ namespace tree_sitter {
|
|||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
LexConflictManager::LexConflictManager(const PreparedGrammar &grammar) :
|
||||
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
|
||||
grammar(grammar) {}
|
||||
|
||||
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class LexConflictManager {
|
||||
const PreparedGrammar grammar;
|
||||
const LexicalGrammar grammar;
|
||||
|
||||
public:
|
||||
explicit LexConflictManager(const PreparedGrammar &grammar);
|
||||
explicit LexConflictManager(const LexicalGrammar &grammar);
|
||||
bool resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -13,8 +14,8 @@ namespace tree_sitter {
|
|||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
ParseConflictManager::ParseConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar) :
|
||||
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar) :
|
||||
parse_grammar(parse_grammar),
|
||||
lex_grammar(lex_grammar) {}
|
||||
|
||||
|
|
@ -87,7 +88,7 @@ namespace tree_sitter {
|
|||
return precedences + ")";
|
||||
}
|
||||
|
||||
string message_for_action(const ParseAction &action, const PreparedGrammar &parse_grammar) {
|
||||
string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift:
|
||||
return "shift " + precedence_string(action);
|
||||
|
|
|
|||
|
|
@ -13,13 +13,13 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class ParseConflictManager {
|
||||
const PreparedGrammar parse_grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
const SyntaxGrammar parse_grammar;
|
||||
const LexicalGrammar lex_grammar;
|
||||
std::set<Conflict> conflicts_;
|
||||
|
||||
public:
|
||||
ParseConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar);
|
||||
ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
bool resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action);
|
||||
|
|
|
|||
|
|
@ -39,13 +39,13 @@ namespace tree_sitter {
|
|||
};
|
||||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const PreparedGrammar *grammar;
|
||||
const SyntaxGrammar *grammar;
|
||||
set<rules::Symbol> visited_symbols;
|
||||
using CanBeBlank::visit;
|
||||
|
||||
public:
|
||||
using CanBeBlank::apply_to;
|
||||
explicit CanBeBlankRecursive(const PreparedGrammar *grammar) : grammar(grammar) {}
|
||||
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
bool apply_to(const rules::Symbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
|
|
@ -61,7 +61,7 @@ namespace tree_sitter {
|
|||
return CanBeBlank().apply(rule);
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return CanBeBlankRecursive(&grammar).apply(rule);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ namespace tree_sitter {
|
|||
tuple<string, vector<Conflict>, const GrammarError *>
|
||||
compile(const Grammar &grammar, std::string name) {
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
|
||||
const PreparedGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const PreparedGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
const GrammarError *error = get<2>(prepare_grammar_result);
|
||||
|
||||
if (error)
|
||||
|
|
|
|||
|
|
@ -27,16 +27,16 @@ namespace tree_sitter {
|
|||
const string name;
|
||||
const ParseTable parse_table;
|
||||
const LexTable lex_table;
|
||||
const PreparedGrammar syntax_grammar;
|
||||
const PreparedGrammar lexical_grammar;
|
||||
const SyntaxGrammar syntax_grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
|
||||
public:
|
||||
CCodeGenerator(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const PreparedGrammar &syntax_grammar,
|
||||
const PreparedGrammar &lexical_grammar) :
|
||||
const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar) :
|
||||
indent_level(0),
|
||||
name(name),
|
||||
parse_table(parse_table),
|
||||
|
|
@ -107,7 +107,7 @@ namespace tree_sitter {
|
|||
void ubiquitous_symbols_list() {
|
||||
line("UBIQUITOUS_SYMBOLS = {");
|
||||
indent([&]() {
|
||||
for (auto &symbol : syntax_grammar.ubiquitous_tokens())
|
||||
for (auto &symbol : syntax_grammar.ubiquitous_tokens)
|
||||
line("[" + symbol_id(symbol) + "] = 1,");
|
||||
});
|
||||
line("};");
|
||||
|
|
@ -118,7 +118,7 @@ namespace tree_sitter {
|
|||
line("HIDDEN_SYMBOLS = {");
|
||||
indent([&]() {
|
||||
for (auto &symbol : parse_table.symbols)
|
||||
if (!symbol.is_built_in() && (symbol.is_auxiliary() || grammar_for_symbol(symbol).rule_name(symbol)[0] == '_'))
|
||||
if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
|
||||
line("[" + symbol_id(symbol) + "] = 1,");
|
||||
});
|
||||
line("};");
|
||||
|
|
@ -178,8 +178,10 @@ namespace tree_sitter {
|
|||
line();
|
||||
}
|
||||
|
||||
const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
|
||||
return symbol.is_token() ? lexical_grammar : syntax_grammar;
|
||||
string rule_name(const rules::Symbol &symbol) {
|
||||
return symbol.is_token() ?
|
||||
lexical_grammar.rule_name(symbol) :
|
||||
syntax_grammar.rule_name(symbol);
|
||||
}
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
|
|
@ -188,7 +190,7 @@ namespace tree_sitter {
|
|||
"ts_builtin_sym_error" :
|
||||
"ts_builtin_sym_end";
|
||||
} else {
|
||||
string name = sanitize_name(grammar_for_symbol(symbol).rule_name(symbol));
|
||||
string name = sanitize_name(rule_name(symbol));
|
||||
if (symbol.is_auxiliary())
|
||||
return "ts_aux_sym_" + name;
|
||||
else
|
||||
|
|
@ -238,9 +240,9 @@ namespace tree_sitter {
|
|||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ? "error" : "end";
|
||||
} else if (symbol.is_token() && symbol.is_auxiliary()) {
|
||||
return grammar_for_symbol(symbol).rule_name(symbol);
|
||||
return rule_name(symbol);
|
||||
} else {
|
||||
return grammar_for_symbol(symbol).rule_name(symbol);
|
||||
return rule_name(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -397,8 +399,8 @@ namespace tree_sitter {
|
|||
string c_code(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const PreparedGrammar &syntax_grammar,
|
||||
const PreparedGrammar &lexical_grammar) {
|
||||
const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar) {
|
||||
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,14 +7,15 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace generate_code {
|
||||
std::string c_code(std::string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const PreparedGrammar &syntax_grammar,
|
||||
const PreparedGrammar &lexical_grammar);
|
||||
const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -62,11 +62,20 @@ namespace tree_sitter {
|
|||
return ubiquitous_tokens_;
|
||||
}
|
||||
|
||||
const Grammar & Grammar::ubiquitous_tokens(const vector<string> &ubiquitous_tokens) {
|
||||
Grammar & Grammar::ubiquitous_tokens(const vector<string> &ubiquitous_tokens) {
|
||||
ubiquitous_tokens_ = ubiquitous_tokens;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const vector<char> & Grammar::separators() const {
|
||||
return separators_;
|
||||
}
|
||||
|
||||
Grammar & Grammar::separators(const vector<char> &separators) {
|
||||
separators_ = separators;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const vector<pair<string, rule_ptr>> & Grammar::rules() const {
|
||||
return rules_;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,17 +50,16 @@ namespace tree_sitter {
|
|||
vector<pair<string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules());
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
for (auto &pair : grammar.rules) {
|
||||
ExpandRepeats expander(pair.first, aux_rules.size());
|
||||
rules.push_back({ pair.first, expander.apply(pair.second) });
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
}
|
||||
|
||||
return PreparedGrammar(rules, aux_rules).
|
||||
ubiquitous_tokens(grammar.ubiquitous_tokens());
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &);
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,28 +43,29 @@ namespace tree_sitter {
|
|||
ExpandTokens() : error(nullptr) {}
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *>
|
||||
expand_tokens(const PreparedGrammar &grammar) {
|
||||
pair<LexicalGrammar, const GrammarError *>
|
||||
expand_tokens(const LexicalGrammar &grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, aux_rules;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { PreparedGrammar(), expander.error };
|
||||
return { LexicalGrammar({}, {}, {}), expander.error };
|
||||
rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
for (auto &pair : grammar.aux_rules()) {
|
||||
for (auto &pair : grammar.aux_rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { PreparedGrammar(), expander.error };
|
||||
return { LexicalGrammar({}, {}, {}), expander.error };
|
||||
aux_rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(grammar.ubiquitous_tokens()),
|
||||
nullptr };
|
||||
LexicalGrammar(rules, aux_rules, grammar.separators),
|
||||
nullptr,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,11 +5,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, const GrammarError *>
|
||||
expand_tokens(const PreparedGrammar &);
|
||||
std::pair<LexicalGrammar, const GrammarError *>
|
||||
expand_tokens(const LexicalGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/token_description.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -93,15 +94,15 @@ namespace tree_sitter {
|
|||
vector<pair<string, rule_ptr>> tokens;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
|
||||
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
|
||||
vector<Symbol> ubiquitous_tokens;
|
||||
|
||||
TokenExtractor extractor;
|
||||
map<Symbol, Symbol> symbol_replacements;
|
||||
|
||||
for (size_t i = 0; i < input_grammar.rules().size(); i++) {
|
||||
auto pair = input_grammar.rules()[i];
|
||||
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
|
||||
auto pair = input_grammar.rules[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
tokens.push_back(pair);
|
||||
symbol_replacements.insert({
|
||||
|
|
@ -113,32 +114,17 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_grammar.aux_rules().size(); i++) {
|
||||
auto pair = input_grammar.aux_rules()[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
aux_tokens.push_back(pair);
|
||||
symbol_replacements.insert({
|
||||
Symbol(i, rules::SymbolOptionAuxiliary),
|
||||
Symbol(aux_tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionAuxiliary|rules::SymbolOptionToken))
|
||||
});
|
||||
} else {
|
||||
aux_rules.push_back({ pair.first, extractor.apply(pair.second) });
|
||||
}
|
||||
}
|
||||
|
||||
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
SymbolInliner inliner(symbol_replacements);
|
||||
for (auto &pair : rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &pair : aux_rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &symbol : input_grammar.ubiquitous_tokens())
|
||||
for (auto &symbol : input_grammar.ubiquitous_tokens)
|
||||
ubiquitous_tokens.push_back(inliner.replace_symbol(symbol));
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(ubiquitous_tokens),
|
||||
PreparedGrammar(tokens, aux_tokens)
|
||||
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
|
||||
LexicalGrammar(tokens, aux_tokens, {}),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,12 +2,14 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
|
||||
|
||||
#include <utility>
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &);
|
||||
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
|
|
@ -37,15 +38,16 @@ namespace tree_sitter {
|
|||
string missing_rule_name;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
|
||||
pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
|
||||
InternedGrammar grammar;
|
||||
return {
|
||||
PreparedGrammar({}, {}),
|
||||
grammar,
|
||||
new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'")
|
||||
};
|
||||
}
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr>> rules;
|
||||
|
||||
|
|
@ -64,10 +66,12 @@ namespace tree_sitter {
|
|||
ubiquitous_tokens.push_back(*token);
|
||||
}
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, {}).ubiquitous_tokens(ubiquitous_tokens),
|
||||
nullptr
|
||||
};
|
||||
InternedGrammar result;
|
||||
result.rules = rules;
|
||||
result.ubiquitous_tokens = ubiquitous_tokens;
|
||||
result.separators = grammar.separators();
|
||||
|
||||
return { result, nullptr };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,13 +4,13 @@
|
|||
#include <utility>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
21
src/compiler/prepare_grammar/interned_grammar.h
Normal file
21
src/compiler/prepare_grammar/interned_grammar.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
class InternedGrammar {
|
||||
public:
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
std::vector<rules::Symbol> ubiquitous_tokens;
|
||||
std::vector<char> separators;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
|
|
@ -4,29 +4,31 @@
|
|||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
|
||||
namespace prepare_grammar {
|
||||
tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
prepare_grammar(const Grammar &input_grammar) {
|
||||
auto result = intern_symbols(input_grammar);
|
||||
const PreparedGrammar &grammar = result.first;
|
||||
const InternedGrammar &grammar = result.first;
|
||||
const GrammarError *error = result.second;
|
||||
|
||||
if (error)
|
||||
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
auto grammars = extract_tokens(grammar);
|
||||
const PreparedGrammar &rule_grammar = expand_repeats(grammars.first);
|
||||
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
|
||||
auto expand_tokens_result = expand_tokens(grammars.second);
|
||||
const PreparedGrammar &lex_grammar = expand_tokens_result.first;
|
||||
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
|
||||
if (error)
|
||||
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
return make_tuple(rule_grammar, lex_grammar, nullptr);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,14 +2,14 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
|
||||
|
||||
#include <utility>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class GrammarError;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
prepare_grammar(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,98 +7,41 @@
|
|||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
|
||||
PreparedGrammar::PreparedGrammar() :
|
||||
rules_({}),
|
||||
aux_rules_({}),
|
||||
ubiquitous_tokens_({}) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :
|
||||
rules_(rules),
|
||||
aux_rules_(aux_rules),
|
||||
ubiquitous_tokens_({}) {}
|
||||
|
||||
const rule_ptr & PreparedGrammar::rule(const Symbol &symbol) const {
|
||||
const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules_[symbol.index].second :
|
||||
rules_[symbol.index].second;
|
||||
aux_rules[symbol.index].second :
|
||||
rules[symbol.index].second;
|
||||
}
|
||||
|
||||
const string & PreparedGrammar::rule_name(const Symbol &symbol) const {
|
||||
const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules_[symbol.index].first :
|
||||
rules_[symbol.index].first;
|
||||
aux_rules[symbol.index].first :
|
||||
rules[symbol.index].first;
|
||||
}
|
||||
|
||||
bool PreparedGrammar::operator==(const PreparedGrammar &other) const {
|
||||
if (other.rules_.size() != rules_.size()) return false;
|
||||
PreparedGrammar::PreparedGrammar() {}
|
||||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
LexicalGrammar::LexicalGrammar() {}
|
||||
|
||||
for (size_t i = 0; i < rules_.size(); i++) {
|
||||
auto &pair = rules_[i];
|
||||
auto &other_pair = other.rules_[i];
|
||||
if (other_pair.first != pair.first) return false;
|
||||
if (!other_pair.second->operator==(*pair.second)) return false;
|
||||
}
|
||||
PreparedGrammar::PreparedGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
|
||||
rules(rules),
|
||||
aux_rules(aux_rules) {}
|
||||
|
||||
if (other.aux_rules_.size() != aux_rules_.size()) return false;
|
||||
for (size_t i = 0; i < aux_rules_
|
||||
.size(); i++) {
|
||||
auto &pair = aux_rules_[i];
|
||||
auto &other_pair = other.aux_rules_[i];
|
||||
if (other_pair.first != pair.first) return false;
|
||||
if (!other_pair.second->operator==(*pair.second)) return false;
|
||||
}
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const vector<rules::Symbol> &ubiquitous_tokens) :
|
||||
PreparedGrammar(rules, aux_rules),
|
||||
ubiquitous_tokens(ubiquitous_tokens) {}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const vector<pair<string, rule_ptr>> & PreparedGrammar::rules() const {
|
||||
return rules_;
|
||||
}
|
||||
|
||||
const vector<pair<string, rule_ptr>> & PreparedGrammar::aux_rules() const {
|
||||
return aux_rules_;
|
||||
}
|
||||
|
||||
const vector<Symbol> & PreparedGrammar::ubiquitous_tokens() const {
|
||||
return ubiquitous_tokens_;
|
||||
}
|
||||
|
||||
const PreparedGrammar & PreparedGrammar::ubiquitous_tokens(const vector<Symbol> &ubiquitous_tokens) {
|
||||
ubiquitous_tokens_ = ubiquitous_tokens;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules()) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
stream << string(" aux_rules: {");
|
||||
started = false;
|
||||
for (auto pair : grammar.aux_rules()) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
return stream << string(">");
|
||||
}
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const vector<char> &separators) :
|
||||
PreparedGrammar(rules, aux_rules),
|
||||
separators(separators) {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,25 +9,40 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar {
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules_;
|
||||
std::vector<rules::Symbol> ubiquitous_tokens_;
|
||||
|
||||
public:
|
||||
PreparedGrammar();
|
||||
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
PreparedGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
|
||||
bool operator==(const PreparedGrammar &other) const;
|
||||
const std::string & rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr & rule(const rules::Symbol &symbol) const;
|
||||
const std::vector<rules::Symbol> & ubiquitous_tokens() const;
|
||||
const PreparedGrammar & ubiquitous_tokens(const std::vector<rules::Symbol> &ubiquitous_tokens);
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> & aux_rules() const;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
|
||||
class SyntaxGrammar : public PreparedGrammar {
|
||||
public:
|
||||
SyntaxGrammar();
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::vector<rules::Symbol> &ubiquitous_tokens);
|
||||
|
||||
std::vector<rules::Symbol> ubiquitous_tokens;
|
||||
};
|
||||
|
||||
class LexicalGrammar : public PreparedGrammar {
|
||||
public:
|
||||
LexicalGrammar();
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::vector<char> &separators);
|
||||
|
||||
std::vector<char> separators;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // COMPILER_PREPARED_GRAMMAR_H_
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue