Clean up extract_tokens function

This commit is contained in:
Max Brunsfeld 2014-09-09 12:57:29 -07:00
parent 9ee0665fad
commit 8f109504a8

View file

@ -30,8 +30,7 @@ using rules::Symbol;
using rules::SymbolOptionToken;
using rules::SymbolOptionAuxToken;
class SymbolInliner : public rules::IdentityRuleFn {
map<Symbol, Symbol> replacements;
class SymbolReplacer : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
int new_index_for_symbol(const Symbol &symbol) {
@ -46,6 +45,8 @@ class SymbolInliner : public rules::IdentityRuleFn {
rule_ptr apply_to(const Symbol *rule) { return replace_symbol(*rule).copy(); }
public:
map<Symbol, Symbol> replacements;
Symbol replace_symbol(const Symbol &rule) {
if (rule.is_built_in())
return rule;
@ -55,9 +56,6 @@ class SymbolInliner : public rules::IdentityRuleFn {
else
return Symbol(new_index_for_symbol(rule), rule.options);
}
explicit SymbolInliner(const map<Symbol, Symbol> &replacements)
: replacements(replacements) {}
};
class TokenExtractor : public rules::IdentityRuleFn {
@ -93,57 +91,56 @@ class TokenExtractor : public rules::IdentityRuleFn {
vector<pair<string, rule_ptr>> tokens;
};
static const GrammarError *ubiq_token_err(const string &msg) {
return new GrammarError(GrammarErrorTypeInvalidUbiquitousToken, msg);
static tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
ubiq_token_err(const rule_ptr rule) {
return make_tuple(SyntaxGrammar(), LexicalGrammar(),
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
"Not a token: " + rule->to_string()));
}
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
const Grammar &grammar) {
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
vector<pair<string, rule_ptr>> rules, tokens;
vector<rule_ptr> separators;
set<Symbol> ubiquitous_tokens;
map<Symbol, Symbol> symbol_replacements;
SymbolReplacer symbol_replacer;
TokenExtractor extractor;
for (size_t i = 0; i < grammar.rules().size(); i++) {
auto pair = grammar.rules()[i];
size_t i = 0;
for (auto &pair : grammar.rules()) {
if (is_token(pair.second)) {
tokens.push_back(pair);
symbol_replacements.insert(
{ Symbol(i), Symbol(tokens.size() - 1, SymbolOptionToken) });
symbol_replacer.replacements.insert(
{ Symbol(i), Symbol(tokens.size() - 1, SymbolOptionToken), });
} else {
rules.push_back({ pair.first, extractor.apply(pair.second) });
}
i++;
}
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(),
extractor.tokens.end());
SymbolInliner inliner(symbol_replacements);
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
pair.second = symbol_replacer.apply(pair.second);
for (auto rule : grammar.ubiquitous_tokens()) {
for (auto &rule : grammar.ubiquitous_tokens()) {
if (is_token(rule)) {
separators.push_back(rule);
} else {
auto sym = dynamic_pointer_cast<const Symbol>(extractor.apply(rule));
if (!sym.get())
return make_tuple(SyntaxGrammar(), LexicalGrammar(),
ubiq_token_err("Not a token: " + rule->to_string()));
return ubiq_token_err(rule);
Symbol symbol = inliner.replace_symbol(*sym);
Symbol symbol = symbol_replacer.replace_symbol(*sym);
if (!symbol.is_token())
return make_tuple(SyntaxGrammar(), LexicalGrammar(),
ubiq_token_err("Not a token: " + symbol.to_string()));
return ubiq_token_err(rule);
ubiquitous_tokens.insert(symbol);
}
}
return make_tuple(SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, separators), nullptr);
return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens),
LexicalGrammar(tokens, extractor.tokens, separators),
nullptr);
}
} // namespace prepare_grammar