Allow anonymous tokens to be used in grammars' external token lists

This commit is contained in:
Max Brunsfeld 2017-03-17 16:31:29 -07:00
parent e2baf0930b
commit ed8fbff175
24 changed files with 282 additions and 183 deletions

View file

@ -85,7 +85,7 @@ class ExpandRepeats {
return apply(rule);
}
vector<InitialSyntaxGrammar::Variable> aux_rules;
vector<Variable> aux_rules;
};
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {

View file

@ -156,7 +156,7 @@ class TokenExtractor {
}
vector<size_t> token_usage_counts;
vector<InternedGrammar::Variable> tokens;
vector<Variable> tokens;
};
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
@ -167,8 +167,8 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
SymbolReplacer symbol_replacer;
TokenExtractor extractor;
// First, extract all of the grammar's tokens into the lexical grammar.
vector<InitialSyntaxGrammar::Variable> processed_variables;
// Extract all of the grammar's tokens into the lexical grammar.
vector<Variable> processed_variables;
for (const auto &variable : grammar.variables) {
processed_variables.push_back({
variable.name,
@ -177,6 +177,15 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
});
}
vector<Variable> processed_external_tokens;
for (const auto &external_token : grammar.external_tokens) {
processed_external_tokens.push_back({
external_token.name,
external_token.type,
extractor.apply(external_token.rule)
});
}
for (const auto &extracted_token : extractor.tokens) {
auto expansion = expand_token(extracted_token.rule);
if (expansion.error) return make_tuple(
@ -269,12 +278,22 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
if (error) return make_tuple(syntax_grammar, lexical_grammar, error);
}
for (const ExternalToken &external_token : grammar.external_tokens) {
Symbol internal_token = symbol_replacer.replace_symbol(
external_token.corresponding_internal_token
);
for (const auto &external_token : processed_external_tokens) {
Rule new_rule = symbol_replacer.apply(external_token.rule);
if (internal_token.is_non_terminal()) {
if (!new_rule.is<Symbol>()) {
return make_tuple(
syntax_grammar,
lexical_grammar,
CompileError(
TSCompileErrorTypeInvalidExternalToken,
"Non-symbol rule expressions can't be used as external tokens"
)
);
}
Symbol symbol = new_rule.get_unchecked<Symbol>();
if (symbol.is_non_terminal()) {
return make_tuple(
syntax_grammar,
lexical_grammar,
@ -285,11 +304,19 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
);
}
syntax_grammar.external_tokens.push_back(ExternalToken{
external_token.name,
external_token.type,
internal_token
});
if (symbol.is_external()) {
syntax_grammar.external_tokens.push_back(ExternalToken{
external_token.name,
external_token.type,
rules::NONE()
});
} else {
syntax_grammar.external_tokens.push_back(ExternalToken{
lexical_grammar.variables[symbol.index].name,
external_token.type,
symbol
});
}
}
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());

View file

@ -89,7 +89,7 @@ class FlattenRule {
}
};
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable) {
SyntaxVariable flatten_rule(const Variable &variable) {
vector<Production> productions;
for (const Rule &rule_component : extract_choices(variable.rule)) {

View file

@ -11,7 +11,7 @@
namespace tree_sitter {
namespace prepare_grammar {
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable);
SyntaxVariable flatten_rule(const Variable &variable);
std::pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &);
} // namespace prepare_grammar

View file

@ -5,22 +5,13 @@
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {
struct InitialSyntaxGrammar {
struct Variable {
std::string name;
VariableType type;
rules::Rule rule;
inline bool operator==(const Variable &other) const {
return name == other.name && type == other.type && rule == other.rule;
}
};
std::vector<Variable> variables;
std::set<rules::Symbol> extra_tokens;
std::set<std::set<rules::Symbol>> expected_conflicts;

View file

@ -21,14 +21,21 @@ class SymbolInterner {
public:
Rule apply(const Rule &rule) {
return rule.match(
[&](const rules::Blank &blank) -> Rule { return blank; },
[&](const rules::Blank &blank) -> Rule {
return blank;
},
[&](const rules::NamedSymbol &symbol) {
return intern_symbol(symbol);
},
[&](const rules::String &string) { return string; },
[&](const rules::Pattern &pattern) { return pattern; },
[&](const rules::String &string) {
return string;
},
[&](const rules::Pattern &pattern) {
return pattern;
},
[&](const rules::Choice &choice) {
vector<rules::Rule> elements;
@ -58,12 +65,18 @@ class SymbolInterner {
}
Symbol intern_symbol(rules::NamedSymbol named_symbol) {
for (size_t i = 0; i < grammar.variables.size(); i++)
if (grammar.variables[i].name == named_symbol.value)
for (size_t i = 0; i < grammar.variables.size(); i++) {
if (grammar.variables[i].name == named_symbol.value) {
return Symbol::non_terminal(i);
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
if (grammar.external_tokens[i].name == named_symbol.value)
}
}
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
if (grammar.external_tokens[i].name == named_symbol.value) {
return Symbol::external(i);
}
}
missing_rule_name = named_symbol.value;
return rules::NONE();
}
@ -81,23 +94,21 @@ CompileError missing_rule_error(string rule_name) {
pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar) {
InternedGrammar result;
SymbolInterner interner(grammar);
for (auto &external_token : grammar.external_tokens) {
Symbol corresponding_internal_token = rules::NONE();
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
if (grammar.variables[i].name == external_token.name) {
corresponding_internal_token = Symbol::non_terminal(i);
break;
}
auto new_rule = interner.apply(external_token.rule);
if (!interner.missing_rule_name.empty()) {
return { result, missing_rule_error(interner.missing_rule_name) };
}
result.external_tokens.push_back(ExternalToken{
result.external_tokens.push_back(Variable{
external_token.name,
external_token.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
corresponding_internal_token
external_token.name[0] == '_' ? VariableTypeHidden : external_token.type,
new_rule
});
}
SymbolInterner interner(grammar);
for (auto &variable : grammar.variables) {
auto new_rule = interner.apply(variable.rule);
@ -105,7 +116,7 @@ pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar)
return { result, missing_rule_error(interner.missing_rule_name) };
}
result.variables.push_back(InternedGrammar::Variable{
result.variables.push_back(Variable{
variable.name,
variable.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
new_rule
@ -131,7 +142,7 @@ pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar)
result.expected_conflicts.insert(entry);
}
return { result, CompileError::none() };
return {result, CompileError::none()};
}
} // namespace prepare_grammar

View file

@ -11,20 +11,10 @@ namespace tree_sitter {
namespace prepare_grammar {
struct InternedGrammar {
struct Variable {
std::string name;
VariableType type;
rules::Rule rule;
bool operator==(const Variable &other) const {
return name == other.name && type == other.type && rule == other.rule;
}
};
std::vector<Variable> variables;
std::vector<rules::Rule> extra_tokens;
std::set<std::set<rules::Symbol>> expected_conflicts;
std::vector<ExternalToken> external_tokens;
std::vector<Variable> external_tokens;
};
} // namespace prepare_grammar