Refactor logic for marking '_'-prefixed rules as hidden

This commit is contained in:
Max Brunsfeld 2015-09-06 16:46:29 -07:00
parent 9591c88f39
commit f9316933ad
16 changed files with 22975 additions and 22952 deletions

View file

@ -13,7 +13,6 @@
namespace tree_sitter {
namespace generate_code {
using std::function;
using std::map;
using std::set;
@ -22,6 +21,14 @@ using std::to_string;
using std::vector;
using util::escape_char;
static RuleEntry ERROR_ENTRY{
"error", rule_ptr(), RuleEntryTypeNamed,
};
static RuleEntry EOF_ENTRY{
"end", rule_ptr(), RuleEntryTypeAuxiliary,
};
static const map<char, string> REPLACEMENTS({
{ '~', "TILDE" },
{ '`', "BQUOTE" },
@ -142,21 +149,7 @@ class CCodeGenerator {
for (const auto &symbol : parse_table.symbols) {
line("[" + symbol_id(symbol) + "] = ");
if (symbol == rules::ERROR()) {
add("TSNodeTypeNormal,");
continue;
} else if (symbol == rules::END_OF_INPUT()) {
add("TSNodeTypeHidden,");
continue;
}
RuleEntry entry = entry_for_symbol(symbol);
if (entry.name[0] == '_') {
add("TSNodeTypeHidden,");
continue;
}
switch (entry.type) {
switch (entry_for_symbol(symbol).type) {
case RuleEntryTypeNamed:
add("TSNodeTypeNormal,");
break;
@ -164,6 +157,7 @@ class CCodeGenerator {
add("TSNodeTypeConcrete,");
break;
case RuleEntryTypeHidden:
case RuleEntryTypeAuxiliary:
add("TSNodeTypeHidden,");
break;
}
@ -344,36 +338,34 @@ class CCodeGenerator {
}
string symbol_id(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ts_builtin_sym_error";
else if (symbol == rules::END_OF_INPUT())
return "ts_builtin_sym_end";
else
return "";
} else {
string name = sanitize_name(rule_name(symbol));
if (entry_for_symbol(symbol).type == RuleEntryTypeNamed)
return "sym_" + name;
else
RuleEntry entry = entry_for_symbol(symbol);
string name = sanitize_name(entry.name);
if (symbol.is_built_in())
return "ts_builtin_sym_" + name;
switch (entry.type) {
case RuleEntryTypeAuxiliary:
return "aux_sym_" + name;
case RuleEntryTypeAnonymous:
return "anon_sym_" + name;
default:
return "sym_" + name;
}
}
string symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ERROR";
else if (symbol == rules::END_OF_INPUT())
return "END";
else
return "";
} else {
return rule_name(symbol);
}
if (symbol == rules::ERROR())
return "ERROR";
if (symbol == rules::END_OF_INPUT())
return "END";
return entry_for_symbol(symbol).name;
}
const RuleEntry &entry_for_symbol(const rules::Symbol &symbol) {
if (symbol == rules::ERROR())
return ERROR_ENTRY;
if (symbol == rules::END_OF_INPUT())
return EOF_ENTRY;
if (symbol.is_token)
return lexical_grammar.rules[symbol.index];
else

View file

@ -46,7 +46,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
helper_rule_name,
Seq::build({ inner_rule, Choice::build({ repeat_symbol.copy(),
make_shared<Blank>() }) }),
RuleEntryTypeHidden,
RuleEntryTypeAuxiliary,
});
return repeat_symbol.copy();
}

View file

@ -76,12 +76,12 @@ class TokenExtractor : public rules::IdentityRuleFn {
}
rule_ptr apply_to(const rules::Pattern *rule) {
return apply_to_token(rule, RuleEntryTypeHidden);
return apply_to_token(rule, RuleEntryTypeAuxiliary);
}
rule_ptr apply_to(const rules::Metadata *rule) {
if (rule->value_for(rules::IS_TOKEN) > 0)
return apply_to_token(rule->rule.get(), RuleEntryTypeHidden);
return apply_to_token(rule->rule.get(), RuleEntryTypeAuxiliary);
else
return rules::IdentityRuleFn::apply_to(rule);
}
@ -107,9 +107,9 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
* First, extract all of the grammar's tokens into the lexical grammar.
*/
vector<RuleEntry> processed_rules;
for (const auto &pair : grammar.rules)
for (const RuleEntry &entry : grammar.rules)
processed_rules.push_back({
pair.first, extractor.apply(pair.second), RuleEntryTypeNamed,
entry.name, extractor.apply(entry.rule), entry.type,
});
lexical_grammar.rules = extractor.tokens;

View file

@ -55,7 +55,11 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
return { result, missing_rule_error(interner.missing_rule_name) };
result.rules.push_back({ pair.first, new_rule });
result.rules.push_back({
pair.first, new_rule,
pair.first[0] == '_' ? RuleEntryTypeHidden : RuleEntryTypeNamed,
});
}
for (auto &rule : grammar.ubiquitous_tokens()) {

View file

@ -7,12 +7,13 @@
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace prepare_grammar {
struct InternedGrammar {
std::vector<std::pair<std::string, rule_ptr>> rules;
std::vector<RuleEntry> rules;
std::set<rule_ptr> ubiquitous_tokens;
std::set<std::set<rules::Symbol>> expected_conflicts;
};

View file

@ -13,6 +13,7 @@ enum RuleEntryType {
RuleEntryTypeNamed,
RuleEntryTypeAnonymous,
RuleEntryTypeHidden,
RuleEntryTypeAuxiliary,
};
struct RuleEntry {

View file

@ -16,10 +16,11 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength size, TSLength padding,
.child_count = 0,
.children = NULL,
.padding = padding,
.options = (TSTreeOptions){
.hidden = (node_type == TSNodeTypeHidden),
.concrete = (node_type == TSNodeTypeConcrete),
},
.options =
(TSTreeOptions){
.hidden = (node_type == TSNodeTypeHidden),
.concrete = (node_type == TSNodeTypeConcrete),
},
};
return result;
}