Rename RENAME rule to ALIAS, allow it to create anonymous nodes

This commit is contained in:
Max Brunsfeld 2017-07-31 11:45:24 -07:00
parent b5f421cafb
commit cb5fe80348
28 changed files with 304 additions and 270 deletions

View file

@ -64,7 +64,7 @@ class ParseTableBuilder {
pair<ParseTable, CompileError> build() {
// Ensure that the empty rename sequence has index 0.
parse_table.rename_sequences.push_back({});
parse_table.alias_sequences.push_back({});
// Ensure that the error state has index 0.
ParseStateId error_state_id = add_parse_state({}, ParseItemSet{});
@ -73,7 +73,7 @@ class ParseTableBuilder {
Symbol start_symbol = grammar.variables.empty() ?
Symbol::terminal(0) :
Symbol::non_terminal(0);
Production start_production{{{start_symbol, 0, rules::AssociativityNone, ""}}, 0};
Production start_production{{{start_symbol, 0, rules::AssociativityNone, {"", false}}}, 0};
add_parse_state({}, ParseItemSet{{
{
ParseItem(rules::START(), start_production, 0),
@ -204,7 +204,7 @@ class ParseTableBuilder {
item.precedence(),
item.production->dynamic_precedence,
item.associativity(),
get_rename_sequence_id(*item.production)
get_alias_sequence_id(*item.production)
);
lookahead_symbols.for_each([&](Symbol lookahead) {
@ -716,30 +716,30 @@ class ParseTableBuilder {
}
}
unsigned get_rename_sequence_id(const Production &production) {
bool has_rename = false;
RenameSequence rename_sequence;
unsigned get_alias_sequence_id(const Production &production) {
bool has_alias = false;
AliasSequence alias_sequence;
for (unsigned i = 0, n = production.size(); i < n; i++) {
auto &step = production.at(i);
if (!step.name_replacement.empty()) {
has_rename = true;
rename_sequence.resize(i + 1);
rename_sequence[i] = step.name_replacement;
if (!step.alias.value.empty()) {
has_alias = true;
alias_sequence.resize(i + 1);
alias_sequence[i] = step.alias;
}
}
if (has_rename && production.size() > parse_table.max_rename_sequence_length) {
parse_table.max_rename_sequence_length = production.size();
if (has_alias && production.size() > parse_table.max_alias_sequence_length) {
parse_table.max_alias_sequence_length = production.size();
}
auto begin = parse_table.rename_sequences.begin();
auto end = parse_table.rename_sequences.end();
auto iter = find(begin, end, rename_sequence);
auto begin = parse_table.alias_sequences.begin();
auto end = parse_table.alias_sequences.end();
auto iter = find(begin, end, alias_sequence);
if (iter != end) {
return iter - begin;
} else {
parse_table.rename_sequences.push_back(move(rename_sequence));
return parse_table.rename_sequences.size() - 1;
parse_table.alias_sequences.push_back(move(alias_sequence));
return parse_table.alias_sequences.size() - 1;
}
}

View file

@ -28,9 +28,7 @@ bool ParseItem::operator==(const ParseItem &other) const {
if (variable_index != other.variable_index) return false;
if (production->size() != other.production->size()) return false;
for (size_t i = 0; i < step_index; i++) {
if (production->at(i).name_replacement != other.production->at(i).name_replacement) {
return false;
}
if (production->at(i).alias != other.production->at(i).alias) return false;
}
if (is_done()) {
if (!production->empty()) {
@ -53,8 +51,8 @@ bool ParseItem::operator<(const ParseItem &other) const {
if (production->size() < other.production->size()) return true;
if (other.production->size() < production->size()) return false;
for (size_t i = 0; i < step_index; i++) {
if (production->at(i).name_replacement < other.production->at(i).name_replacement) return true;
if (other.production->at(i).name_replacement < production->at(i).name_replacement) return false;
if (production->at(i).alias < other.production->at(i).alias) return true;
if (other.production->at(i).alias < production->at(i).alias) return false;
}
if (is_done()) {
if (!production->empty()) {
@ -158,7 +156,8 @@ struct hash<ParseItem> {
hash_combine(&result, item.production->dynamic_precedence);
hash_combine(&result, item.production->size());
for (size_t i = 0; i < item.step_index; i++) {
hash_combine(&result, item.production->at(i).name_replacement);
hash_combine(&result, item.production->at(i).alias.value);
hash_combine(&result, item.production->at(i).alias.is_named);
}
if (item.is_done()) {
if (!item.production->empty()) {

View file

@ -211,8 +211,8 @@ const vector<Production> &ParseItemSetBuilder::inline_production(const ParseItem
Production production{{begin, step}, item.production->dynamic_precedence};
for (auto &step : *production_to_insert) {
production.steps.push_back(step);
if (!inlined_step.name_replacement.empty()) {
production.steps.back().name_replacement = inlined_step.name_replacement;
if (!inlined_step.alias.value.empty()) {
production.steps.back().alias = inlined_step.alias;
}
}
production.back().precedence = inlined_step.precedence;

View file

@ -25,6 +25,7 @@ using std::to_string;
using std::vector;
using util::escape_char;
using rules::Symbol;
using rules::Alias;
static const map<char, string> REPLACEMENTS({
{ '~', "TILDE" },
@ -76,7 +77,7 @@ class CCodeGenerator {
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<set<Symbol::Index>> external_scanner_states;
size_t next_parse_action_list_index;
set<string> unique_replacement_names;
set<Alias> unique_aliases;
public:
CCodeGenerator(string name, const ParseTable &parse_table,
@ -99,7 +100,7 @@ class CCodeGenerator {
add_symbol_enum();
add_symbol_names_list();
add_symbol_metadata_list();
add_rename_sequences();
add_alias_sequences();
add_lex_function();
add_lex_modes_list();
@ -141,10 +142,10 @@ class CCodeGenerator {
}
}
for (const RenameSequence &rename_sequence : parse_table.rename_sequences) {
for (const string &name_replacement : rename_sequence) {
if (!name_replacement.empty()) {
unique_replacement_names.insert(name_replacement);
for (const AliasSequence &alias_sequence : parse_table.alias_sequences) {
for (const Alias &alias : alias_sequence) {
if (!alias.value.empty()) {
unique_aliases.insert(alias);
}
}
}
@ -152,10 +153,10 @@ class CCodeGenerator {
line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line("#define RENAME_SYMBOL_COUNT " + to_string(unique_replacement_names.size()));
line("#define ALIAS_COUNT " + to_string(unique_aliases.size()));
line("#define TOKEN_COUNT " + to_string(token_count));
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
line("#define MAX_RENAME_SEQUENCE_LENGTH " + to_string(parse_table.max_rename_sequence_length));
line("#define MAX_ALIAS_SEQUENCE_LENGTH " + to_string(parse_table.max_alias_sequence_length));
line();
}
@ -171,8 +172,8 @@ class CCodeGenerator {
}
}
for (const string &replacement_name : unique_replacement_names) {
line(rename_id(replacement_name) + " = " + to_string(i) + ",");
for (const Alias &alias : unique_aliases) {
line(alias_id(alias) + " = " + to_string(i) + ",");
i++;
}
});
@ -190,10 +191,10 @@ class CCodeGenerator {
);
}
for (const string &replacement_name : unique_replacement_names) {
for (const Alias &alias : unique_aliases) {
line(
"[" + rename_id(replacement_name) + "] = \"" +
sanitize_name_for_string(replacement_name) + "\","
"[" + alias_id(alias) + "] = \"" +
sanitize_name_for_string(alias.value) + "\","
);
}
});
@ -201,22 +202,21 @@ class CCodeGenerator {
line();
}
void add_rename_sequences() {
void add_alias_sequences() {
line(
"static TSSymbol ts_rename_sequences[" +
to_string(parse_table.rename_sequences.size()) +
"][MAX_RENAME_SEQUENCE_LENGTH] = {"
"static TSSymbol ts_alias_sequences[" +
to_string(parse_table.alias_sequences.size()) +
"][MAX_ALIAS_SEQUENCE_LENGTH] = {"
);
indent([&]() {
for (unsigned i = 1, n = parse_table.rename_sequences.size(); i < n; i++) {
const RenameSequence &sequence = parse_table.rename_sequences[i];
for (unsigned i = 1, n = parse_table.alias_sequences.size(); i < n; i++) {
const AliasSequence &sequence = parse_table.alias_sequences[i];
line("[" + to_string(i) + "] = {");
indent([&]() {
for (unsigned j = 0, n = sequence.size(); j < n; j++) {
if (!sequence[j].empty()) {
line("[" + to_string(j) + "] = " + rename_id(sequence[j]) + ",");
if (!sequence[j].value.empty()) {
line("[" + to_string(j) + "] = " + alias_id(sequence[j]) + ",");
}
}
});
@ -260,11 +260,11 @@ class CCodeGenerator {
line("},");
}
for (const string &replacement_name : unique_replacement_names) {
line("[" + rename_id(replacement_name) + "] = {");
for (const Alias &alias : unique_aliases) {
line("[" + alias_id(alias) + "] = {");
indent([&]() {
line(".visible = true,");
line(".named = true,");
line(".named = " + _boolean(alias.is_named) + ",");
line(".structural = true,");
line(".extra = true,");
});
@ -616,8 +616,8 @@ class CCodeGenerator {
add(", .dynamic_precedence = " + to_string(action.dynamic_precedence));
}
if (action.rename_sequence_id != 0) {
add(", .rename_sequence_id = " + to_string(action.rename_sequence_id));
if (action.alias_sequence_id != 0) {
add(", .alias_sequence_id = " + to_string(action.alias_sequence_id));
}
add(")");
@ -671,8 +671,12 @@ class CCodeGenerator {
}
}
string rename_id(const string &name) {
return "rename_sym_" + sanitize_name(name);
string alias_id(const Alias &alias) {
if (alias.is_named) {
return "alias_sym_" + sanitize_name(alias.value);
} else {
return "anon_alias_sym_" + sanitize_name(alias.value);
}
}
string symbol_name(const Symbol &symbol) {

View file

@ -198,18 +198,27 @@ ParseRuleResult parse_rule(json_value *rule_json) {
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
}
if (type == "RENAME") {
json_value name_json = rule_json->operator[]("value");
if (name_json.type != json_string) {
if (type == "ALIAS") {
json_value value_json = rule_json->operator[]("value");
if (value_json.type != json_string) {
return "Rename value must be a string";
}
json_value is_named_json = rule_json->operator[]("named");
if (is_named_json.type != json_boolean) {
return "Rename named value must be a boolean";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid rename content: " + result.error_message;
}
return Rule(Metadata::rename(string(name_json.u.string.ptr), result.rule));
return Rule(Metadata::alias(
string(value_json.u.string.ptr),
is_named_json.u.boolean,
result.rule
));
}
return "Unknown rule type: " + type;

View file

@ -19,7 +19,7 @@ ParseAction::ParseAction() :
precedence(0),
dynamic_precedence(0),
associativity(rules::AssociativityNone),
rename_sequence_id(0),
alias_sequence_id(0),
fragile(false),
extra(false) {}
@ -56,7 +56,7 @@ ParseAction ParseAction::ShiftExtra() {
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
int precedence, int dynamic_precedence,
rules::Associativity associativity, unsigned rename_sequence_id) {
rules::Associativity associativity, unsigned alias_sequence_id) {
ParseAction result;
result.type = ParseActionTypeReduce;
result.symbol = symbol;
@ -64,7 +64,7 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
result.precedence = precedence;
result.dynamic_precedence = dynamic_precedence;
result.associativity = associativity;
result.rename_sequence_id = rename_sequence_id;
result.alias_sequence_id = alias_sequence_id;
return result;
}
@ -77,7 +77,7 @@ bool ParseAction::operator==(const ParseAction &other) const {
precedence == other.precedence &&
dynamic_precedence == other.dynamic_precedence &&
associativity == other.associativity &&
rename_sequence_id == other.rename_sequence_id &&
alias_sequence_id == other.alias_sequence_id &&
extra == other.extra &&
fragile == other.fragile;
}
@ -101,7 +101,7 @@ bool ParseAction::operator<(const ParseAction &other) const {
if (other.extra && !extra) return false;
if (fragile && !other.fragile) return true;
if (other.fragile && !fragile) return false;
return rename_sequence_id < other.rename_sequence_id;
return alias_sequence_id < other.alias_sequence_id;
}
ParseTableEntry::ParseTableEntry()

View file

@ -30,7 +30,7 @@ struct ParseAction {
static ParseAction Recover(ParseStateId state_index);
static ParseAction Reduce(rules::Symbol symbol, size_t child_count,
int precedence, int dynamic_precedence, rules::Associativity,
unsigned rename_sequence_id);
unsigned alias_sequence_id);
static ParseAction ShiftExtra();
bool operator==(const ParseAction &) const;
bool operator<(const ParseAction &) const;
@ -42,7 +42,7 @@ struct ParseAction {
int precedence;
int dynamic_precedence;
rules::Associativity associativity;
unsigned rename_sequence_id;
unsigned alias_sequence_id;
bool fragile;
bool extra;
};
@ -77,7 +77,7 @@ struct ParseTableSymbolMetadata {
bool structural;
};
using RenameSequence = std::vector<std::string>;
using AliasSequence = std::vector<rules::Alias>;
struct ParseTable {
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
@ -85,8 +85,8 @@ struct ParseTable {
std::vector<ParseState> states;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
std::vector<RenameSequence> rename_sequences;
unsigned max_rename_sequence_length = 0;
std::vector<AliasSequence> alias_sequences;
unsigned max_alias_sequence_length = 0;
};
} // namespace tree_sitter

View file

@ -22,7 +22,7 @@ class FlattenRule {
private:
vector<int> precedence_stack;
vector<rules::Associativity> associativity_stack;
vector<string> name_replacement_stack;
vector<rules::Alias> alias_stack;
Production production;
void apply(const Rule &rule, bool at_end) {
@ -32,7 +32,7 @@ class FlattenRule {
symbol,
precedence_stack.back(),
associativity_stack.back(),
name_replacement_stack.back()
alias_stack.back()
});
},
@ -45,8 +45,8 @@ class FlattenRule {
associativity_stack.push_back(metadata.params.associativity);
}
if (!metadata.params.name_replacement.empty()) {
name_replacement_stack.push_back(metadata.params.name_replacement);
if (!metadata.params.alias.value.empty()) {
alias_stack.push_back(metadata.params.alias);
}
if (abs(metadata.params.dynamic_precedence) > abs(production.dynamic_precedence)) {
@ -65,8 +65,8 @@ class FlattenRule {
if (!at_end) production.back().associativity = associativity_stack.back();
}
if (!metadata.params.name_replacement.empty()) {
name_replacement_stack.pop_back();
if (!metadata.params.alias.value.empty()) {
alias_stack.pop_back();
}
},
@ -87,7 +87,7 @@ class FlattenRule {
FlattenRule() :
precedence_stack({0}),
associativity_stack({rules::AssociativityNone}),
name_replacement_stack({""}) {}
alias_stack({rules::Alias{}}) {}
Production flatten(const Rule &rule) {
apply(rule, true);

View file

@ -9,6 +9,20 @@ namespace rules {
using std::move;
using std::string;
bool Alias::operator==(const Alias &other) const {
return value == other.value && is_named == other.is_named;
}
bool Alias::operator!=(const Alias &other) const {
return !operator==(other);
}
bool Alias::operator<(const Alias &other) const {
if (value < other.value) return true;
if (other.value < value) return false;
return is_named < other.is_named;
}
Metadata::Metadata(const Rule &rule, MetadataParams params) :
rule(std::make_shared<Rule>(rule)), params(params) {}
@ -77,9 +91,9 @@ Metadata Metadata::main_token(const Rule &rule) {
return Metadata{rule, params};
}
Metadata Metadata::rename(string &&name, const Rule &rule) {
Metadata Metadata::alias(string &&value, bool is_named, const Rule &rule) {
MetadataParams params;
params.name_replacement = move(name);
params.alias = {move(value), is_named};
return Metadata{rule, params};
}

View file

@ -13,6 +13,14 @@ enum Associativity {
AssociativityRight,
};
struct Alias {
std::string value = "";
bool is_named = false;
bool operator==(const Alias &) const;
bool operator!=(const Alias &) const;
bool operator<(const Alias &) const;
};
struct MetadataParams {
int precedence;
int dynamic_precedence;
@ -23,7 +31,7 @@ struct MetadataParams {
bool is_string;
bool is_active;
bool is_main_token;
std::string name_replacement;
Alias alias;
inline MetadataParams() :
precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone},
@ -41,7 +49,7 @@ struct MetadataParams {
is_string == other.is_string &&
is_active == other.is_active &&
is_main_token == other.is_main_token &&
name_replacement == other.name_replacement
alias == other.alias
);
}
};
@ -62,7 +70,7 @@ struct Metadata {
static Metadata prec_dynamic(int precedence, const Rule &rule);
static Metadata separator(const Rule &rule);
static Metadata main_token(const Rule &rule);
static Metadata rename(std::string &&name, const Rule &rule);
static Metadata alias(std::string &&value, bool is_named, const Rule &rule);
bool operator==(const Metadata &other) const;
};

View file

@ -6,7 +6,7 @@ bool ProductionStep::operator==(const ProductionStep &other) const {
return symbol == other.symbol &&
precedence == other.precedence &&
associativity == other.associativity &&
name_replacement == other.name_replacement;
alias == other.alias;
}
bool ProductionStep::operator!=(const ProductionStep &other) const {
@ -20,7 +20,7 @@ bool ProductionStep::operator<(const ProductionStep &other) const {
if (other.precedence < precedence) return false;
if (associativity < other.associativity) return true;
if (other.associativity < associativity) return false;
return name_replacement < other.name_replacement;
return alias < other.alias;
}
bool Production::operator==(const Production &other) const {

View file

@ -13,7 +13,7 @@ struct ProductionStep {
rules::Symbol symbol;
int precedence;
rules::Associativity associativity;
std::string name_replacement;
rules::Alias alias;
bool operator==(const ProductionStep &) const;
bool operator!=(const ProductionStep &) const;