From b3a72954ff389a0655557c58266feea4a0cee852 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Jul 2017 17:17:22 -0700 Subject: [PATCH] Introduce RENAME rule type --- doc/grammar-schema.json | 18 +++ include/tree_sitter/parser.h | 122 +++++++++--------- project.gyp | 1 + .../build_tables/build_parse_table.cc | 49 +++++-- src/compiler/build_tables/parse_item.cc | 17 ++- src/compiler/build_tables/parse_item.h | 3 - .../build_tables/parse_item_set_builder.cc | 19 +-- src/compiler/generate_code/c_code.cc | 88 +++++++++++-- src/compiler/parse_grammar.cc | 14 ++ src/compiler/parse_table.cc | 84 ++++++------ src/compiler/parse_table.h | 4 + .../prepare_grammar/flatten_grammar.cc | 23 +++- src/compiler/rules/metadata.cc | 10 ++ src/compiler/rules/metadata.h | 6 +- src/compiler/syntax_grammar.cc | 36 ++++++ src/compiler/syntax_grammar.h | 40 ++---- src/runtime/language.c | 3 +- src/runtime/language.h | 2 +- src/runtime/node.c | 3 +- src/runtime/parser.c | 53 ++++---- src/runtime/reduce_action.h | 1 + src/runtime/tree.c | 34 ++--- src/runtime/tree.h | 4 +- .../parse_item_set_builder_test.cc | 41 +++--- .../test_grammars/renamed_rules/corpus.txt | 18 +++ .../test_grammars/renamed_rules/grammar.json | 69 ++++++++++ 26 files changed, 516 insertions(+), 246 deletions(-) create mode 100644 src/compiler/syntax_grammar.cc create mode 100644 test/fixtures/test_grammars/renamed_rules/corpus.txt create mode 100644 test/fixtures/test_grammars/renamed_rules/grammar.json diff --git a/doc/grammar-schema.json b/doc/grammar-schema.json index 282a51d0..0d910fd6 100644 --- a/doc/grammar-schema.json +++ b/doc/grammar-schema.json @@ -135,6 +135,23 @@ "required": ["type", "members"] }, + "rename-rule": { + "type": "object", + "properties": { + "type": { + "type": "string", + "pattern": "^RENAME$" + }, + "value": { + "type": "string" + }, + "content": { + "$ref": "#/definitions/rule" + } + }, + "required": ["type", "content", "value"] + }, + "repeat-rule": { "type": "object", "properties": { @@ -202,6 +219,7 @@ { "$ref": "#/definitions/symbol-rule" }, { "$ref": "#/definitions/seq-rule" }, { "$ref": "#/definitions/choice-rule" }, + { "$ref": "#/definitions/rename-rule" }, { "$ref": "#/definitions/repeat1-rule" }, { "$ref": "#/definitions/repeat-rule" }, { "$ref": "#/definitions/token-rule" }, diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 295b3c96..4772eec6 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -9,9 +9,8 @@ extern "C" { #include #include -typedef unsigned short TSSymbol; -typedef unsigned short TSStateId; - +typedef uint16_t TSSymbol; +typedef uint16_t TSStateId; typedef uint8_t TSExternalTokenState[16]; #define ts_builtin_sym_error ((TSSymbol)-1) @@ -40,16 +39,19 @@ typedef enum { typedef struct { union { - TSStateId to_state; struct { - short dynamic_precedence; - TSSymbol symbol; - unsigned short child_count; + TSStateId to_state; + bool extra : 1; }; - } params; + struct { + TSSymbol symbol; + uint16_t dynamic_precedence; + uint8_t child_count; + uint8_t rename_sequence_id : 7; + bool fragile : 1; + }; + }; TSParseActionType type : 4; - bool extra : 1; - bool fragile : 1; } TSParseAction; typedef struct { @@ -60,7 +62,7 @@ typedef struct { typedef union { TSParseAction action; struct { - unsigned short count; + uint8_t count; bool reusable : 1; bool depends_on_lookahead : 1; }; @@ -73,9 +75,11 @@ typedef struct TSLanguage { uint32_t external_token_count; const char **symbol_names; const TSSymbolMetadata *symbol_metadata; - const unsigned short *parse_table; + const uint16_t *parse_table; const TSParseActionEntry *parse_actions; const TSLexMode *lex_modes; + const TSSymbol *rename_sequences; + uint16_t max_rename_sequence_length; bool (*lex_fn)(TSLexer *, TSStateId); struct { const bool *states; @@ -127,70 +131,62 @@ typedef struct TSLanguage { #define STATE(id) id #define ACTIONS(id) id -#define SHIFT(to_state_value) \ - { \ - { \ - .type = TSParseActionTypeShift, .params = {.to_state = to_state_value } \ - } \ +#define SHIFT(to_state_value) \ + { \ + { \ + .type = TSParseActionTypeShift, \ + .to_state = to_state_value, \ + } \ } -#define RECOVER(to_state_value) \ - { \ - { \ - .type = TSParseActionTypeRecover, .params = {.to_state = to_state_value } \ - } \ +#define RECOVER(to_state_value) \ + { \ + { \ + .type = TSParseActionTypeRecover, \ + .to_state = to_state_value \ + } \ } -#define SHIFT_EXTRA() \ - { \ - { .type = TSParseActionTypeShift, .extra = true } \ +#define SHIFT_EXTRA() \ + { \ + { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ } -#define REDUCE(symbol_val, child_count_val, dynamic_precedence_val) \ - { \ - { \ - .type = TSParseActionTypeReduce, \ - .params = { \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - .dynamic_precedence = dynamic_precedence_val, \ - } \ - } \ +#define REDUCE(symbol_val, child_count_val, ...) \ + { \ + { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + } \ } -#define REDUCE_FRAGILE(symbol_val, child_count_val, dynamic_precedence_val) \ -{ \ - { \ - .type = TSParseActionTypeReduce, \ - .fragile = true, \ - .params = { \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - .dynamic_precedence = dynamic_precedence_val, \ - } \ - } \ -} - #define ACCEPT_INPUT() \ { \ { .type = TSParseActionTypeAccept } \ } -#define GET_LANGUAGE(...) \ - static TSLanguage language = { \ - .version = LANGUAGE_VERSION, \ - .symbol_count = SYMBOL_COUNT, \ - .token_count = TOKEN_COUNT, \ - .symbol_metadata = ts_symbol_metadata, \ - .parse_table = (const unsigned short *)ts_parse_table, \ - .parse_actions = ts_parse_actions, \ - .lex_modes = ts_lex_modes, \ - .symbol_names = ts_symbol_names, \ - .lex_fn = ts_lex, \ - .external_token_count = EXTERNAL_TOKEN_COUNT, \ - .external_scanner = {__VA_ARGS__} \ - }; \ - return &language \ +#define GET_LANGUAGE(...) \ + static TSLanguage language = { \ + .version = LANGUAGE_VERSION, \ + .symbol_count = SYMBOL_COUNT, \ + .token_count = TOKEN_COUNT, \ + .symbol_metadata = ts_symbol_metadata, \ + .parse_table = (const unsigned short *)ts_parse_table, \ + .parse_actions = ts_parse_actions, \ + .lex_modes = ts_lex_modes, \ + .symbol_names = ts_symbol_names, \ + .rename_sequences = (const TSSymbol *)ts_rename_sequences, \ + .max_rename_sequence_length = MAX_RENAME_SEQUENCE_LENGTH, \ + .lex_fn = ts_lex, \ + .external_token_count = EXTERNAL_TOKEN_COUNT, \ + .external_scanner = {__VA_ARGS__} \ + }; \ + return &language \ #ifdef __cplusplus } diff --git a/project.gyp b/project.gyp index 4e1ae182..609e28e8 100644 --- a/project.gyp +++ b/project.gyp @@ -38,6 +38,7 @@ 'src/compiler/prepare_grammar/prepare_grammar.cc', 'src/compiler/prepare_grammar/token_description.cc', 'src/compiler/rule.cc', + 'src/compiler/syntax_grammar.cc', 'src/compiler/rules/character_set.cc', 'src/compiler/rules/choice.cc', 'src/compiler/rules/metadata.cc', diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index a14563d2..9175cc4a 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -63,21 +63,26 @@ class ParseTableBuilder { processing_recovery_states(false) {} pair build() { + // Ensure that the empty rename sequence has index 0. + parse_table.rename_sequences.push_back({}); + + // Ensure that the error state has index 0. + ParseStateId error_state_id = add_parse_state({}, ParseItemSet{}); + + // Add the starting state. Symbol start_symbol = grammar.variables.empty() ? Symbol::terminal(0) : Symbol::non_terminal(0); - Production start_production{{{start_symbol, 0, rules::AssociativityNone}}, 0}; - - ParseStateId error_state_id = add_parse_state({}, ParseItemSet()); - add_parse_state({}, ParseItemSet({ + Production start_production{{{start_symbol, 0, rules::AssociativityNone, ""}}, 0}; + add_parse_state({}, ParseItemSet{{ { ParseItem(rules::START(), start_production, 0), LookaheadSet({END_OF_INPUT()}), }, - })); + }}); CompileError error = process_part_state_queue(); - if (error.type != TSCompileErrorTypeNone) return {parse_table, error}; + if (error) return {parse_table, error}; compute_unmergable_token_pairs(); @@ -191,9 +196,14 @@ class ParseTableBuilder { // If the item is finished, immediately add a Reduce or Accept action to // the parse table for each of its lookahead terminals. if (item.is_done()) { - ParseAction action = (item.lhs() == rules::START()) ? - ParseAction::Accept() : - ParseAction::Reduce(item.lhs(), item.step_index, *item.production); + ParseAction action; + + if (item.lhs() == rules::START()) { + action = ParseAction::Accept(); + } else { + action = ParseAction::Reduce(item.lhs(), item.step_index, *item.production); + action.rename_sequence_id = get_rename_sequence_id(*item.production); + } int precedence = item.precedence(); lookahead_symbols.for_each([&](Symbol lookahead) { @@ -688,6 +698,27 @@ class ParseTableBuilder { return fragile_productions.find(production) != fragile_productions.end(); } + unsigned get_rename_sequence_id(const Production &production) { + RenameSequence rename_sequence; + for (unsigned i = 0, n = production.size(); i < n; i++) { + auto &step = production.at(i); + if (!step.name_replacement.empty()) { + rename_sequence.resize(production.size()); + rename_sequence[i] = step.name_replacement; + } + } + + auto begin = parse_table.rename_sequences.begin(); + auto end = parse_table.rename_sequences.end(); + auto iter = find(begin, end, rename_sequence); + if (iter != end) { + return iter - begin; + } else { + parse_table.rename_sequences.push_back(move(rename_sequence)); + return parse_table.rename_sequences.size() - 1; + } + } + SymbolSequence append_symbol(const SymbolSequence &sequence, const Symbol &symbol) { if (!sequence.empty()) { const LookaheadSet &left_tokens = item_set_builder.get_last_set(sequence.back()); diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index f06898ac..5184a98a 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -27,6 +27,11 @@ bool ParseItem::operator==(const ParseItem &other) const { if (step_index != other.step_index) return false; if (variable_index != other.variable_index) return false; if (production->size() != other.production->size()) return false; + for (size_t i = 0; i < step_index; i++) { + if (production->at(i).name_replacement != other.production->at(i).name_replacement) { + return false; + } + } if (is_done()) { if (!production->empty()) { if (production->back().precedence != other.production->back().precedence) return false; @@ -47,6 +52,10 @@ bool ParseItem::operator<(const ParseItem &other) const { if (other.variable_index < variable_index) return false; if (production->size() < other.production->size()) return true; if (other.production->size() < production->size()) return false; + for (size_t i = 0; i < step_index; i++) { + if (production->at(i).name_replacement < other.production->at(i).name_replacement) return true; + if (other.production->at(i).name_replacement < production->at(i).name_replacement) return false; + } if (is_done()) { if (!production->empty()) { if (production->back().precedence < other.production->back().precedence) return true; @@ -106,11 +115,6 @@ Symbol ParseItem::next_symbol() const { return production->at(step_index).symbol; } -ParseItemSet::ParseItemSet() {} - -ParseItemSet::ParseItemSet(const map &entries) - : entries(entries) {} - bool ParseItemSet::operator==(const ParseItemSet &other) const { return entries == other.entries; } @@ -153,6 +157,9 @@ struct hash { hash_combine(&result, item.step_index); hash_combine(&result, item.production->dynamic_precedence); hash_combine(&result, item.production->size()); + for (size_t i = 0; i < item.step_index; i++) { + hash_combine(&result, item.production->at(i).name_replacement); + } if (item.is_done()) { if (!item.production->empty()) { hash_combine(&result, item.production->back().precedence); diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index 5133970d..47d14078 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -36,9 +36,6 @@ struct ParseItem { }; struct ParseItemSet { - ParseItemSet(); - explicit ParseItemSet(const std::map &); - bool operator==(const ParseItemSet &) const; void add(const ParseItemSet &); size_t unfinished_item_signature() const; diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index ec6003e4..31bb836b 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -23,19 +23,22 @@ using rules::Symbol; static vector inline_production(const ParseItem &item, const SyntaxGrammar &grammar) { vector result; - for (const Production &production_to_insert : grammar.variables[item.next_symbol().index].productions) { + auto &inlined_step = item.production->at(item.step_index); + auto &productions_to_insert = grammar.variables[inlined_step.symbol.index].productions; + for (const Production &production_to_insert : productions_to_insert) { auto begin = item.production->steps.begin(); auto end = item.production->steps.end(); auto step = begin + item.step_index; Production production{{begin, step}, item.production->dynamic_precedence}; - production.steps.insert( - production.steps.end(), - production_to_insert.steps.begin(), - production_to_insert.steps.end() - ); - production.back().precedence = item.precedence(); - production.back().associativity = item.associativity(); + for (auto &step : production_to_insert) { + production.steps.push_back(step); + if (!inlined_step.name_replacement.empty()) { + production.steps.back().name_replacement = inlined_step.name_replacement; + } + } + production.back().precedence = inlined_step.precedence; + production.back().associativity = inlined_step.associativity; production.steps.insert( production.steps.end(), step + 1, diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 1d5ff690..3b6a03b8 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -76,6 +76,7 @@ class CCodeGenerator { vector> parse_table_entries; vector> external_scanner_states; size_t next_parse_action_list_index; + set unique_replacement_names; public: CCodeGenerator(string name, const ParseTable &parse_table, @@ -98,6 +99,7 @@ class CCodeGenerator { add_symbol_enum(); add_symbol_names_list(); add_symbol_metadata_list(); + add_rename_sequences(); add_lex_function(); add_lex_modes_list(); @@ -139,11 +141,22 @@ class CCodeGenerator { } } + unsigned max_rename_sequence_length = 0; + for (const RenameSequence &rename_sequence : parse_table.rename_sequences) { + if (rename_sequence.size() > max_rename_sequence_length) { + max_rename_sequence_length = rename_sequence.size(); + } + for (const string &name_replacement : rename_sequence) { + unique_replacement_names.insert(name_replacement); + } + } + line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION)); line("#define STATE_COUNT " + to_string(parse_table.states.size())); line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size())); line("#define TOKEN_COUNT " + to_string(token_count)); line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size())); + line("#define MAX_RENAME_SEQUENCE_LENGTH " + to_string(max_rename_sequence_length)); line(); } @@ -158,6 +171,11 @@ class CCodeGenerator { i++; } } + + for (const string &replacement_name : unique_replacement_names) { + line(rename_id(replacement_name) + " = " + to_string(i) + ","); + i++; + } }); line("};"); line(); @@ -166,9 +184,45 @@ class CCodeGenerator { void add_symbol_names_list() { line("static const char *ts_symbol_names[] = {"); indent([&]() { - for (const auto &entry : parse_table.symbols) - line("[" + symbol_id(entry.first) + "] = \"" + - sanitize_name_for_string(symbol_name(entry.first)) + "\","); + for (const auto &entry : parse_table.symbols) { + line( + "[" + symbol_id(entry.first) + "] = \"" + + sanitize_name_for_string(symbol_name(entry.first)) + "\"," + ); + } + + for (const string &replacement_name : unique_replacement_names) { + line( + "[" + rename_id(replacement_name) + "] = \"" + + sanitize_name_for_string(replacement_name) + "\"," + ); + } + }); + line("};"); + line(); + } + + void add_rename_sequences() { + + line( + "static TSSymbol ts_rename_sequences[" + + to_string(parse_table.rename_sequences.size()) + + "][MAX_RENAME_SEQUENCE_LENGTH] = {" + ); + + indent([&]() { + for (unsigned i = 1, n = parse_table.rename_sequences.size(); i < n; i++) { + const RenameSequence &sequence = parse_table.rename_sequences[i]; + line("[" + to_string(i) + "] = {"); + indent([&]() { + for (unsigned j = 0, n = sequence.size(); j < n; j++) { + if (!sequence[j].empty()) { + line("[" + to_string(j) + "] = " + rename_id(sequence[j]) + ","); + } + } + }); + line("},"); + } }); line("};"); line(); @@ -332,7 +386,7 @@ class CCodeGenerator { add_parse_action_list_id(ParseTableEntry{ {}, false, false }); size_t state_id = 0; - line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {"); + line("static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {"); indent([&]() { for (const auto &state : parse_table.states) { @@ -543,17 +597,23 @@ class CCodeGenerator { } break; case ParseActionTypeReduce: - if (action.fragile) { - add("REDUCE_FRAGILE"); - } else { - add("REDUCE"); - } - - add("("); + add("REDUCE("); add(symbol_id(action.symbol)); add(", "); add(to_string(action.consumed_symbol_count)); - add(", " + to_string(action.dynamic_precedence)); + + if (action.fragile) { + add(", .fragile = true"); + } + + if (action.dynamic_precedence != 0) { + add(", .dynamic_precedence = " + to_string(action.dynamic_precedence)); + } + + if (action.rename_sequence_id != 0) { + add(", .rename_sequence_id = " + to_string(action.rename_sequence_id)); + } + add(")"); break; case ParseActionTypeRecover: @@ -605,6 +665,10 @@ class CCodeGenerator { } } + string rename_id(const string &name) { + return "rename_sym_" + sanitize_name(name); + } + string symbol_name(const Symbol &symbol) { if (symbol == rules::END_OF_INPUT()) return "END"; diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index 43ceed51..08ebb6c7 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -198,6 +198,20 @@ ParseRuleResult parse_rule(json_value *rule_json) { return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule)); } + if (type == "RENAME") { + json_value name_json = rule_json->operator[]("value"); + if (name_json.type != json_string) { + return "Rename value must be a string"; + } + + json_value content_json = rule_json->operator[]("content"); + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid rename content: " + result.error_message; + } + return Rule(Metadata::rename(string(name_json.u.string.ptr), result.rule)); + } + return "Unknown rule type: " + type; } diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 28e0fbe9..35c3daba 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -6,21 +6,20 @@ namespace tree_sitter { using std::string; -using std::ostream; using std::to_string; -using std::set; using std::vector; using std::function; using rules::Symbol; ParseAction::ParseAction() - : production(nullptr), - consumed_symbol_count(0), - symbol(rules::NONE()), - type(ParseActionTypeError), - extra(false), - fragile(false), - state_index(-1) {} + : production(nullptr), + consumed_symbol_count(0), + symbol(rules::NONE()), + type(ParseActionTypeError), + extra(false), + fragile(false), + state_index(-1), + rename_sequence_id(0) {} ParseAction ParseAction::Error() { return ParseAction(); @@ -65,52 +64,49 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, } bool ParseAction::operator==(const ParseAction &other) const { - return (type == other.type && extra == other.extra && - fragile == other.fragile && symbol == other.symbol && - state_index == other.state_index && production == other.production && - consumed_symbol_count == other.consumed_symbol_count); + return + type == other.type && + extra == other.extra && + fragile == other.fragile && + symbol == other.symbol && + state_index == other.state_index && + production == other.production && + consumed_symbol_count == other.consumed_symbol_count && + rename_sequence_id == other.rename_sequence_id; } bool ParseAction::operator<(const ParseAction &other) const { - if (type < other.type) - return true; - if (other.type < type) - return false; - if (extra && !other.extra) - return true; - if (other.extra && !extra) - return false; - if (fragile && !other.fragile) - return true; - if (other.fragile && !fragile) - return false; - if (symbol < other.symbol) - return true; - if (other.symbol < symbol) - return false; - if (state_index < other.state_index) - return true; - if (other.state_index < state_index) - return false; - if (production < other.production) - return true; - if (other.production < production) - return false; - return consumed_symbol_count < other.consumed_symbol_count; + if (type < other.type) return true; + if (other.type < type) return false; + if (extra && !other.extra) return true; + if (other.extra && !extra) return false; + if (fragile && !other.fragile) return true; + if (other.fragile && !fragile) return false; + if (symbol < other.symbol) return true; + if (other.symbol < symbol) return false; + if (state_index < other.state_index) return true; + if (other.state_index < state_index) return false; + if (production < other.production) return true; + if (other.production < production) return false; + if (consumed_symbol_count < other.consumed_symbol_count) return true; + if (other.consumed_symbol_count < consumed_symbol_count) return false; + return rename_sequence_id < other.rename_sequence_id; } ParseTableEntry::ParseTableEntry() - : reusable(true), depends_on_lookahead(false) {} + : reusable(true), depends_on_lookahead(false) {} ParseTableEntry::ParseTableEntry(const vector &actions, bool reusable, bool depends_on_lookahead) - : actions(actions), - reusable(reusable), - depends_on_lookahead(depends_on_lookahead) {} + : actions(actions), + reusable(reusable), + depends_on_lookahead(depends_on_lookahead) {} bool ParseTableEntry::operator==(const ParseTableEntry &other) const { - return actions == other.actions && reusable == other.reusable && - depends_on_lookahead == other.depends_on_lookahead; + return + actions == other.actions && + reusable == other.reusable && + depends_on_lookahead == other.depends_on_lookahead; } ParseState::ParseState() : lex_state_id(-1) {} diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index e1630b32..6642e325 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -41,6 +41,7 @@ struct ParseAction { bool extra; bool fragile; ParseStateId state_index; + unsigned rename_sequence_id; }; struct ParseTableEntry { @@ -73,12 +74,15 @@ struct ParseTableSymbolMetadata { bool structural; }; +using RenameSequence = std::vector; + struct ParseTable { ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction); void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId); std::vector states; std::map symbols; + std::vector rename_sequences; }; } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index c4bc2c23..667dc9a7 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -1,8 +1,9 @@ #include "compiler/prepare_grammar/flatten_grammar.h" -#include +#include #include #include -#include +#include +#include #include "compiler/prepare_grammar/extract_choices.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/grammar.h" @@ -13,6 +14,7 @@ namespace prepare_grammar { using std::find; using std::pair; +using std::string; using std::vector; using rules::Rule; @@ -20,6 +22,7 @@ class FlattenRule { private: vector precedence_stack; vector associativity_stack; + vector name_replacement_stack; Production production; void apply(const Rule &rule, bool at_end) { @@ -28,7 +31,8 @@ class FlattenRule { production.steps.push_back(ProductionStep{ symbol, precedence_stack.back(), - associativity_stack.back() + associativity_stack.back(), + name_replacement_stack.back() }); }, @@ -41,6 +45,10 @@ class FlattenRule { associativity_stack.push_back(metadata.params.associativity); } + if (!metadata.params.name_replacement.empty()) { + name_replacement_stack.push_back(metadata.params.name_replacement); + } + if (abs(metadata.params.dynamic_precedence) > abs(production.dynamic_precedence)) { production.dynamic_precedence = metadata.params.dynamic_precedence; } @@ -56,6 +64,10 @@ class FlattenRule { associativity_stack.pop_back(); if (!at_end) production.back().associativity = associativity_stack.back(); } + + if (!metadata.params.name_replacement.empty()) { + name_replacement_stack.pop_back(); + } }, [&](const rules::Seq &sequence) { @@ -72,7 +84,10 @@ class FlattenRule { } public: - FlattenRule() : precedence_stack({ 0 }), associativity_stack({ rules::AssociativityNone }) {} + FlattenRule() : + precedence_stack({0}), + associativity_stack({rules::AssociativityNone}), + name_replacement_stack({""}) {} Production flatten(const Rule &rule) { apply(rule, true); diff --git a/src/compiler/rules/metadata.cc b/src/compiler/rules/metadata.cc index 08baa8da..5a6e59b0 100644 --- a/src/compiler/rules/metadata.cc +++ b/src/compiler/rules/metadata.cc @@ -1,10 +1,14 @@ #include "compiler/rules/metadata.h" #include +#include #include "compiler/rule.h" namespace tree_sitter { namespace rules { +using std::move; +using std::string; + Metadata::Metadata(const Rule &rule, MetadataParams params) : rule(std::make_shared(rule)), params(params) {} @@ -73,5 +77,11 @@ Metadata Metadata::main_token(const Rule &rule) { return Metadata{rule, params}; } +Metadata Metadata::rename(string &&name, const Rule &rule) { + MetadataParams params; + params.name_replacement = move(name); + return Metadata{rule, params}; +} + } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/metadata.h b/src/compiler/rules/metadata.h index af0b3e4e..e9ff08f4 100644 --- a/src/compiler/rules/metadata.h +++ b/src/compiler/rules/metadata.h @@ -1,6 +1,7 @@ #ifndef COMPILER_RULES_METADATA_H_ #define COMPILER_RULES_METADATA_H_ +#include #include namespace tree_sitter { @@ -22,6 +23,7 @@ struct MetadataParams { bool is_string; bool is_active; bool is_main_token; + std::string name_replacement; inline MetadataParams() : precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone}, @@ -38,7 +40,8 @@ struct MetadataParams { is_token == other.is_token && is_string == other.is_string && is_active == other.is_active && - is_main_token == other.is_main_token + is_main_token == other.is_main_token && + name_replacement == other.name_replacement ); } }; @@ -59,6 +62,7 @@ struct Metadata { static Metadata prec_dynamic(int precedence, const Rule &rule); static Metadata separator(const Rule &rule); static Metadata main_token(const Rule &rule); + static Metadata rename(std::string &&name, const Rule &rule); bool operator==(const Metadata &other) const; }; diff --git a/src/compiler/syntax_grammar.cc b/src/compiler/syntax_grammar.cc new file mode 100644 index 00000000..34534e3c --- /dev/null +++ b/src/compiler/syntax_grammar.cc @@ -0,0 +1,36 @@ +#include "compiler/syntax_grammar.h" + +namespace tree_sitter { + +bool ProductionStep::operator==(const ProductionStep &other) const { + return symbol == other.symbol && + precedence == other.precedence && + associativity == other.associativity && + name_replacement == other.name_replacement; +} + +bool ProductionStep::operator!=(const ProductionStep &other) const { + return !operator==(other); +} + +bool ProductionStep::operator<(const ProductionStep &other) const { + if (symbol < other.symbol) return true; + if (other.symbol < symbol) return false; + if (precedence < other.precedence) return true; + if (other.precedence < precedence) return false; + if (associativity < other.associativity) return true; + if (other.associativity < associativity) return false; + return name_replacement < other.name_replacement; +} + +bool Production::operator==(const Production &other) const { + return steps == other.steps && dynamic_precedence == other.dynamic_precedence; +} + +bool ExternalToken::operator==(const ExternalToken &other) const { + return name == other.name && + type == other.type && + corresponding_internal_token == other.corresponding_internal_token; +} + +} // namespace tree_sitter diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h index 3c3d3b66..4fbd082d 100644 --- a/src/compiler/syntax_grammar.h +++ b/src/compiler/syntax_grammar.h @@ -10,43 +10,29 @@ namespace tree_sitter { struct ProductionStep { - inline bool operator==(const ProductionStep &other) const { - return symbol == other.symbol && - precedence == other.precedence && - associativity == other.associativity; - } - - inline bool operator!=(const ProductionStep &other) const { - return !operator==(other); - } - - inline bool operator<(const ProductionStep &other) const { - if (symbol < other.symbol) return true; - if (other.symbol < symbol) return false; - if (precedence < other.precedence) return true; - if (other.precedence < precedence) return false; - return associativity < other.associativity; - } - rules::Symbol symbol; int precedence; rules::Associativity associativity; + std::string name_replacement; + + bool operator==(const ProductionStep &) const; + bool operator!=(const ProductionStep &) const; + bool operator<(const ProductionStep &) const; }; struct Production { std::vector steps; int dynamic_precedence = 0; - inline bool operator==(const Production &other) const { - return steps == other.steps && dynamic_precedence == other.dynamic_precedence; - } - + bool operator==(const Production &) const; inline ProductionStep &back() { return steps.back(); } inline const ProductionStep &back() const { return steps.back(); } inline bool empty() const { return steps.empty(); } inline size_t size() const { return steps.size(); } inline const ProductionStep &operator[](int i) const { return steps[i]; } inline const ProductionStep &at(int i) const { return steps[i]; } + inline std::vector::const_iterator begin() const { return steps.begin(); } + inline std::vector::const_iterator end() const { return steps.end(); } }; struct SyntaxVariable { @@ -55,24 +41,18 @@ struct SyntaxVariable { std::vector productions; }; -using ConflictSet = std::set; - struct ExternalToken { std::string name; VariableType type; rules::Symbol corresponding_internal_token; - inline bool operator==(const ExternalToken &other) const { - return name == other.name && - type == other.type && - corresponding_internal_token == other.corresponding_internal_token; - } + bool operator==(const ExternalToken &) const; }; struct SyntaxGrammar { std::vector variables; std::set extra_tokens; - std::set expected_conflicts; + std::set> expected_conflicts; std::vector external_tokens; std::set variables_to_inline; }; diff --git a/src/runtime/language.c b/src/runtime/language.c index 8ca4de50..41a41b2b 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -3,7 +3,8 @@ #include "runtime/error_costs.h" static const TSParseAction SHIFT_ERROR = { - .type = TSParseActionTypeShift, .params = {.to_state = ERROR_STATE} + .type = TSParseActionTypeShift, + .to_state = ERROR_STATE, }; void ts_language_table_entry(const TSLanguage *self, TSStateId state, diff --git a/src/runtime/language.h b/src/runtime/language.h index 20e6ec5d..47850d30 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -44,7 +44,7 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self, if (count > 0) { TSParseAction action = actions[count - 1]; if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) { - return action.params.to_state; + return action.to_state; } } return 0; diff --git a/src/runtime/node.c b/src/runtime/node.c index 84d4e3a7..bfca082a 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -288,7 +288,8 @@ void ts_symbol_iterator_next(TSSymbolIterator *self) { } const char *ts_node_type(TSNode self, const TSDocument *document) { - TSSymbol symbol = ts_node__tree(self)->symbol; + const Tree *tree = ts_node__tree(self); + TSSymbol symbol = tree->context.rename_symbol ? tree->context.rename_symbol : tree->symbol; return ts_language_symbol_name(document->parser.language, symbol); } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 2b01dddb..28f660f7 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -558,8 +558,8 @@ static bool parser__switch_children(Parser *self, Tree *tree, static StackPopResult parser__reduce(Parser *self, StackVersion version, TSSymbol symbol, unsigned count, - bool fragile, int dynamic_precedence, - bool allow_skipping) { + int dynamic_precedence, unsigned short rename_sequence_id, + bool fragile, bool allow_skipping) { uint32_t initial_version_count = ts_stack_version_count(self->stack); StackPopResult pop = ts_stack_pop_count(self->stack, version, count); @@ -603,6 +603,7 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version, } parent->dynamic_precedence += dynamic_precedence; + parent->rename_sequence_id = rename_sequence_id; TSStateId state = ts_stack_top_state(self->stack, slice.version); TSStateId next_state = ts_language_next_state(language, state, symbol); @@ -699,12 +700,12 @@ static const TSParseAction *parser__reductions_after_sequence( (*count)--; } - while (*count > 0 && actions[0].params.child_count < child_count) { + while (*count > 0 && actions[0].child_count < child_count) { actions++; (*count)--; } - while (*count > 0 && actions[*count - 1].params.child_count > child_count) { + while (*count > 0 && actions[*count - 1].child_count > child_count) { (*count)--; } @@ -756,7 +757,7 @@ static StackIterateAction parser__repair_error_callback(void *payload, TSStateId } for (uint32_t j = 0; j < repair_reduction_count; j++) { - if (repair_reductions[j].params.symbol == repair->symbol) { + if (repair_reductions[j].symbol == repair->symbol) { result |= StackIteratePop; session->found_repair = true; session->best_repair = *repair; @@ -788,8 +789,8 @@ static bool parser__repair_error(Parser *self, StackSlice slice, array_clear(&self->reduce_actions); for (uint32_t i = 0; i < entry.action_count; i++) { if (entry.actions[i].type == TSParseActionTypeReduce) { - TSSymbol symbol = entry.actions[i].params.symbol; - uint32_t child_count = entry.actions[i].params.child_count; + TSSymbol symbol = entry.actions[i].symbol; + uint32_t child_count = entry.actions[i].child_count; if ((child_count > session.tree_count_above_error) || (child_count == session.tree_count_above_error && !ts_language_symbol_metadata(self->language, symbol).visible)) @@ -942,11 +943,12 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version) has_shift_action = true; break; case TSParseActionTypeReduce: - if (action.params.child_count > 0) + if (action.child_count > 0) ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){ - .symbol = action.params.symbol, - .count = action.params.child_count, - .dynamic_precedence = action.params.dynamic_precedence + .symbol = action.symbol, + .count = action.child_count, + .dynamic_precedence = action.dynamic_precedence, + .rename_sequence_id = action.rename_sequence_id, }); default: break; @@ -958,8 +960,9 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version) for (uint32_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; StackPopResult reduction = parser__reduce( - self, version, action.symbol, action.count, true, - action.dynamic_precedence, false + self, version, action.symbol, action.count, + action.dynamic_precedence, action.rename_sequence_id, + true, false ); if (reduction.stopped_at_error) { ts_tree_array_delete(&reduction.slices.contents[0].trees); @@ -1168,7 +1171,7 @@ static void parser__advance(Parser *self, StackVersion version, next_state = state; LOG("shift_extra"); } else { - next_state = action.params.to_state; + next_state = action.to_state; LOG("shift state:%u", next_state); } @@ -1195,18 +1198,14 @@ static void parser__advance(Parser *self, StackVersion version, } case TSParseActionTypeReduce: { - if (reduction_stopped_at_error) - continue; + if (reduction_stopped_at_error) continue; - unsigned child_count = action.params.child_count; - TSSymbol symbol = action.params.symbol; - unsigned dynamic_precedence = action.params.dynamic_precedence; - bool fragile = action.fragile; - - LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count); - - StackPopResult reduction = - parser__reduce(self, version, symbol, child_count, fragile, dynamic_precedence, true); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), action.child_count); + StackPopResult reduction = parser__reduce( + self, version, action.symbol, action.child_count, + action.dynamic_precedence, action.rename_sequence_id, + action.fragile, true + ); StackSlice slice = *array_front(&reduction.slices); if (reduction.stopped_at_error) { reduction_stopped_at_error = true; @@ -1237,7 +1236,7 @@ static void parser__advance(Parser *self, StackVersion version, ts_tree_retain(lookahead); } - parser__recover(self, version, action.params.to_state, lookahead); + parser__recover(self, version, action.to_state, lookahead); if (lookahead == reusable_node->tree) { reusable_node_pop(reusable_node); } @@ -1355,6 +1354,6 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err LOG_TREE(); ts_stack_clear(self->stack); parser__clear_cached_token(self); - ts_tree_assign_parents(self->finished_tree, &self->tree_path1); + ts_tree_assign_parents(self->finished_tree, &self->tree_path1, self->language); return self->finished_tree; } diff --git a/src/runtime/reduce_action.h b/src/runtime/reduce_action.h index aad1f619..5ae876f2 100644 --- a/src/runtime/reduce_action.h +++ b/src/runtime/reduce_action.h @@ -12,6 +12,7 @@ typedef struct { uint32_t count; TSSymbol symbol; int dynamic_precedence; + unsigned short rename_sequence_id; } ReduceAction; typedef Array(ReduceAction) ReduceActionSet; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index af9a5477..793deaa3 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -22,6 +22,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size, .visible_child_count = 0, .named_child_count = 0, .children = NULL, + .rename_sequence_id = 0, .padding = padding, .visible = metadata.visible, .named = metadata.named, @@ -120,18 +121,23 @@ Tree *ts_tree_make_copy(Tree *self) { return result; } -void ts_tree_assign_parents(Tree *self, TreePath *path) { +void ts_tree_assign_parents(Tree *self, TreePath *path, const TSLanguage *language) { array_clear(path); array_push(path, ((TreePathEntry){self, length_zero(), 0})); while (path->size > 0) { Tree *tree = array_pop(path).tree; Length offset = length_zero(); + const TSSymbol *rename_symbols = language->rename_sequences + + tree->rename_sequence_id * language->max_rename_sequence_length; for (uint32_t i = 0; i < tree->child_count; i++) { Tree *child = tree->children[i]; if (child->context.parent != tree || child->context.index != i) { child->context.parent = tree; child->context.index = i; child->context.offset = offset; + if (tree->rename_sequence_id && rename_symbols[i] != 0) { + child->context.rename_symbol = rename_symbols[i]; + } array_push(path, ((TreePathEntry){child, length_zero(), 0})); } offset = length_add(offset, ts_tree_total_size(child)); @@ -472,36 +478,32 @@ static size_t ts_tree__write_to_string(const Tree *self, const TSLanguage *language, char *string, size_t limit, bool is_root, bool include_all) { - if (!self) - return snprintf(string, limit, "(NULL)"); + if (!self) return snprintf(string, limit, "(NULL)"); char *cursor = string; char **writer = (limit > 0) ? &cursor : &string; bool visible = include_all || is_root || (self->visible && self->named); - if (visible && !is_root) + if (visible && !is_root) { cursor += snprintf(*writer, limit, " "); + } if (visible) { - if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && - self->size.chars > 0) { + if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.chars > 0) { cursor += snprintf(*writer, limit, "(UNEXPECTED "); - cursor += - ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); + cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char); } else { - cursor += snprintf(*writer, limit, "(%s", - ts_language_symbol_name(language, self->symbol)); + TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol; + cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, symbol)); } } for (uint32_t i = 0; i < self->child_count; i++) { Tree *child = self->children[i]; - cursor += ts_tree__write_to_string(child, language, *writer, limit, false, - include_all); + cursor += ts_tree__write_to_string(child, language, *writer, limit, false, include_all); } - if (visible) - cursor += snprintf(*writer, limit, ")"); + if (visible) cursor += snprintf(*writer, limit, ")"); return cursor - string; } @@ -518,8 +520,8 @@ char *ts_tree_string(const Tree *self, const TSLanguage *language, void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset, const TSLanguage *language, FILE *f) { - fprintf(f, "tree_%p [label=\"%s\"", self, - ts_language_symbol_name(language, self->symbol)); + TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol; + fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol)); if (self->child_count == 0) fprintf(f, ", shape=plaintext"); diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 3b217722..ab5f050b 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -19,6 +19,7 @@ typedef struct Tree { struct Tree *parent; uint32_t index; Length offset; + TSSymbol rename_symbol; } context; uint32_t child_count; @@ -26,6 +27,7 @@ typedef struct Tree { struct { uint32_t visible_child_count; uint32_t named_child_count; + unsigned short rename_sequence_id; struct Tree **children; }; TSExternalTokenState external_token_state; @@ -85,7 +87,7 @@ int ts_tree_compare(const Tree *tree1, const Tree *tree2); uint32_t ts_tree_start_column(const Tree *self); uint32_t ts_tree_end_column(const Tree *self); void ts_tree_set_children(Tree *, uint32_t, Tree **); -void ts_tree_assign_parents(Tree *, TreePath *); +void ts_tree_assign_parents(Tree *, TreePath *, const TSLanguage *); void ts_tree_edit(Tree *, const TSInputEdit *edit); char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all); void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *); diff --git a/test/compiler/build_tables/parse_item_set_builder_test.cc b/test/compiler/build_tables/parse_item_set_builder_test.cc index 4e2b28e6..efa1add4 100644 --- a/test/compiler/build_tables/parse_item_set_builder_test.cc +++ b/test/compiler/build_tables/parse_item_set_builder_test.cc @@ -12,6 +12,7 @@ START_TEST describe("ParseItemSetBuilder", []() { vector lexical_variables; + for (size_t i = 0; i < 20; i++) { lexical_variables.push_back({ "token_" + to_string(i), @@ -27,23 +28,23 @@ describe("ParseItemSetBuilder", []() { SyntaxGrammar grammar{{ SyntaxVariable{"rule0", VariableTypeNamed, { Production{{ - {Symbol::non_terminal(1), 0, AssociativityNone}, - {Symbol::terminal(11), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone, ""}, + {Symbol::terminal(11), 0, AssociativityNone, ""}, }, 0}, }}, SyntaxVariable{"rule1", VariableTypeNamed, { Production{{ - {Symbol::terminal(12), 0, AssociativityNone}, - {Symbol::terminal(13), 0, AssociativityNone}, + {Symbol::terminal(12), 0, AssociativityNone, ""}, + {Symbol::terminal(13), 0, AssociativityNone, ""}, }, 0}, Production{{ - {Symbol::non_terminal(2), 0, AssociativityNone}, + {Symbol::non_terminal(2), 0, AssociativityNone, ""}, }, 0} }}, SyntaxVariable{"rule2", VariableTypeNamed, { Production{{ - {Symbol::terminal(14), 0, AssociativityNone}, - {Symbol::terminal(15), 0, AssociativityNone}, + {Symbol::terminal(14), 0, AssociativityNone, ""}, + {Symbol::terminal(15), 0, AssociativityNone, ""}, }, 0} }}, }, {}, {}, {}, {}}; @@ -52,21 +53,21 @@ describe("ParseItemSetBuilder", []() { return grammar.variables[variable_index].productions[production_index]; }; - ParseItemSet item_set({ + ParseItemSet item_set{{ { ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }), } - }); + }}; ParseItemSetBuilder item_set_builder(grammar, lexical_grammar); item_set_builder.apply_transitive_closure(&item_set); - AssertThat(item_set, Equals(ParseItemSet({ + AssertThat(item_set, Equals(ParseItemSet{{ { ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }) - }, + }, { ParseItem(Symbol::non_terminal(1), production(1, 0), 0), LookaheadSet({ Symbol::terminal(11) }) @@ -79,21 +80,21 @@ describe("ParseItemSetBuilder", []() { ParseItem(Symbol::non_terminal(2), production(2, 0), 0), LookaheadSet({ Symbol::terminal(11) }) }, - }))); + }})); }); it("handles rules with empty productions", [&]() { SyntaxGrammar grammar{{ SyntaxVariable{"rule0", VariableTypeNamed, { Production{{ - {Symbol::non_terminal(1), 0, AssociativityNone}, - {Symbol::terminal(11), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone, ""}, + {Symbol::terminal(11), 0, AssociativityNone, ""}, }, 0}, }}, SyntaxVariable{"rule1", VariableTypeNamed, { Production{{ - {Symbol::terminal(12), 0, AssociativityNone}, - {Symbol::terminal(13), 0, AssociativityNone}, + {Symbol::terminal(12), 0, AssociativityNone, ""}, + {Symbol::terminal(13), 0, AssociativityNone, ""}, }, 0}, Production{{}, 0} }}, @@ -103,17 +104,17 @@ describe("ParseItemSetBuilder", []() { return grammar.variables[variable_index].productions[production_index]; }; - ParseItemSet item_set({ + ParseItemSet item_set{{ { ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }), } - }); + }}; ParseItemSetBuilder item_set_builder(grammar, lexical_grammar); item_set_builder.apply_transitive_closure(&item_set); - AssertThat(item_set, Equals(ParseItemSet({ + AssertThat(item_set, Equals(ParseItemSet{{ { ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }) @@ -126,7 +127,7 @@ describe("ParseItemSetBuilder", []() { ParseItem(Symbol::non_terminal(1), production(1, 1), 0), LookaheadSet({ Symbol::terminal(11) }) }, - }))); + }})); }); }); diff --git a/test/fixtures/test_grammars/renamed_rules/corpus.txt b/test/fixtures/test_grammars/renamed_rules/corpus.txt new file mode 100644 index 00000000..8789179d --- /dev/null +++ b/test/fixtures/test_grammars/renamed_rules/corpus.txt @@ -0,0 +1,18 @@ +====================================== +Method calls +====================================== + +a.b(c(d.e)); + +--- + +(statement + (call_expression + (member_expression + (variable_name) + (property_name)) + (call_expression + (variable_name) + (member_expression + (variable_name) + (property_name))))) diff --git a/test/fixtures/test_grammars/renamed_rules/grammar.json b/test/fixtures/test_grammars/renamed_rules/grammar.json new file mode 100644 index 00000000..c6e86052 --- /dev/null +++ b/test/fixtures/test_grammars/renamed_rules/grammar.json @@ -0,0 +1,69 @@ +{ + "name": "renamed_rules", + + "extras": [ + {"type": "PATTERN", "value": "\\s"} + ], + + "rules": { + "statement": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "_expression"}, + {"type": "STRING", "value": ";"} + ] + }, + + "_expression": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "call_expression"}, + {"type": "SYMBOL", "name": "member_expression"}, + { + "type": "RENAME", + "value": "variable_name", + "content": { + "type": "SYMBOL", + "name": "identifier" + } + } + ] + }, + + "call_expression": { + "type": "PREC_LEFT", + "value": 0, + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "_expression"}, + {"type": "STRING", "value": "("}, + {"type": "SYMBOL", "name": "_expression"}, + {"type": "STRING", "value": ")"}, + ] + } + }, + + "member_expression": { + "type": "PREC_LEFT", + "value": 1, + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "_expression"}, + {"type": "STRING", "value": "."}, + { + "type": "RENAME", + "value": "property_name", + "content": { + "type": "SYMBOL", + "name": "identifier" + } + } + ] + } + }, + + "identifier": {"type": "PATTERN", "value": "\\a+"} + } +}