From db4b9ebc7c9548a395ff6621f78ef5d542161396 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Mar 2017 12:52:01 -0700 Subject: [PATCH 1/6] Implement Rule as a union rather than an abstract base class --- .clang_complete | 1 + include/tree_sitter/compiler.h | 1 + project.gyp | 15 +- .../build_tables/build_parse_table.cc | 19 +- .../build_tables/lex_conflict_manager.cc | 2 +- .../build_tables/lex_conflict_manager.h | 2 +- src/compiler/build_tables/lex_item.cc | 77 ++-- src/compiler/build_tables/lex_item.h | 7 +- .../build_tables/lex_item_transitions.cc | 208 +++++----- .../build_tables/lex_item_transitions.h | 5 +- .../build_tables/lex_table_builder.cc | 121 +++--- src/compiler/build_tables/lookahead_set.cc | 2 +- src/compiler/build_tables/lookahead_set.h | 2 +- src/compiler/build_tables/parse_item.cc | 4 +- src/compiler/build_tables/parse_item.h | 3 +- .../build_tables/parse_item_set_builder.cc | 10 +- .../build_tables/parse_item_set_builder.h | 2 +- .../build_tables/rule_can_be_blank.cc | 59 ++- src/compiler/build_tables/rule_can_be_blank.h | 2 +- src/compiler/compile.cc | 11 +- src/compiler/compile.h | 4 +- src/compiler/compile_error.h | 6 + src/compiler/generate_code/c_code.cc | 20 +- src/compiler/grammar.h | 36 +- src/compiler/lex_table.cc | 3 +- src/compiler/lex_table.h | 3 +- src/compiler/lexical_grammar.h | 11 +- src/compiler/parse_grammar.cc | 192 +++++----- src/compiler/parse_grammar.h | 2 +- src/compiler/parse_table.cc | 4 +- src/compiler/parse_table.h | 3 +- .../prepare_grammar/expand_repeats.cc | 100 +++-- src/compiler/prepare_grammar/expand_repeats.h | 2 - src/compiler/prepare_grammar/expand_tokens.cc | 118 +++--- src/compiler/prepare_grammar/expand_tokens.h | 14 +- .../prepare_grammar/extract_choices.cc | 72 ++-- .../prepare_grammar/extract_choices.h | 2 +- .../prepare_grammar/extract_tokens.cc | 299 +++++++++------ src/compiler/prepare_grammar/extract_tokens.h | 5 +- .../prepare_grammar/flatten_grammar.cc | 99 +++-- .../prepare_grammar/flatten_grammar.h | 6 +- .../prepare_grammar/initial_syntax_grammar.h | 17 +- .../prepare_grammar/intern_symbols.cc | 123 +++--- src/compiler/prepare_grammar/intern_symbols.h | 4 +- .../prepare_grammar/interned_grammar.h | 19 +- src/compiler/prepare_grammar/is_token.cc | 30 -- src/compiler/prepare_grammar/is_token.h | 14 - .../prepare_grammar/normalize_rules.cc | 4 +- src/compiler/prepare_grammar/parse_regex.cc | 103 ++--- src/compiler/prepare_grammar/parse_regex.h | 2 +- .../prepare_grammar/prepare_grammar.cc | 16 +- .../prepare_grammar/prepare_grammar.h | 7 +- .../prepare_grammar/token_description.cc | 104 ++--- .../prepare_grammar/token_description.h | 2 +- src/compiler/rule.cc | 232 +++++++++++- src/compiler/rule.h | 142 +++++-- src/compiler/rules.h | 28 -- src/compiler/rules/blank.cc | 36 -- src/compiler/rules/blank.h | 19 +- src/compiler/rules/built_in_symbols.cc | 19 - src/compiler/rules/built_in_symbols.h | 16 - src/compiler/rules/character_range.cc | 36 -- src/compiler/rules/character_range.h | 25 -- src/compiler/rules/character_set.cc | 89 ++--- src/compiler/rules/character_set.h | 44 +-- src/compiler/rules/choice.cc | 89 ++--- src/compiler/rules/choice.h | 20 +- src/compiler/rules/metadata.cc | 119 +++--- src/compiler/rules/metadata.h | 53 ++- src/compiler/rules/named_symbol.cc | 35 -- src/compiler/rules/named_symbol.h | 18 +- src/compiler/rules/pattern.cc | 36 -- src/compiler/rules/pattern.h | 18 +- src/compiler/rules/repeat.cc | 41 +- src/compiler/rules/repeat.h | 21 +- src/compiler/rules/rules.cc | 108 ------ src/compiler/rules/seq.cc | 69 ++-- src/compiler/rules/seq.h | 23 +- src/compiler/rules/string.cc | 35 -- src/compiler/rules/string.h | 18 +- src/compiler/rules/symbol.cc | 82 ---- src/compiler/rules/symbol.h | 91 +++-- src/compiler/rules/visitor.cc | 44 --- src/compiler/rules/visitor.h | 234 ------------ src/compiler/syntax_grammar.cc | 20 - src/compiler/syntax_grammar.h | 20 +- src/compiler/util/make_visitor.h | 38 ++ src/compiler/variable.h | 25 -- .../build_tables/lex_conflict_manager_test.cc | 13 +- test/compiler/build_tables/lex_item_test.cc | 228 +++++------ .../parse_item_set_builder_test.cc | 62 ++- .../build_tables/rule_can_be_blank_test.cc | 33 +- .../prepare_grammar/expand_repeats_test.cc | 130 ++++--- .../prepare_grammar/expand_tokens_test.cc | 188 +++------ .../prepare_grammar/extract_choices_test.cc | 119 +++--- .../prepare_grammar/extract_tokens_test.cc | 358 +++++++++++++----- .../prepare_grammar/flatten_grammar_test.cc | 67 ++-- .../prepare_grammar/intern_symbols_test.cc | 60 +-- .../prepare_grammar/parse_regex_test.cc | 204 ++++++---- test/compiler/rules/character_set_test.cc | 18 +- test/compiler/rules/choice_test.cc | 64 ++-- test/compiler/rules/repeat_test.cc | 11 +- test/helpers/equals_pointer.h | 37 -- test/helpers/rule_helpers.cc | 62 --- test/helpers/rule_helpers.h | 25 -- test/helpers/stream_methods.cc | 238 +++++++----- test/helpers/stream_methods.h | 46 ++- test/integration/test_grammars.cc | 4 +- tests.gyp | 4 +- 109 files changed, 2793 insertions(+), 3120 deletions(-) delete mode 100644 src/compiler/prepare_grammar/is_token.cc delete mode 100644 src/compiler/prepare_grammar/is_token.h delete mode 100644 src/compiler/rules.h delete mode 100644 src/compiler/rules/blank.cc delete mode 100644 src/compiler/rules/built_in_symbols.cc delete mode 100644 src/compiler/rules/built_in_symbols.h delete mode 100644 src/compiler/rules/character_range.cc delete mode 100644 src/compiler/rules/character_range.h delete mode 100644 src/compiler/rules/named_symbol.cc delete mode 100644 src/compiler/rules/pattern.cc delete mode 100644 src/compiler/rules/rules.cc delete mode 100644 src/compiler/rules/string.cc delete mode 100644 src/compiler/rules/symbol.cc delete mode 100644 src/compiler/rules/visitor.cc delete mode 100644 src/compiler/rules/visitor.h delete mode 100644 src/compiler/syntax_grammar.cc create mode 100644 src/compiler/util/make_visitor.h delete mode 100644 src/compiler/variable.h delete mode 100644 test/helpers/equals_pointer.h delete mode 100644 test/helpers/rule_helpers.cc delete mode 100644 test/helpers/rule_helpers.h diff --git a/.clang_complete b/.clang_complete index 11bc7510..12c483f7 100644 --- a/.clang_complete +++ b/.clang_complete @@ -1,3 +1,4 @@ +-std=c++14 -Isrc -Itest -Iinclude diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 1c287fd5..eda0e51a 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -15,6 +15,7 @@ typedef enum { TSCompileErrorTypeLexConflict, TSCompileErrorTypeParseConflict, TSCompileErrorTypeEpsilonRule, + TSCompileErrorTypeInvalidTokenContents, } TSCompileErrorType; typedef struct { diff --git a/project.gyp b/project.gyp index 8871fc16..d4292f39 100644 --- a/project.gyp +++ b/project.gyp @@ -33,39 +33,28 @@ 'src/compiler/prepare_grammar/extract_tokens.cc', 'src/compiler/prepare_grammar/flatten_grammar.cc', 'src/compiler/prepare_grammar/intern_symbols.cc', - 'src/compiler/prepare_grammar/is_token.cc', 'src/compiler/prepare_grammar/normalize_rules.cc', 'src/compiler/prepare_grammar/parse_regex.cc', 'src/compiler/prepare_grammar/prepare_grammar.cc', 'src/compiler/prepare_grammar/token_description.cc', 'src/compiler/rule.cc', - 'src/compiler/syntax_grammar.cc', - 'src/compiler/rules/blank.cc', - 'src/compiler/rules/built_in_symbols.cc', - 'src/compiler/rules/character_range.cc', 'src/compiler/rules/character_set.cc', 'src/compiler/rules/choice.cc', 'src/compiler/rules/metadata.cc', - 'src/compiler/rules/named_symbol.cc', - 'src/compiler/rules/pattern.cc', 'src/compiler/rules/repeat.cc', - 'src/compiler/rules/rules.cc', 'src/compiler/rules/seq.cc', - 'src/compiler/rules/string.cc', - 'src/compiler/rules/symbol.cc', - 'src/compiler/rules/visitor.cc', 'src/compiler/util/string_helpers.cc', 'externals/utf8proc/utf8proc.c', 'externals/json-parser/json.c', ], 'cflags_cc': [ - '-std=c++0x', + '-std=c++14', ], 'cflags_cc!': [ '-fno-rtti' ], 'xcode_settings': { - 'CLANG_CXX_LANGUAGE_STANDARD': 'c++11', + 'CLANG_CXX_LANGUAGE_STANDARD': 'c++14', 'GCC_ENABLE_CPP_RTTI': 'YES', 'GCC_ENABLE_CPP_EXCEPTIONS': 'NO', }, diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 1f8a6939..20d02fa1 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -10,8 +10,7 @@ #include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/lexical_grammar.h" #include "compiler/syntax_grammar.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/build_tables/lex_table_builder.h" namespace tree_sitter { @@ -53,8 +52,8 @@ class ParseTableBuilder { pair build() { Symbol start_symbol = grammar.variables.empty() ? - Symbol(0, Symbol::Terminal) : - Symbol(0, Symbol::NonTerminal); + Symbol::terminal(0) : + Symbol::non_terminal(0); Production start_production{ ProductionStep{start_symbol, 0, rules::AssociativityNone}, @@ -121,7 +120,7 @@ class ParseTableBuilder { } if (!has_non_reciprocal_conflict) { - add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal)); + add_out_of_context_parse_state(&error_state, Symbol::terminal(i)); } } @@ -132,11 +131,11 @@ class ParseTableBuilder { } for (size_t i = 0; i < grammar.external_tokens.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External)); + add_out_of_context_parse_state(&error_state, Symbol::external(i)); } for (size_t i = 0; i < grammar.variables.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal)); + add_out_of_context_parse_state(&error_state, Symbol::non_terminal(i)); } error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0)); @@ -253,7 +252,7 @@ class ParseTableBuilder { ParseStateId next_state = add_parse_state(next_item_set); parse_table.set_nonterminal_action(state_id, lookahead, next_state); if (!allow_any_conflict) - recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set); + recovery_states[Symbol::non_terminal(lookahead)].add(next_item_set); } for (Symbol lookahead : lookaheads_with_conflicts) { @@ -428,7 +427,7 @@ class ParseTableBuilder { if (lookahead.is_external()) return false; if (!lookahead.is_built_in()) { for (Symbol::Index incompatible_index : incompatible_token_indices) { - Symbol incompatible_symbol(incompatible_index, Symbol::Terminal); + Symbol incompatible_symbol = Symbol::terminal(incompatible_index); if (other.terminal_entries.count(incompatible_symbol)) return false; } } @@ -452,7 +451,7 @@ class ParseTableBuilder { if (lookahead.is_external()) return false; if (!lookahead.is_built_in()) { for (Symbol::Index incompatible_index : incompatible_token_indices) { - Symbol incompatible_symbol(incompatible_index, Symbol::Terminal); + Symbol incompatible_symbol = Symbol::terminal(incompatible_index); if (state.terminal_entries.count(incompatible_symbol)) return false; } } diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index 0fbdf4d9..82b5efaf 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -1,7 +1,7 @@ #include "compiler/build_tables/lex_conflict_manager.h" #include #include "compiler/parse_table.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/build_tables/lex_item.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index 0d3177dd..d15c5b58 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -4,7 +4,7 @@ #include #include #include "compiler/lexical_grammar.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index 4c9056df..23d63b1b 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -2,12 +2,7 @@ #include #include "compiler/build_tables/lex_item_transitions.h" #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" #include "compiler/util/hash_combine.h" namespace tree_sitter { @@ -19,51 +14,63 @@ using std::unordered_set; using rules::CharacterSet; using rules::Symbol; -LexItem::LexItem(const rules::Symbol &lhs, const rule_ptr rule) +LexItem::LexItem(const rules::Symbol &lhs, const rules::Rule &rule) : lhs(lhs), rule(rule) {} bool LexItem::operator==(const LexItem &other) const { - return (other.lhs == lhs) && other.rule->operator==(*rule); + return lhs == other.lhs && rule == other.rule; } -LexItem::CompletionStatus LexItem::completion_status() const { - class GetCompletionStatus : public rules::RuleFn { - protected: - CompletionStatus apply_to(const rules::Choice *rule) { - for (const auto &element : rule->elements) { - CompletionStatus status = apply(element); +using CompletionStatus = LexItem::CompletionStatus; + +static CompletionStatus get_completion_status(const rules::Rule &rule) { + return rule.match( + [](rules::Choice choice) { + for (const auto &element : choice.elements) { + auto status = get_completion_status(element); if (status.is_done) return status; } - return { false, PrecedenceRange() }; - } + return CompletionStatus{false, PrecedenceRange()}; + }, - CompletionStatus apply_to(const rules::Metadata *rule) { - CompletionStatus result = apply(rule->rule); - if (result.is_done && result.precedence.empty && rule->params.has_precedence) { - result.precedence.add(rule->params.precedence); + [](rules::Metadata metadata) { + CompletionStatus result = get_completion_status(metadata.rule); + if (result.is_done && result.precedence.empty && metadata.params.has_precedence) { + result.precedence.add(metadata.params.precedence); } return result; - } + }, - CompletionStatus apply_to(const rules::Repeat *rule) { - return apply(rule->content); - } + [](rules::Repeat repeat) { + return get_completion_status(repeat.rule); + }, - CompletionStatus apply_to(const rules::Blank *rule) { - return { true, PrecedenceRange() }; - } - - CompletionStatus apply_to(const rules::Seq *rule) { - CompletionStatus left_status = apply(rule->left); + [](rules::Seq sequence) { + CompletionStatus left_status = get_completion_status(sequence.left); if (left_status.is_done) { - return apply(rule->right); + return get_completion_status(sequence.right); } else { - return { false, PrecedenceRange() }; + return CompletionStatus{false, PrecedenceRange()}; } - } - }; + }, - return GetCompletionStatus().apply(rule); + [](rules::Blank blank) { + return CompletionStatus{true, PrecedenceRange()}; + }, + + [](rules::CharacterSet) { + return CompletionStatus{false, PrecedenceRange()}; + }, + + [](auto) { + return CompletionStatus{false, PrecedenceRange()}; + } + ); +} + + +LexItem::CompletionStatus LexItem::completion_status() const { + return get_completion_status(rule); } LexItemSet::LexItemSet() {} diff --git a/src/compiler/build_tables/lex_item.h b/src/compiler/build_tables/lex_item.h index b6b07de7..89a943e3 100644 --- a/src/compiler/build_tables/lex_item.h +++ b/src/compiler/build_tables/lex_item.h @@ -5,8 +5,7 @@ #include #include #include -#include "compiler/rules/character_set.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" #include "compiler/precedence_range.h" namespace tree_sitter { @@ -14,7 +13,7 @@ namespace build_tables { class LexItem { public: - LexItem(const rules::Symbol &, rule_ptr); + LexItem(const rules::Symbol &, const rules::Rule &); struct CompletionStatus { bool is_done; @@ -25,7 +24,7 @@ class LexItem { CompletionStatus completion_status() const; rules::Symbol lhs; - rule_ptr rule; + rules::Rule rule; }; } // namespace build_tables diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc index 9dc4f762..60c89f9d 100644 --- a/src/compiler/build_tables/lex_item_transitions.cc +++ b/src/compiler/build_tables/lex_item_transitions.cc @@ -4,47 +4,34 @@ #include #include #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" #include "compiler/build_tables/lex_item.h" namespace tree_sitter { namespace build_tables { using std::function; -using std::make_shared; using std::map; using std::pair; using std::vector; using rules::CharacterSet; -using rules::Symbol; -using rules::Blank; -using rules::Choice; -using rules::Seq; -using rules::Repeat; -using rules::Metadata; -typedef LexItemSet::Transition Transition; -typedef LexItemSet::TransitionMap TransitionMap; +using rules::Rule; +using Transition = LexItemSet::Transition; +using TransitionMap = LexItemSet::TransitionMap; -class TransitionBuilder : public rules::RuleFn { +class TransitionBuilder { TransitionMap *transitions; - const Symbol &item_lhs; + const rules::Symbol &item_lhs; vector *precedence_stack; bool in_main_token; - Transition transform_transition(const Transition &transition, - function callback) { + inline Transition transform_transition(const Transition &transition, + const function &callback) { LexItemSet destination; - for (const LexItem &item : transition.destination.entries) + for (const LexItem &item : transition.destination.entries) { destination.entries.insert(LexItem(item.lhs, callback(item.rule))); - return Transition{ destination, transition.precedence, - transition.in_main_token }; + } + return Transition{destination, transition.precedence, transition.in_main_token}; } void add_transition(TransitionMap *transitions, CharacterSet new_characters, @@ -89,82 +76,102 @@ class TransitionBuilder : public rules::RuleFn { transitions->insert({ new_characters, new_transition }); } - void apply_to(const CharacterSet *character_set) { - PrecedenceRange precedence; - if (!precedence_stack->empty()) - precedence.add(precedence_stack->back()); - - add_transition(transitions, *character_set, - Transition{ - LexItemSet({ LexItem(item_lhs, Blank::build()) }), - precedence, in_main_token, - }); - } - - void apply_to(const Choice *choice) { - for (const rule_ptr &element : choice->elements) - apply(element); - } - - void apply_to(const Seq *sequence) { - TransitionMap left_transitions; - TransitionBuilder(&left_transitions, this).apply(sequence->left); - - for (const auto &pair : left_transitions) { - add_transition( - transitions, pair.first, - transform_transition(pair.second, [&sequence](rule_ptr rule) { - return Seq::build({ rule, sequence->right }); - })); - } - - if (rule_can_be_blank(sequence->left)) - apply(sequence->right); - } - - void apply_to(const Repeat *repeat) { - TransitionMap content_transitions; - TransitionBuilder(&content_transitions, this).apply(repeat->content); - - for (const auto &pair : content_transitions) { - add_transition(transitions, pair.first, pair.second); - add_transition( - transitions, pair.first, - transform_transition(pair.second, [&repeat](rule_ptr item_rule) { - return Seq::build({ item_rule, repeat->copy() }); - })); - } - } - - void apply_to(const Metadata *metadata) { - bool has_active_precedence = metadata->params.is_active; - if (has_active_precedence) - precedence_stack->push_back(metadata->params.precedence); - - if (metadata->params.is_main_token) - in_main_token = true; - - rules::MetadataParams params = metadata->params; - if (params.has_precedence) - params.is_active = true; - - TransitionMap content_transitions; - TransitionBuilder(&content_transitions, this).apply(metadata->rule); - - for (const auto &pair : content_transitions) { - add_transition( - transitions, pair.first, - transform_transition(pair.second, [¶ms](rule_ptr rule) { - return Metadata::build(rule, params); - })); - } - - if (has_active_precedence) - precedence_stack->pop_back(); - } - public: - TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs, + void apply(const Rule &rule) { + rule.match( + [this](const rules::Blank &) {}, + + [this](const rules::CharacterSet &character_set) { + PrecedenceRange precedence; + if (!precedence_stack->empty()) { + precedence.add(precedence_stack->back()); + } + + add_transition( + transitions, + character_set, + Transition{ + LexItemSet({ LexItem(item_lhs, rules::Blank{}) }), + precedence, + in_main_token, + } + ); + }, + + [this](const rules::Choice &choice) { + for (const auto &element : choice.elements) { + apply(element); + } + }, + + [this](const rules::Seq &sequence) { + TransitionMap left_transitions; + TransitionBuilder(&left_transitions, this).apply(sequence.left); + + for (const auto &pair : left_transitions) { + add_transition( + transitions, + pair.first, + transform_transition(pair.second, [&sequence](Rule rule) -> Rule { + return rules::Seq::build({ rule, sequence.right }); + }) + ); + } + + if (rule_can_be_blank(sequence.left)) { + apply(sequence.right); + } + }, + + [this](const rules::Repeat &repeat) { + TransitionMap content_transitions; + TransitionBuilder(&content_transitions, this).apply(repeat.rule); + + for (const auto &pair : content_transitions) { + add_transition(transitions, pair.first, pair.second); + add_transition( + transitions, pair.first, + transform_transition(pair.second, [&repeat](Rule item_rule) { + return rules::Seq::build({ item_rule, repeat }); + }) + ); + } + }, + + [this](const rules::Metadata &metadata) { + bool has_active_precedence = metadata.params.is_active; + if (has_active_precedence) + precedence_stack->push_back(metadata.params.precedence); + + if (metadata.params.is_main_token) + in_main_token = true; + + auto params = metadata.params; + if (params.has_precedence) + params.is_active = true; + + TransitionMap content_transitions; + TransitionBuilder(&content_transitions, this).apply(metadata.rule); + + for (const auto &pair : content_transitions) { + add_transition( + transitions, pair.first, + transform_transition(pair.second, [¶ms](Rule rule) { + return rules::Metadata{rule, params}; + }) + ); + } + + if (has_active_precedence) { + precedence_stack->pop_back(); + } + }, + + [](auto) {} + ); + } + + TransitionBuilder(TransitionMap *transitions, const rules::Symbol &item_lhs, vector *precedence_stack, bool in_main_token) : transitions(transitions), item_lhs(item_lhs), @@ -180,8 +187,7 @@ class TransitionBuilder : public rules::RuleFn { void lex_item_transitions(TransitionMap *transitions, const LexItem &item) { vector precedence_stack; - TransitionBuilder(transitions, item.lhs, &precedence_stack, false) - .apply(item.rule); + TransitionBuilder(transitions, item.lhs, &precedence_stack, false).apply(item.rule); } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_item_transitions.h b/src/compiler/build_tables/lex_item_transitions.h index b9dc25d0..2cd10917 100644 --- a/src/compiler/build_tables/lex_item_transitions.h +++ b/src/compiler/build_tables/lex_item_transitions.h @@ -1,15 +1,12 @@ #ifndef COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_ #define COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_ -#include "compiler/rules/character_set.h" -#include "compiler/rules/symbol.h" #include "compiler/build_tables/lex_item.h" namespace tree_sitter { namespace build_tables { -void lex_item_transitions(LexItemSet::TransitionMap *transitions, - const LexItem &); +void lex_item_transitions(LexItemSet::TransitionMap *transitions, const LexItem &); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/lex_table_builder.cc b/src/compiler/build_tables/lex_table_builder.cc index e0a18914..3f9e14bb 100644 --- a/src/compiler/build_tables/lex_table_builder.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -10,13 +10,7 @@ #include "compiler/build_tables/lex_item.h" #include "compiler/parse_table.h" #include "compiler/lexical_grammar.h" -#include "compiler/rules/built_in_symbols.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { @@ -28,6 +22,7 @@ using std::string; using std::vector; using std::unordered_map; using std::unique_ptr; +using rules::Rule; using rules::Blank; using rules::Choice; using rules::CharacterSet; @@ -36,35 +31,45 @@ using rules::Symbol; using rules::Metadata; using rules::Seq; -class StartingCharacterAggregator : public rules::RuleFn { - void apply_to(const rules::Seq *rule) { - apply(rule->left); - } - - void apply_to(const rules::Choice *rule) { - for (const rule_ptr &element : rule->elements) apply(element); - } - - void apply_to(const rules::Repeat *rule) { - apply(rule->content); - } - - void apply_to(const rules::Metadata *rule) { - apply(rule->rule); - } - - void apply_to(const rules::CharacterSet *rule) { - result.add_set(*rule); - } - +class StartingCharacterAggregator { public: + void apply(const Rule &rule) { + rule.match( + [this](const Seq &sequence) { + apply(sequence.left); + }, + + [this](const rules::Choice &rule) { + for (const auto &element : rule.elements) { + apply(element); + } + }, + + [this](const rules::Repeat &rule) { + apply(rule.rule); + }, + + [this](const rules::Metadata &rule) { + apply(rule.rule); + }, + + [this](const rules::CharacterSet &rule) { + result.add_set(rule); + }, + + [this](const rules::Blank) {}, + + [](auto) {} + ); + } + CharacterSet result; }; class LexTableBuilderImpl : public LexTableBuilder { LexTable lex_table; const LexicalGrammar grammar; - vector separator_rules; + vector separator_rules; CharacterSet first_separator_characters; LexConflictManager conflict_manager; unordered_map lex_state_ids; @@ -74,11 +79,11 @@ class LexTableBuilderImpl : public LexTableBuilder { LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) { StartingCharacterAggregator starting_character_aggregator; - for (const rule_ptr &rule : grammar.separators) { - separator_rules.push_back(Repeat::build(rule)); + for (const auto &rule : grammar.separators) { + separator_rules.push_back(Repeat{rule}); starting_character_aggregator.apply(rule); } - separator_rules.push_back(Blank::build()); + separator_rules.push_back(Blank{}); first_separator_characters = starting_character_aggregator.result; shadowed_token_indices.resize(grammar.variables.size()); } @@ -98,8 +103,18 @@ class LexTableBuilderImpl : public LexTableBuilder { clear(); map terminals; - terminals[Symbol(left, Symbol::Terminal)]; - terminals[Symbol(right, Symbol::Terminal)]; + terminals[Symbol::terminal(left)]; + terminals[Symbol::terminal(right)]; + + if (grammar.variables[left].is_string && grammar.variables[right].is_string) { + StartingCharacterAggregator left_starting_characters; + left_starting_characters.apply(grammar.variables[left].rule); + StartingCharacterAggregator right_starting_characters; + right_starting_characters.apply(grammar.variables[right].rule); + if (!(left_starting_characters.result == right_starting_characters.result)) { + return false; + } + } add_lex_state(item_set_for_terminals(terminals)); @@ -183,11 +198,11 @@ class LexTableBuilderImpl : public LexTableBuilder { for (ParseState &state : parse_table->states) { for (auto &entry : state.terminal_entries) { Symbol symbol = entry.first; - if (symbol.is_token()) { + if (symbol.is_terminal()) { auto homonyms = conflict_manager.possible_homonyms.find(symbol.index); if (homonyms != conflict_manager.possible_homonyms.end()) for (Symbol::Index homonym : homonyms->second) - if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) { + if (state.terminal_entries.count(Symbol::terminal(homonym))) { entry.second.reusable = false; break; } @@ -198,7 +213,7 @@ class LexTableBuilderImpl : public LexTableBuilder { auto extensions = conflict_manager.possible_extensions.find(symbol.index); if (extensions != conflict_manager.possible_extensions.end()) for (Symbol::Index extension : extensions->second) - if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) { + if (state.terminal_entries.count(Symbol::terminal(extension))) { entry.second.depends_on_lookahead = true; break; } @@ -278,15 +293,18 @@ class LexTableBuilderImpl : public LexTableBuilder { LexItemSet result; for (const auto &pair : terminals) { Symbol symbol = pair.first; - if (symbol.is_token()) { - for (const rule_ptr &rule : rules_for_symbol(symbol)) { - for (const rule_ptr &separator_rule : separator_rules) { + if (symbol.is_terminal()) { + for (const auto &rule : rules_for_symbol(symbol)) { + for (const auto &separator_rule : separator_rules) { result.entries.insert(LexItem( symbol, Metadata::separator( Seq::build({ separator_rule, - Metadata::main_token(rule) })))); + Metadata::main_token(rule) + }) + ) + )); } } } @@ -294,17 +312,20 @@ class LexTableBuilderImpl : public LexTableBuilder { return result; } - vector rules_for_symbol(const rules::Symbol &symbol) { - if (symbol == rules::END_OF_INPUT()) - return { CharacterSet().include(0).copy() }; + vector rules_for_symbol(const rules::Symbol &symbol) { + if (symbol == rules::END_OF_INPUT()) { + return { CharacterSet().include(0) }; + } - rule_ptr rule = grammar.variables[symbol.index].rule; + return grammar.variables[symbol.index].rule.match( + [](const Choice &choice) { + return choice.elements; + }, - auto choice = rule->as(); - if (choice) - return choice->elements; - else - return { rule }; + [](auto rule) { + return vector{ rule }; + } + ); } }; diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index 239bc029..b9604c24 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -1,7 +1,7 @@ #include "compiler/build_tables/lookahead_set.h" #include #include -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h index e62ee34d..74cd63e2 100644 --- a/src/compiler/build_tables/lookahead_set.h +++ b/src/compiler/build_tables/lookahead_set.h @@ -3,7 +3,7 @@ #include #include -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index b9c3831b..baf10a00 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -1,7 +1,7 @@ #include "compiler/build_tables/parse_item.h" #include #include "compiler/syntax_grammar.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/util/hash_combine.h" namespace tree_sitter { @@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const { } Symbol ParseItem::lhs() const { - return Symbol(variable_index, Symbol::NonTerminal); + return Symbol{variable_index, Symbol::NonTerminal}; } bool ParseItem::is_done() const { diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index fc3f0129..020afc07 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -4,8 +4,7 @@ #include #include #include "compiler/build_tables/lookahead_set.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" #include "compiler/syntax_grammar.h" #include "compiler/precedence_range.h" diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 0a2039d3..b0531d37 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -4,7 +4,7 @@ #include #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { @@ -27,17 +27,17 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, set processed_non_terminals; for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) { - Symbol symbol(i, Symbol::Terminal); + Symbol symbol = Symbol::terminal(i); first_sets.insert({symbol, LookaheadSet({ symbol })}); } for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) { - Symbol symbol(i, Symbol::External); + Symbol symbol = Symbol::external(i); first_sets.insert({symbol, LookaheadSet({ symbol })}); } for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { - Symbol symbol(i, Symbol::NonTerminal); + Symbol symbol = Symbol::non_terminal(i); LookaheadSet first_set; processed_non_terminals.clear(); @@ -64,7 +64,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, vector components_to_process; for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { - Symbol symbol(i, Symbol::NonTerminal); + Symbol symbol = Symbol::non_terminal(i); map> cache_entry; components_to_process.clear(); diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index 2a0de268..a319d698 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -2,7 +2,7 @@ #define COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_ #include "compiler/build_tables/parse_item.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" #include namespace tree_sitter { diff --git a/src/compiler/build_tables/rule_can_be_blank.cc b/src/compiler/build_tables/rule_can_be_blank.cc index 0ae95749..a72fc1ff 100644 --- a/src/compiler/build_tables/rule_can_be_blank.cc +++ b/src/compiler/build_tables/rule_can_be_blank.cc @@ -1,43 +1,42 @@ #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/repeat.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { -class CanBeBlank : public rules::RuleFn { - protected: - bool apply_to(const rules::Blank *) { - return true; - } +bool rule_can_be_blank(const rules::Rule &rule) { + return rule.match( + [](rules::Blank) { + return true; + }, - bool apply_to(const rules::Repeat *rule) { - return apply(rule->content); - } + [](rules::CharacterSet) { + return false; + }, - bool apply_to(const rules::Choice *rule) { - for (const auto &element : rule->elements) - if (apply(element)) - return true; - return false; - } + [](rules::Repeat repeat) { + return rule_can_be_blank(repeat.rule); + }, - bool apply_to(const rules::Seq *rule) { - return apply(rule->left) && apply(rule->right); - } + [](rules::Metadata metadata) { + return rule_can_be_blank(metadata.rule); + }, - bool apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } -}; + [](rules::Choice choice) { + for (const auto &element : choice.elements) { + if (rule_can_be_blank(element)) { + return true; + } + } + return false; + }, -bool rule_can_be_blank(const rule_ptr &rule) { - return CanBeBlank().apply(rule); + [](rules::Seq seq) { + return rule_can_be_blank(seq.left) && rule_can_be_blank(seq.right); + }, + + [](auto) { return false; } + ); } } // namespace build_tables diff --git a/src/compiler/build_tables/rule_can_be_blank.h b/src/compiler/build_tables/rule_can_be_blank.h index 96a4a279..768dc6df 100644 --- a/src/compiler/build_tables/rule_can_be_blank.h +++ b/src/compiler/build_tables/rule_can_be_blank.h @@ -6,7 +6,7 @@ namespace tree_sitter { namespace build_tables { -bool rule_can_be_blank(const rule_ptr &rule); +bool rule_can_be_blank(const rules::Rule &rule); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index cc9d8155..3edcf141 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -22,8 +22,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) { TSCompileErrorTypeInvalidGrammar }; } - auto prepare_grammar_result = - prepare_grammar::prepare_grammar(parse_result.grammar); + auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar); const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); CompileError error = get<2>(prepare_grammar_result); @@ -46,22 +45,20 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) { return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone }; } -pair compile(const Grammar &grammar, +pair compile(const InputGrammar &grammar, std::string name) { auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar); const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); CompileError error = get<2>(prepare_grammar_result); - if (error.type) - return { "", error }; + if (error.type) return { "", error }; auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar); const ParseTable &parse_table = get<0>(table_build_result); const LexTable &lex_table = get<1>(table_build_result); error = get<2>(table_build_result); - if (error.type) - return { "", error }; + if (error.type) return { "", error }; string code = generate_code::c_code(name, parse_table, lex_table, syntax_grammar, lexical_grammar); diff --git a/src/compiler/compile.h b/src/compiler/compile.h index 649c38ea..5f182bc0 100644 --- a/src/compiler/compile.h +++ b/src/compiler/compile.h @@ -7,9 +7,9 @@ namespace tree_sitter { -struct Grammar; +struct InputGrammar; -std::pair compile(const Grammar &, std::string); +std::pair compile(const InputGrammar &, std::string); } // namespace tree_sitter diff --git a/src/compiler/compile_error.h b/src/compiler/compile_error.h index 7035d7a7..9797a459 100644 --- a/src/compiler/compile_error.h +++ b/src/compiler/compile_error.h @@ -8,6 +8,8 @@ namespace tree_sitter { class CompileError { public: + CompileError() : type(TSCompileErrorTypeNone) {} + CompileError(TSCompileErrorType type, std::string message) : type(type), message(message) {} @@ -15,6 +17,10 @@ class CompileError { return CompileError(TSCompileErrorTypeNone, ""); } + operator bool() const { + return type != TSCompileErrorTypeNone; + } + bool operator==(const CompileError &other) const { return type == other.type && message == other.message; } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index d592966a..b51db626 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -9,7 +9,7 @@ #include "compiler/parse_table.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/util/string_helpers.h" #include "tree_sitter/runtime.h" @@ -129,7 +129,7 @@ class CCodeGenerator { size_t token_count = 0; for (const auto &entry : parse_table.symbols) { const Symbol &symbol = entry.first; - if (symbol.is_token()) { + if (symbol.is_terminal()) { token_count++; } else if (symbol.is_external()) { const ExternalToken &external_token = syntax_grammar.external_tokens[symbol.index]; @@ -256,7 +256,7 @@ class CCodeGenerator { if (symbol.is_external()) { needs_external_scanner = true; external_token_indices.insert(symbol.index); - } else if (symbol.is_token()) { + } else if (symbol.is_terminal()) { auto corresponding_external_token = external_tokens_by_corresponding_internal_token.find(symbol.index); if (corresponding_external_token != external_tokens_by_corresponding_internal_token.end()) { @@ -298,7 +298,7 @@ class CCodeGenerator { line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {"); indent([&]() { for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) { - line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ","); + line("[" + external_token_id(i) + "] = " + symbol_id(Symbol::external(i)) + ","); } }); line("};"); @@ -339,7 +339,7 @@ class CCodeGenerator { line("[" + to_string(state_id++) + "] = {"); indent([&]() { for (const auto &entry : state.nonterminal_entries) { - line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE("); + line("[" + symbol_id(Symbol::non_terminal(entry.first)) + "] = STATE("); add(to_string(entry.second)); add("),"); } @@ -686,9 +686,13 @@ class CCodeGenerator { string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table, const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar) { - return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, - lexical_grammar) - .code(); + return CCodeGenerator( + name, + parse_table, + lex_table, + syntax_grammar, + lexical_grammar + ).code(); } } // namespace generate_code diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h index 0a07280c..f24c0030 100644 --- a/src/compiler/grammar.h +++ b/src/compiler/grammar.h @@ -2,17 +2,43 @@ #define COMPILER_GRAMMAR_H_ #include +#include #include #include #include "compiler/rule.h" namespace tree_sitter { -struct Grammar { - std::vector> rules; - std::vector extra_tokens; - std::vector> expected_conflicts; - std::vector external_tokens; +enum VariableType { + VariableTypeHidden, + VariableTypeAuxiliary, + VariableTypeAnonymous, + VariableTypeNamed, +}; + +struct ExternalToken { + std::string name; + VariableType type; + rules::Symbol corresponding_internal_token; + + inline bool operator==(const ExternalToken &other) const { + return name == other.name && + type == other.type && + corresponding_internal_token == other.corresponding_internal_token; + } +}; + +struct InputGrammar { + struct Variable { + std::string name; + VariableType type; + rules::Rule rule; + }; + + std::vector variables; + std::vector extra_tokens; + std::vector> expected_conflicts; + std::vector external_tokens; }; } // namespace tree_sitter diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index ccca250d..daf4517a 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -1,6 +1,5 @@ #include "compiler/lex_table.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 7b87079a..9317c818 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -6,8 +6,7 @@ #include #include #include "compiler/precedence_range.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/character_set.h" +#include "compiler/rule.h" namespace tree_sitter { diff --git a/src/compiler/lexical_grammar.h b/src/compiler/lexical_grammar.h index 456e2089..78d3faa8 100644 --- a/src/compiler/lexical_grammar.h +++ b/src/compiler/lexical_grammar.h @@ -5,20 +5,25 @@ #include #include #include "compiler/rule.h" -#include "compiler/variable.h" +#include "compiler/grammar.h" namespace tree_sitter { struct LexicalVariable { std::string name; VariableType type; - rule_ptr rule; + rules::Rule rule; bool is_string; + + inline bool operator==(const LexicalVariable &other) const { + return other.name == name && other.type == type && other.rule == rule && + other.is_string == is_string; + } }; struct LexicalGrammar { std::vector variables; - std::vector separators; + std::vector separators; }; } // namespace tree_sitter diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index 327c0f31..1a631c02 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -1,20 +1,34 @@ #include "compiler/parse_grammar.h" #include #include +#include #include #include "json.h" #include "compiler/rule.h" -#include "compiler/rules.h" namespace tree_sitter { using std::string; using std::vector; +using std::unordered_set; using std::pair; +using rules::Rule; +using rules::Blank; +using rules::Choice; +using rules::Repeat; +using rules::Seq; +using rules::Metadata; +using rules::Pattern; +using rules::String; +using rules::NamedSymbol; struct ParseRuleResult { - rule_ptr rule; + Rule rule; string error_message; + + ParseRuleResult(const string &error_message) : error_message(error_message) {} + ParseRuleResult(const char *error_message) : error_message(error_message) {} + ParseRuleResult(Rule rule) : rule(rule) {} }; ParseRuleResult parse_rule(json_value *rule_json) { @@ -23,193 +37,163 @@ ParseRuleResult parse_rule(json_value *rule_json) { string type; if (!rule_json) { - error_message = "Rule cannot be null"; - goto error; + return "Rule cannot be null"; } if (rule_json->type != json_object) { - error_message = "Rule type must be an object"; - goto error; + return "Rule type must be an object"; } rule_type_json = rule_json->operator[]("type"); if (rule_type_json.type != json_string) { - error_message = "Rule type must be a string"; - goto error; + return "Rule type must be a string"; } type = rule_type_json.u.string.ptr; if (type == "BLANK") { - return { blank(), "" }; + return Rule(Blank{}); } if (type == "CHOICE") { json_value members_json = rule_json->operator[]("members"); if (members_json.type != json_array) { - error_message = "Choice members must be an array"; - goto error; + return "Choice members must be an array"; } - vector members; + vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - ParseRuleResult member = parse_rule(member_json); - if (member.rule.get()) { - members.push_back(member.rule); - } else { - error_message = "Invalid choice member: " + member.error_message; - goto error; + auto result = parse_rule(member_json); + if (!result.error_message.empty()) { + return "Invalid choice member: " + result.error_message; } + members.push_back(result.rule); } - return { choice(members), "" }; + return Rule(Choice{members}); } if (type == "SEQ") { json_value members_json = rule_json->operator[]("members"); if (members_json.type != json_array) { - error_message = "Seq members must be an array"; - goto error; + return "Seq members must be an array"; } - vector members; + vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - ParseRuleResult member = parse_rule(member_json); - if (member.rule.get()) { - members.push_back(member.rule); - } else { - error_message = "Invalid seq member: " + member.error_message; - goto error; + auto result = parse_rule(member_json); + if (!result.error_message.empty()) { + return "Invalid choice member: " + result.error_message; } + members.push_back(result.rule); } - return { seq(members), "" }; + return *Seq::build(members); } if (type == "REPEAT") { json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (content.rule.get()) { - return { repeat(content.rule), "" }; - } else { - error_message = "Invalid repeat content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid repeat content: " + result.error_message; } + return Rule(Choice{{Repeat{result.rule}, Blank{}}}); } if (type == "REPEAT1") { json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (content.rule.get()) { - return { repeat1(content.rule), "" }; - } else { - error_message = "Invalid repeat1 content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid repeat content: " + result.error_message; } + return Rule(Repeat{result.rule}); } if (type == "TOKEN") { json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (content.rule.get()) { - return { token(content.rule), "" }; - } else { - error_message = "Invalid token content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid token content: " + result.error_message; } + return Rule(Metadata::token(result.rule)); } if (type == "PATTERN") { json_value value_json = rule_json->operator[]("value"); if (value_json.type == json_string) { - return { pattern(value_json.u.string.ptr), "" }; + return Rule(Pattern{value_json.u.string.ptr}); } else { - error_message = "Pattern value must be a string"; - goto error; + return "Pattern value must be a string"; } } if (type == "STRING") { json_value value_json = rule_json->operator[]("value"); if (value_json.type == json_string) { - return { str(value_json.u.string.ptr), "" }; + return Rule(String{value_json.u.string.ptr}); } else { - error_message = "String rule value must be a string"; - goto error; + return "String rule value must be a string"; } } if (type == "SYMBOL") { json_value value_json = rule_json->operator[]("name"); if (value_json.type == json_string) { - return { sym(value_json.u.string.ptr), "" }; + return Rule(NamedSymbol{value_json.u.string.ptr}); } else { - error_message = "Symbol value must be a string"; - goto error; + return "Symbol value must be a string"; } } if (type == "PREC") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { - error_message = "Precedence value must be an integer"; - goto error; + return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (!content.rule.get()) { - error_message = "Invalid precedence content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid precedence content: " + result.error_message; } - - return { prec(precedence_json.u.integer, content.rule), "" }; + return Rule(Metadata::prec(precedence_json.u.integer, result.rule)); } if (type == "PREC_LEFT") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { - error_message = "Precedence value must be an integer"; - goto error; + return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (!content.rule.get()) { - error_message = "Invalid precedence content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid precedence content: " + result.error_message; } - - return { prec_left(precedence_json.u.integer, content.rule), "" }; + return Rule(Metadata::prec_left(precedence_json.u.integer, result.rule)); } if (type == "PREC_RIGHT") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { - error_message = "Precedence value must be an integer"; - goto error; + return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (!content.rule.get()) { - error_message = "Invalid precedence content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid precedence content: " + result.error_message; } - - return { prec_right(precedence_json.u.integer, content.rule), "" }; + return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule)); } - error_message = "Unknown rule type " + type; - -error: - return { rule_ptr(), error_message }; + return "Unknown rule type: " + type; } ParseGrammarResult parse_grammar(const string &input) { string error_message; string name; - Grammar grammar; + InputGrammar grammar; json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json; json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; @@ -242,15 +226,16 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) { json_object_entry entry_json = rules_json.u.object.values[i]; - ParseRuleResult entry = parse_rule(entry_json.value); - - if (!entry.rule.get()) { - error_message = - string("Invalid rule '") + entry_json.name + "' " + entry.error_message; + auto result = parse_rule(entry_json.value); + if (!result.error_message.empty()) { + error_message = result.error_message; goto error; } - - grammar.rules.push_back({ string(entry_json.name), entry.rule }); + grammar.variables.push_back(InputGrammar::Variable{ + string(entry_json.name), + VariableTypeNamed, + result.rule + }); } extras_json = grammar_json->operator[]("extras"); @@ -262,13 +247,12 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) { json_value *extra_json = extras_json.u.array.values[i]; - ParseRuleResult extra = parse_rule(extra_json); - if (!extra.rule.get()) { - error_message = string("Invalid extra token: ") + extra.error_message; + auto result = parse_rule(extra_json); + if (!result.error_message.empty()) { + error_message = "Invalid extra token: " + result.error_message; goto error; } - - grammar.extra_tokens.push_back(extra.rule); + grammar.extra_tokens.push_back(result.rule); } } @@ -286,7 +270,7 @@ ParseGrammarResult parse_grammar(const string &input) { goto error; } - vector conflict; + unordered_set conflict; for (size_t j = 0, conflict_length = conflict_json->u.array.length; j < conflict_length; j++) { json_value *conflict_entry_json = conflict_json->u.array.values[j]; @@ -295,7 +279,9 @@ ParseGrammarResult parse_grammar(const string &input) { goto error; } - conflict.push_back(string(conflict_entry_json->u.string.ptr)); + conflict.insert(rules::NamedSymbol{ + string(conflict_entry_json->u.string.ptr) + }); } grammar.expected_conflicts.push_back(conflict); @@ -317,7 +303,11 @@ ParseGrammarResult parse_grammar(const string &input) { } string token_name = token_name_json->u.string.ptr; - grammar.external_tokens.push_back(token_name); + grammar.external_tokens.push_back({ + token_name, + VariableTypeNamed, + rules::NONE() + }); } } @@ -329,7 +319,7 @@ error: json_value_free(grammar_json); } - return { "", Grammar(), error_message }; + return { "", InputGrammar(), error_message }; } } // namespace tree_sitter diff --git a/src/compiler/parse_grammar.h b/src/compiler/parse_grammar.h index 284ca4b2..04e7672b 100644 --- a/src/compiler/parse_grammar.h +++ b/src/compiler/parse_grammar.h @@ -9,7 +9,7 @@ namespace tree_sitter { struct ParseGrammarResult { std::string name; - Grammar grammar; + InputGrammar grammar; std::string error_message; }; diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 57728e0f..37707ed0 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -1,7 +1,7 @@ #include "compiler/parse_table.h" #include #include "compiler/precedence_range.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { @@ -178,7 +178,7 @@ ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, void ParseTable::set_nonterminal_action(ParseStateId state_id, Symbol::Index lookahead, ParseStateId next_state_id) { - symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true; + symbols[Symbol::non_terminal(lookahead)].structural = true; states[state_id].nonterminal_entries[lookahead] = next_state_id; } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 02501ebd..c00969d2 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -6,8 +6,7 @@ #include #include #include "compiler/lex_table.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" #include "compiler/precedence_range.h" #include "compiler/syntax_grammar.h" diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index d01bb7a0..d81c9cfc 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -2,14 +2,8 @@ #include #include #include -#include "compiler/prepare_grammar/initial_syntax_grammar.h" +#include "compiler/grammar.h" #include "compiler/rule.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/repeat.h" namespace tree_sitter { namespace prepare_grammar { @@ -24,47 +18,78 @@ using rules::Choice; using rules::Repeat; using rules::Seq; using rules::Symbol; +using rules::Rule; -class ExpandRepeats : public rules::IdentityRuleFn { +class ExpandRepeats { string rule_name; size_t offset; size_t repeat_count; - vector> existing_repeats; + vector> existing_repeats; - rule_ptr apply_to(const Repeat *rule) { - for (const auto pair : existing_repeats) - if (pair.first->operator==(*rule)) - return pair.second.copy(); + Rule apply(Rule rule) { + return rule.match( + [&](const rules::Blank &blank) -> Rule { return blank; }, + [&](const rules::Symbol &symbol) { return symbol; }, - rule_ptr inner_rule = apply(rule->content); - size_t index = aux_rules.size(); - string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count); - Symbol repeat_symbol(offset + index, Symbol::NonTerminal); - existing_repeats.push_back({ rule->copy(), repeat_symbol }); - aux_rules.push_back(Variable{ - helper_rule_name, - VariableTypeAuxiliary, - Choice::build({ - Seq::build({ - repeat_symbol.copy(), - inner_rule, - }), - inner_rule, - }) - }); - return repeat_symbol.copy(); + [&](const rules::Choice &choice) { + vector elements; + for (const auto &element : choice.elements) { + elements.push_back(apply(element)); + } + return rules::Choice::build(elements); + }, + + [&](const rules::Seq &sequence) { + return rules::Seq{ + apply(sequence.left), + apply(sequence.right) + }; + }, + + [&](const rules::Repeat &repeat) { + for (const auto pair : existing_repeats) { + if (pair.first == rule) { + return pair.second; + } + } + + Rule inner_rule = apply(repeat.rule); + size_t index = aux_rules.size(); + string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count); + Symbol repeat_symbol = Symbol::non_terminal(offset + index); + existing_repeats.push_back({repeat, repeat_symbol}); + aux_rules.push_back({ + helper_rule_name, + VariableTypeAuxiliary, + Choice{{ + Seq{repeat_symbol, inner_rule}, + inner_rule, + }} + }); + return repeat_symbol; + }, + + [&](const rules::Metadata &metadata) { + return rules::Metadata{apply(metadata.rule), metadata.params}; + }, + + [](auto) { + assert(false); + return Blank{}; + } + ); } public: explicit ExpandRepeats(size_t offset) : offset(offset) {} - rule_ptr expand(const rule_ptr &rule, const string &name) { + Rule expand(const Rule &rule, const string &name) { rule_name = name; repeat_count = 0; return apply(rule); } - vector aux_rules; + vector aux_rules; }; InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) { @@ -75,11 +100,16 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) { result.external_tokens = grammar.external_tokens; ExpandRepeats expander(result.variables.size()); - for (auto &variable : result.variables) + for (auto &variable : result.variables) { variable.rule = expander.expand(variable.rule, variable.name); + } + + result.variables.insert( + result.variables.end(), + expander.aux_rules.begin(), + expander.aux_rules.end() + ); - result.variables.insert(result.variables.end(), expander.aux_rules.begin(), - expander.aux_rules.end()); return result; } diff --git a/src/compiler/prepare_grammar/expand_repeats.h b/src/compiler/prepare_grammar/expand_repeats.h index 3efabd9a..57e1474f 100644 --- a/src/compiler/prepare_grammar/expand_repeats.h +++ b/src/compiler/prepare_grammar/expand_repeats.h @@ -6,8 +6,6 @@ namespace tree_sitter { namespace prepare_grammar { -struct InitialSyntaxGrammar; - InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &); } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/expand_tokens.cc b/src/compiler/prepare_grammar/expand_tokens.cc index ff268782..e384dfed 100644 --- a/src/compiler/prepare_grammar/expand_tokens.cc +++ b/src/compiler/prepare_grammar/expand_tokens.cc @@ -2,15 +2,8 @@ #include #include #include -#include #include "compiler/lexical_grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/string.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/character_set.h" +#include "compiler/rule.h" #include "compiler/prepare_grammar/parse_regex.h" #include "utf8proc.h" @@ -19,70 +12,69 @@ namespace prepare_grammar { using std::string; using std::vector; -using std::map; -using std::pair; -using std::make_shared; -using rules::String; -using rules::Pattern; -using rules::Metadata; +using rules::Rule; -class ExpandTokens : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; +ExpandTokenResult expand_token(const rules::Rule &rule) { + return rule.match( + [](const rules::Blank &blank) -> ExpandTokenResult { return Rule(blank); }, - rule_ptr apply_to(const String *rule) { - vector elements; - const uint8_t *iter = reinterpret_cast(rule->value.data()); - const uint8_t *end = iter + rule->value.size(); + [](const rules::String &string) { + vector elements; + const uint8_t *iter = reinterpret_cast(string.value.data()); + const uint8_t *end = iter + string.value.size(); - while (iter < end) { - int32_t el; - size_t size = utf8proc_iterate(iter, (end - iter), &el); - if (!size) - break; - iter += size; + while (iter < end) { + int32_t el; + size_t size = utf8proc_iterate(iter, (end - iter), &el); + if (!size) + break; + iter += size; - elements.push_back(rules::CharacterSet().include(el).copy()); - } + elements.push_back(rules::CharacterSet().include(el)); + } - rules::MetadataParams params; - params.is_token = true; - params.is_string = true; + return *rules::Seq::build(elements); + }, - return rules::Metadata::build(rules::Seq::build(elements), params); - } + [](const rules::Pattern &pattern) -> ExpandTokenResult { + auto result = parse_regex(pattern.value); + if (result.second) return result.second; + return result.first; + }, - rule_ptr apply_to(const Pattern *rule) { - auto pair = parse_regex(rule->value); - if (!error.type) - error = pair.second; - return pair.first; - } + [](const rules::Repeat &rule) -> ExpandTokenResult { + auto result = expand_token(rule.rule); + if (result.error) return result.error; + return *rules::Repeat::build(result.rule); + }, - public: - CompileError error; - ExpandTokens() : error(CompileError::none()) {} + [](const rules::Metadata &rule) -> ExpandTokenResult { + auto result = expand_token(rule.rule); + if (result.error) return result.error; + return Rule(rules::Metadata{result.rule, rule.params}); + }, + + [](const rules::Seq &rule) -> ExpandTokenResult { + auto left_result = expand_token(rule.left); + if (left_result.error) return left_result.error; + auto right_result = expand_token(rule.right); + if (right_result.error) return right_result.error; + return Rule(rules::Seq{left_result.rule, right_result.rule}); + }, + + [](const rules::Choice &rule) -> ExpandTokenResult { + std::vector elements; + for (const auto &element : rule.elements) { + auto result = expand_token(element); + if (result.error) return result.error; + elements.push_back(result.rule); + } + return Rule(rules::Choice{elements}); + }, + + [](auto) { return CompileError(TSCompileErrorTypeInvalidTokenContents, ""); } + ); }; -pair expand_tokens(const LexicalGrammar &grammar) { - LexicalGrammar result; - ExpandTokens expander; - - for (const LexicalVariable &variable : grammar.variables) { - auto rule = expander.apply(variable.rule); - if (expander.error.type) - return { result, expander.error }; - result.variables.push_back({variable.name, variable.type, rule, variable.is_string}); - } - - for (auto &sep : grammar.separators) { - auto rule = expander.apply(sep); - if (expander.error.type) - return { result, expander.error }; - result.separators.push_back(rule); - } - - return { result, CompileError::none() }; -} - } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/expand_tokens.h b/src/compiler/prepare_grammar/expand_tokens.h index 2e400090..d1545cca 100644 --- a/src/compiler/prepare_grammar/expand_tokens.h +++ b/src/compiler/prepare_grammar/expand_tokens.h @@ -2,15 +2,21 @@ #define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_ #include +#include "compiler/rule.h" #include "compiler/compile_error.h" namespace tree_sitter { - -struct LexicalGrammar; - namespace prepare_grammar { -std::pair expand_tokens(const LexicalGrammar &); +struct ExpandTokenResult { + rules::Rule rule; + CompileError error; + + ExpandTokenResult(const rules::Rule &rule) : rule(rule) {} + ExpandTokenResult(const CompileError &error) : error(error) {} +}; + +ExpandTokenResult expand_token(const rules::Rule &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/extract_choices.cc b/src/compiler/prepare_grammar/extract_choices.cc index 6262001c..04225b6e 100644 --- a/src/compiler/prepare_grammar/extract_choices.cc +++ b/src/compiler/prepare_grammar/extract_choices.cc @@ -1,54 +1,48 @@ #include "compiler/prepare_grammar/extract_choices.h" #include #include -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/repeat.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { using std::vector; +using rules::Rule; -class ExtractChoices : public rules::RuleFn> { - vector default_apply(const Rule *rule) { - return vector({ rule->copy() }); - } +vector extract_choices(const Rule &rule) { + return rule.match( + [](const rules::Seq &sequence) { + vector result; + for (auto &left_entry : extract_choices(sequence.left)) { + for (auto &right_entry : extract_choices(sequence.right)) { + result.push_back(rules::Seq::build({left_entry, right_entry})); + } + } + return result; + }, - vector apply_to(const rules::Seq *rule) { - vector result; - for (auto left_entry : apply(rule->left)) - for (auto right_entry : apply(rule->right)) - result.push_back(rules::Seq::build({ left_entry, right_entry })); - return result; - } + [](const rules::Metadata &rule) { + vector result; + for (auto &entry : extract_choices(rule.rule)) { + result.push_back(rules::Metadata{entry, rule.params}); + } + return result; + }, - vector apply_to(const rules::Metadata *rule) { - vector result; - for (auto entry : apply(rule->rule)) - result.push_back(rules::Metadata::build(entry, rule->params)); - return result; - } + [](const rules::Choice &choice) { + vector result; + for (auto &element : choice.elements) { + for (auto &entry : extract_choices(element)) { + result.push_back(entry); + } + } + return result; + }, - vector apply_to(const rules::Choice *rule) { - vector result; - for (auto element : rule->elements) - for (auto entry : apply(element)) - result.push_back(entry); - return result; - } - - vector apply_to(const rules::Repeat *rule) { - return vector({ - rules::Repeat::build(rules::Choice::build(apply(rule->content))), - }); - } -}; - -std::vector extract_choices(const rule_ptr &rule) { - return ExtractChoices().apply(rule); + [](const auto &rule) { + return vector({rule}); + } + ); } } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/extract_choices.h b/src/compiler/prepare_grammar/extract_choices.h index 50b91467..3b0d12db 100644 --- a/src/compiler/prepare_grammar/extract_choices.h +++ b/src/compiler/prepare_grammar/extract_choices.h @@ -7,7 +7,7 @@ namespace tree_sitter { namespace prepare_grammar { -std::vector extract_choices(const rule_ptr &); +std::vector extract_choices(const rules::Rule &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index ec821ecc..a476d30c 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -1,24 +1,19 @@ #include "compiler/prepare_grammar/extract_tokens.h" #include #include +#include #include #include #include #include "tree_sitter/compiler.h" #include "compiler/lexical_grammar.h" -#include "compiler/prepare_grammar/initial_syntax_grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" +#include "compiler/rule.h" #include "compiler/prepare_grammar/token_description.h" -#include "compiler/prepare_grammar/is_token.h" +#include "compiler/prepare_grammar/expand_tokens.h" namespace tree_sitter { namespace prepare_grammar { -using std::make_shared; using std::make_tuple; using std::map; using std::set; @@ -26,74 +21,146 @@ using std::string; using std::tuple; using std::vector; using rules::Symbol; +using rules::Rule; +using rules::Rule; -class SymbolReplacer : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; - - rule_ptr apply_to(const Symbol *rule) { - return replace_symbol(*rule).copy(); - } - +class SymbolReplacer { public: map replacements; + Rule apply(const Rule &rule) { + return rule.match( + [this](const rules::Blank &blank) -> Rule { + return blank; + }, + + [this](const rules::Symbol &symbol) { + return replace_symbol(symbol); + }, + + [this](const rules::Choice &choice) { + vector elements; + for (const auto &element : choice.elements) { + elements.push_back(apply(element)); + } + return rules::Choice::build(elements); + }, + + [this](const rules::Seq &sequence) { + return rules::Seq{ + apply(sequence.left), + apply(sequence.right) + }; + }, + + [this](const rules::Repeat &repeat) { + return rules::Repeat{apply(repeat.rule)}; + }, + + [this](const rules::Metadata &metadata) { + return rules::Metadata{apply(metadata.rule), metadata.params}; + }, + + [](auto) { + assert(false); + return rules::Blank{}; + } + ); + } + Symbol replace_symbol(const Symbol &symbol) { - if (!symbol.is_non_terminal()) - return symbol; + if (!symbol.is_non_terminal()) return symbol; auto replacement_pair = replacements.find(symbol); - if (replacement_pair != replacements.end()) + if (replacement_pair != replacements.end()) { return replacement_pair->second; + } int new_index = symbol.index; - for (const auto &pair : replacements) - if (pair.first.index < symbol.index) + for (const auto &pair : replacements) { + if (pair.first.index < symbol.index) { new_index--; - return Symbol(new_index, Symbol::NonTerminal); + } + } + + return Symbol::non_terminal(new_index); } }; -class TokenExtractor : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; - - rule_ptr apply_to_token(const Rule *input, VariableType entry_type, bool is_string) { - for (size_t i = 0; i < tokens.size(); i++) - if (tokens[i].rule->operator==(*input)) { +class TokenExtractor { + Symbol extract_token(const rules::Rule &input, VariableType entry_type) { + for (size_t i = 0; i < tokens.size(); i++) { + if (tokens[i].rule == input) { token_usage_counts[i]++; - return make_shared(i, Symbol::Terminal); + return Symbol::terminal(i); } - - rule_ptr rule = input->copy(); - size_t index = tokens.size(); - tokens.push_back({token_description(rule), entry_type, rule, is_string}); - token_usage_counts.push_back(1); - return make_shared(index, Symbol::Terminal); - } - - rule_ptr apply_to(const rules::String *rule) { - return apply_to_token(rule, VariableTypeAnonymous, true); - } - - rule_ptr apply_to(const rules::Pattern *rule) { - return apply_to_token(rule, VariableTypeAuxiliary, false); - } - - rule_ptr apply_to(const rules::Metadata *rule) { - if (rule->params.is_token) { - return apply_to_token(rule->rule.get(), VariableTypeAuxiliary, false); - } else { - return rules::IdentityRuleFn::apply_to(rule); } + + size_t index = tokens.size(); + tokens.push_back({ + token_description(input), + entry_type, + input + }); + token_usage_counts.push_back(1); + + return Symbol::terminal(index); } public: - vector token_usage_counts; - vector tokens; -}; + Rule apply(const rules::Rule &rule) { + return rule.match( + [this](const rules::Blank &blank) -> Rule { return blank; }, -static CompileError extra_token_error(const string &message) { - return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message); -} + [this](const rules::Metadata &rule) -> Rule { + if (rule.params.is_token) { + return extract_token(rule.rule, VariableTypeAuxiliary); + } else { + return rules::Metadata{apply(rule.rule), rule.params}; + } + }, + + [this](const rules::String &rule) { + return extract_token(rule, VariableTypeAnonymous); + }, + + [this](const rules::Pattern &rule) { + return extract_token(rule, VariableTypeAuxiliary); + }, + + [this](const rules::Repeat &rule) { + return rules::Repeat{apply(rule.rule)}; + }, + + [this](const rules::Seq &rule) { + return rules::Seq{ + apply(rule.left), + apply(rule.right) + }; + }, + + [this](const rules::Choice &rule) { + std::vector elements; + for (const auto &element : rule.elements) { + elements.push_back(apply(element)); + } + return rules::Choice::build(elements); + }, + + [](const rules::Symbol &symbol) { + return symbol; + }, + + [](auto) { + assert(false); + return rules::Blank{}; + } + ); + } + + vector token_usage_counts; + vector tokens; +}; tuple extract_tokens( const InternedGrammar &grammar @@ -104,15 +171,29 @@ tuple extract_tokens( TokenExtractor extractor; // First, extract all of the grammar's tokens into the lexical grammar. - vector processed_variables; - for (const Variable &variable : grammar.variables) { - processed_variables.push_back(Variable{ + vector processed_variables; + for (const auto &variable : grammar.variables) { + processed_variables.push_back({ variable.name, variable.type, extractor.apply(variable.rule) }); } - lexical_grammar.variables = extractor.tokens; + + for (const auto &extracted_token : extractor.tokens) { + auto expansion = expand_token(extracted_token.rule); + if (expansion.error) return make_tuple( + syntax_grammar, + lexical_grammar, + expansion.error + ); + lexical_grammar.variables.push_back({ + extracted_token.name, + extracted_token.type, + expansion.rule, + extracted_token.type == VariableTypeAnonymous + }); + } // If a variable's entire rule was extracted as a token and that token didn't // appear within any other rule, then remove that variable from the syntax @@ -120,26 +201,28 @@ tuple extract_tokens( // that pointed to that variable will need to be updated to point to the // variable in the lexical grammar. Symbols that pointed to later variables // will need to have their indices decremented. - size_t i = 0; - for (const Variable &variable : processed_variables) { - auto symbol = variable.rule->as(); - if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) { - lexical_grammar.variables[symbol->index].type = variable.type; - lexical_grammar.variables[symbol->index].name = variable.name; - symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol }); - } else { - syntax_grammar.variables.push_back(variable); - } + size_t i = -1; + for (const auto &variable : processed_variables) { i++; + if (variable.rule.is()) { + auto symbol = variable.rule.get_unchecked(); + if (symbol.is_terminal() && extractor.token_usage_counts[symbol.index] == 1) { + lexical_grammar.variables[symbol.index].type = variable.type; + lexical_grammar.variables[symbol.index].name = variable.name; + symbol_replacer.replacements[Symbol::non_terminal(i)] = symbol; + continue; + } + } + syntax_grammar.variables.push_back(variable); } // Perform any replacements of symbols needed based on the previous step. - for (Variable &variable : syntax_grammar.variables) { + for (auto &variable : syntax_grammar.variables) { variable.rule = symbol_replacer.apply(variable.rule); } - for (const ConflictSet &conflict_set : grammar.expected_conflicts) { - ConflictSet new_conflict_set; + for (const auto &conflict_set : grammar.expected_conflicts) { + set new_conflict_set; for (const Symbol &symbol : conflict_set) { new_conflict_set.insert(symbol_replacer.replace_symbol(symbol)); } @@ -148,47 +231,51 @@ tuple extract_tokens( // The grammar's extra tokens can be either token rules or symbols // pointing to token rules. If they are symbols, then they'll be handled by - // the parser; add them to the syntax grammar's ubiqutous tokens. If they + // the parser; add them to the syntax grammar's extra tokens. If they // are anonymous rules, they can be handled by the lexer; add them to the // lexical grammar's separator rules. - for (const rule_ptr &rule : grammar.extra_tokens) { - int i = 0; - bool used_elsewhere_in_grammar = false; - for (const LexicalVariable &variable : lexical_grammar.variables) { - if (variable.rule->operator==(*rule)) { - syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal)); - used_elsewhere_in_grammar = true; + for (const auto &rule : grammar.extra_tokens) { + CompileError error = rule.match( + [&](const Symbol &symbol) { + Symbol new_symbol = symbol_replacer.replace_symbol(symbol); + if (new_symbol.is_non_terminal()) { + return CompileError( + TSCompileErrorTypeInvalidExtraToken, + "Non-token symbol " + syntax_grammar.variables[new_symbol.index].name + " can't be used as an extra token" + ); + } else { + syntax_grammar.extra_tokens.insert(new_symbol); + return CompileError::none(); + } + }, + + [&](auto non_symbol) { + auto expansion = expand_token(non_symbol); + if (expansion.error) return CompileError( + TSCompileErrorTypeInvalidExtraToken, + "Non-token rule expression can't be used as an extra token" + ); + int i = 0; + for (const LexicalVariable &variable : lexical_grammar.variables) { + if (variable.rule == expansion.rule) { + syntax_grammar.extra_tokens.insert(Symbol::terminal(i)); + return CompileError::none(); + } + i++; + } + + lexical_grammar.separators.push_back(expansion.rule); + return CompileError::none(); } - i++; - } + ); - if (used_elsewhere_in_grammar) { - continue; - } - - if (is_token(rule)) { - lexical_grammar.separators.push_back(rule); - continue; - } - - auto symbol = rule->as(); - if (!symbol) { - return make_tuple(syntax_grammar, lexical_grammar, - extra_token_error(rule->to_string())); - } - - Symbol new_symbol = symbol_replacer.replace_symbol(*symbol); - if (new_symbol.is_non_terminal()) { - return make_tuple( - syntax_grammar, lexical_grammar, - extra_token_error(syntax_grammar.variables[new_symbol.index].name)); - } - - syntax_grammar.extra_tokens.insert(new_symbol); + if (error) return make_tuple(syntax_grammar, lexical_grammar, error); } for (const ExternalToken &external_token : grammar.external_tokens) { - Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token); + Symbol internal_token = symbol_replacer.replace_symbol( + external_token.corresponding_internal_token + ); if (internal_token.is_non_terminal()) { return make_tuple( diff --git a/src/compiler/prepare_grammar/extract_tokens.h b/src/compiler/prepare_grammar/extract_tokens.h index 733524cf..73da39fd 100644 --- a/src/compiler/prepare_grammar/extract_tokens.h +++ b/src/compiler/prepare_grammar/extract_tokens.h @@ -4,14 +4,15 @@ #include #include "compiler/compile_error.h" #include "compiler/lexical_grammar.h" -#include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/prepare_grammar/interned_grammar.h" +#include "compiler/prepare_grammar/initial_syntax_grammar.h" namespace tree_sitter { namespace prepare_grammar { std::tuple extract_tokens( - const InternedGrammar &); + const InternedGrammar & +); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index fe49c7a3..81aeea75 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -3,11 +3,8 @@ #include #include "compiler/prepare_grammar/extract_choices.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/grammar.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { @@ -15,8 +12,9 @@ namespace prepare_grammar { using std::find; using std::pair; using std::vector; +using rules::Rule; -class FlattenRule : public rules::RuleFn { +class FlattenRule { private: vector precedence_stack; vector associativity_stack; @@ -24,40 +22,50 @@ class FlattenRule : public rules::RuleFn { rules::Associativity last_associativity; Production production; - void apply_to(const rules::Symbol *sym) { - production.push_back(ProductionStep{ - *sym, - precedence_stack.back(), - associativity_stack.back() - }); - } + void apply(const Rule &rule) { + rule.match( + [&](const rules::Symbol &symbol) { + production.push_back(ProductionStep{ + symbol, + precedence_stack.back(), + associativity_stack.back() + }); + }, - void apply_to(const rules::Metadata *metadata) { - if (metadata->params.has_precedence) - precedence_stack.push_back(metadata->params.precedence); - if (metadata->params.has_associativity) - associativity_stack.push_back(metadata->params.associativity); + [&](const rules::Metadata &metadata) { + if (metadata.params.has_precedence) + precedence_stack.push_back(metadata.params.precedence); + if (metadata.params.has_associativity) + associativity_stack.push_back(metadata.params.associativity); - apply(metadata->rule); + apply(metadata.rule); - if (metadata->params.has_precedence) { - last_precedence = precedence_stack.back(); - precedence_stack.pop_back(); - production.back().precedence = precedence_stack.back(); - } + if (metadata.params.has_precedence) { + last_precedence = precedence_stack.back(); + precedence_stack.pop_back(); + production.back().precedence = precedence_stack.back(); + } - if (metadata->params.has_associativity) { - last_associativity = associativity_stack.back(); - associativity_stack.pop_back(); - production.back().associativity = associativity_stack.back(); - } - } + if (metadata.params.has_associativity) { + last_associativity = associativity_stack.back(); + associativity_stack.pop_back(); + production.back().associativity = associativity_stack.back(); + } + }, - void apply_to(const rules::Seq *seq) { - apply(seq->left); - last_precedence = 0; - last_associativity = rules::AssociativityNone; - apply(seq->right); + [&](const rules::Seq &sequence) { + apply(sequence.left); + last_precedence = 0; + last_associativity = rules::AssociativityNone; + apply(sequence.right); + }, + + [&](const rules::Blank &blank) {}, + + [&](auto) { + assert(!"Unexpected rule type"); + } + ); } public: @@ -67,7 +75,7 @@ class FlattenRule : public rules::RuleFn { last_precedence(0), last_associativity(rules::AssociativityNone) {} - Production flatten(const rule_ptr &rule) { + Production flatten(const Rule &rule) { apply(rule); if (!production.empty()) { production.back().precedence = last_precedence; @@ -77,10 +85,10 @@ class FlattenRule : public rules::RuleFn { } }; -SyntaxVariable flatten_rule(const Variable &variable) { +SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable) { vector productions; - for (const rule_ptr &rule_component : extract_choices(variable.rule)) { + for (const Rule &rule_component : extract_choices(variable.rule)) { Production production = FlattenRule().flatten(rule_component); auto end = productions.end(); if (find(productions.begin(), end, production) == end) { @@ -93,12 +101,21 @@ SyntaxVariable flatten_rule(const Variable &variable) { pair flatten_grammar(const InitialSyntaxGrammar &grammar) { SyntaxGrammar result; - result.expected_conflicts = grammar.expected_conflicts; - result.extra_tokens = grammar.extra_tokens; result.external_tokens = grammar.external_tokens; + for (const auto &expected_conflict : grammar.expected_conflicts) { + result.expected_conflicts.insert({ + expected_conflict.begin(), + expected_conflict.end(), + }); + } + + for (const rules::Symbol &extra_token : grammar.extra_tokens) { + result.extra_tokens.insert(extra_token); + } + bool is_start = true; - for (const Variable &variable : grammar.variables) { + for (const auto &variable : grammar.variables) { SyntaxVariable syntax_variable = flatten_rule(variable); if (!is_start) { diff --git a/src/compiler/prepare_grammar/flatten_grammar.h b/src/compiler/prepare_grammar/flatten_grammar.h index b5501fb4..4efd9561 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.h +++ b/src/compiler/prepare_grammar/flatten_grammar.h @@ -4,14 +4,14 @@ #include #include "tree_sitter/compiler.h" #include "compiler/compile_error.h" +#include "compiler/grammar.h" +#include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/syntax_grammar.h" namespace tree_sitter { namespace prepare_grammar { -struct InitialSyntaxGrammar; - -SyntaxVariable flatten_rule(const Variable &variable); +SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable); std::pair flatten_grammar(const InitialSyntaxGrammar &); } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/initial_syntax_grammar.h b/src/compiler/prepare_grammar/initial_syntax_grammar.h index 1ac319cb..bc200483 100644 --- a/src/compiler/prepare_grammar/initial_syntax_grammar.h +++ b/src/compiler/prepare_grammar/initial_syntax_grammar.h @@ -4,17 +4,26 @@ #include #include #include "tree_sitter/compiler.h" -#include "compiler/rules/symbol.h" -#include "compiler/syntax_grammar.h" -#include "compiler/variable.h" +#include "compiler/grammar.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { struct InitialSyntaxGrammar { + struct Variable { + std::string name; + VariableType type; + rules::Rule rule; + + inline bool operator==(const Variable &other) const { + return name == other.name && type == other.type && rule == other.rule; + } + }; + std::vector variables; std::set extra_tokens; - std::set expected_conflicts; + std::set> expected_conflicts; std::vector external_tokens; }; diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index 0786982b..504e9b97 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -4,11 +4,7 @@ #include #include "tree_sitter/compiler.h" #include "compiler/grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/named_symbol.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { @@ -19,32 +15,64 @@ using std::set; using std::pair; using std::make_shared; using rules::Symbol; +using rules::Rule; -class SymbolInterner : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; - - rule_ptr apply_to(const rules::NamedSymbol *rule) { - auto result = symbol_for_rule_name(rule->name); - if (!result.get()) { - missing_rule_name = rule->name; - return rules::Blank::build(); - } - return result; - } - +class SymbolInterner { public: - std::shared_ptr symbol_for_rule_name(string rule_name) { - for (size_t i = 0; i < grammar.rules.size(); i++) - if (grammar.rules[i].first == rule_name) - return make_shared(i, Symbol::NonTerminal); - for (size_t i = 0; i < grammar.external_tokens.size(); i++) - if (grammar.external_tokens[i] == rule_name) - return make_shared(i, Symbol::External); - return nullptr; + Rule apply(const Rule &rule) { + return rule.match( + [&](const rules::Blank &blank) -> Rule { return blank; }, + + [&](const rules::NamedSymbol &symbol) { + return intern_symbol(symbol); + }, + + [&](const rules::String &string) { return string; }, + [&](const rules::Pattern &pattern) { return pattern; }, + + [&](const rules::Choice &choice) { + vector elements; + for (const auto &element : choice.elements) { + elements.push_back(apply(element)); + } + return rules::Choice{elements}; + }, + + [&](const rules::Seq &sequence) { + return rules::Seq{ + apply(sequence.left), + apply(sequence.right) + }; + }, + + [&](const rules::Repeat &repeat) { + return rules::Repeat{apply(repeat.rule)}; + }, + + [&](const rules::Metadata &metadata) { + return rules::Metadata{apply(metadata.rule), metadata.params}; + }, + + [](auto) { + assert(false); + return rules::Blank{}; + } + ); } - explicit SymbolInterner(const Grammar &grammar) : grammar(grammar) {} - const Grammar grammar; + Symbol intern_symbol(rules::NamedSymbol named_symbol) { + for (size_t i = 0; i < grammar.variables.size(); i++) + if (grammar.variables[i].name == named_symbol.value) + return Symbol::non_terminal(i); + for (size_t i = 0; i < grammar.external_tokens.size(); i++) + if (grammar.external_tokens[i].name == named_symbol.value) + return Symbol::external(i); + missing_rule_name = named_symbol.value; + return rules::NONE(); + } + + explicit SymbolInterner(const InputGrammar &grammar) : grammar(grammar) {} + const InputGrammar &grammar; string missing_rule_name; }; @@ -53,52 +81,55 @@ CompileError missing_rule_error(string rule_name) { "Undefined rule '" + rule_name + "'"); } -pair intern_symbols(const Grammar &grammar) { +pair intern_symbols(const InputGrammar &grammar) { InternedGrammar result; - for (auto &external_token_name : grammar.external_tokens) { + for (auto &external_token : grammar.external_tokens) { Symbol corresponding_internal_token = rules::NONE(); - for (size_t i = 0, n = grammar.rules.size(); i < n; i++) { - if (grammar.rules[i].first == external_token_name) { - corresponding_internal_token = Symbol(i, Symbol::NonTerminal); + for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { + if (grammar.variables[i].name == external_token.name) { + corresponding_internal_token = Symbol::non_terminal(i); break; } } result.external_tokens.push_back(ExternalToken{ - external_token_name, - external_token_name[0] == '_' ? VariableTypeHidden : VariableTypeNamed, + external_token.name, + external_token.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed, corresponding_internal_token }); } SymbolInterner interner(grammar); - for (auto &pair : grammar.rules) { - auto new_rule = interner.apply(pair.second); - if (!interner.missing_rule_name.empty()) + for (auto &variable : grammar.variables) { + auto new_rule = interner.apply(variable.rule); + if (!interner.missing_rule_name.empty()) { return { result, missing_rule_error(interner.missing_rule_name) }; + } - result.variables.push_back(Variable{ - pair.first, - pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed, + result.variables.push_back(InternedGrammar::Variable{ + variable.name, + variable.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed, new_rule }); } for (auto &rule : grammar.extra_tokens) { auto new_rule = interner.apply(rule); - if (!interner.missing_rule_name.empty()) + if (!interner.missing_rule_name.empty()) { return { result, missing_rule_error(interner.missing_rule_name) }; + } result.extra_tokens.push_back(new_rule); } - for (auto &names : grammar.expected_conflicts) { + for (auto &expected_conflict : grammar.expected_conflicts) { set entry; - for (auto &name : names) { - auto symbol = interner.symbol_for_rule_name(name); - if (symbol.get()) - entry.insert(*symbol); + for (auto &named_symbol : expected_conflict) { + auto symbol = interner.intern_symbol(named_symbol); + if (symbol != rules::NONE()) { + entry.insert(symbol); + } } result.expected_conflicts.insert(entry); } diff --git a/src/compiler/prepare_grammar/intern_symbols.h b/src/compiler/prepare_grammar/intern_symbols.h index f42369a6..8e8f2abe 100644 --- a/src/compiler/prepare_grammar/intern_symbols.h +++ b/src/compiler/prepare_grammar/intern_symbols.h @@ -8,11 +8,11 @@ namespace tree_sitter { -struct Grammar; +struct InputGrammar; namespace prepare_grammar { -std::pair intern_symbols(const Grammar &); +std::pair intern_symbols(const InputGrammar &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h index c8a14647..f7abdd4f 100644 --- a/src/compiler/prepare_grammar/interned_grammar.h +++ b/src/compiler/prepare_grammar/interned_grammar.h @@ -4,17 +4,26 @@ #include #include #include "tree_sitter/compiler.h" -#include "compiler/rules/symbol.h" -#include "compiler/syntax_grammar.h" -#include "compiler/variable.h" +#include "compiler/grammar.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { struct InternedGrammar { + struct Variable { + std::string name; + VariableType type; + rules::Rule rule; + + bool operator==(const Variable &other) const { + return name == other.name && type == other.type && rule == other.rule; + } + }; + std::vector variables; - std::vector extra_tokens; - std::set expected_conflicts; + std::vector extra_tokens; + std::set> expected_conflicts; std::vector external_tokens; }; diff --git a/src/compiler/prepare_grammar/is_token.cc b/src/compiler/prepare_grammar/is_token.cc deleted file mode 100644 index 4d209882..00000000 --- a/src/compiler/prepare_grammar/is_token.cc +++ /dev/null @@ -1,30 +0,0 @@ -#include "compiler/prepare_grammar/is_token.h" -#include "tree_sitter/compiler.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" - -namespace tree_sitter { -namespace prepare_grammar { - -class IsToken : public rules::RuleFn { - bool apply_to(const rules::String *rule) { - return true; - } - - bool apply_to(const rules::Pattern *rule) { - return true; - } - - bool apply_to(const rules::Metadata *rule) { - return rule->params.is_token; - } -}; - -bool is_token(const rule_ptr &rule) { - return IsToken().apply(rule); -} - -} // namespace prepare_grammar -} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/is_token.h b/src/compiler/prepare_grammar/is_token.h deleted file mode 100644 index 55d1b62f..00000000 --- a/src/compiler/prepare_grammar/is_token.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_ -#define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_ - -#include "compiler/rule.h" - -namespace tree_sitter { -namespace prepare_grammar { - -bool is_token(const rule_ptr &); - -} // namespace prepare_grammar -} // namespace tree_sitter - -#endif // COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_ diff --git a/src/compiler/prepare_grammar/normalize_rules.cc b/src/compiler/prepare_grammar/normalize_rules.cc index 2e4dd205..fd659a00 100644 --- a/src/compiler/prepare_grammar/normalize_rules.cc +++ b/src/compiler/prepare_grammar/normalize_rules.cc @@ -1,10 +1,12 @@ #include "compiler/prepare_grammar/normalize_rules.h" #include "compiler/prepare_grammar/extract_choices.h" -#include "compiler/rules/choice.h" namespace tree_sitter { namespace prepare_grammar { +using std::vector; +using rules::Rule; + LexicalGrammar normalize_rules(const LexicalGrammar &input_grammar) { LexicalGrammar result(input_grammar); diff --git a/src/compiler/prepare_grammar/parse_regex.cc b/src/compiler/prepare_grammar/parse_regex.cc index 9fdab0d1..7c9f6ce2 100644 --- a/src/compiler/prepare_grammar/parse_regex.cc +++ b/src/compiler/prepare_grammar/parse_regex.cc @@ -2,11 +2,7 @@ #include #include #include -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/blank.h" +#include "compiler/rule.h" #include "compiler/util/string_helpers.h" #include "utf8proc.h" @@ -18,9 +14,10 @@ using std::vector; using std::pair; using std::make_shared; using rules::CharacterSet; -using rules::Seq; using rules::Blank; +using rules::Rule; using rules::Choice; +using rules::Seq; using rules::Repeat; class PatternParser { @@ -32,103 +29,121 @@ class PatternParser { next(); } - pair rule(bool nested) { - vector choices = {}; + pair rule(bool nested) { + vector choices; do { if (!choices.empty()) { - if (peek() == '|') + if (peek() == '|') { next(); - else + } else { break; + } } auto pair = term(nested); - if (pair.second.type) - return { Blank::build(), pair.second }; + if (pair.second.type) { + return {Blank{}, pair.second }; + } choices.push_back(pair.first); } while (has_more_input()); - auto rule = - (choices.size() > 1) ? make_shared(choices) : choices.front(); - return { rule, CompileError::none() }; + return {Choice::build(choices), CompileError::none()}; } private: - pair term(bool nested) { - rule_ptr result = Blank::build(); + pair term(bool nested) { + Rule result; do { if (peek() == '|') break; if (nested && peek() == ')') break; auto pair = factor(); - if (pair.second.type) - return { Blank::build(), pair.second }; - result = Seq::build({ result, pair.first }); + if (pair.second) { + return {Blank{}, pair.second}; + } + result = Seq::build({result, pair.first}); } while (has_more_input()); return { result, CompileError::none() }; } - pair factor() { + pair factor() { auto pair = atom(); - if (pair.second.type) - return { Blank::build(), pair.second }; - rule_ptr result = pair.first; + if (pair.second.type) { + return {Blank{}, pair.second}; + } + + Rule result = pair.first; if (has_more_input()) { switch (peek()) { case '*': next(); - result = Choice::build({ Repeat::build(result), Blank::build() }); + result = Choice::build({ + Repeat{result}, + Blank{} + }); break; case '+': next(); - result = Repeat::build(result); + result = Repeat{result}; break; case '?': next(); - result = Choice::build({ result, Blank::build() }); + result = Choice::build({result, Blank{}}); break; } } - return { result, CompileError::none() }; + + return {result, CompileError::none()}; } - pair atom() { + pair atom() { switch (peek()) { case '(': { next(); auto pair = rule(true); - if (pair.second.type) - return { Blank::build(), pair.second }; - if (peek() != ')') + if (pair.second.type) { + return {Blank{}, pair.second}; + } + if (peek() != ')') { return error("unmatched open paren"); + } next(); - return { pair.first, CompileError::none() }; + return {pair.first, CompileError::none()}; } + case '[': { next(); auto pair = char_set(); - if (pair.second.type) - return { Blank::build(), pair.second }; - if (peek() != ']') + if (pair.second.type) { + return {Blank{}, pair.second}; + } + if (peek() != ']') { return error("unmatched open square bracket"); + } next(); - return { pair.first.copy(), CompileError::none() }; + return {pair.first, CompileError::none()}; } + case ')': { return error("unmatched close paren"); } + case ']': { return error("unmatched close square bracket"); } + case '.': { next(); - return { CharacterSet().include_all().exclude('\n').copy(), - CompileError::none() }; + return { + CharacterSet().include_all().exclude('\n'), + CompileError::none() + }; } + default: { auto pair = single_char(); if (pair.second.type) - return { Blank::build(), pair.second }; - return { pair.first.copy(), CompileError::none() }; + return { Blank{}, pair.second }; + return {pair.first, CompileError::none()}; } } } @@ -234,8 +249,8 @@ class PatternParser { return lookahead && iter <= end; } - pair error(string msg) { - return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) }; + pair error(string msg) { + return { Blank{}, CompileError(TSCompileErrorTypeInvalidRegex, msg) }; } string input; @@ -244,7 +259,7 @@ class PatternParser { int32_t lookahead; }; -pair parse_regex(const std::string &input) { +pair parse_regex(const std::string &input) { return PatternParser(input.c_str()).rule(false); } diff --git a/src/compiler/prepare_grammar/parse_regex.h b/src/compiler/prepare_grammar/parse_regex.h index 7db65dce..b1c03f30 100644 --- a/src/compiler/prepare_grammar/parse_regex.h +++ b/src/compiler/prepare_grammar/parse_regex.h @@ -9,7 +9,7 @@ namespace tree_sitter { namespace prepare_grammar { -std::pair parse_regex(const std::string &); +std::pair parse_regex(const std::string &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index 81750b58..ac573a28 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -17,7 +17,7 @@ using std::get; using std::make_tuple; tuple prepare_grammar( - const Grammar &input_grammar) { + const InputGrammar &input_grammar) { /* * Convert all string-based `NamedSymbols` into numerical `Symbols` */ @@ -31,8 +31,9 @@ tuple prepare_grammar( */ auto extract_result = extract_tokens(intern_result.first); error = get<2>(extract_result); - if (error.type) + if (error.type) { return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + } /* * Replace `Repeat` rules with pairs of recursive rules @@ -42,11 +43,12 @@ tuple prepare_grammar( /* * Expand `String` and `Pattern` rules into full rule trees */ - auto expand_tokens_result = expand_tokens(get<1>(extract_result)); - LexicalGrammar lex_grammar = expand_tokens_result.first; - error = expand_tokens_result.second; - if (error.type) - return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + LexicalGrammar lex_grammar = get<1>(extract_result); + // auto expand_tokens_result = expand_tokens(get<1>(extract_result)); + // LexicalGrammar lex_grammar = expand_tokens_result.first; + // error = expand_tokens_result.second; + // if (error.type) + // return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); /* * Flatten syntax rules into lists of productions. diff --git a/src/compiler/prepare_grammar/prepare_grammar.h b/src/compiler/prepare_grammar/prepare_grammar.h index e9cb80ee..bed59a53 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.h +++ b/src/compiler/prepare_grammar/prepare_grammar.h @@ -2,18 +2,15 @@ #define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_ #include +#include "compiler/grammar.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" #include "compiler/compile_error.h" namespace tree_sitter { - -struct Grammar; - namespace prepare_grammar { -std::tuple prepare_grammar( - const Grammar &); +std::tuple prepare_grammar(const InputGrammar &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/token_description.cc b/src/compiler/prepare_grammar/token_description.cc index 72127959..2aa43bd7 100644 --- a/src/compiler/prepare_grammar/token_description.cc +++ b/src/compiler/prepare_grammar/token_description.cc @@ -1,68 +1,82 @@ #include "compiler/prepare_grammar/token_description.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/string.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" #include "compiler/util/string_helpers.h" namespace tree_sitter { namespace prepare_grammar { using std::string; +using rules::Rule; -class TokenDescription : public rules::RuleFn { - string apply_to(const rules::Pattern *rule) { - is_trivial = false; - return rule->value; - } +class TokenDescription { + bool is_trivial; - string apply_to(const rules::String *rule) { - return rule->value; - } + string apply(const Rule &rule) { + return rule.match( + [&](const rules::Blank) -> string { + return ""; + }, - string apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } + [&](const rules::Symbol) { + return ""; + }, - string apply_to(const rules::Seq *rule) { - is_trivial = false; - return apply(rule->left) + apply(rule->right); - } + [&](const rules::Pattern &rule) { + is_trivial = false; + return rule.value; + }, - string apply_to(const rules::Repeat *rule) { - is_trivial = false; - return apply(rule->content) + "*"; - } + [&](const rules::String &rule) { + return rule.value; + }, - string apply_to(const rules::Choice *rule) { - is_trivial = false; - string result = "("; - bool started = false; - for (auto &element : rule->elements) { - if (started) - result += "|"; - result += apply(element); - started = true; - } - return result + ")"; + [&](const rules::Metadata &rule) { + return apply(rule.rule); + }, + + [&](const rules::Seq &rule) { + is_trivial = false; + return apply(rule.left) + apply(rule.right); + }, + + [&](const rules::Repeat &rule) { + is_trivial = false; + return apply(rule.rule) + "+"; + }, + + [&](const rules::Choice &rule) { + is_trivial = false; + string result = "("; + bool started = false; + for (auto &element : rule.elements) { + if (started) result += "|"; + result += apply(element); + started = true; + } + return result + ")"; + }, + + [](auto) { + return ""; + } + ); } public: - bool is_trivial; + string describe(const Rule &rule) { + string result = apply(rule); + if (is_trivial) { + return result; + } else { + return "/" + result + "/"; + } + } TokenDescription() : is_trivial(true) {} }; -string token_description(const rule_ptr &rule) { - TokenDescription description; - string result = description.apply(rule); - if (description.is_trivial) - return result; - else - return "/" + result + "/"; +string token_description(const Rule &rule) { + return TokenDescription().describe(rule); } } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/token_description.h b/src/compiler/prepare_grammar/token_description.h index 5e44141c..6d83f7a9 100644 --- a/src/compiler/prepare_grammar/token_description.h +++ b/src/compiler/prepare_grammar/token_description.h @@ -7,7 +7,7 @@ namespace tree_sitter { namespace prepare_grammar { -std::string token_description(const rule_ptr &); +std::string token_description(const rules::Rule &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/rule.cc b/src/compiler/rule.cc index 8cb8ce95..55508c77 100644 --- a/src/compiler/rule.cc +++ b/src/compiler/rule.cc @@ -1,15 +1,235 @@ #include "compiler/rule.h" -#include +#include "compiler/util/hash_combine.h" namespace tree_sitter { +namespace rules { -using std::ostream; -using std::string; +using std::move; +using util::hash_combine; -bool Rule::operator!=(const Rule &other) const { - return !this->operator==(other); +Rule::Rule(const Rule &other) : blank(Blank{}), type(BlankType) { + *this = other; } -Rule::~Rule() {} +Rule::Rule(Rule &&other) noexcept : blank(Blank{}), type(BlankType) { + *this = move(other); +} +static void destroy_value(Rule *rule) { + switch (rule->type) { + case Rule::BlankType: return rule->blank.~Blank(); + case Rule::CharacterSetType: return rule->character_set.~CharacterSet(); + case Rule::StringType: return rule->string .~String(); + case Rule::PatternType: return rule->pattern .~Pattern(); + case Rule::NamedSymbolType: return rule->named_symbol.~NamedSymbol(); + case Rule::SymbolType: return rule->symbol .~Symbol(); + case Rule::ChoiceType: return rule->choice .~Choice(); + case Rule::MetadataType: return rule->metadata .~Metadata(); + case Rule::RepeatType: return rule->repeat .~Repeat(); + case Rule::SeqType: return rule->seq .~Seq(); + } +} + +Rule &Rule::operator=(const Rule &other) { + destroy_value(this); + type = other.type; + switch (type) { + case BlankType: + new (&blank) Blank(other.blank); + break; + case CharacterSetType: + new (&character_set) CharacterSet(other.character_set); + break; + case StringType: + new (&string) String(other.string); + break; + case PatternType: + new (&pattern) Pattern(other.pattern); + break; + case NamedSymbolType: + new (&named_symbol) NamedSymbol(other.named_symbol); + break; + case SymbolType: + new (&symbol) Symbol(other.symbol); + break; + case ChoiceType: + new (&choice) Choice(other.choice); + break; + case MetadataType: + new (&metadata) Metadata(other.metadata); + break; + case RepeatType: + new (&repeat) Repeat(other.repeat); + break; + case SeqType: + new (&seq) Seq(other.seq); + break; + } + return *this; +} + +Rule &Rule::operator=(Rule &&other) noexcept { + destroy_value(this); + type = other.type; + switch (type) { + case BlankType: + new (&blank) Blank(move(other.blank)); + break; + case CharacterSetType: + new (&character_set) CharacterSet(move(other.character_set)); + break; + case StringType: + new (&string) String(move(other.string)); + break; + case PatternType: + new (&pattern) Pattern(move(other.pattern)); + break; + case NamedSymbolType: + new (&named_symbol) NamedSymbol(move(other.named_symbol)); + break; + case SymbolType: + new (&symbol) Symbol(move(other.symbol)); + break; + case ChoiceType: + new (&choice) Choice(move(other.choice)); + break; + case MetadataType: + new (&metadata) Metadata(move(other.metadata)); + break; + case RepeatType: + new (&repeat) Repeat(move(other.repeat)); + break; + case SeqType: + new (&seq) Seq(move(other.seq)); + break; + } + other.type = BlankType; + other.blank = Blank{}; + return *this; +} + +Rule::~Rule() noexcept { + destroy_value(this); +} + +bool Rule::operator==(const Rule &other) const { + if (type != other.type) return false; + switch (type) { + case Rule::BlankType: return blank == other.blank; + case Rule::CharacterSetType: return character_set == other.character_set; + case Rule::StringType: return string == other.string; + case Rule::PatternType: return pattern == other.pattern; + case Rule::NamedSymbolType: return named_symbol == other.named_symbol; + case Rule::SymbolType: return symbol == other.symbol; + case Rule::ChoiceType: return choice == other.choice; + case Rule::MetadataType: return metadata == other.metadata; + case Rule::RepeatType: return repeat == other.repeat; + case Rule::SeqType: return seq == other.seq; + } +} + +template <> +bool Rule::is() const { return type == BlankType; } + +template <> +bool Rule::is() const { return type == SymbolType; } + +template <> +bool Rule::is() const { return type == RepeatType; } + +template <> +const Symbol & Rule::get_unchecked() const { return symbol; } + +} // namespace rules } // namespace tree_sitter + +namespace std { + +size_t hash::operator()(const Symbol &symbol) const { + auto result = hash()(symbol.index); + hash_combine(&result, hash()(symbol.type)); + return result; +} + +size_t hash::operator()(const NamedSymbol &symbol) const { + return hash()(symbol.value); +} + +size_t hash::operator()(const Pattern &symbol) const { + return hash()(symbol.value); +} + +size_t hash::operator()(const String &symbol) const { + return hash()(symbol.value); +} + +size_t hash::operator()(const CharacterSet &character_set) const { + size_t result = 0; + hash_combine(&result, character_set.includes_all); + hash_combine(&result, character_set.included_chars.size()); + for (uint32_t c : character_set.included_chars) { + hash_combine(&result, c); + } + hash_combine(&result, character_set.excluded_chars.size()); + for (uint32_t c : character_set.excluded_chars) { + hash_combine(&result, c); + } + return result; +} + +size_t hash::operator()(const Blank &blank) const { + return 0; +} + +size_t hash::operator()(const Choice &choice) const { + size_t result = 0; + for (const auto &element : choice.elements) { + symmetric_hash_combine(&result, element); + } + return result; +} + +size_t hash::operator()(const Repeat &repeat) const { + size_t result = 0; + hash_combine(&result, *repeat.rule); + return result; +} + +size_t hash::operator()(const Seq &seq) const { + size_t result = 0; + hash_combine(&result, *seq.left); + hash_combine(&result, *seq.right); + return result; +} + +size_t hash::operator()(const Metadata &metadata) const { + size_t result = 0; + hash_combine(&result, *metadata.rule); + hash_combine(&result, metadata.params.precedence); + hash_combine(&result, metadata.params.associativity); + hash_combine(&result, metadata.params.has_precedence); + hash_combine(&result, metadata.params.has_associativity); + hash_combine(&result, metadata.params.is_token); + hash_combine(&result, metadata.params.is_string); + hash_combine(&result, metadata.params.is_active); + hash_combine(&result, metadata.params.is_main_token); + return result; +} + +size_t hash::operator()(const Rule &rule) const { + size_t result = hash()(rule.type); + switch (rule.type) { + case Rule::BlankType: return result ^ hash()(rule.blank); + case Rule::CharacterSetType: return result ^ hash()(rule.character_set); + case Rule::StringType: return result ^ hash()(rule.string); + case Rule::PatternType: return result ^ hash()(rule.pattern); + case Rule::NamedSymbolType: return result ^ hash()(rule.named_symbol); + case Rule::SymbolType: return result ^ hash()(rule.symbol); + case Rule::ChoiceType: return result ^ hash()(rule.choice); + case Rule::MetadataType: return result ^ hash()(rule.metadata); + case Rule::RepeatType: return result ^ hash()(rule.repeat); + case Rule::SeqType: return result ^ hash()(rule.seq); + } +} + +} // namespace std \ No newline at end of file diff --git a/src/compiler/rule.h b/src/compiler/rule.h index b77e54a4..19c45564 100644 --- a/src/compiler/rule.h +++ b/src/compiler/rule.h @@ -1,44 +1,140 @@ #ifndef COMPILER_RULE_H_ #define COMPILER_RULE_H_ -#include #include +#include "compiler/util/make_visitor.h" +#include "compiler/util/hash_combine.h" +#include "compiler/rules/blank.h" +#include "compiler/rules/character_set.h" +#include "compiler/rules/choice.h" +#include "compiler/rules/metadata.h" +#include "compiler/rules/named_symbol.h" +#include "compiler/rules/pattern.h" +#include "compiler/rules/repeat.h" +#include "compiler/rules/seq.h" +#include "compiler/rules/string.h" +#include "compiler/rules/symbol.h" namespace tree_sitter { - namespace rules { -class Visitor; -} // namespace rules -class Rule; -typedef std::shared_ptr rule_ptr; +struct Rule { + union { + Blank blank; + CharacterSet character_set; + String string; + Pattern pattern; + NamedSymbol named_symbol; + Symbol symbol; + Choice choice; + Metadata metadata; + Repeat repeat; + Seq seq; + }; -class Rule { - public: - virtual bool operator==(const Rule &other) const = 0; - bool operator!=(const Rule &other) const; - virtual size_t hash_code() const = 0; - virtual rule_ptr copy() const = 0; - virtual std::string to_string() const = 0; - virtual void accept(rules::Visitor *visitor) const = 0; - virtual ~Rule(); + enum { + BlankType, + CharacterSetType, + StringType, + PatternType, + NamedSymbolType, + SymbolType, + ChoiceType, + MetadataType, + RepeatType, + SeqType, + } type; - template - const T *as() const { - return dynamic_cast(this); + Rule() : blank(Blank{}), type(BlankType) {}; + Rule(const Blank &value) : blank(value), type(BlankType) {}; + Rule(const CharacterSet &value) : character_set(value), type(CharacterSetType) {}; + Rule(const String &value) : string(value), type(StringType) {}; + Rule(const Pattern &value) : pattern(value), type(PatternType) {}; + Rule(const NamedSymbol &value) : named_symbol(value), type(NamedSymbolType) {}; + Rule(const Symbol &value) : symbol(value), type(SymbolType) {}; + Rule(const Choice &value) : choice(value), type(ChoiceType) {}; + Rule(const Metadata &value) : metadata(value), type(MetadataType) {}; + Rule(const Repeat &value) : repeat(value), type(RepeatType) {}; + Rule(const Seq &value) : seq(value), type(SeqType) {}; + + Rule(const std::shared_ptr &value) : Rule(*value) {} + + Rule(const Rule &other); + Rule(Rule &&other) noexcept; + Rule &operator=(const Rule &other); + Rule &operator=(Rule &&other) noexcept; + ~Rule() noexcept; + + template + bool is() const; + + template + const RuleType & get_unchecked() const; + + template + auto accept(FunctionType function) const -> decltype(function(blank)) { + switch (type) { + case BlankType: return function(blank); + case CharacterSetType: return function(character_set); + case StringType: return function(string); + case PatternType: return function(pattern); + case NamedSymbolType: return function(named_symbol); + case SymbolType: return function(symbol); + case ChoiceType: return function(choice); + case MetadataType: return function(metadata); + case RepeatType: return function(repeat); + case SeqType: return function(seq); + } } + + template + auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward(functions)...))){ + return accept(util::make_visitor(std::forward(functions)...)); + } + + bool operator==(const Rule &other) const; }; +} // namespace rules } // namespace tree_sitter namespace std { +using namespace tree_sitter::rules; +using namespace tree_sitter::util; + template <> -struct hash { - size_t operator()(const tree_sitter::rule_ptr &rule) const { - return rule->hash_code(); - } -}; +struct hash { size_t operator()(const Symbol &) const; }; + +template <> +struct hash { size_t operator()(const NamedSymbol &) const; }; + +template <> +struct hash { size_t operator()(const Pattern &) const; }; + +template <> +struct hash { size_t operator()(const String &) const; }; + +template <> +struct hash { size_t operator()(const CharacterSet &) const; }; + +template <> +struct hash { size_t operator()(const Blank &) const; }; + +template <> +struct hash { size_t operator()(const Choice &) const; }; + +template <> +struct hash { size_t operator()(const Repeat &) const; }; + +template <> +struct hash { size_t operator()(const Seq &) const; }; + +template <> +struct hash { size_t operator()(const Metadata &) const; }; + +template <> +struct hash { size_t operator()(const Rule &) const; }; } // namespace std diff --git a/src/compiler/rules.h b/src/compiler/rules.h deleted file mode 100644 index d98a719a..00000000 --- a/src/compiler/rules.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef COMPILER_RULES_H_ -#define COMPILER_RULES_H_ - -#include -#include -#include -#include "compiler/rule.h" - -namespace tree_sitter { - -rule_ptr blank(); -rule_ptr choice(const std::vector &); -rule_ptr repeat(const rule_ptr &); -rule_ptr repeat1(const rule_ptr &); -rule_ptr seq(const std::vector &); -rule_ptr sym(const std::string &); -rule_ptr pattern(const std::string &); -rule_ptr str(const std::string &); -rule_ptr prec(int precedence, const rule_ptr &); -rule_ptr prec_left(const rule_ptr &); -rule_ptr prec_left(int precedence, const rule_ptr &); -rule_ptr prec_right(const rule_ptr &); -rule_ptr prec_right(int precedence, const rule_ptr &); -rule_ptr token(const rule_ptr &rule); - -} // namespace std - -#endif // COMPILER_RULES_H_ diff --git a/src/compiler/rules/blank.cc b/src/compiler/rules/blank.cc deleted file mode 100644 index 6348bf62..00000000 --- a/src/compiler/rules/blank.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "compiler/rules/blank.h" -#include -#include -#include "compiler/rules/visitor.h" - -namespace tree_sitter { -namespace rules { - -Blank::Blank() {} - -rule_ptr Blank::build() { - return std::make_shared(); -} - -bool Blank::operator==(const Rule &rule) const { - return rule.as() != nullptr; -} - -size_t Blank::hash_code() const { - return 0; -} - -rule_ptr Blank::copy() const { - return std::make_shared(); -} - -std::string Blank::to_string() const { - return "(blank)"; -} - -void Blank::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/blank.h b/src/compiler/rules/blank.h index e67d213d..aa7ed5c9 100644 --- a/src/compiler/rules/blank.h +++ b/src/compiler/rules/blank.h @@ -1,25 +1,16 @@ #ifndef COMPILER_RULES_BLANK_H_ #define COMPILER_RULES_BLANK_H_ -#include -#include "compiler/rule.h" - namespace tree_sitter { namespace rules { -class Blank : public Rule { - public: - Blank(); - static rule_ptr build(); - - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Blank { + inline bool operator==(const Blank &other) const { + return true; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_BLANK_H_ +#endif // COMPILER_RULES_BLANK_H_ \ No newline at end of file diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc deleted file mode 100644 index 0fe45f68..00000000 --- a/src/compiler/rules/built_in_symbols.cc +++ /dev/null @@ -1,19 +0,0 @@ -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { -namespace rules { - -Symbol END_OF_INPUT() { - return Symbol(-1, Symbol::Terminal); -} - -Symbol START() { - return Symbol(-2, Symbol::NonTerminal); -} - -Symbol NONE() { - return Symbol(-3, Symbol::Type(-1)); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h deleted file mode 100644 index bdaed01c..00000000 --- a/src/compiler/rules/built_in_symbols.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef COMPILER_RULES_BUILT_IN_SYMBOLS_H_ -#define COMPILER_RULES_BUILT_IN_SYMBOLS_H_ - -#include "compiler/rules/symbol.h" - -namespace tree_sitter { -namespace rules { - -Symbol END_OF_INPUT(); -Symbol START(); -Symbol NONE(); - -} // namespace rules -} // namespace tree_sitter - -#endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_ diff --git a/src/compiler/rules/character_range.cc b/src/compiler/rules/character_range.cc deleted file mode 100644 index 1f6292e3..00000000 --- a/src/compiler/rules/character_range.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "compiler/rules/character_range.h" -#include -#include "compiler/util/string_helpers.h" - -namespace tree_sitter { -namespace rules { - -using std::string; - -CharacterRange::CharacterRange(uint32_t value) : min(value), max(value) {} -CharacterRange::CharacterRange(uint32_t min, uint32_t max) - : min(min), max(max) {} - -bool CharacterRange::operator==(const CharacterRange &other) const { - return min == other.min && max == other.max; -} - -bool CharacterRange::operator<(const CharacterRange &other) const { - if (min < other.min) - return true; - if (min > other.min) - return false; - if (max < other.max) - return true; - return false; -} - -string CharacterRange::to_string() const { - if (min == max) - return util::escape_char(min); - else - return util::escape_char(min) + "-" + util::escape_char(max); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/character_range.h b/src/compiler/rules/character_range.h deleted file mode 100644 index ecb73eb0..00000000 --- a/src/compiler/rules/character_range.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef COMPILER_RULES_CHARACTER_RANGE_H_ -#define COMPILER_RULES_CHARACTER_RANGE_H_ - -#include -#include - -namespace tree_sitter { -namespace rules { - -struct CharacterRange { - uint32_t min; - uint32_t max; - - explicit CharacterRange(uint32_t value); - explicit CharacterRange(uint32_t min, uint32_t max); - - bool operator==(const CharacterRange &other) const; - bool operator<(const CharacterRange &others) const; - std::string to_string() const; -}; - -} // namespace rules -} // namespace tree_sitter - -#endif // COMPILER_RULES_CHARACTER_RANGE_H_ diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index cff3ab58..089fce6a 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -1,43 +1,40 @@ #include "compiler/rules/character_set.h" -#include -#include -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/hash_combine.h" + +using std::set; +using std::vector; namespace tree_sitter { namespace rules { -using std::string; -using std::set; -using std::vector; -using util::hash_combine; - static void add_range(set *characters, uint32_t min, uint32_t max) { - for (uint32_t c = min; c <= max; c++) + for (uint32_t c = min; c <= max; c++) { characters->insert(c); + } } static void remove_range(set *characters, uint32_t min, uint32_t max) { - for (uint32_t c = min; c <= max; c++) + for (uint32_t c = min; c <= max; c++) { characters->erase(c); + } } -static set remove_chars(set *left, - const set &right) { +static set remove_chars(set *left, const set &right) { set result; for (uint32_t c : right) { - if (left->erase(c)) + if (left->erase(c)) { result.insert(c); + } } return result; } static set add_chars(set *left, const set &right) { set result; - for (uint32_t c : right) - if (left->insert(c).second) + for (uint32_t c : right) { + if (left->insert(c).second) { result.insert(c); + } + } return result; } @@ -50,10 +47,11 @@ static vector consolidate_ranges(const set &chars) { result.back().max = c; } else if (size >= 1) { CharacterRange &last = result.back(); - if (last.min < last.max && last.max == (c - 1)) + if (last.min < last.max && last.max == (c - 1)) { last.max = c; - else + } else { result.push_back(CharacterRange(c)); + } } else { result.push_back(CharacterRange(c)); } @@ -61,14 +59,14 @@ static vector consolidate_ranges(const set &chars) { return result; } -CharacterSet::CharacterSet() - : includes_all(false), included_chars({}), excluded_chars({}) {} +CharacterSet::CharacterSet() : includes_all(false) {} -bool CharacterSet::operator==(const Rule &rule) const { - const CharacterSet *other = rule.as(); - return other && (includes_all == other->includes_all) && - (included_chars == other->included_chars) && - (excluded_chars == other->excluded_chars); +CharacterSet::CharacterSet(const set &chars) : included_chars(chars), includes_all(false) {} + +bool CharacterSet::operator==(const CharacterSet &other) const { + return includes_all == other.includes_all && + included_chars == other.included_chars && + excluded_chars == other.excluded_chars; } bool CharacterSet::operator<(const CharacterSet &other) const { @@ -83,41 +81,6 @@ bool CharacterSet::operator<(const CharacterSet &other) const { return excluded_chars < other.excluded_chars; } -size_t CharacterSet::hash_code() const { - size_t result = 0; - hash_combine(&result, includes_all); - hash_combine(&result, included_chars.size()); - for (uint32_t c : included_chars) - hash_combine(&result, c); - hash_combine(&result, excluded_chars.size()); - for (uint32_t c : excluded_chars) - hash_combine(&result, c); - return result; -} - -rule_ptr CharacterSet::copy() const { - return std::make_shared(*this); -} - -string CharacterSet::to_string() const { - string result("(char"); - if (includes_all) - result += " include_all"; - if (!included_chars.empty()) { - result += " (include"; - for (auto r : included_ranges()) - result += string(" ") + r.to_string(); - result += ")"; - } - if (!excluded_chars.empty()) { - result += " (exclude"; - for (auto r : excluded_ranges()) - result += string(" ") + r.to_string(); - result += ")"; - } - return result + ")"; -} - CharacterSet &CharacterSet::include_all() { includes_all = true; included_chars = {}; @@ -212,9 +175,5 @@ vector CharacterSet::excluded_ranges() const { return consolidate_ranges(excluded_chars); } -void CharacterSet::accept(Visitor *visitor) const { - visitor->visit(this); -} - } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index ced343b0..7a8ae715 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -1,20 +1,27 @@ #ifndef COMPILER_RULES_CHARACTER_SET_H_ #define COMPILER_RULES_CHARACTER_SET_H_ -#include -#include -#include -#include #include -#include "compiler/rule.h" -#include "compiler/rules/character_range.h" +#include namespace tree_sitter { namespace rules { -class CharacterSet : public Rule { - public: +struct CharacterRange { + uint32_t min; + uint32_t max; + + inline explicit CharacterRange(uint32_t value) : min{value}, max{value} {} + inline CharacterRange(uint32_t min, uint32_t max) : min{min}, max{max} {} + + inline bool operator==(const CharacterRange &other) const { + return min == other.min && max == other.max; + } +}; + +struct CharacterSet { CharacterSet(); + CharacterSet(const std::set &); CharacterSet &include_all(); CharacterSet &include(uint32_t c); @@ -22,12 +29,8 @@ class CharacterSet : public Rule { CharacterSet &exclude(uint32_t c); CharacterSet &exclude(uint32_t min, uint32_t max); - bool operator==(const Rule &other) const; + bool operator==(const CharacterSet &) const; bool operator<(const CharacterSet &) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; void add_set(const CharacterSet &other); CharacterSet remove_set(const CharacterSet &other); @@ -37,23 +40,12 @@ class CharacterSet : public Rule { std::vector included_ranges() const; std::vector excluded_ranges() const; - bool includes_all; std::set included_chars; std::set excluded_chars; + bool includes_all; }; } // namespace rules } // namespace tree_sitter -namespace std { - -template <> -struct hash { - size_t operator()(const tree_sitter::rules::CharacterSet &rule) const { - return rule.hash_code(); - } -}; - -} // namespace std - -#endif // COMPILER_RULES_CHARACTER_SET_H_ +#endif // COMPILER_RULES_CHARACTER_SET_H_ \ No newline at end of file diff --git a/src/compiler/rules/choice.cc b/src/compiler/rules/choice.cc index c793aed1..cba1f9f7 100644 --- a/src/compiler/rules/choice.cc +++ b/src/compiler/rules/choice.cc @@ -1,77 +1,38 @@ #include "compiler/rules/choice.h" -#include -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/hash_combine.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::string; -using std::make_shared; -using std::vector; -using std::set; -using util::symmetric_hash_combine; +static inline void add_choice_element(std::vector *elements, const Rule &new_rule) { + new_rule.match( + [elements](Choice choice) { + for (auto &element : choice.elements) { + add_choice_element(elements, element); + } + }, -Choice::Choice(const vector &elements) : elements(elements) {} + [elements](auto rule) { + for (auto &element : *elements) { + if (element == rule) return; + } + elements->push_back(rule); + } + ); +} -void add_choice_element(vector *vec, const rule_ptr new_rule) { - auto choice = new_rule->as(); - if (choice) { - for (auto &child : choice->elements) - add_choice_element(vec, child); - } else { - for (auto &element : *vec) - if (element->operator==(*new_rule)) - return; - vec->push_back(new_rule); +std::shared_ptr Choice::build(const std::vector &rules) { + std::vector elements; + for (auto &element : rules) { + add_choice_element(&elements, element); } + return std::make_shared( + (elements.size() == 1) ? elements.front() : Choice{elements} + ); } -rule_ptr Choice::build(const vector &inputs) { - vector elements; - for (auto &el : inputs) - add_choice_element(&elements, el); - if (elements.size() == 1) - return elements.front(); - else - return make_shared(elements); -} - -bool Choice::operator==(const Rule &rule) const { - const Choice *other = rule.as(); - if (!other) - return false; - size_t size = elements.size(); - if (size != other->elements.size()) - return false; - for (size_t i = 0; i < size; i++) - if (!elements[i]->operator==(*other->elements[i])) - return false; - return true; -} - -size_t Choice::hash_code() const { - size_t result = 0; - symmetric_hash_combine(&result, elements.size()); - for (const auto &element : elements) - symmetric_hash_combine(&result, element); - return result; -} - -rule_ptr Choice::copy() const { - return std::make_shared(*this); -} - -string Choice::to_string() const { - string result = "(choice"; - for (const auto &element : elements) - result += " " + element->to_string(); - return result + ")"; -} - -void Choice::accept(Visitor *visitor) const { - visitor->visit(this); +bool Choice::operator==(const Choice &other) const { + return elements == other.elements; } } // namespace rules diff --git a/src/compiler/rules/choice.h b/src/compiler/rules/choice.h index 1139ae6a..3a883a53 100644 --- a/src/compiler/rules/choice.h +++ b/src/compiler/rules/choice.h @@ -1,28 +1,22 @@ #ifndef COMPILER_RULES_CHOICE_H_ #define COMPILER_RULES_CHOICE_H_ -#include +#include #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class Choice : public Rule { - public: - explicit Choice(const std::vector &elements); - static rule_ptr build(const std::vector &rules); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Choice { + std::vector elements; - const std::vector elements; + static std::shared_ptr build(const std::vector &rules); + bool operator==(const Choice &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_CHOICE_H_ +#endif // COMPILER_RULES_CHOICE_H_ \ No newline at end of file diff --git a/src/compiler/rules/metadata.cc b/src/compiler/rules/metadata.cc index 5b9724d8..ff98a54b 100644 --- a/src/compiler/rules/metadata.cc +++ b/src/compiler/rules/metadata.cc @@ -1,97 +1,70 @@ #include "compiler/rules/metadata.h" -#include -#include #include -#include "compiler/rules/visitor.h" -#include "compiler/rules/blank.h" -#include "compiler/util/hash_combine.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::make_shared; -using std::map; -using std::pair; -using util::hash_combine; +Metadata::Metadata(const Rule &rule, MetadataParams params) : + rule(std::make_shared(rule)), params(params) {} -MetadataParams::MetadataParams() : - precedence{0}, - associativity{AssociativityNone}, - has_precedence{false}, - has_associativity{false}, - is_token{false}, - is_string{false}, - is_active{false}, - is_main_token{false} {} - -bool MetadataParams::operator==(const MetadataParams &other) const { - return - precedence == other.precedence && - associativity == other.associativity && - has_precedence == other.has_precedence && - has_associativity == other.has_associativity && - is_token == other.is_token && - is_string == other.is_string && - is_active == other.is_active && - is_main_token == other.is_main_token; +bool Metadata::operator==(const Metadata &other) const { + return rule->operator==(*other.rule) && params == other.params; } -Metadata::Metadata(rule_ptr rule, MetadataParams params) - : rule(rule), params(params) {} - -rule_ptr Metadata::build(rule_ptr rule, MetadataParams params) { - return std::make_shared(rule, params); +Metadata Metadata::token(const Rule &rule) { + MetadataParams params; + params.is_token = true; + return Metadata{rule, params}; } -rule_ptr Metadata::main_token(rule_ptr rule) { +Metadata Metadata::active_prec(int precedence, const Rule &rule) { MetadataParams params; params.has_precedence = true; - params.precedence = 0; - params.is_main_token = true; - return Metadata::build(rule, params); + params.precedence = precedence; + params.is_active = true; + return Metadata{rule, params}; } -rule_ptr Metadata::separator(rule_ptr rule) { +Metadata Metadata::prec(int precedence, const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = precedence; + return Metadata{rule, params}; +} + +Metadata Metadata::prec_left(int precedence, const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = precedence; + params.has_associativity = true; + params.associativity = AssociativityLeft; + return Metadata{rule, params}; +} + +Metadata Metadata::prec_right(int precedence, const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = precedence; + params.has_associativity = true; + params.associativity = AssociativityRight; + return Metadata{rule, params}; +} + +Metadata Metadata::separator(const Rule &rule) { MetadataParams params; params.has_precedence = true; params.precedence = INT_MIN; params.is_active = true; - return Metadata::build(rule, params); + return Metadata{rule, params}; } -bool Metadata::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && other->params == params && other->rule->operator==(*this->rule); -} - -size_t Metadata::hash_code() const { - size_t result = 0; - hash_combine(&result, params.precedence); - hash_combine(&result, params.associativity); - hash_combine(&result, params.has_precedence); - hash_combine(&result, params.has_associativity); - hash_combine(&result, params.is_token); - hash_combine(&result, params.is_string); - hash_combine(&result, params.is_active); - hash_combine(&result, params.is_main_token); - return result; -} - -rule_ptr Metadata::copy() const { - return make_shared(rule->copy(), params); -} - -std::string Metadata::to_string() const { - if (params.has_precedence) { - return "(metadata prec:" + std::to_string(params.precedence) + " " + - rule->to_string() + ")"; - } else { - return "(metadata " + rule->to_string() + ")"; - } -} - -void Metadata::accept(Visitor *visitor) const { - visitor->visit(this); +Metadata Metadata::main_token(const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = 0; + params.is_main_token = true; + return Metadata{rule, params}; } } // namespace rules diff --git a/src/compiler/rules/metadata.h b/src/compiler/rules/metadata.h index a9f43c2f..0d55dfd2 100644 --- a/src/compiler/rules/metadata.h +++ b/src/compiler/rules/metadata.h @@ -1,9 +1,7 @@ #ifndef COMPILER_RULES_METADATA_H_ #define COMPILER_RULES_METADATA_H_ -#include -#include -#include "compiler/rule.h" +#include namespace tree_sitter { namespace rules { @@ -24,28 +22,45 @@ struct MetadataParams { bool is_active; bool is_main_token; - MetadataParams(); - bool operator==(const MetadataParams &) const; + inline MetadataParams() : + precedence{0}, associativity{AssociativityNone}, has_precedence{false}, + has_associativity{false}, is_token{false}, is_string{false}, + is_active{false}, is_main_token{false} {} + + inline bool operator==(const MetadataParams &other) const { + return ( + precedence == other.precedence && + associativity == other.associativity && + has_precedence == other.has_precedence && + has_associativity == other.has_associativity && + is_token == other.is_token && + is_string == other.is_string && + is_active == other.is_active && + is_main_token == other.is_main_token + ); + } }; -class Metadata : public Rule { - public: - Metadata(rule_ptr rule, MetadataParams); - static rule_ptr build(rule_ptr rule, MetadataParams); - static rule_ptr main_token(rule_ptr rule); - static rule_ptr separator(rule_ptr rule); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - const rule_ptr rule; +struct Metadata { + std::shared_ptr rule; MetadataParams params; + + Metadata(const Rule &rule, MetadataParams params); + + static Metadata token(const Rule &rule); + static Metadata active_prec(int precedence, const Rule &rule); + static Metadata prec(int precedence, const Rule &rule); + static Metadata prec_left(int precedence, const Rule &rule); + static Metadata prec_right(int precedence, const Rule &rule); + static Metadata separator(const Rule &rule); + static Metadata main_token(const Rule &rule); + + bool operator==(const Metadata &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_METADATA_H_ +#endif // COMPILER_RULES_METADATA_H_ \ No newline at end of file diff --git a/src/compiler/rules/named_symbol.cc b/src/compiler/rules/named_symbol.cc deleted file mode 100644 index d846580b..00000000 --- a/src/compiler/rules/named_symbol.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include "compiler/rules/named_symbol.h" -#include -#include "compiler/rules/visitor.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::hash; - -NamedSymbol::NamedSymbol(const std::string &name) : name(name) {} - -bool NamedSymbol::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && other->name == name; -} - -size_t NamedSymbol::hash_code() const { - return hash()(name); -} - -rule_ptr NamedSymbol::copy() const { - return std::make_shared(*this); -} - -string NamedSymbol::to_string() const { - return string("(sym '") + name + "')"; -} - -void NamedSymbol::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/named_symbol.h b/src/compiler/rules/named_symbol.h index 2d265d0f..dd668aea 100644 --- a/src/compiler/rules/named_symbol.h +++ b/src/compiler/rules/named_symbol.h @@ -2,25 +2,19 @@ #define COMPILER_RULES_NAMED_SYMBOL_H_ #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class NamedSymbol : public Rule { - public: - explicit NamedSymbol(const std::string &name); +struct NamedSymbol { + std::string value; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - std::string name; + inline bool operator==(const NamedSymbol &other) const { + return value == other.value; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_NAMED_SYMBOL_H_ +#endif // COMPILER_RULES_NAMED_SYMBOL_H_ \ No newline at end of file diff --git a/src/compiler/rules/pattern.cc b/src/compiler/rules/pattern.cc deleted file mode 100644 index 5ac8f97b..00000000 --- a/src/compiler/rules/pattern.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "compiler/rules/pattern.h" -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/string_helpers.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::hash; - -Pattern::Pattern(const string &string) : value(string) {} - -bool Pattern::operator==(tree_sitter::Rule const &other) const { - auto pattern = other.as(); - return pattern && (pattern->value == value); -} - -size_t Pattern::hash_code() const { - return hash()(value); -} - -rule_ptr Pattern::copy() const { - return std::make_shared(*this); -} - -string Pattern::to_string() const { - return string("(pattern '") + util::escape_string(value) + "')"; -} - -void Pattern::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/pattern.h b/src/compiler/rules/pattern.h index 305f7024..60c773f6 100644 --- a/src/compiler/rules/pattern.h +++ b/src/compiler/rules/pattern.h @@ -2,25 +2,19 @@ #define COMPILER_RULES_PATTERN_H_ #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class Pattern : public Rule { - public: - explicit Pattern(const std::string &string); +struct Pattern { + std::string value; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - const std::string value; + inline bool operator==(const Pattern &other) const { + return value == other.value; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_PATTERN_H_ +#endif // COMPILER_RULES_PATTERN_H_ \ No newline at end of file diff --git a/src/compiler/rules/repeat.cc b/src/compiler/rules/repeat.cc index 64d793bb..5bdbf185 100644 --- a/src/compiler/rules/repeat.cc +++ b/src/compiler/rules/repeat.cc @@ -1,43 +1,20 @@ #include "compiler/rules/repeat.h" -#include -#include -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::make_shared; -using std::string; +Repeat::Repeat(const Rule &rule) : + rule(std::make_shared(rule)) {} -Repeat::Repeat(const rule_ptr content) : content(content) {} - -rule_ptr Repeat::build(const rule_ptr &rule) { - auto inner_repeat = rule->as(); - if (inner_repeat) - return rule; - else - return make_shared(rule); +bool Repeat::operator==(const Repeat &other) const { + return rule->operator==(*other.rule); } -bool Repeat::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && (*other->content == *content); -} - -size_t Repeat::hash_code() const { - return content->hash_code(); -} - -rule_ptr Repeat::copy() const { - return make_shared(*this); -} - -string Repeat::to_string() const { - return string("(repeat ") + content->to_string() + ")"; -} - -void Repeat::accept(Visitor *visitor) const { - visitor->visit(this); +std::shared_ptr Repeat::build(const Rule &rule) { + return std::make_shared( + rule.is() ? rule : Repeat{rule} + ); } } // namespace rules diff --git a/src/compiler/rules/repeat.h b/src/compiler/rules/repeat.h index 35e4531a..2eb03001 100644 --- a/src/compiler/rules/repeat.h +++ b/src/compiler/rules/repeat.h @@ -1,27 +1,22 @@ #ifndef COMPILER_RULES_REPEAT_H_ #define COMPILER_RULES_REPEAT_H_ -#include -#include "compiler/rule.h" +#include namespace tree_sitter { namespace rules { -class Repeat : public Rule { - public: - explicit Repeat(rule_ptr content); - static rule_ptr build(const rule_ptr &rule); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Repeat { + std::shared_ptr rule; - const rule_ptr content; + explicit Repeat(const Rule &rule); + static std::shared_ptr build(const Rule &rule); + bool operator==(const Repeat &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_REPEAT_H_ +#endif // COMPILER_RULES_REPEAT_H_ \ No newline at end of file diff --git a/src/compiler/rules/rules.cc b/src/compiler/rules/rules.cc deleted file mode 100644 index fdb0ebdf..00000000 --- a/src/compiler/rules/rules.cc +++ /dev/null @@ -1,108 +0,0 @@ -#include -#include -#include -#include -#include "compiler/rule.h" -#include "compiler/rules.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/named_symbol.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { - -using std::make_shared; -using std::string; -using std::set; -using std::vector; -using std::map; -using rules::MetadataParams; - -static rule_ptr metadata(rule_ptr rule, MetadataParams params) { - return std::make_shared(rule, params); -} - -rule_ptr blank() { - return rules::Blank::build(); -} - -rule_ptr choice(const vector &rules) { - return rules::Choice::build(rules); -} - -rule_ptr repeat(const rule_ptr &content) { - return choice({ repeat1(content), blank() }); -} - -rule_ptr repeat1(const rule_ptr &content) { - return rules::Repeat::build(content); -} - -rule_ptr seq(const vector &rules) { - return rules::Seq::build(rules); -} - -rule_ptr sym(const string &name) { - return make_shared(name); -} - -rule_ptr pattern(const string &value) { - return make_shared(value); -} - -rule_ptr str(const string &value) { - return make_shared(value); -} - -rule_ptr prec_left(const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityLeft; - return metadata(rule, params); -} - -rule_ptr prec_left(int precedence, const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityLeft; - params.has_precedence = true; - params.precedence = precedence; - return metadata(rule, params); -} - -rule_ptr prec_right(const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityRight; - return metadata(rule, params); -} - -rule_ptr prec_right(int precedence, const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityRight; - params.has_precedence = true; - params.precedence = precedence; - return metadata(rule, params); -} - -rule_ptr prec(int precedence, const rule_ptr &rule) { - MetadataParams params; - params.has_precedence = true; - params.precedence = precedence; - return metadata(rule, params); -} - -rule_ptr token(const rule_ptr &rule) { - MetadataParams params; - params.is_token = true; - return metadata(rule, params); -} - -} // namespace tree_sitter diff --git a/src/compiler/rules/seq.cc b/src/compiler/rules/seq.cc index cc934a5c..4e3a8132 100644 --- a/src/compiler/rules/seq.cc +++ b/src/compiler/rules/seq.cc @@ -1,56 +1,37 @@ #include "compiler/rules/seq.h" -#include -#include "compiler/rules/visitor.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::make_shared; -using std::string; -using std::vector; +Seq::Seq(const Rule &left, const Rule &right) : + left(std::make_shared(left)), + right(std::make_shared(right)) {} -Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {} - -rule_ptr Seq::build(const std::vector &rules) { - rule_ptr result = make_shared(); - for (auto &rule : rules) { - auto blank = rule->as(); - if (blank) - continue; - - auto metadata = rule->as(); - if (metadata && metadata->rule->as()) - continue; - - if (result->as()) - result = rule; - else - result = make_shared(result, rule); +std::shared_ptr Seq::build(const std::vector &rules) { + Rule result; + for (const auto &rule : rules) { + rule.match( + [](Blank) {}, + [&](Metadata metadata) { + if (!metadata.rule->is()) { + result = Seq{result, rule}; + } + }, + [&](auto) { + if (result.is()) { + result = rule; + } else { + result = Seq{result, rule}; + } + } + ); } - return result; + return std::make_shared(result); } -bool Seq::operator==(const Rule &rule) const { - const Seq *other = rule.as(); - return other && (*other->left == *left) && (*other->right == *right); -} - -size_t Seq::hash_code() const { - return left->hash_code() ^ right->hash_code(); -} - -rule_ptr Seq::copy() const { - return std::make_shared(*this); -} - -string Seq::to_string() const { - return string("(seq ") + left->to_string() + " " + right->to_string() + ")"; -} - -void Seq::accept(Visitor *visitor) const { - visitor->visit(this); +bool Seq::operator==(const Seq &other) const { + return left->operator==(*other.left) && right->operator==(*other.right); } } // namespace rules diff --git a/src/compiler/rules/seq.h b/src/compiler/rules/seq.h index b331cf77..036db44b 100644 --- a/src/compiler/rules/seq.h +++ b/src/compiler/rules/seq.h @@ -1,29 +1,24 @@ #ifndef COMPILER_RULES_SEQ_H_ #define COMPILER_RULES_SEQ_H_ -#include +#include #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class Seq : public Rule { - public: - Seq(rule_ptr left, rule_ptr right); - static rule_ptr build(const std::vector &rules); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Seq { + std::shared_ptr left; + std::shared_ptr right; - const rule_ptr left; - const rule_ptr right; + Seq(const Rule &left, const Rule &right); + static std::shared_ptr build(const std::vector &rules); + bool operator==(const Seq &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_SEQ_H_ +#endif // COMPILER_RULES_SEQ_H_ \ No newline at end of file diff --git a/src/compiler/rules/string.cc b/src/compiler/rules/string.cc deleted file mode 100644 index 8a77b169..00000000 --- a/src/compiler/rules/string.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include "compiler/rules/string.h" -#include -#include "compiler/rules/visitor.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::hash; - -String::String(string value) : value(value) {} - -bool String::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && (other->value == value); -} - -size_t String::hash_code() const { - return hash()(value); -} - -rule_ptr String::copy() const { - return std::make_shared(*this); -} - -string String::to_string() const { - return string("(string '") + value + "')"; -} - -void String::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/string.h b/src/compiler/rules/string.h index 4398b560..9fbacd34 100644 --- a/src/compiler/rules/string.h +++ b/src/compiler/rules/string.h @@ -2,25 +2,19 @@ #define COMPILER_RULES_STRING_H_ #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class String : public Rule { - public: - explicit String(std::string value); +struct String { + std::string value; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - const std::string value; + inline bool operator==(const String &other) const { + return value == other.value; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_STRING_H_ +#endif // COMPILER_RULES_STRING_H_ \ No newline at end of file diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc deleted file mode 100644 index e826cb0f..00000000 --- a/src/compiler/rules/symbol.cc +++ /dev/null @@ -1,82 +0,0 @@ -#include "compiler/rules/symbol.h" -#include -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/hash_combine.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::to_string; -using util::hash_combine; - -Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {} - -bool Symbol::operator==(const Symbol &other) const { - return (other.index == index) && (other.type == type); -} - -bool Symbol::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && this->operator==(*other); -} - -size_t Symbol::hash_code() const { - size_t result = 0; - hash_combine(&result, index); - hash_combine(&result, type); - return result; -} - -rule_ptr Symbol::copy() const { - return std::make_shared(*this); -} - -string Symbol::to_string() const { - switch (type) { - case Symbol::Terminal: - return "(terminal " + std::to_string(index) + ")"; - case Symbol::NonTerminal: - return "(non-terminal " + std::to_string(index) + ")"; - case Symbol::External: - return "(external " + std::to_string(index) + ")"; - default: - return "(none)"; - } -} - -bool Symbol::operator<(const Symbol &other) const { - if (type < other.type) - return true; - if (other.type < type) - return false; - return (index < other.index); -} - -bool Symbol::is_built_in(Symbol::Index index) { - return index < 0; -} - -bool Symbol::is_built_in() const { - return is_built_in(index); -} - -bool Symbol::is_token() const { - return type == Symbol::Terminal; -} - -bool Symbol::is_external() const { - return type == Symbol::External; -} - -bool Symbol::is_non_terminal() const { - return type == Symbol::NonTerminal; -} - -void Symbol::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index a963433c..9df0dc84 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -1,55 +1,76 @@ #ifndef COMPILER_RULES_SYMBOL_H_ #define COMPILER_RULES_SYMBOL_H_ -#include -#include "compiler/rule.h" - namespace tree_sitter { namespace rules { -class Symbol : public Rule { - public: - typedef int Index; - - typedef enum { +struct Symbol { + using Index = int; + enum Type { External, Terminal, NonTerminal, - } Type; + }; - Symbol(Index index, Type type); + inline bool operator==(const Symbol &other) const { + return index == other.index && type == other.type; + } - bool operator==(const Symbol &other) const; - bool operator==(const Rule &other) const; + inline bool operator!=(const Symbol &other) const { + return !operator==(other); + } - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - bool operator<(const Symbol &other) const; - static bool is_built_in(Index); - bool is_built_in() const; - bool is_token() const; - bool is_external() const; - bool is_non_terminal() const; + inline bool operator<(const Symbol &other) const { + if (type < other.type) return true; + if (type > other.type) return false; + return index < other.index; + } Index index; Type type; + + static Symbol terminal(Index index) { + return Symbol{index, Type::Terminal}; + } + + static Symbol external(Index index) { + return Symbol{index, Type::External}; + } + + static Symbol non_terminal(Index index) { + return Symbol{index, Type::NonTerminal}; + } + + bool is_non_terminal() const { + return type == Type::NonTerminal; + } + + bool is_terminal() const { + return type == Type::Terminal; + } + + bool is_external() const { + return type == Type::External; + } + + bool is_built_in() const { + return index < 0; + } }; +inline Symbol END_OF_INPUT() { + return Symbol{-1, Symbol::Terminal}; +} + +inline Symbol START() { + return Symbol{-2, Symbol::NonTerminal}; +} + +inline Symbol NONE() { + return Symbol{-3, Symbol::Type(-1)}; +} + } // namespace rules } // namespace tree_sitter -namespace std { - -template <> -struct hash { - size_t operator()(const tree_sitter::rules::Symbol &rule) const { - return rule.hash_code(); - } -}; - -} // std - -#endif // COMPILER_RULES_SYMBOL_H_ +#endif // COMPILER_RULES_SYMBOL_H_ \ No newline at end of file diff --git a/src/compiler/rules/visitor.cc b/src/compiler/rules/visitor.cc deleted file mode 100644 index e06f6c7f..00000000 --- a/src/compiler/rules/visitor.cc +++ /dev/null @@ -1,44 +0,0 @@ -#include "compiler/rules/visitor.h" -#include -#include "compiler/rule.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/repeat.h" - -namespace tree_sitter { -namespace rules { - -using std::vector; - -Visitor::~Visitor() {} - -rule_ptr IdentityRuleFn::default_apply(const Rule *rule) { - return rule->copy(); -} - -rule_ptr IdentityRuleFn::apply_to(const Choice *rule) { - vector rules; - for (const auto &el : rule->elements) - rules.push_back(apply(el)); - return Choice::build(rules); -} - -rule_ptr IdentityRuleFn::apply_to(const Seq *rule) { - return Seq::build({ apply(rule->left), apply(rule->right) }); -} - -rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) { - return Repeat::build(apply(rule->content)); -} - -rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) { - return Metadata::build(apply(rule->rule), rule->params); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/visitor.h b/src/compiler/rules/visitor.h deleted file mode 100644 index c75e31dc..00000000 --- a/src/compiler/rules/visitor.h +++ /dev/null @@ -1,234 +0,0 @@ -#ifndef COMPILER_RULES_VISITOR_H_ -#define COMPILER_RULES_VISITOR_H_ - -#include "compiler/rule.h" - -namespace tree_sitter { -namespace rules { - -class Blank; -class NamedSymbol; -class CharacterSet; -class Choice; -class Repeat; -class Seq; -class String; -class Symbol; -class Pattern; -class Metadata; -class ExternalToken; - -class Visitor { - public: - virtual void visit(const Blank *rule) = 0; - virtual void visit(const CharacterSet *rule) = 0; - virtual void visit(const Choice *rule) = 0; - virtual void visit(const Metadata *rule) = 0; - virtual void visit(const Pattern *rule) = 0; - virtual void visit(const Repeat *rule) = 0; - virtual void visit(const Seq *rule) = 0; - virtual void visit(const String *rule) = 0; - virtual void visit(const NamedSymbol *rule) = 0; - virtual void visit(const Symbol *rule) = 0; - virtual void visit(const ExternalToken *rule) = 0; - virtual ~Visitor(); -}; - -template -class RuleFn : private Visitor { - public: - T apply(const rule_ptr &rule) { - value_ = T(); - rule->accept(this); - return value_; - } - - protected: - virtual T default_apply(const Rule *rule) { - return T(); - } - - virtual T apply_to(const Blank *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const CharacterSet *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Choice *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Metadata *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Pattern *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Repeat *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Seq *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const String *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const NamedSymbol *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Symbol *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const ExternalToken *rule) { - return default_apply((const Rule *)rule); - } - - void visit(const Blank *rule) { - value_ = apply_to(rule); - } - - void visit(const CharacterSet *rule) { - value_ = apply_to(rule); - } - - void visit(const Choice *rule) { - value_ = apply_to(rule); - } - - void visit(const Metadata *rule) { - value_ = apply_to(rule); - } - - void visit(const Pattern *rule) { - value_ = apply_to(rule); - } - - void visit(const Repeat *rule) { - value_ = apply_to(rule); - } - - void visit(const Seq *rule) { - value_ = apply_to(rule); - } - - void visit(const String *rule) { - value_ = apply_to(rule); - } - - void visit(const NamedSymbol *rule) { - value_ = apply_to(rule); - } - - void visit(const Symbol *rule) { - value_ = apply_to(rule); - } - - void visit(const ExternalToken *rule) { - value_ = apply_to(rule); - } - - private: - T value_; -}; - -template <> -class RuleFn : private Visitor { - public: - void apply(const rule_ptr &rule) { - rule->accept(this); - } - - protected: - virtual void default_apply(const Rule *rule) {} - - virtual void apply_to(const Blank *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const CharacterSet *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Choice *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Metadata *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Pattern *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Repeat *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Seq *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const String *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const NamedSymbol *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Symbol *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const ExternalToken *rule) { - return default_apply((const Rule *)rule); - } - - void visit(const Blank *rule) { - apply_to(rule); - } - void visit(const CharacterSet *rule) { - apply_to(rule); - } - void visit(const Choice *rule) { - apply_to(rule); - } - void visit(const Metadata *rule) { - apply_to(rule); - } - void visit(const Pattern *rule) { - apply_to(rule); - } - void visit(const Repeat *rule) { - apply_to(rule); - } - void visit(const Seq *rule) { - apply_to(rule); - } - void visit(const String *rule) { - apply_to(rule); - } - void visit(const NamedSymbol *rule) { - apply_to(rule); - } - void visit(const Symbol *rule) { - apply_to(rule); - } - void visit(const ExternalToken *rule) { - apply_to(rule); - } -}; - -class IdentityRuleFn : public RuleFn { - protected: - virtual rule_ptr default_apply(const Rule *rule); - virtual rule_ptr apply_to(const Choice *rule); - virtual rule_ptr apply_to(const Metadata *rule); - virtual rule_ptr apply_to(const Seq *rule); - virtual rule_ptr apply_to(const Repeat *rule); -}; - -} // namespace rules -} // namespace tree_sitter - -#endif // COMPILER_RULES_VISITOR_H_ diff --git a/src/compiler/syntax_grammar.cc b/src/compiler/syntax_grammar.cc deleted file mode 100644 index 254e1a34..00000000 --- a/src/compiler/syntax_grammar.cc +++ /dev/null @@ -1,20 +0,0 @@ -#include "compiler/syntax_grammar.h" -#include -#include -#include -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { - -bool ExternalToken::operator==(const ExternalToken &other) const { - return name == other.name && type == other.type && - corresponding_internal_token == other.corresponding_internal_token; -} - -bool ProductionStep::operator==(const ProductionStep &other) const { - return symbol == other.symbol && precedence == other.precedence && - associativity == other.associativity; -} - -} // namespace tree_sitter diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h index 9d154884..4099de18 100644 --- a/src/compiler/syntax_grammar.h +++ b/src/compiler/syntax_grammar.h @@ -4,22 +4,16 @@ #include #include #include -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" -#include "compiler/variable.h" +#include "compiler/rule.h" +#include "compiler/grammar.h" namespace tree_sitter { -struct ExternalToken { - bool operator==(const ExternalToken &) const; - - std::string name; - VariableType type; - rules::Symbol corresponding_internal_token; -}; - struct ProductionStep { - bool operator==(const ProductionStep &) const; + inline bool operator==(const ProductionStep &other) const { + return symbol == other.symbol && precedence == other.precedence && + associativity == other.associativity; + } rules::Symbol symbol; int precedence; @@ -34,7 +28,7 @@ struct SyntaxVariable { std::vector productions; }; -typedef std::set ConflictSet; +using ConflictSet = std::set; struct SyntaxGrammar { std::vector variables; diff --git a/src/compiler/util/make_visitor.h b/src/compiler/util/make_visitor.h new file mode 100644 index 00000000..f55d4a09 --- /dev/null +++ b/src/compiler/util/make_visitor.h @@ -0,0 +1,38 @@ +#ifndef COMPILER_UTIL_MAKE_VISITOR_H_ +#define COMPILER_UTIL_MAKE_VISITOR_H_ + +namespace tree_sitter { +namespace util { + +template +struct visitor; + +template +struct visitor : Fn +{ + using type = Fn; + using Fn::operator(); + + visitor(Fn fn) : Fn(fn) {} +}; + +template +struct visitor : Fn, visitor +{ + using type = visitor; + using Fn::operator(); + using visitor::operator(); + + visitor(Fn fn, Fns... fns) : Fn(fn), visitor(fns...) {} +}; + +template +visitor make_visitor(Fns... fns) +{ + return visitor(fns...); +} + +} // namespace util +} // namespace tree_sitter + +#endif // COMPILER_UTIL_MAKE_VISITOR_H_ diff --git a/src/compiler/variable.h b/src/compiler/variable.h deleted file mode 100644 index 823852ff..00000000 --- a/src/compiler/variable.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef COMPILER_VARIABLE_H_ -#define COMPILER_VARIABLE_H_ - -#include -#include "compiler/rule.h" -#include "compiler/rules/symbol.h" - -namespace tree_sitter { - -enum VariableType { - VariableTypeHidden, - VariableTypeAuxiliary, - VariableTypeAnonymous, - VariableTypeNamed, -}; - -struct Variable { - std::string name; - VariableType type; - rule_ptr rule; -}; - -} // namespace tree_sitter - -#endif // COMPILER_VARIABLE_H_ diff --git a/test/compiler/build_tables/lex_conflict_manager_test.cc b/test/compiler/build_tables/lex_conflict_manager_test.cc index ca05a32c..7273a3c2 100644 --- a/test/compiler/build_tables/lex_conflict_manager_test.cc +++ b/test/compiler/build_tables/lex_conflict_manager_test.cc @@ -1,7 +1,6 @@ #include "test_helper.h" -#include "helpers/rule_helpers.h" #include "helpers/stream_methods.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/parse_table.h" #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/build_tables/lex_item.h" @@ -14,11 +13,11 @@ START_TEST describe("LexConflictManager::resolve(new_action, old_action)", []() { LexConflictManager conflict_manager; bool update; - Symbol sym1(0, Symbol::Terminal); - Symbol sym2(1, Symbol::Terminal); - Symbol sym3(2, Symbol::Terminal); - Symbol sym4(3, Symbol::Terminal); - LexItemSet item_set({ LexItem(sym4, blank() )}); + Symbol sym1 = Symbol::terminal(0); + Symbol sym2 = Symbol::terminal(1); + Symbol sym3 = Symbol::terminal(2); + Symbol sym4 = Symbol::terminal(3); + LexItemSet item_set({ LexItem(sym4, Blank{} )}); before_each([&]() { conflict_manager = LexConflictManager(); diff --git a/test/compiler/build_tables/lex_item_test.cc b/test/compiler/build_tables/lex_item_test.cc index 55413759..3ca271fc 100644 --- a/test/compiler/build_tables/lex_item_test.cc +++ b/test/compiler/build_tables/lex_item_test.cc @@ -1,8 +1,6 @@ #include "test_helper.h" #include "compiler/build_tables/lex_item.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules.h" -#include "helpers/rule_helpers.h" +#include "compiler/rule.h" #include "helpers/stream_methods.h" using namespace rules; @@ -14,7 +12,7 @@ START_TEST describe("LexItem", []() { describe("completion_status()", [&]() { it("indicates whether the item is done and its precedence", [&]() { - LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' })); + LexItem item1(Symbol::terminal(0), CharacterSet({'a', 'b', 'c'})); AssertThat(item1.completion_status().is_done, IsFalse()); AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange())); @@ -22,15 +20,18 @@ describe("LexItem", []() { params.precedence = 3; params.has_precedence = true; params.is_string = 1; - LexItem item2(Symbol(0, Symbol::Terminal), choice({ - metadata(blank(), params), - character({ 'a', 'b', 'c' }) - })); + LexItem item2(Symbol::terminal(0), Choice{{ + Metadata{Blank{}, params}, + CharacterSet{{ 'a', 'b', 'c' }} + }}); AssertThat(item2.completion_status().is_done, IsTrue()); AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3))); - LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' }))); + LexItem item3(Symbol::terminal(0), Choice{{ + Blank{}, + Repeat{CharacterSet{{ ' ', '\t' }}}, + }}); AssertThat(item3.completion_status().is_done, IsTrue()); AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange())); }); @@ -40,17 +41,17 @@ describe("LexItem", []() { describe("LexItemSet::transitions()", [&]() { it("handles single characters", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}), }); AssertThat( item_set.transitions(), Equals(LexItemSet::TransitionMap({ { - CharacterSet().include('x'), + CharacterSet{{'x'}}, Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), }), PrecedenceRange(), false @@ -64,7 +65,7 @@ describe("LexItemSet::transitions()", [&]() { params.is_main_token = true; LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)), + LexItem(Symbol::non_terminal(1), Metadata{CharacterSet{{'x'}}, params}), }); AssertThat( @@ -74,7 +75,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)), + LexItem(Symbol::non_terminal(1), Metadata{Blank{}, params}), }), PrecedenceRange(), true @@ -85,11 +86,11 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'w' }), - character({ 'x' }), - character({ 'y' }), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + CharacterSet{{ 'w' }}, + CharacterSet{{ 'x' }}, + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }); @@ -100,10 +101,10 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('w'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'x' }), - character({ 'y' }), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + CharacterSet{{ 'x' }}, + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }), PrecedenceRange(), @@ -115,14 +116,14 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences with nested precedence", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - prec(3, seq({ - character({ 'v' }), - prec(4, seq({ - character({ 'w' }), - character({ 'x' }) })), - character({ 'y' }) })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + Metadata::prec(3, Seq::build({ + CharacterSet{{ 'v' }}, + Metadata::prec(4, Seq::build({ + CharacterSet{{ 'w' }}, + CharacterSet{{ 'x' }} })), + CharacterSet{{ 'y' }} })), + CharacterSet{{ 'z' }}, })), }); @@ -137,13 +138,15 @@ describe("LexItemSet::transitions()", [&]() { // The outer precedence is now 'active', because we are within its // contained rule. LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - active_prec(3, seq({ - prec(4, seq({ - character({ 'w' }), - character({ 'x' }) })), - character({ 'y' }) })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + Metadata::active_prec(3, Seq::build({ + Metadata::prec(4, Seq::build({ + CharacterSet{{ 'w' }}, + CharacterSet{{ 'x' }} + })), + CharacterSet{{ 'y' }} + })), + CharacterSet{{ 'z' }}, })), }), @@ -165,11 +168,12 @@ describe("LexItemSet::transitions()", [&]() { Transition{ // The inner precedence is now 'active' LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - active_prec(3, seq({ - active_prec(4, character({ 'x' })), - character({ 'y' }) })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + Metadata::active_prec(3, Seq::build({ + Metadata::active_prec(4, CharacterSet{{'x'}}), + CharacterSet{{'y'}} + })), + CharacterSet{{'z'}}, })), }), @@ -190,9 +194,9 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - active_prec(3, character({ 'y' })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + Metadata::active_prec(3, CharacterSet{{'y'}}), + CharacterSet{{'z'}}, })), }), @@ -213,7 +217,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('y'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}), }), PrecedenceRange(3), false @@ -224,13 +228,13 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences where the left hand side can be blank", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - choice({ - character({ 'x' }), - blank(), + LexItem(Symbol::non_terminal(1), Seq::build({ + Choice::build({ + CharacterSet{{ 'x' }}, + Blank{}, }), - character({ 'y' }), - character({ 'z' }), + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }); @@ -241,9 +245,9 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'y' }), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }), PrecedenceRange(), @@ -254,7 +258,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('y'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}), }), PrecedenceRange(), false @@ -265,7 +269,7 @@ describe("LexItemSet::transitions()", [&]() { it("handles blanks", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), }); AssertThat(item_set.transitions(), IsEmpty()); @@ -273,11 +277,11 @@ describe("LexItemSet::transitions()", [&]() { it("handles repeats", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({ - character({ 'a' }), - character({ 'b' }), - }))), - LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))), + LexItem(Symbol::non_terminal(1), Repeat{Seq::build({ + CharacterSet{{ 'a' }}, + CharacterSet{{ 'b' }}, + })}), + LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{'c'}}}), }); AssertThat( @@ -287,14 +291,14 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'b' }), - repeat1(seq({ - character({ 'a' }), - character({ 'b' }), - })) + LexItem(Symbol::non_terminal(1), Seq::build({ + CharacterSet{{ 'b' }}, + Repeat{Seq::build({ + CharacterSet{{ 'a' }}, + CharacterSet{{ 'b' }}, + })} })), - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'b' }}), }), PrecedenceRange(), false @@ -304,8 +308,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('c'), Transition{ LexItemSet({ - LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))), - LexItem(Symbol(2, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{ 'c' }}}), + LexItem(Symbol::non_terminal(2), Blank{}), }), PrecedenceRange(), false @@ -316,7 +320,7 @@ describe("LexItemSet::transitions()", [&]() { it("handles repeats with precedence", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))) + LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}})) }); AssertThat( @@ -326,8 +330,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))), - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}})), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Blank{})), }), PrecedenceRange(-1), false @@ -338,14 +342,14 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between overlapping character sets", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), choice({ - active_prec(2, seq({ - character({ 'a', 'b', 'c', 'd' }), - character({ 'x' }), + LexItem(Symbol::non_terminal(1), Choice::build({ + Metadata::active_prec(2, Seq::build({ + CharacterSet{{ 'a', 'b', 'c', 'd' }}, + CharacterSet{{ 'x' }}, })), - active_prec(3, seq({ - character({ 'c', 'd', 'e', 'f' }), - character({ 'y' }), + Metadata::active_prec(3, Seq::build({ + CharacterSet{{ 'c', 'd', 'e', 'f' }}, + CharacterSet{{ 'y' }}, })), })) }); @@ -357,7 +361,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a', 'b'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})), }), PrecedenceRange(2), false @@ -367,8 +371,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('c', 'd'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))), - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})), }), PrecedenceRange(2, 3), false @@ -378,7 +382,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('e', 'f'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})), }), PrecedenceRange(3), false @@ -389,14 +393,14 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between a subset and a superset of characters", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), choice({ - seq({ - character({ 'b', 'c', 'd' }), - character({ 'x' }), + LexItem(Symbol::non_terminal(1), Choice::build({ + Seq::build({ + CharacterSet{{ 'b', 'c', 'd' }}, + CharacterSet{{ 'x' }}, }), - seq({ - character({ 'a', 'b', 'c', 'd', 'e', 'f' }), - character({ 'y' }), + Seq::build({ + CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }}, + CharacterSet{{ 'y' }}, }), })), }); @@ -408,7 +412,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a').include('e', 'f'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}), }), PrecedenceRange(), false @@ -418,8 +422,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('b', 'd'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })), - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}), }), PrecedenceRange(), false @@ -430,15 +434,15 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between whitelisted and blacklisted character sets", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - choice({ - character({ '/' }, false), - seq({ - character({ '\\' }), - character({ '/' }), + LexItem(Symbol::non_terminal(1), Seq::build({ + Choice::build({ + CharacterSet().include_all().exclude('/'), + Seq::build({ + CharacterSet{{ '\\' }}, + CharacterSet{{ '/' }}, }), }), - character({ '/' }), + CharacterSet{{ '/' }}, })) }); @@ -449,7 +453,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include_all().exclude('/').exclude('\\'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}), }), PrecedenceRange(), false @@ -459,8 +463,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('\\'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })), - LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}), + LexItem(Symbol::non_terminal(1), Seq::build({ CharacterSet{{ '/' }}, CharacterSet{{ '/' }} })), }), PrecedenceRange(), false @@ -471,8 +475,8 @@ describe("LexItemSet::transitions()", [&]() { it("handles different items with overlapping character sets", [&]() { LexItemSet set1({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })), - LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' })) + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }}), + LexItem(Symbol::non_terminal(2), CharacterSet{{ 'e', 'f', 'g', 'h', 'i' }}) }); AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({ @@ -480,7 +484,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a', 'd'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), }), PrecedenceRange(), false @@ -490,8 +494,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('e', 'f'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), - LexItem(Symbol(2, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), + LexItem(Symbol::non_terminal(2), Blank{}), }), PrecedenceRange(), false @@ -501,7 +505,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('g', 'i'), Transition{ LexItemSet({ - LexItem(Symbol(2, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(2), Blank{}), }), PrecedenceRange(), false diff --git a/test/compiler/build_tables/parse_item_set_builder_test.cc b/test/compiler/build_tables/parse_item_set_builder_test.cc index 9bc4715b..8583c7b1 100644 --- a/test/compiler/build_tables/parse_item_set_builder_test.cc +++ b/test/compiler/build_tables/parse_item_set_builder_test.cc @@ -3,8 +3,6 @@ #include "compiler/lexical_grammar.h" #include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/build_tables/lookahead_set.h" -#include "compiler/rules/built_in_symbols.h" -#include "helpers/rule_helpers.h" using namespace build_tables; using namespace rules; @@ -17,7 +15,7 @@ describe("ParseItemSetBuilder", []() { lexical_variables.push_back({ "token_" + to_string(i), VariableTypeNamed, - blank(), + Blank{}, false }); } @@ -28,23 +26,23 @@ describe("ParseItemSetBuilder", []() { SyntaxGrammar grammar{{ SyntaxVariable{"rule0", VariableTypeNamed, { Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(11, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::terminal(11), 0, AssociativityNone}, }), }}, SyntaxVariable{"rule1", VariableTypeNamed, { Production({ - {Symbol(12, Symbol::Terminal), 0, AssociativityNone}, - {Symbol(13, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::terminal(12), 0, AssociativityNone}, + {Symbol::terminal(13), 0, AssociativityNone}, }), Production({ - {Symbol(2, Symbol::NonTerminal), 0, AssociativityNone}, + {Symbol::non_terminal(2), 0, AssociativityNone}, }) }}, SyntaxVariable{"rule2", VariableTypeNamed, { Production({ - {Symbol(14, Symbol::Terminal), 0, AssociativityNone}, - {Symbol(15, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::terminal(14), 0, AssociativityNone}, + {Symbol::terminal(15), 0, AssociativityNone}, }) }}, }, {}, {}, {}}; @@ -55,8 +53,8 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }), + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }), } }); @@ -65,20 +63,20 @@ describe("ParseItemSetBuilder", []() { AssertThat(item_set, Equals(ParseItemSet({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 0), 0), + LookaheadSet({ Symbol::terminal(11) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 1), 0), + LookaheadSet({ Symbol::terminal(11) }) }, { - ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(2), production(2, 0), 0), + LookaheadSet({ Symbol::terminal(11) }) }, }))); }); @@ -87,14 +85,14 @@ describe("ParseItemSetBuilder", []() { SyntaxGrammar grammar{{ SyntaxVariable{"rule0", VariableTypeNamed, { Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(11, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::terminal(11), 0, AssociativityNone}, }), }}, SyntaxVariable{"rule1", VariableTypeNamed, { Production({ - {Symbol(12, Symbol::Terminal), 0, AssociativityNone}, - {Symbol(13, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::terminal(12), 0, AssociativityNone}, + {Symbol::terminal(13), 0, AssociativityNone}, }), Production({}) }}, @@ -106,8 +104,8 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }), + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }), } }); @@ -116,16 +114,16 @@ describe("ParseItemSetBuilder", []() { AssertThat(item_set, Equals(ParseItemSet({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 0), 0), + LookaheadSet({ Symbol::terminal(11) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 1), 0), + LookaheadSet({ Symbol::terminal(11) }) }, }))); }); diff --git a/test/compiler/build_tables/rule_can_be_blank_test.cc b/test/compiler/build_tables/rule_can_be_blank_test.cc index a3068f91..0fe07350 100644 --- a/test/compiler/build_tables/rule_can_be_blank_test.cc +++ b/test/compiler/build_tables/rule_can_be_blank_test.cc @@ -1,8 +1,6 @@ #include "test_helper.h" #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules.h" -#include "helpers/rule_helpers.h" +#include "compiler/rule.h" using namespace rules; using build_tables::rule_can_be_blank; @@ -10,49 +8,48 @@ using build_tables::rule_can_be_blank; START_TEST describe("rule_can_be_blank", [&]() { - rule_ptr rule; + Rule rule; it("returns false for basic rules", [&]() { - AssertThat(rule_can_be_blank(i_sym(3)), IsFalse()); - AssertThat(rule_can_be_blank(str("x")), IsFalse()); - AssertThat(rule_can_be_blank(pattern("x")), IsFalse()); + AssertThat(rule_can_be_blank(CharacterSet{{'x'}}), IsFalse()); }); it("returns true for blanks", [&]() { - AssertThat(rule_can_be_blank(blank()), IsTrue()); + AssertThat(rule_can_be_blank(Blank{}), IsTrue()); }); - it("returns true for repeats", [&]() { - AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue()); + it("returns true for repeats iff the content can be blank", [&]() { + AssertThat(rule_can_be_blank(Repeat{CharacterSet{{'x'}}}), IsFalse()); + AssertThat(rule_can_be_blank(Repeat{Blank{}}), IsTrue()); }); it("returns true for choices iff one or more sides can be blank", [&]() { - rule = choice({ sym("x"), blank() }); + rule = Choice::build({ CharacterSet{{'x'}}, Blank{} }); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = choice({ blank(), sym("x") }); + rule = Choice::build({ Blank{}, CharacterSet{{'x'}} }); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = choice({ sym("x"), sym("y") }); + rule = Choice::build({ CharacterSet{{'x'}}, CharacterSet{{'y'}} }); AssertThat(rule_can_be_blank(rule), IsFalse()); }); it("returns true for sequences iff both sides can be blank", [&]() { - rule = seq({ blank(), str("x") }); + rule = Seq::build({ Blank{}, CharacterSet{{'x'}} }); AssertThat(rule_can_be_blank(rule), IsFalse()); - rule = seq({ str("x"), blank() }); + rule = Seq::build({ CharacterSet{{'x'}}, Blank{} }); AssertThat(rule_can_be_blank(rule), IsFalse()); - rule = seq({ blank(), choice({ sym("x"), blank() }) }); + rule = Seq::build({ Blank{}, Choice::build({ CharacterSet{{'x'}}, Blank{} }) }); AssertThat(rule_can_be_blank(rule), IsTrue()); }); it("ignores metadata rules", [&]() { - rule = make_shared(blank(), MetadataParams()); + rule = Metadata::prec(1, Blank{}); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = make_shared(sym("one"), MetadataParams()); + rule = Metadata::prec(1, CharacterSet{{'x'}}); AssertThat(rule_can_be_blank(rule), IsFalse()); }); }); diff --git a/test/compiler/prepare_grammar/expand_repeats_test.cc b/test/compiler/prepare_grammar/expand_repeats_test.cc index 7d735a4a..2c70c961 100644 --- a/test/compiler/prepare_grammar/expand_repeats_test.cc +++ b/test/compiler/prepare_grammar/expand_repeats_test.cc @@ -1,20 +1,24 @@ #include "test_helper.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/prepare_grammar/expand_repeats.h" -#include "helpers/rule_helpers.h" #include "helpers/stream_methods.h" -START_TEST - using namespace rules; using prepare_grammar::InitialSyntaxGrammar; using prepare_grammar::expand_repeats; +using Variable = InitialSyntaxGrammar::Variable; + +bool operator==(const Variable &left, const Variable &right) { + return left.name == right.name && left.rule == right.rule && left.type == right.type; +} + +START_TEST describe("expand_repeats", []() { it("replaces repeat rules with pairs of recursive rules", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))}, + Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(0)}}, }, {}, {}, {} }; @@ -22,10 +26,10 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, i_sym(1)}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(0) }), - i_token(0), + Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(1)}, + Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(1), Symbol::terminal(0) }), + Symbol::terminal(0), })}, })); }); @@ -33,9 +37,9 @@ describe("expand_repeats", []() { it("replaces repeats inside of sequences", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, seq({ - i_token(10), - repeat1(i_token(11)), + Variable{"rule0", VariableTypeNamed, Seq::build({ + Symbol::terminal(10), + Repeat{Symbol::terminal(11)}, })}, }, {}, {}, {} @@ -44,13 +48,13 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, seq({ - i_token(10), - i_sym(1), + Variable{"rule0", VariableTypeNamed, Seq::build({ + Symbol::terminal(10), + Symbol::non_terminal(1), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(11) }), - i_token(11) + Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(1), Symbol::terminal(11) }), + Symbol::terminal(11) })}, })); }); @@ -58,9 +62,9 @@ describe("expand_repeats", []() { it("replaces repeats inside of choices", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, choice({ - i_token(10), - repeat1(i_token(11)) + Variable{"rule0", VariableTypeNamed, Choice::build({ + Symbol::terminal(10), + Repeat{Symbol::terminal(11)} })}, }, {}, {}, {} @@ -69,13 +73,13 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, choice({ - i_token(10), - i_sym(1), + Variable{"rule0", VariableTypeNamed, Choice::build({ + Symbol::terminal(10), + Symbol::non_terminal(1), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(11) }), - i_token(11), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(1), Symbol::terminal(11) }), + Symbol::terminal(11), })}, })); }); @@ -83,13 +87,13 @@ describe("expand_repeats", []() { it("does not create redundant auxiliary rules", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, choice({ - seq({ i_token(1), repeat1(i_token(4)) }), - seq({ i_token(2), repeat1(i_token(4)) }), + Variable{"rule0", VariableTypeNamed, Choice::build({ + Seq::build({ Symbol::terminal(1), Repeat{Symbol::terminal(4)} }), + Seq::build({ Symbol::terminal(2), Repeat{Symbol::terminal(4)} }), })}, - Variable{"rule1", VariableTypeNamed, seq({ - i_token(3), - repeat1(i_token(4)) + Variable{"rule1", VariableTypeNamed, Seq::build({ + Symbol::terminal(3), + Repeat{Symbol::terminal(4)} })}, }, {}, {}, {} @@ -98,17 +102,17 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, choice({ - seq({ i_token(1), i_sym(2) }), - seq({ i_token(2), i_sym(2) }), + Variable{"rule0", VariableTypeNamed, Choice::build({ + Seq::build({ Symbol::terminal(1), Symbol::non_terminal(2) }), + Seq::build({ Symbol::terminal(2), Symbol::non_terminal(2) }), })}, - Variable{"rule1", VariableTypeNamed, seq({ - i_token(3), - i_sym(2), + Variable{"rule1", VariableTypeNamed, Seq::build({ + Symbol::terminal(3), + Symbol::non_terminal(2), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(2), i_token(4) }), - i_token(4), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(2), Symbol::terminal(4) }), + Symbol::terminal(4), })}, })); }); @@ -116,9 +120,9 @@ describe("expand_repeats", []() { it("can replace multiple repeats in the same rule", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, seq({ - repeat1(i_token(10)), - repeat1(i_token(11)), + Variable{"rule0", VariableTypeNamed, Seq::build({ + Repeat{Symbol::terminal(10)}, + Repeat{Symbol::terminal(11)}, })}, }, {}, {}, {} @@ -127,17 +131,17 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, seq({ - i_sym(1), - i_sym(2), + Variable{"rule0", VariableTypeNamed, Seq::build({ + Symbol::non_terminal(1), + Symbol::non_terminal(2), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(10) }), - i_token(10), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(1), Symbol::terminal(10) }), + Symbol::terminal(10), })}, - Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({ - seq({ i_sym(2), i_token(11) }), - i_token(11), + Variable{"rule0_repeat2", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(2), Symbol::terminal(11) }), + Symbol::terminal(11), })}, })); }); @@ -145,8 +149,8 @@ describe("expand_repeats", []() { it("can replace repeats in multiple rules", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))}, - Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))}, + Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(10)}}, + Variable{"rule1", VariableTypeNamed, Repeat{Symbol::terminal(11)}}, }, {}, {}, {} }; @@ -154,15 +158,15 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, i_sym(2)}, - Variable{"rule1", VariableTypeNamed, i_sym(3)}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(2), i_token(10) }), - i_token(10), + Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(2)}, + Variable{"rule1", VariableTypeNamed, Symbol::non_terminal(3)}, + Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(2), Symbol::terminal(10) }), + Symbol::terminal(10), })}, - Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(3), i_token(11) }), - i_token(11), + Variable{"rule1_repeat1", VariableTypeAuxiliary, Choice::build({ + Seq::build({ Symbol::non_terminal(3), Symbol::terminal(11) }), + Symbol::terminal(11), })}, })); }); diff --git a/test/compiler/prepare_grammar/expand_tokens_test.cc b/test/compiler/prepare_grammar/expand_tokens_test.cc index 0c1e6209..aa273042 100644 --- a/test/compiler/prepare_grammar/expand_tokens_test.cc +++ b/test/compiler/prepare_grammar/expand_tokens_test.cc @@ -1,12 +1,13 @@ #include "test_helper.h" #include "compiler/lexical_grammar.h" #include "compiler/prepare_grammar/expand_tokens.h" -#include "helpers/rule_helpers.h" +#include "helpers/stream_methods.h" START_TEST using namespace rules; -using prepare_grammar::expand_tokens; +using prepare_grammar::expand_token; +using prepare_grammar::ExpandTokenResult; describe("expand_tokens", []() { MetadataParams string_token_params; @@ -15,153 +16,72 @@ describe("expand_tokens", []() { describe("string rules", [&]() { it("replaces strings with sequences of character sets", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - str("xyz"), - i_sym(11), - }), - false - } - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.second, Equals(CompileError::none())); - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - metadata(seq({ - character({ 'x' }), - character({ 'y' }), - character({ 'z' }), - }), string_token_params), - i_sym(11), + AssertThat( + expand_token(Seq::build({ + String{"a"}, + String{"bcd"}, + String{"e"} + })).rule, + Equals(*Seq::build({ + CharacterSet{{ 'a' }}, + Seq::build({ + CharacterSet{{ 'b' }}, + CharacterSet{{ 'c' }}, + CharacterSet{{ 'd' }}, }), - false - } - })); + CharacterSet{{ 'e' }}, + }))); }); it("handles strings containing non-ASCII UTF8 characters", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - str("\u03B1 \u03B2"), - false - }, - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - metadata(seq({ - character({ 945 }), - character({ ' ' }), - character({ 946 }), - }), string_token_params), - false - } - })); + AssertThat( + expand_token(String{"\u03B1 \u03B2"}).rule, + Equals(*Seq::build({ + CharacterSet{{ 945 }}, + CharacterSet{{ ' ' }}, + CharacterSet{{ 946 }}, + })) + ); }); }); describe("regexp rules", [&]() { it("replaces regexps with the equivalent rule tree", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - pattern("x*"), - i_sym(11), - }), - false - } - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.second, Equals(CompileError::none())); - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - repeat(character({ 'x' })), - i_sym(11), - }), - false - } - })); + AssertThat( + expand_token(Seq::build({ + String{"a"}, + Pattern{"x+"}, + String{"b"}, + })).rule, + Equals(*Seq::build({ + CharacterSet{{'a'}}, + Repeat{CharacterSet{{ 'x' }}}, + CharacterSet{{'b'}}, + })) + ); }); it("handles regexps containing non-ASCII UTF8 characters", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - pattern("[^\u03B1-\u03B4]*"), - false - } - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - repeat(character({ 945, 946, 947, 948 }, false)), - false - } - })); + AssertThat( + expand_token(Pattern{"[^\u03B1-\u03B4]+"}).rule, + Equals(Rule(Repeat{ + CharacterSet().include_all().exclude(945, 948) + })) + ); }); it("returns an error when the grammar contains an invalid regex", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - pattern("("), - str("xyz"), - pattern("["), - }), - false - }, - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren"))); + AssertThat( + expand_token(Seq::build({ + Pattern{"("}, + String{"xyz"}, + Pattern{"["}, + })).error, + Equals(CompileError( + TSCompileErrorTypeInvalidRegex, + "unmatched open paren" + )) + ); }); }); }); diff --git a/test/compiler/prepare_grammar/extract_choices_test.cc b/test/compiler/prepare_grammar/extract_choices_test.cc index f6891865..dbd8c5d0 100644 --- a/test/compiler/prepare_grammar/extract_choices_test.cc +++ b/test/compiler/prepare_grammar/extract_choices_test.cc @@ -1,104 +1,75 @@ #include "test_helper.h" +#include "helpers/stream_methods.h" #include "compiler/prepare_grammar/extract_choices.h" -#include "helpers/rule_helpers.h" START_TEST using namespace rules; using prepare_grammar::extract_choices; -class rule_vector : public vector { - public: - bool operator==(const vector &other) const { - if (this->size() != other.size()) return false; - for (size_t i = 0; i < this->size(); i++) { - auto rule = this->operator[](i); - auto other_rule = other[i]; - if (!rule->operator==(*rule)) - return false; - } - return true; - } - - rule_vector(const initializer_list &list) : - vector(list) {} -}; - describe("extract_choices", []() { it("expands rules containing choices into multiple rules", [&]() { - auto rule = seq({ - sym("a"), - choice({ sym("b"), sym("c"), sym("d") }), - sym("e") + auto rule = Seq::build({ + Symbol::terminal(1), + Choice::build({ + Symbol::terminal(2), + Symbol::terminal(3), + Symbol::terminal(4) + }), + Symbol::terminal(5) }); - AssertThat(extract_choices(rule), Equals(rule_vector({ - seq({ sym("a"), sym("b"), sym("e") }), - seq({ sym("a"), sym("c"), sym("e") }), - seq({ sym("a"), sym("d"), sym("e") }), + auto result = extract_choices(rule); + + AssertThat(result, Equals(vector({ + Seq::build({Symbol::terminal(1), Symbol::terminal(2), Symbol::terminal(5)}), + Seq::build({Symbol::terminal(1), Symbol::terminal(3), Symbol::terminal(5)}), + Seq::build({Symbol::terminal(1), Symbol::terminal(4), Symbol::terminal(5)}), }))); }); it("handles metadata rules", [&]() { - auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") })); + auto rule = Metadata::prec(5, Choice::build({ + Symbol::terminal(2), + Symbol::terminal(3), + Symbol::terminal(4) + })); - AssertThat(extract_choices(rule), Equals(rule_vector({ - prec(5, sym("b")), - prec(5, sym("c")), - prec(5, sym("d")), + AssertThat(extract_choices(rule), Equals(vector({ + Metadata::prec(5, Symbol::terminal(2)), + Metadata::prec(5, Symbol::terminal(3)), + Metadata::prec(5, Symbol::terminal(4)), }))); }); it("handles nested choices", [&]() { - auto rule = choice({ - seq({ choice({ sym("a"), sym("b") }), sym("c") }), - sym("d") + auto rule = Choice::build({ + Seq::build({ + Choice::build({ + Symbol::terminal(1), + Symbol::terminal(2) + }), + Symbol::terminal(3) + }), + Symbol::terminal(4) }); - AssertThat(extract_choices(rule), Equals(rule_vector({ - seq({ sym("a"), sym("c") }), - seq({ sym("b"), sym("c") }), - sym("d"), + AssertThat(extract_choices(rule), Equals(vector({ + Seq::build({Symbol::terminal(1), Symbol::terminal(3)}), + Seq::build({Symbol::terminal(2), Symbol::terminal(3)}), + Symbol::terminal(4), + }))); + }); + + it("handles single symbols", [&]() { + AssertThat(extract_choices(Symbol::terminal(2)), Equals(vector({ + Symbol::terminal(2) }))); }); it("handles blank rules", [&]() { - AssertThat(extract_choices(blank()), Equals(rule_vector({ - blank(), - }))); - }); - - it("does not move choices outside of repeats", [&]() { - auto rule = seq({ - choice({ sym("a"), sym("b") }), - repeat1(seq({ - sym("c"), - choice({ - sym("d"), - sym("e"), - }), - sym("f"), - })), - sym("g"), - }); - - AssertThat(extract_choices(rule), Equals(rule_vector({ - seq({ - sym("a"), - repeat1(choice({ - seq({ sym("c"), sym("d"), sym("f") }), - seq({ sym("c"), sym("e"), sym("f") }), - })), - sym("g"), - }), - seq({ - sym("b"), - repeat1(choice({ - seq({ sym("c"), sym("d"), sym("f") }), - seq({ sym("c"), sym("e"), sym("f") }), - })), - sym("g"), - }), + AssertThat(extract_choices(Blank{}), Equals(vector({ + Blank{}, }))); }); }); diff --git a/test/compiler/prepare_grammar/extract_tokens_test.cc b/test/compiler/prepare_grammar/extract_tokens_test.cc index 76ed50a0..005174bf 100644 --- a/test/compiler/prepare_grammar/extract_tokens_test.cc +++ b/test/compiler/prepare_grammar/extract_tokens_test.cc @@ -3,8 +3,6 @@ #include "compiler/prepare_grammar/interned_grammar.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/prepare_grammar/extract_tokens.h" -#include "helpers/rule_helpers.h" -#include "helpers/equals_pointer.h" #include "helpers/stream_methods.h" START_TEST @@ -13,23 +11,44 @@ using namespace rules; using prepare_grammar::extract_tokens; using prepare_grammar::InternedGrammar; using prepare_grammar::InitialSyntaxGrammar; +using InternedVariable = InternedGrammar::Variable; +using InitialSyntaxVariable = InitialSyntaxGrammar::Variable; describe("extract_tokens", []() { it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, repeat1(seq({ - str("ab"), - pattern("cd*"), - choice({ - i_sym(1), - i_sym(2), - token(repeat1(choice({ str("ef"), str("gh") }))), - }), - }))}, - Variable{"rule_B", VariableTypeNamed, pattern("ij+")}, - Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })}, - Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + Repeat{Seq::build({ + String{"ab"}, + Pattern{"cd+"}, + Choice::build({ + Symbol::non_terminal(1), + Symbol::non_terminal(2), + Metadata::token(Repeat{Choice::build({ + String{"ef"}, + String{"g"} + })}), + }), + })} + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + Pattern{"h+"} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + Choice::build({ String{"i"}, Blank{} }) + }, + InternedVariable{ + "rule_D", + VariableTypeNamed, + Repeat{Symbol::non_terminal(3)} + }, }, {}, {}, @@ -42,62 +61,104 @@ describe("extract_tokens", []() { AssertThat(error, Equals(CompileError::none())); - AssertThat(syntax_grammar.variables, Equals(vector{ - Variable{"rule_A", VariableTypeNamed, repeat1(seq({ + AssertThat(syntax_grammar.variables, Equals(vector{ + InitialSyntaxVariable{ + "rule_A", + VariableTypeNamed, + Repeat{Seq::build({ - // This string is now the first token in the lexical grammar. - i_token(0), + // This string is now the first token in the lexical grammar. + Symbol::terminal(0), - // This pattern is now the second rule in the lexical grammar. - i_token(1), + // This pattern is now the second rule in the lexical grammar. + Symbol::terminal(1), - choice({ - // Rule 1, which this symbol pointed to, has been moved to the - // lexical grammar. - i_token(3), + Choice::build({ + // Rule 1, which this symbol pointed to, has been moved to the + // lexical grammar. + Symbol::terminal(3), - // This symbol's index has been decremented, because a previous rule - // was moved to the lexical grammar. - i_sym(1), + // This symbol's index has been decremented, because a previous rule + // was moved to the lexical grammar. + Symbol::non_terminal(1), - // This token rule is now the third rule in the lexical grammar. - i_token(2), - }), - }))}, + // This token rule is now the third rule in the lexical grammar. + Symbol::terminal(2), + }), + })} + }, - Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })}, - Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))}, + InitialSyntaxVariable{ + "rule_C", + VariableTypeNamed, + Choice::build({Symbol::terminal(4), Blank{}}) + }, + + InitialSyntaxVariable{ + "rule_D", + VariableTypeNamed, + Repeat{Symbol::non_terminal(2)} + }, })); AssertThat(lexical_grammar.variables, Equals(vector({ // Strings become anonymous rules. - LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true}, + LexicalVariable{ + "ab", + VariableTypeAnonymous, + Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, + true + }, // Patterns become hidden rules. - LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false}, + LexicalVariable{ + "/cd+/", + VariableTypeAuxiliary, + Seq{CharacterSet{{'c'}}, Repeat{CharacterSet{{'d'}}}}, + false + }, // Rules marked as tokens become hidden rules. - LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({ - str("ef"), - str("gh") - })), false}, + LexicalVariable{ + "/(ef|g)+/", + VariableTypeAuxiliary, + Repeat{Choice::build({ + Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}}, + CharacterSet{{'g'}}, + })}, + false + }, // This named rule was moved wholesale to the lexical grammar. - LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false}, + LexicalVariable{ + "rule_B", + VariableTypeNamed, + Repeat{CharacterSet{{'h'}}}, + false + }, // Strings become anonymous rules. - LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true}, + LexicalVariable{ + "i", + VariableTypeAnonymous, + CharacterSet{{'i'}}, + true + }, }))); }); it("does not create duplicate tokens in the lexical grammar", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, seq({ - str("ab"), - i_sym(0), - str("ab"), - })}, + { + "rule_A", + VariableTypeNamed, + Seq::build({ + String{"ab"}, + Symbol::non_terminal(1), + String{"ab"}, + }) + }, }, {}, {}, @@ -107,50 +168,114 @@ describe("extract_tokens", []() { InitialSyntaxGrammar &syntax_grammar = get<0>(result); LexicalGrammar &lexical_grammar = get<1>(result); - AssertThat(syntax_grammar.variables, Equals(vector { - Variable {"rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })}, + AssertThat(syntax_grammar.variables, Equals(vector { + InitialSyntaxVariable{ + "rule_A", + VariableTypeNamed, + Seq::build({ + Symbol::terminal(0), + Symbol::non_terminal(1), + Symbol::terminal(0) + }) + }, })); AssertThat(lexical_grammar.variables, Equals(vector { - LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true}, + LexicalVariable{ + "ab", + VariableTypeAnonymous, + Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, + true + }, })) }); it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() { auto result = extract_tokens(InternedGrammar{{ - Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })}, - Variable{"rule_B", VariableTypeNamed, str("cd")}, - Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + Seq::build({ Symbol::non_terminal(1), String{"ab"} }) + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + String{"cd"} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + Seq::build({ String{"ef"}, String{"cd"} }) + }, }, {}, {}, {}}); InitialSyntaxGrammar &syntax_grammar = get<0>(result); LexicalGrammar &lexical_grammar = get<1>(result); - AssertThat(syntax_grammar.variables, Equals(vector({ - Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })}, - Variable{"rule_B", VariableTypeNamed, i_token(1)}, - Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })}, + AssertThat(syntax_grammar.variables, Equals(vector({ + InitialSyntaxVariable{ + "rule_A", + VariableTypeNamed, + Seq::build({ Symbol::non_terminal(1), Symbol::terminal(0) }) + }, + InitialSyntaxVariable{ + "rule_B", + VariableTypeNamed, + Symbol::terminal(1) + }, + InitialSyntaxVariable{ + "rule_C", + VariableTypeNamed, + Seq::build({ Symbol::terminal(2), Symbol::terminal(1) }) + }, }))); AssertThat(lexical_grammar.variables, Equals(vector { - LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true}, - LexicalVariable {"cd", VariableTypeAnonymous, str("cd"), true}, - LexicalVariable {"ef", VariableTypeAnonymous, str("ef"), true}, + LexicalVariable{ + "ab", + VariableTypeAnonymous, + Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, + true + }, + LexicalVariable{ + "cd", + VariableTypeAnonymous, + Seq{CharacterSet{{'c'}}, CharacterSet{{'d'}}}, + true + }, + LexicalVariable{ + "ef", + VariableTypeAnonymous, + Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}}, + true + }, })); }); it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, str("ok")}, - Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))}, - Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + String{"ok"} + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + Repeat{Symbol::non_terminal(0)} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}} + }, }, { - str(" ") + String{" "} }, { - { Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) } + { Symbol::non_terminal(1), Symbol::non_terminal(2) } }, {} }); @@ -159,7 +284,7 @@ describe("extract_tokens", []() { AssertThat(syntax_grammar.variables.size(), Equals(2)); AssertThat(syntax_grammar.expected_conflicts, Equals(set>({ - { Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) }, + { Symbol::non_terminal(0), Symbol::non_terminal(1) }, }))); }); @@ -167,11 +292,11 @@ describe("extract_tokens", []() { it("adds inline extra tokens to the lexical grammar's separators", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, str("x")}, + InternedVariable{"rule_A", VariableTypeNamed, String{"x"}}, }, { - str("y"), - pattern("\\s+"), + String{"y"}, + Pattern{" "}, }, {}, {} @@ -180,8 +305,8 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), Equals(CompileError::none())); AssertThat(get<1>(result).separators.size(), Equals(2)); - AssertThat(get<1>(result).separators[0], EqualsPointer(str("y"))); - AssertThat(get<1>(result).separators[1], EqualsPointer(pattern("\\s+"))); + AssertThat(get<1>(result).separators[0], Equals(Rule(CharacterSet{{'y'}}))); + AssertThat(get<1>(result).separators[1], Equals(Rule(CharacterSet{{' '}}))); AssertThat(get<0>(result).extra_tokens, IsEmpty()); }); @@ -189,11 +314,11 @@ describe("extract_tokens", []() { it("handles inline extra tokens that match tokens in the grammar", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, str("x")}, - Variable{"rule_B", VariableTypeNamed, str("y")}, + InternedVariable{"rule_A", VariableTypeNamed, String{"x"}}, + InternedVariable{"rule_B", VariableTypeNamed, String{"y"}}, }, { - str("y"), + String{"y"}, }, {}, {} @@ -201,18 +326,30 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), Equals(CompileError::none())); AssertThat(get<1>(result).separators.size(), Equals(0)); - AssertThat(get<0>(result).extra_tokens, Equals(set({ Symbol(1, Symbol::Terminal) }))); + AssertThat(get<0>(result).extra_tokens, Equals(set({ Symbol::terminal(1) }))); }); it("updates extra symbols according to the new symbol numbers", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })}, - Variable{"rule_B", VariableTypeNamed, str("y")}, - Variable{"rule_C", VariableTypeNamed, str("z")}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + Seq::build({ String{"w"}, String{"x"}, Symbol::non_terminal(1) }) + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + String{"y"} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + String{"z"} + }, }, { - i_sym(2), + Symbol::non_terminal(2), }, {}, {} @@ -221,34 +358,55 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), Equals(CompileError::none())); AssertThat(get<0>(result).extra_tokens, Equals(set({ - { Symbol(3, Symbol::Terminal) }, + { Symbol::terminal(3) }, }))); AssertThat(get<1>(result).separators, IsEmpty()); }); it("returns an error if any extra tokens are non-token symbols", [&]() { - auto result = extract_tokens(InternedGrammar{{ - Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })}, - Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })}, - }, { i_sym(1) }, {}, {}}); + auto result = extract_tokens(InternedGrammar{ + { + InternedVariable{ + "rule_A", + VariableTypeNamed, + Seq::build({ String{"x"}, Symbol::non_terminal(1) }) + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + Seq::build({ String{"y"}, String{"z"} }) + }, + }, + { + Symbol::non_terminal(1) + }, + {}, + {} + }); - AssertThat(get<2>(result), !Equals(CompileError::none())); - AssertThat(get<2>(result), Equals( - CompileError(TSCompileErrorTypeInvalidExtraToken, - "Not a token: rule_B"))); + AssertThat(get<2>(result), Equals(CompileError( + TSCompileErrorTypeInvalidExtraToken, + "Non-token symbol rule_B can't be used as an extra token" + ))); }); it("returns an error if any extra tokens are non-token rules", [&]() { - auto result = extract_tokens(InternedGrammar{{ - Variable{"rule_A", VariableTypeNamed, str("x")}, - Variable{"rule_B", VariableTypeNamed, str("y")}, - }, { choice({ i_sym(1), blank() }) }, {}, {}}); + auto result = extract_tokens(InternedGrammar{ + { + {"rule_A", VariableTypeNamed, String{"x"}}, + {"rule_B", VariableTypeNamed, String{"y"}}, + }, + { + Choice::build({ Symbol::non_terminal(1), Blank{} }) + }, + {}, + {} + }); - AssertThat(get<2>(result), !Equals(CompileError::none())); AssertThat(get<2>(result), Equals(CompileError( TSCompileErrorTypeInvalidExtraToken, - "Not a token: (choice (non-terminal 1) (blank))" + "Non-token rule expression can't be used as an extra token" ))); }); }); @@ -256,13 +414,21 @@ describe("extract_tokens", []() { it("returns an error if an external token has the same name as a non-terminal rule", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })}, - Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })}, + { + "rule_A", + VariableTypeNamed, + Seq::build({ String{"x"}, Symbol::non_terminal(1) }) + }, + { + "rule_B", + VariableTypeNamed, + Seq::build({ String{"y"}, String{"z"} }) + }, }, {}, {}, { - ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)} + ExternalToken {"rule_A", VariableTypeNamed, Symbol::non_terminal(0)} } }); diff --git a/test/compiler/prepare_grammar/flatten_grammar_test.cc b/test/compiler/prepare_grammar/flatten_grammar_test.cc index f935490c..15a53aa6 100644 --- a/test/compiler/prepare_grammar/flatten_grammar_test.cc +++ b/test/compiler/prepare_grammar/flatten_grammar_test.cc @@ -2,7 +2,6 @@ #include "compiler/prepare_grammar/flatten_grammar.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/syntax_grammar.h" -#include "helpers/rule_helpers.h" #include "helpers/stream_methods.h" START_TEST @@ -12,23 +11,23 @@ using prepare_grammar::flatten_rule; describe("flatten_grammar", []() { it("associates each symbol with the precedence and associativity binding it to its successor", [&]() { - SyntaxVariable result = flatten_rule(Variable{ + SyntaxVariable result = flatten_rule({ "test", VariableTypeNamed, - seq({ - i_sym(1), - prec_left(101, seq({ - i_sym(2), - choice({ - prec_right(102, seq({ - i_sym(3), - i_sym(4) + Seq::build({ + Symbol::non_terminal(1), + Metadata::prec_left(101, Seq::build({ + Symbol::non_terminal(2), + Choice::build({ + Metadata::prec_right(102, Seq::build({ + Symbol::non_terminal(3), + Symbol::non_terminal(4) })), - i_sym(5), + Symbol::non_terminal(5), }), - i_sym(6), + Symbol::non_terminal(6), })), - i_sym(7), + Symbol::non_terminal(7), }) }); @@ -36,51 +35,51 @@ describe("flatten_grammar", []() { AssertThat(result.type, Equals(VariableTypeNamed)); AssertThat(result.productions, Equals(vector({ Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(3, Symbol::NonTerminal), 102, AssociativityRight}, - {Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(6, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(7, Symbol::NonTerminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::non_terminal(2), 101, AssociativityLeft}, + {Symbol::non_terminal(3), 102, AssociativityRight}, + {Symbol::non_terminal(4), 101, AssociativityLeft}, + {Symbol::non_terminal(6), 0, AssociativityNone}, + {Symbol::non_terminal(7), 0, AssociativityNone}, }), Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(6, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(7, Symbol::NonTerminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::non_terminal(2), 101, AssociativityLeft}, + {Symbol::non_terminal(5), 101, AssociativityLeft}, + {Symbol::non_terminal(6), 0, AssociativityNone}, + {Symbol::non_terminal(7), 0, AssociativityNone}, }) }))) }); it("uses the last assigned precedence", [&]() { - SyntaxVariable result = flatten_rule(Variable{ + SyntaxVariable result = flatten_rule({ "test1", VariableTypeNamed, - prec_left(101, seq({ - i_sym(1), - i_sym(2), + Metadata::prec_left(101, Seq::build({ + Symbol::non_terminal(1), + Symbol::non_terminal(2), })) }); AssertThat(result.productions, Equals(vector({ Production({ - {Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft}, + {Symbol::non_terminal(1), 101, AssociativityLeft}, + {Symbol::non_terminal(2), 101, AssociativityLeft}, }) }))) - result = flatten_rule(Variable{ + result = flatten_rule({ "test2", VariableTypeNamed, - prec_left(101, seq({ - i_sym(1), + Metadata::prec_left(101, Seq::build({ + Symbol::non_terminal(1), })) }); AssertThat(result.productions, Equals(vector({ Production({ - {Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft}, + {Symbol::non_terminal(1), 101, AssociativityLeft}, }) }))) }); diff --git a/test/compiler/prepare_grammar/intern_symbols_test.cc b/test/compiler/prepare_grammar/intern_symbols_test.cc index 1950d638..9bfe7367 100644 --- a/test/compiler/prepare_grammar/intern_symbols_test.cc +++ b/test/compiler/prepare_grammar/intern_symbols_test.cc @@ -1,11 +1,7 @@ #include "test_helper.h" #include "compiler/prepare_grammar/intern_symbols.h" #include "compiler/grammar.h" -#include "compiler/rules/named_symbol.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" -#include "helpers/equals_pointer.h" -#include "helpers/rule_helpers.h" +#include "compiler/rule.h" #include "helpers/stream_methods.h" START_TEST @@ -15,29 +11,29 @@ using prepare_grammar::intern_symbols; describe("intern_symbols", []() { it("replaces named symbols with numerically-indexed symbols", [&]() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", choice({ sym("y"), sym("_z") })}, - {"y", sym("_z")}, - {"_z", str("stuff")} + {"x", VariableTypeNamed, Choice::build({ NamedSymbol{"y"}, NamedSymbol{"_z"} })}, + {"y", VariableTypeNamed, NamedSymbol{"_z"}}, + {"_z", VariableTypeNamed, String{"stuff"}} }, {}, {}, {} }; auto result = intern_symbols(grammar); AssertThat(result.second, Equals(CompileError::none())); - AssertThat(result.first.variables, Equals(vector{ - Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })}, - Variable{"y", VariableTypeNamed, i_sym(2)}, - Variable{"_z", VariableTypeHidden, str("stuff")}, + AssertThat(result.first.variables, Equals(vector{ + {"x", VariableTypeNamed, Choice::build({ Symbol::non_terminal(1), Symbol::non_terminal(2) })}, + {"y", VariableTypeNamed, Symbol::non_terminal(2)}, + {"_z", VariableTypeHidden, String{"stuff"}}, })); }); describe("when there are symbols that reference undefined rules", [&]() { it("returns an error", []() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", sym("y")}, + {"x", VariableTypeNamed, NamedSymbol{"y"}}, }, {}, {}, {} }; @@ -49,14 +45,14 @@ describe("intern_symbols", []() { }); it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", choice({ sym("y"), sym("z") })}, - {"y", sym("z")}, - {"z", str("stuff")} + {"x", VariableTypeNamed, Choice::build({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, + {"y", VariableTypeNamed, NamedSymbol{"z"}}, + {"z", VariableTypeNamed, String{"stuff"}} }, { - sym("z") + NamedSymbol{"z"} }, {}, {} }; @@ -65,21 +61,29 @@ describe("intern_symbols", []() { AssertThat(result.second, Equals(CompileError::none())); AssertThat(result.first.extra_tokens.size(), Equals(1)); - AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2))); + AssertThat(result.first.extra_tokens, Equals(vector({ Symbol::non_terminal(2) }))); }); it("records any rule names that match external token names", [&]() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", choice({ sym("y"), sym("z") })}, - {"y", sym("z")}, - {"z", str("stuff")}, + {"x", VariableTypeNamed, Choice::build({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, + {"y", VariableTypeNamed, NamedSymbol{"z"}}, + {"z", VariableTypeNamed, String{"stuff"}}, }, {}, {}, { - "w", - "z" + ExternalToken{ + "w", + VariableTypeNamed, + NONE() + }, + ExternalToken{ + "z", + VariableTypeNamed, + NONE() + }, } }; @@ -94,7 +98,7 @@ describe("intern_symbols", []() { ExternalToken{ "z", VariableTypeNamed, - Symbol(2, Symbol::NonTerminal) + Symbol::non_terminal(2) }, })) }); diff --git a/test/compiler/prepare_grammar/parse_regex_test.cc b/test/compiler/prepare_grammar/parse_regex_test.cc index 72ca0a40..94285e39 100644 --- a/test/compiler/prepare_grammar/parse_regex_test.cc +++ b/test/compiler/prepare_grammar/parse_regex_test.cc @@ -1,7 +1,5 @@ #include "test_helper.h" #include "compiler/prepare_grammar/parse_regex.h" -#include "helpers/equals_pointer.h" -#include "helpers/rule_helpers.h" START_TEST @@ -12,178 +10,218 @@ describe("parse_regex", []() { struct ValidInputRow { string description; string pattern; - rule_ptr rule; + Rule rule; }; vector valid_inputs = { { "character sets", "[aAeE]", - character({ 'a', 'A', 'e', 'E' }) + CharacterSet{{ 'a', 'A', 'e', 'E' }} }, { "'.' characters as wildcards", ".", - character({ '\n' }, false) + CharacterSet().include_all().exclude('\n') }, { "character classes", "\\w-\\d-\\s-\\W-\\D-\\S", - seq({ - character({ + Seq::build({ + CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }), - character({ '-' }), - character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }), - character({ '-' }), - character({ ' ', '\t', '\r', '\n' }), - character({ '-' }), - character({ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }, false), - character({ '-' }), - character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false), - character({ '-' }), - character({ ' ', '\t', '\r', '\n' }, false), + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }}, + CharacterSet{{ '-' }}, + CharacterSet{{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }}, + CharacterSet{{ '-' }}, + CharacterSet{{ ' ', '\t', '\r', '\n' }}, + CharacterSet{{ '-' }}, + CharacterSet().include_all() + .exclude('a', 'z') + .exclude('A', 'Z') + .exclude('0', '9') + .exclude('_'), + CharacterSet{{ '-' }}, + CharacterSet().include_all().exclude('0', '9'), + CharacterSet{{ '-' }}, + CharacterSet().include_all() + .exclude(' ') + .exclude('\t') + .exclude('\r') + .exclude('\n') }) }, { "choices", "ab|cd|ef", - choice({ - seq({ - character({ 'a' }), - character({ 'b' }) }), - seq({ - character({ 'c' }), - character({ 'd' }) }), - seq({ - character({ 'e' }), - character({ 'f' }) }) }) + Choice::build({ + Seq{ + CharacterSet{{'a'}}, + CharacterSet{{'b'}} + }, + Seq{ + CharacterSet{{'c'}}, + CharacterSet{{'d'}} + }, + Seq{ + CharacterSet{{'e'}}, + CharacterSet{{'f'}} + } + }) }, { "simple sequences", "abc", - seq({ - character({ 'a' }), - character({ 'b' }), - character({ 'c' }) }) + Seq::build({ + CharacterSet{{'a'}}, + CharacterSet{{'b'}}, + CharacterSet{{'c'}} + }) }, { "character ranges", "[12a-dA-D3]", - character({ + CharacterSet{{ '1', '2', '3', 'a', 'b', 'c', 'd', - 'A', 'B', 'C', 'D' }) + 'A', 'B', 'C', 'D' + }} }, { "negated characters", "[^a\\d]", - character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false) + CharacterSet().include_all() + .exclude('a') + .exclude('0', '9') }, { "backslashes", "\\\\", - character({ '\\' }) + CharacterSet{{'\\'}} }, { "character groups in sequences", "x([^x]|\\\\x)*x", - seq({ - character({ 'x' }), - repeat(choice({ - character({ 'x' }, false), - seq({ character({ '\\' }), character({ 'x' }) }) })), - character({ 'x' }) }) + Seq::build({ + CharacterSet{{'x'}}, + Choice::build({ + Repeat{Choice::build({ + CharacterSet().include_all().exclude('x'), + Seq::build({ + CharacterSet{{'\\'}}, + CharacterSet{{'x'}} + }) + })}, + Blank{} + }), + CharacterSet{{'x'}} + }) }, { "choices in sequences", "(a|b)cd", - seq({ - choice({ - character({ 'a' }), - character({ 'b' }) }), - character({ 'c' }), - character({ 'd' }) }) + Seq::build({ + Choice::build({ + CharacterSet{{'a'}}, + CharacterSet{{'b'}} }), + CharacterSet{{'c'}}, + CharacterSet{{'d'}} }) }, { "escaped parentheses", "a\\(b", - seq({ - character({ 'a' }), - character({ '(' }), - character({ 'b' }) }) + Seq::build({ + CharacterSet{{'a'}}, + CharacterSet{{'('}}, + CharacterSet{{'b'}}, + }) }, { "escaped periods", "a\\.", - seq({ - character({ 'a' }), - character({ '.' }) }) + Seq::build({ + CharacterSet{{'a'}}, + CharacterSet{{'.'}}, + }) }, { "escaped characters", "\\t\\n\\r", - seq({ - character({ '\t' }), - character({ '\n' }), - character({ '\r' }) }) + Seq::build({ + CharacterSet{{'\t'}}, + CharacterSet{{'\n'}}, + CharacterSet{{'\r'}}, + }) }, { "plus repeats", "(ab)+(cd)+", - seq({ - repeat1(seq({ character({ 'a' }), character({ 'b' }) })), - repeat1(seq({ character({ 'c' }), character({ 'd' }) })) }) + Seq::build({ + Repeat{Seq::build({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, + Repeat{Seq::build({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, + }) }, { "asterix repeats", "(ab)*(cd)*", - seq({ - repeat(seq({ character({ 'a' }), character({ 'b' }) })), - repeat(seq({ character({ 'c' }), character({ 'd' }) })) }) + Seq::build({ + Choice::build({ + Repeat{Seq::build({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, + Blank{}, + }), + Choice::build({ + Repeat{Seq::build({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, + Blank{}, + }), + }) }, { "optional rules", "a(bc)?", - seq({ - character({ 'a' }), - choice({ - seq({ character({ 'b' }), character({ 'c' }) }), - blank() }) }) + Seq::build({ + CharacterSet{{'a'}}, + Choice::build({ + Seq::build({ + CharacterSet{{'b'}}, + CharacterSet{{'c'}}, + }), + Blank{} + }), + }) }, { "choices containing negated character classes", - "/([^/]|(\\\\/))*/", - seq({ - character({ '/' }), - repeat(choice({ - character({ '/' }, false), - seq({ character({ '\\' }), character({ '/' }) }) })), - character({ '/' }), }), + "/([^/]|(\\\\/))+/", + Seq::build({ + CharacterSet{{'/'}}, + Repeat{Choice::build({ + CharacterSet().include_all().exclude('/'), + Seq::build({ + CharacterSet{{'\\'}}, + CharacterSet{{'/'}}, + }), + })}, + CharacterSet{{'/'}}, + }), }, }; @@ -229,7 +267,7 @@ describe("parse_regex", []() { for (auto &row : valid_inputs) { it(("parses " + row.description).c_str(), [&]() { auto result = parse_regex(row.pattern); - AssertThat(result.first, EqualsPointer(row.rule)); + AssertThat(result.first, Equals(row.rule)); }); } diff --git a/test/compiler/rules/character_set_test.cc b/test/compiler/rules/character_set_test.cc index 4c8f415b..f7c2e632 100644 --- a/test/compiler/rules/character_set_test.cc +++ b/test/compiler/rules/character_set_test.cc @@ -1,5 +1,5 @@ #include "test_helper.h" -#include "compiler/rules/character_set.h" +#include "compiler/rule.h" using namespace rules; @@ -66,7 +66,7 @@ describe("CharacterSet", []() { .include('a', 'd') .include('f', 'm'); - AssertThat(set1.hash_code(), Equals(set2.hash_code())); + AssertThat(hash()(set1), Equals(hash()(set2))); }); it("returns different numbers for character sets that include different ranges", [&]() { @@ -78,8 +78,8 @@ describe("CharacterSet", []() { .include('a', 'c') .include('f', 'm'); - AssertThat(set1.hash_code(), !Equals(set2.hash_code())); - AssertThat(set2.hash_code(), !Equals(set1.hash_code())); + AssertThat(hash()(set1), !Equals(hash()(set2))); + AssertThat(hash()(set2), !Equals(hash()(set1))); }); it("returns different numbers for character sets that exclude different ranges", [&]() { @@ -93,16 +93,16 @@ describe("CharacterSet", []() { .exclude('a', 'c') .exclude('f', 'm'); - AssertThat(set1.hash_code(), !Equals(set2.hash_code())); - AssertThat(set2.hash_code(), !Equals(set1.hash_code())); + AssertThat(hash()(set1), !Equals(hash()(set2))); + AssertThat(hash()(set2), !Equals(hash()(set1))); }); it("returns different numbers for character sets with different sign", [&]() { CharacterSet set1 = CharacterSet().include_all(); CharacterSet set2 = CharacterSet(); - AssertThat(set1.hash_code(), !Equals(set2.hash_code())); - AssertThat(set2.hash_code(), !Equals(set1.hash_code())); + AssertThat(hash()(set1), !Equals(hash()(set2))); + AssertThat(hash()(set2), !Equals(hash()(set1))); }); }); @@ -312,7 +312,7 @@ describe("CharacterSet", []() { .include('z'); AssertThat(set1.included_ranges(), Equals(vector({ - CharacterRange('a', 'c'), + CharacterRange{'a', 'c'}, CharacterRange('g'), CharacterRange('z'), }))); diff --git a/test/compiler/rules/choice_test.cc b/test/compiler/rules/choice_test.cc index 59b52740..01ed71a1 100644 --- a/test/compiler/rules/choice_test.cc +++ b/test/compiler/rules/choice_test.cc @@ -1,7 +1,5 @@ #include "test_helper.h" -#include "compiler/rules/choice.h" -#include "helpers/rule_helpers.h" -#include "helpers/equals_pointer.h" +#include "compiler/rule.h" using namespace rules; @@ -10,42 +8,62 @@ START_TEST describe("Choice", []() { describe("constructing choices", [&]() { it("eliminates duplicate members", [&]() { - auto rule = Choice::build({ - seq({ sym("one"), sym("two") }), - sym("three"), - seq({ sym("one"), sym("two") }) + Rule rule = Choice::build({ + Seq::build({ NamedSymbol{"one"}, NamedSymbol{"two"} }), + NamedSymbol{"three"}, + Seq::build({ NamedSymbol{"one"}, NamedSymbol{"two"} }) }); - AssertThat(rule, EqualsPointer(choice({ - seq({ sym("one"), sym("two") }), - sym("three"), - }))); + AssertThat(rule, Equals(Rule(Choice{{ + Seq::build({ NamedSymbol{"one"}, NamedSymbol{"two"} }), + NamedSymbol{"three"}, + }}))); + + rule = Choice::build({ + Blank{}, + Blank{}, + Choice::build({ + Blank{}, + NamedSymbol{"four"} + }) + }); + + AssertThat(rule, Equals(*Choice::build({Blank{}, NamedSymbol{"four"}}))); }); it("eliminates duplicates within nested choices", [&]() { - auto rule = Choice::build({ - seq({ sym("one"), sym("two") }), + Rule rule = Choice::build({ + Seq::build({ + NamedSymbol{"one"}, + NamedSymbol{"two"} + }), Choice::build({ - sym("three"), - seq({ sym("one"), sym("two") }) + NamedSymbol{"three"}, + Seq::build({ + NamedSymbol{"one"}, + NamedSymbol{"two"} + }) }) }); - AssertThat(rule, EqualsPointer(choice({ - seq({ sym("one"), sym("two") }), - sym("three"), - }))); + AssertThat(rule, Equals(Rule(Choice{{ + Seq::build({ + NamedSymbol{"one"}, + NamedSymbol{"two"}, + }), + NamedSymbol{"three"}, + }}))); }); it("doesn't construct a choice if there's only one unique member", [&]() { - auto rule = Choice::build({ - sym("one"), + Rule rule = Choice::build({ + NamedSymbol{"one"}, Choice::build({ - sym("one"), + NamedSymbol{"one"}, }) }); - AssertThat(rule, EqualsPointer(sym("one"))); + AssertThat(rule, Equals(Rule(NamedSymbol{"one"}))); }); }); }); diff --git a/test/compiler/rules/repeat_test.cc b/test/compiler/rules/repeat_test.cc index 693b2d43..c3ecb566 100644 --- a/test/compiler/rules/repeat_test.cc +++ b/test/compiler/rules/repeat_test.cc @@ -1,6 +1,5 @@ #include "test_helper.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" using namespace rules; @@ -9,11 +8,11 @@ START_TEST describe("Repeat", []() { describe("constructing repeats", [&]() { it("doesn't create redundant repeats", [&]() { - auto sym = make_shared(1, Symbol::NonTerminal); - auto repeat = Repeat::build(sym); - auto outer_repeat = Repeat::build(repeat); + Rule symbol = Symbol::non_terminal(1); + Rule repeat = Repeat::build(Rule(symbol)); + Rule outer_repeat = Repeat::build(Rule(repeat)); - AssertThat(repeat, !Equals(sym)); + AssertThat(repeat, !Equals(symbol)); AssertThat(outer_repeat, Equals(repeat)); }); }); diff --git a/test/helpers/equals_pointer.h b/test/helpers/equals_pointer.h deleted file mode 100644 index b78be66e..00000000 --- a/test/helpers/equals_pointer.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef HELPERS_EQUALS_POINTER_H_ -#define HELPERS_EQUALS_POINTER_H_ - -#include "bandit/bandit.h" -#include - -namespace snowhouse { - using namespace std; - - template - struct EqualsPointerConstraint : Expression> { - EqualsPointerConstraint(const ExpectedType& expected) : expected(expected) {} - - template - bool operator()(const ActualType& actual) const { - return *expected == *actual; - } - - ExpectedType expected; - }; - - template - struct Stringizer> { - static string ToString(const EqualsPointerConstraint& constraint) { - ostringstream builder; - builder << "pointer to " << snowhouse::Stringize(constraint.expected); - return builder.str(); - } - }; - - template - inline EqualsPointerConstraint EqualsPointer(const ExpectedType& expected) { - return EqualsPointerConstraint(expected); - } -} - -#endif // HELPERS_EQUALS_POINTER_H_ diff --git a/test/helpers/rule_helpers.cc b/test/helpers/rule_helpers.cc deleted file mode 100644 index 968d59ba..00000000 --- a/test/helpers/rule_helpers.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "rule_helpers.h" -#include -#include "compiler/rules/symbol.h" -#include "compiler/variable.h" -#include "compiler/lexical_grammar.h" - -namespace tree_sitter { - using std::make_shared; - using std::set; - using std::map; - using std::ostream; - using std::string; - using std::to_string; - using rules::Symbol; - - rule_ptr character(const set &ranges) { - return character(ranges, true); - } - - rule_ptr character(const set &chars, bool sign) { - rules::CharacterSet result; - if (sign) { - for (uint32_t c : chars) - result.include(c); - } else { - result.include_all(); - for (uint32_t c : chars) - result.exclude(c); - } - return result.copy(); - } - - rule_ptr i_sym(size_t index) { - return make_shared(index, Symbol::NonTerminal); - } - - rule_ptr i_token(size_t index) { - return make_shared(index, Symbol::Terminal); - } - - rule_ptr metadata(rule_ptr rule, rules::MetadataParams params) { - return rules::Metadata::build(rule, params); - } - - rule_ptr active_prec(int precedence, rule_ptr rule) { - rules::MetadataParams params; - params.precedence = precedence; - params.has_precedence = true; - params.is_active = true; - return rules::Metadata::build(rule, params); - } - - bool operator==(const Variable &left, const Variable &right) { - return left.name == right.name && left.rule->operator==(*right.rule) && - left.type == right.type; - } - - bool operator==(const LexicalVariable &left, const LexicalVariable &right) { - return left.name == right.name && left.rule->operator==(*right.rule) && - left.type == right.type && left.is_string == right.is_string; - } -} diff --git a/test/helpers/rule_helpers.h b/test/helpers/rule_helpers.h deleted file mode 100644 index 8ebe87e8..00000000 --- a/test/helpers/rule_helpers.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef HELPERS_RULE_HELPERS_H_ -#define HELPERS_RULE_HELPERS_H_ - -#include "tree_sitter/compiler.h" -#include "compiler/rules.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/metadata.h" -#include "compiler/variable.h" - -namespace tree_sitter { - rule_ptr metadata(rule_ptr, rules::MetadataParams params); - rule_ptr character(const std::set &); - rule_ptr character(const std::set &, bool sign); - rule_ptr i_sym(size_t index); - rule_ptr i_token(size_t index); - rule_ptr active_prec(int precedence, rule_ptr); - - struct Variable; - struct LexicalVariable; - - bool operator==(const Variable &left, const Variable &right); - bool operator==(const LexicalVariable &left, const LexicalVariable &right); -} - -#endif // HELPERS_RULE_HELPERS_H_ diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc index 20338b47..56c85890 100644 --- a/test/helpers/stream_methods.cc +++ b/test/helpers/stream_methods.cc @@ -1,6 +1,7 @@ #include "helpers/stream_methods.h" #include "test_helper.h" #include "tree_sitter/compiler.h" +#include "compiler/util/string_helpers.h" #include "compiler/parse_table.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" @@ -9,109 +10,177 @@ namespace tree_sitter { -ostream &operator<<(ostream &stream, const Grammar &grammar) { - stream << string("#"); +ostream &operator<<(ostream &stream, const InputGrammar &grammar) { + return stream << "(InputGrammar variables: " << grammar.variables << ")"; } ostream &operator<<(ostream &stream, const CompileError &error) { - if (error.type) - return stream << (string("#"); - else - return stream << string("#"); + if (error.type) { + return stream << "(CompileError " << error.message << ")"; + } else { + return stream << "(No CompileError)"; + } +} + +namespace rules { + +ostream &operator<<(ostream &stream, Associativity associativity) { + switch (associativity) { + case AssociativityLeft: + return stream << "AssociativityLeft"; + case AssociativityRight: + return stream << "AssociativityRight"; + case AssociativityNone: + return stream << "AssociativityNone"; + } +} + +ostream &operator<<(ostream &stream, const Blank &) { + return stream << "(Blank)"; +} + +ostream &operator<<(ostream &stream, const CharacterRange &range) { + if (range.min == range.max) { + return stream << util::escape_char(range.min); + } else { + return stream << "(" + util::escape_char(range.min) << "-" << util::escape_char(range.max) << ")"; + } +} + +ostream &operator<<(ostream &stream, const CharacterSet &rule) { + stream << "(CharacterSet"; + if (rule.includes_all) { + if (rule.excluded_chars.empty()) { + stream << " all"; + } else { + stream << " exclude"; + for (const auto &range : rule.excluded_ranges()) { + stream << " " << range; + } + } + } else { + for (const auto &range : rule.included_ranges()) { + stream << " " << range; + } + } + return stream << ")"; +} + +ostream &operator<<(ostream &stream, const Symbol &rule) { + stream << "(Symbol "; + switch (rule.type) { + case Symbol::External: + stream << "external"; + break; + case Symbol::Terminal: + stream << "terminal"; + break; + case Symbol::NonTerminal: + stream << "non-terminal"; + break; + } + return stream << " " << rule.index << ")"; +} + +ostream &operator<<(ostream &stream, const NamedSymbol &rule) { + return stream << "(NamedSymbol " << rule.value << ")"; +} + +ostream &operator<<(ostream &stream, const String &rule) { + return stream << "(String " << rule.value << ")"; +} + +ostream &operator<<(ostream &stream, const Pattern &rule) { + return stream << "(Pattern " << rule.value << ")"; +} + +ostream &operator<<(ostream &stream, const Choice &rule) { + stream << "(Choice"; + for (const auto &element : rule.elements) { + stream << " " << element; + } + return stream << ")"; +} + +ostream &operator<<(ostream &stream, const Seq &rule) { + return stream << "(Seq " << *rule.left << " " << *rule.right << ")"; +} + +ostream &operator<<(ostream &stream, const Repeat &rule) { + return stream << "(Repeat " << *rule.rule << ")"; +} + +ostream &operator<<(ostream &stream, const Metadata &rule) { + return stream << "(Metadata " << *rule.rule << ")"; } ostream &operator<<(ostream &stream, const Rule &rule) { - return stream << rule.to_string(); -} - -ostream &operator<<(ostream &stream, const rule_ptr &rule) { - if (rule.get()) - stream << *rule; - else - stream << string("(null-rule)"); + rule.match( + [&stream](Blank r) { stream << r; }, + [&stream](NamedSymbol r) { stream << r; }, + [&stream](Symbol r) { stream << r; }, + [&stream](String r) { stream << r; }, + [&stream](Pattern r) { stream << r; }, + [&stream](CharacterSet r) { stream << r; }, + [&stream](Choice r) { stream << r; }, + [&stream](Seq r) { stream << r; }, + [&stream](Repeat r) { stream << r; }, + [&stream](Metadata r) { stream << r; } + ); return stream; } -ostream &operator<<(ostream &stream, const Variable &variable) { - return stream << string("{") << variable.name << string(", ") << variable.rule << string(", ") << to_string(variable.type) << string("}"); +} // namespace rules + +ostream &operator<<(ostream &stream, const InputGrammar::Variable &variable) { + return stream << "(Variable " << variable.name << " " << variable.rule << ")"; } ostream &operator<<(ostream &stream, const SyntaxVariable &variable) { - return stream << string("{") << variable.name << string(", ") << variable.productions << string(", ") << to_string(variable.type) << string("}"); + return stream << "(Variable " << variable.name << " " << variable.productions << + " " << to_string(variable.type) << "}"; } ostream &operator<<(ostream &stream, const LexicalVariable &variable) { - return stream << "{" << variable.name << ", " << variable.rule << ", " << - to_string(variable.type) << ", " << to_string(variable.is_string) << "}"; -} - -std::ostream &operator<<(std::ostream &stream, const AdvanceAction &action) { - return stream << string("#"; -} - -std::ostream &operator<<(std::ostream &stream, const AcceptTokenAction &action) { - return stream << string("#"; -} - -ostream &operator<<(ostream &stream, const ParseAction &action) { - switch (action.type) { - case ParseActionTypeError: - return stream << string("#"); - case ParseActionTypeAccept: - return stream << string("#"); - case ParseActionTypeShift: - return stream << string("#"; - case ParseActionTypeReduce: - return stream << ("#"); - default: - return stream; - } -} - -ostream &operator<<(ostream &stream, const ParseTableEntry &entry) { - return stream << entry.actions; -} - -ostream &operator<<(ostream &stream, const ParseState &state) { - stream << string("#"); + return stream << "(Variable " << variable.name << " " << to_string(variable.type) << + " " << variable.rule << ")"; } ostream &operator<<(ostream &stream, const ExternalToken &external_token) { - return stream << "{" << external_token.name << ", " << external_token.type << - "," << external_token.corresponding_internal_token << "}"; + return stream << "(ExternalToken " << external_token.name << " " << + external_token.type << " " << external_token.corresponding_internal_token << ")"; } ostream &operator<<(ostream &stream, const ProductionStep &step) { - stream << "(symbol: " << step.symbol << ", precedence:" << to_string(step.precedence); - stream << ", associativity: "; - switch (step.associativity) { - case rules::AssociativityLeft: - return stream << "left)"; - case rules::AssociativityRight: - return stream << "right)"; - default: - return stream << "none)"; - } + return stream << "(ProductionStep " << step.symbol << " precedence:" << + to_string(step.precedence) << " associativity:" << step.associativity << ")"; } ostream &operator<<(ostream &stream, const PrecedenceRange &range) { - if (range.empty) - return stream << string("{empty}"); - else - return stream << string("{") << to_string(range.min) << string(", ") << to_string(range.max) << string("}"); + if (range.empty) { + return stream << "(PrecedenceRange)"; + } else { + return stream << "(PrecedenceRange " << to_string(range.min) << " " << + to_string(range.max) << ")"; + } } +namespace prepare_grammar { + +ostream &operator<<(ostream &stream, const prepare_grammar::InternedGrammar::Variable &variable) { + return stream << "(Variable " << variable.name << " " << variable.rule << ")"; +} + +ostream &operator<<(ostream &stream, const prepare_grammar::InitialSyntaxGrammar::Variable &variable) { + return stream << "(Variable " << variable.name << " " << variable.rule << ")"; +} + +} // namespace prepare_grammar + namespace build_tables { ostream &operator<<(ostream &stream, const LexItem &item) { - return stream << string("(item ") << item.lhs << string(" ") << *item.rule - << string(")"); + return stream << "(LexItem " << item.lhs << " " << item.rule << ")"; } ostream &operator<<(ostream &stream, const LexItemSet &item_set) { @@ -119,26 +188,7 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) { } ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) { - return stream << "{dest: " << transition.destination << ", prec: " << transition.precedence << "}"; -} - -ostream &operator<<(ostream &stream, const ParseItem &item) { - return stream << string("(item variable:") << to_string(item.variable_index) - << string(" production:") << to_string((size_t)item.production % 1000) - << string(" step:") << to_string(item.step_index) - << string(")"); -} - -std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) { - return stream << item_set.entries; -} - -std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) { - if (set.entries.get()) { - return stream << *set.entries; - } else { - return stream << "{}"; - } + return stream << "(Transition " << transition.destination << " prec:" << transition.precedence << ")"; } } // namespace build_tables diff --git a/test/helpers/stream_methods.h b/test/helpers/stream_methods.h index 149e43c5..58b7fd17 100644 --- a/test/helpers/stream_methods.h +++ b/test/helpers/stream_methods.h @@ -8,6 +8,11 @@ #include #include #include "compiler/grammar.h" +#include "compiler/prepare_grammar/interned_grammar.h" +#include "compiler/prepare_grammar/initial_syntax_grammar.h" +#include "compiler/lexical_grammar.h" +#include "compiler/syntax_grammar.h" +#include "compiler/rule.h" #include "compiler/compile_error.h" #include "compiler/build_tables/lex_item.h" @@ -91,9 +96,8 @@ namespace tree_sitter { using std::ostream; using std::string; using std::to_string; -struct Variable; -struct SyntaxVariable; -struct LexicalVariable; + +struct InputGrammar; struct AdvanceAction; struct AcceptTokenAction; struct ParseAction; @@ -102,20 +106,36 @@ struct ExternalToken; struct ProductionStep; struct PrecedenceRange; -ostream &operator<<(ostream &, const Grammar &); +ostream &operator<<(ostream &, const InputGrammar &); ostream &operator<<(ostream &, const CompileError &); -ostream &operator<<(ostream &, const Rule &); -ostream &operator<<(ostream &, const rule_ptr &); -ostream &operator<<(ostream &, const Variable &); -ostream &operator<<(ostream &, const SyntaxVariable &); -ostream &operator<<(ostream &, const LexicalVariable &); -ostream &operator<<(ostream &, const AdvanceAction &); -ostream &operator<<(ostream &, const AcceptTokenAction &); -ostream &operator<<(ostream &, const ParseAction &); -ostream &operator<<(ostream &, const ParseState &); ostream &operator<<(ostream &, const ExternalToken &); ostream &operator<<(ostream &, const ProductionStep &); ostream &operator<<(ostream &, const PrecedenceRange &); +ostream &operator<<(ostream &, const LexicalVariable &); + +namespace rules { + +ostream &operator<<(ostream &, const Blank &); +ostream &operator<<(ostream &, const CharacterRange &); +ostream &operator<<(ostream &, const CharacterSet &); +ostream &operator<<(ostream &, const Symbol &); +ostream &operator<<(ostream &, const NamedSymbol &); +ostream &operator<<(ostream &, const String &); +ostream &operator<<(ostream &, const Pattern &); +ostream &operator<<(ostream &stream, const Choice &rule); +ostream &operator<<(ostream &stream, const Seq &rule); +ostream &operator<<(ostream &stream, const Repeat &rule); +ostream &operator<<(ostream &stream, const Metadata &rule); +ostream &operator<<(ostream &stream, const Rule &rule); + +} // namespace rules + +namespace prepare_grammar { + +ostream &operator<<(ostream &, const InitialSyntaxGrammar::Variable &); +ostream &operator<<(ostream &, const InternedGrammar::Variable &); + +} // namespace prepare_grammar namespace build_tables { diff --git a/test/integration/test_grammars.cc b/test/integration/test_grammars.cc index f72043d9..37a1a949 100644 --- a/test/integration/test_grammars.cc +++ b/test/integration/test_grammars.cc @@ -25,10 +25,10 @@ for (auto &language_name : test_languages) { } string grammar_json = read_file(grammar_path); - TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str()); if (file_exists(expected_error_path)) { it("fails with the correct error message", [&]() { + TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str()); string expected_error = read_file(expected_error_path); AssertThat((void *)compile_result.error_message, !IsNull()); AssertThat(compile_result.error_message, Equals(expected_error)); @@ -41,6 +41,8 @@ for (auto &language_name : test_languages) { before_each([&]() { if (!language) { + TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str()); + language = load_test_language( language_name, compile_result, diff --git a/tests.gyp b/tests.gyp index 9a792820..770ec9d8 100644 --- a/tests.gyp +++ b/tests.gyp @@ -37,13 +37,13 @@ '-std=c99', ], 'cflags_cc': [ - '-std=c++0x', + '-std=c++14', ], 'ldflags': [ '-g', ], 'xcode_settings': { - 'CLANG_CXX_LANGUAGE_STANDARD': 'c++11', + 'CLANG_CXX_LANGUAGE_STANDARD': 'c++14', 'OTHER_LDFLAGS': ['-g'], 'GCC_OPTIMIZATION_LEVEL': '0', 'ALWAYS_SEARCH_USER_PATHS': 'NO', From 79ffc20be19fa509cfe01bc720d80c66c9ff2ebc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Mar 2017 13:31:35 -0700 Subject: [PATCH 2/6] Remove RTTI flag in gyp file --- project.gyp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/project.gyp b/project.gyp index d4292f39..4ed51809 100644 --- a/project.gyp +++ b/project.gyp @@ -50,12 +50,8 @@ 'cflags_cc': [ '-std=c++14', ], - 'cflags_cc!': [ - '-fno-rtti' - ], 'xcode_settings': { 'CLANG_CXX_LANGUAGE_STANDARD': 'c++14', - 'GCC_ENABLE_CPP_RTTI': 'YES', 'GCC_ENABLE_CPP_EXCEPTIONS': 'NO', }, 'direct_dependent_settings': { From 90d21adf3b494936aa455d57a5d15a859921a468 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Mar 2017 13:37:26 -0700 Subject: [PATCH 3/6] Format make_visitor helper consistently w/ project --- src/compiler/rule.h | 4 ++-- src/compiler/util/make_visitor.h | 25 +++++++++---------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/compiler/rule.h b/src/compiler/rule.h index 19c45564..9e0ada18 100644 --- a/src/compiler/rule.h +++ b/src/compiler/rule.h @@ -72,7 +72,7 @@ struct Rule { const RuleType & get_unchecked() const; template - auto accept(FunctionType function) const -> decltype(function(blank)) { + inline auto accept(FunctionType function) const -> decltype(function(blank)) { switch (type) { case BlankType: return function(blank); case CharacterSetType: return function(character_set); @@ -88,7 +88,7 @@ struct Rule { } template - auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward(functions)...))){ + inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward(functions)...))){ return accept(util::make_visitor(std::forward(functions)...)); } diff --git a/src/compiler/util/make_visitor.h b/src/compiler/util/make_visitor.h index f55d4a09..6de51dc4 100644 --- a/src/compiler/util/make_visitor.h +++ b/src/compiler/util/make_visitor.h @@ -8,28 +8,21 @@ template struct visitor; template -struct visitor : Fn -{ - using type = Fn; - using Fn::operator(); - - visitor(Fn fn) : Fn(fn) {} +struct visitor : Fn { + using Fn::operator(); + visitor(Fn fn) : Fn(fn) {} }; template -struct visitor : Fn, visitor -{ - using type = visitor; - using Fn::operator(); - using visitor::operator(); - - visitor(Fn fn, Fns... fns) : Fn(fn), visitor(fns...) {} +struct visitor : Fn, visitor { + using Fn::operator(); + using visitor::operator(); + visitor(Fn fn, Fns... fns) : Fn(fn), visitor(fns...) {} }; template -visitor make_visitor(Fns... fns) -{ - return visitor(fns...); +visitor make_visitor(Fns... fns) { + return visitor(fns...); } } // namespace util From 416cbb9defde64721d861ba7fb75c699a80b96a8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Mar 2017 13:49:56 -0700 Subject: [PATCH 4/6] Add missing cassert includes --- src/compiler/prepare_grammar/expand_repeats.cc | 16 ++++++---------- src/compiler/prepare_grammar/extract_tokens.cc | 6 +++--- src/compiler/prepare_grammar/flatten_grammar.cc | 1 + src/compiler/prepare_grammar/intern_symbols.cc | 3 ++- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index d81c9cfc..785e7b7d 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include "compiler/grammar.h" #include "compiler/rule.h" @@ -12,13 +13,8 @@ using std::string; using std::vector; using std::pair; using std::to_string; -using std::make_shared; -using rules::Blank; -using rules::Choice; -using rules::Repeat; -using rules::Seq; -using rules::Symbol; using rules::Rule; +using rules::Symbol; class ExpandRepeats { string rule_name; @@ -61,8 +57,8 @@ class ExpandRepeats { aux_rules.push_back({ helper_rule_name, VariableTypeAuxiliary, - Choice{{ - Seq{repeat_symbol, inner_rule}, + rules::Choice{{ + rules::Seq{repeat_symbol, inner_rule}, inner_rule, }} }); @@ -74,8 +70,8 @@ class ExpandRepeats { }, [](auto) { - assert(false); - return Blank{}; + assert(!"Unexpected rule type"); + return rules::Blank{}; } ); } diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index a476d30c..d27175bd 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -1,7 +1,7 @@ #include "compiler/prepare_grammar/extract_tokens.h" #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ class SymbolReplacer { }, [](auto) { - assert(false); + assert(!"Unexpected rule type"); return rules::Blank{}; } ); @@ -152,7 +152,7 @@ class TokenExtractor { }, [](auto) { - assert(false); + assert(!"Unexpected rule type"); return rules::Blank{}; } ); diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index 81aeea75..ff5a3686 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -1,5 +1,6 @@ #include "compiler/prepare_grammar/flatten_grammar.h" #include +#include #include #include "compiler/prepare_grammar/extract_choices.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index 504e9b97..ee015a83 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -1,6 +1,7 @@ #include "compiler/prepare_grammar/intern_symbols.h" #include #include +#include #include #include "tree_sitter/compiler.h" #include "compiler/grammar.h" @@ -54,7 +55,7 @@ class SymbolInterner { }, [](auto) { - assert(false); + assert(!"Unexpected rule type"); return rules::Blank{}; } ); From d9fb863bea119127d54b03d89ccc73436a99ebbb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Mar 2017 13:58:51 -0700 Subject: [PATCH 5/6] Fix build errors w/ gcc --- src/compiler/build_tables/parse_item_set_builder.cc | 2 -- src/compiler/rule.cc | 4 ++-- src/compiler/rule.h | 2 +- src/compiler/rules/character_set.cc | 2 +- src/compiler/rules/character_set.h | 1 + 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index b0531d37..36c3942f 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -16,8 +16,6 @@ using std::get; using std::pair; using std::tuple; using std::make_tuple; -using std::shared_ptr; -using std::make_shared; using rules::Symbol; using rules::NONE; diff --git a/src/compiler/rule.cc b/src/compiler/rule.cc index 55508c77..a20d0baa 100644 --- a/src/compiler/rule.cc +++ b/src/compiler/rule.cc @@ -115,7 +115,6 @@ Rule::~Rule() noexcept { bool Rule::operator==(const Rule &other) const { if (type != other.type) return false; switch (type) { - case Rule::BlankType: return blank == other.blank; case Rule::CharacterSetType: return character_set == other.character_set; case Rule::StringType: return string == other.string; case Rule::PatternType: return pattern == other.pattern; @@ -125,6 +124,7 @@ bool Rule::operator==(const Rule &other) const { case Rule::MetadataType: return metadata == other.metadata; case Rule::RepeatType: return repeat == other.repeat; case Rule::SeqType: return seq == other.seq; + default: return blank == other.blank; } } @@ -219,7 +219,6 @@ size_t hash::operator()(const Metadata &metadata) const { size_t hash::operator()(const Rule &rule) const { size_t result = hash()(rule.type); switch (rule.type) { - case Rule::BlankType: return result ^ hash()(rule.blank); case Rule::CharacterSetType: return result ^ hash()(rule.character_set); case Rule::StringType: return result ^ hash()(rule.string); case Rule::PatternType: return result ^ hash()(rule.pattern); @@ -229,6 +228,7 @@ size_t hash::operator()(const Rule &rule) const { case Rule::MetadataType: return result ^ hash()(rule.metadata); case Rule::RepeatType: return result ^ hash()(rule.repeat); case Rule::SeqType: return result ^ hash()(rule.seq); + default: return result ^ hash()(rule.blank); } } diff --git a/src/compiler/rule.h b/src/compiler/rule.h index 9e0ada18..0dca6f38 100644 --- a/src/compiler/rule.h +++ b/src/compiler/rule.h @@ -74,7 +74,6 @@ struct Rule { template inline auto accept(FunctionType function) const -> decltype(function(blank)) { switch (type) { - case BlankType: return function(blank); case CharacterSetType: return function(character_set); case StringType: return function(string); case PatternType: return function(pattern); @@ -84,6 +83,7 @@ struct Rule { case MetadataType: return function(metadata); case RepeatType: return function(repeat); case SeqType: return function(seq); + default: return function(blank); } } diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index 089fce6a..5b0c3464 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -41,7 +41,7 @@ static set add_chars(set *left, const set &right) static vector consolidate_ranges(const set &chars) { vector result; for (uint32_t c : chars) { - size_t size = result.size(); + auto size = result.size(); if (size >= 2 && result[size - 2].max == (c - 2)) { result.pop_back(); result.back().max = c; diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index 7a8ae715..0c991c43 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -3,6 +3,7 @@ #include #include +#include namespace tree_sitter { namespace rules { From b3edd8f7497a2cf732fbeb2efe1420776d018880 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Mar 2017 14:28:13 -0700 Subject: [PATCH 6/6] Remove use of shared_ptr in choice, repeat, and seq factories --- .../build_tables/build_parse_table.cc | 1 - src/compiler/build_tables/lex_item.cc | 8 +- .../build_tables/lex_item_transitions.cc | 14 +- .../build_tables/lex_table_builder.cc | 8 +- .../build_tables/rule_can_be_blank.cc | 6 +- src/compiler/parse_grammar.cc | 11 +- .../prepare_grammar/expand_repeats.cc | 12 +- src/compiler/prepare_grammar/expand_tokens.cc | 12 +- .../prepare_grammar/extract_choices.cc | 8 +- .../prepare_grammar/extract_tokens.cc | 23 ++- .../prepare_grammar/flatten_grammar.cc | 15 +- .../prepare_grammar/intern_symbols.cc | 10 +- .../prepare_grammar/normalize_rules.cc | 2 +- src/compiler/prepare_grammar/parse_regex.cc | 16 +- .../prepare_grammar/token_description.cc | 6 +- src/compiler/rule.cc | 160 ++++++++++++------ src/compiler/rule.h | 71 ++++---- src/compiler/rules/choice.cc | 27 --- src/compiler/rules/choice.h | 1 - src/compiler/rules/repeat.cc | 6 - src/compiler/rules/repeat.h | 1 - src/compiler/rules/seq.cc | 22 --- src/compiler/rules/seq.h | 1 - test/compiler/build_tables/lex_item_test.cc | 54 +++--- .../build_tables/rule_can_be_blank_test.cc | 12 +- .../prepare_grammar/expand_repeats_test.cc | 60 +++---- .../prepare_grammar/expand_tokens_test.cc | 14 +- .../prepare_grammar/extract_choices_test.cc | 22 +-- .../prepare_grammar/extract_tokens_test.cc | 40 ++--- .../prepare_grammar/flatten_grammar_test.cc | 12 +- .../prepare_grammar/intern_symbols_test.cc | 8 +- .../prepare_grammar/parse_regex_test.cc | 52 +++--- test/compiler/rules/choice_test.cc | 28 +-- test/compiler/rules/repeat_test.cc | 4 +- 34 files changed, 366 insertions(+), 381 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 20d02fa1..6ab35f76 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -24,7 +24,6 @@ using std::map; using std::string; using std::to_string; using std::unordered_map; -using std::make_shared; using rules::Associativity; using rules::Symbol; using rules::END_OF_INPUT; diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index 23d63b1b..59b59dc1 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -34,7 +34,7 @@ static CompletionStatus get_completion_status(const rules::Rule &rule) { }, [](rules::Metadata metadata) { - CompletionStatus result = get_completion_status(metadata.rule); + CompletionStatus result = get_completion_status(*metadata.rule); if (result.is_done && result.precedence.empty && metadata.params.has_precedence) { result.precedence.add(metadata.params.precedence); } @@ -42,13 +42,13 @@ static CompletionStatus get_completion_status(const rules::Rule &rule) { }, [](rules::Repeat repeat) { - return get_completion_status(repeat.rule); + return get_completion_status(*repeat.rule); }, [](rules::Seq sequence) { - CompletionStatus left_status = get_completion_status(sequence.left); + CompletionStatus left_status = get_completion_status(*sequence.left); if (left_status.is_done) { - return get_completion_status(sequence.right); + return get_completion_status(*sequence.right); } else { return CompletionStatus{false, PrecedenceRange()}; } diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc index 60c89f9d..c4e05420 100644 --- a/src/compiler/build_tables/lex_item_transitions.cc +++ b/src/compiler/build_tables/lex_item_transitions.cc @@ -106,33 +106,33 @@ class TransitionBuilder { [this](const rules::Seq &sequence) { TransitionMap left_transitions; - TransitionBuilder(&left_transitions, this).apply(sequence.left); + TransitionBuilder(&left_transitions, this).apply(*sequence.left); for (const auto &pair : left_transitions) { add_transition( transitions, pair.first, transform_transition(pair.second, [&sequence](Rule rule) -> Rule { - return rules::Seq::build({ rule, sequence.right }); + return Rule::seq({rule, *sequence.right}); }) ); } - if (rule_can_be_blank(sequence.left)) { - apply(sequence.right); + if (rule_can_be_blank(*sequence.left)) { + apply(*sequence.right); } }, [this](const rules::Repeat &repeat) { TransitionMap content_transitions; - TransitionBuilder(&content_transitions, this).apply(repeat.rule); + TransitionBuilder(&content_transitions, this).apply(*repeat.rule); for (const auto &pair : content_transitions) { add_transition(transitions, pair.first, pair.second); add_transition( transitions, pair.first, transform_transition(pair.second, [&repeat](Rule item_rule) { - return rules::Seq::build({ item_rule, repeat }); + return Rule::seq({ item_rule, repeat }); }) ); } @@ -151,7 +151,7 @@ class TransitionBuilder { params.is_active = true; TransitionMap content_transitions; - TransitionBuilder(&content_transitions, this).apply(metadata.rule); + TransitionBuilder(&content_transitions, this).apply(*metadata.rule); for (const auto &pair : content_transitions) { add_transition( diff --git a/src/compiler/build_tables/lex_table_builder.cc b/src/compiler/build_tables/lex_table_builder.cc index 3f9e14bb..f9068d42 100644 --- a/src/compiler/build_tables/lex_table_builder.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -36,7 +36,7 @@ class StartingCharacterAggregator { void apply(const Rule &rule) { rule.match( [this](const Seq &sequence) { - apply(sequence.left); + apply(*sequence.left); }, [this](const rules::Choice &rule) { @@ -46,11 +46,11 @@ class StartingCharacterAggregator { }, [this](const rules::Repeat &rule) { - apply(rule.rule); + apply(*rule.rule); }, [this](const rules::Metadata &rule) { - apply(rule.rule); + apply(*rule.rule); }, [this](const rules::CharacterSet &rule) { @@ -299,7 +299,7 @@ class LexTableBuilderImpl : public LexTableBuilder { result.entries.insert(LexItem( symbol, Metadata::separator( - Seq::build({ + Rule::seq({ separator_rule, Metadata::main_token(rule) }) diff --git a/src/compiler/build_tables/rule_can_be_blank.cc b/src/compiler/build_tables/rule_can_be_blank.cc index a72fc1ff..97737fd3 100644 --- a/src/compiler/build_tables/rule_can_be_blank.cc +++ b/src/compiler/build_tables/rule_can_be_blank.cc @@ -15,11 +15,11 @@ bool rule_can_be_blank(const rules::Rule &rule) { }, [](rules::Repeat repeat) { - return rule_can_be_blank(repeat.rule); + return rule_can_be_blank(*repeat.rule); }, [](rules::Metadata metadata) { - return rule_can_be_blank(metadata.rule); + return rule_can_be_blank(*metadata.rule); }, [](rules::Choice choice) { @@ -32,7 +32,7 @@ bool rule_can_be_blank(const rules::Rule &rule) { }, [](rules::Seq seq) { - return rule_can_be_blank(seq.left) && rule_can_be_blank(seq.right); + return rule_can_be_blank(*seq.left) && rule_can_be_blank(*seq.right); }, [](auto) { return false; } diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index 1a631c02..536672f4 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -14,9 +14,6 @@ using std::unordered_set; using std::pair; using rules::Rule; using rules::Blank; -using rules::Choice; -using rules::Repeat; -using rules::Seq; using rules::Metadata; using rules::Pattern; using rules::String; @@ -70,7 +67,7 @@ ParseRuleResult parse_rule(json_value *rule_json) { } members.push_back(result.rule); } - return Rule(Choice{members}); + return Rule::choice(members); } if (type == "SEQ") { @@ -88,7 +85,7 @@ ParseRuleResult parse_rule(json_value *rule_json) { } members.push_back(result.rule); } - return *Seq::build(members); + return Rule::seq(members); } if (type == "REPEAT") { @@ -97,7 +94,7 @@ ParseRuleResult parse_rule(json_value *rule_json) { if (!result.error_message.empty()) { return "Invalid repeat content: " + result.error_message; } - return Rule(Choice{{Repeat{result.rule}, Blank{}}}); + return Rule::choice({Rule::repeat(result.rule), Blank{}}); } if (type == "REPEAT1") { @@ -106,7 +103,7 @@ ParseRuleResult parse_rule(json_value *rule_json) { if (!result.error_message.empty()) { return "Invalid repeat content: " + result.error_message; } - return Rule(Repeat{result.rule}); + return Rule::repeat(result.rule); } if (type == "TOKEN") { diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index 785e7b7d..ec2ec19a 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -28,17 +28,17 @@ class ExpandRepeats { [&](const rules::Symbol &symbol) { return symbol; }, [&](const rules::Choice &choice) { - vector elements; + vector elements; for (const auto &element : choice.elements) { elements.push_back(apply(element)); } - return rules::Choice::build(elements); + return Rule::choice(elements); }, [&](const rules::Seq &sequence) { return rules::Seq{ - apply(sequence.left), - apply(sequence.right) + apply(*sequence.left), + apply(*sequence.right) }; }, @@ -49,7 +49,7 @@ class ExpandRepeats { } } - Rule inner_rule = apply(repeat.rule); + Rule inner_rule = apply(*repeat.rule); size_t index = aux_rules.size(); string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count); Symbol repeat_symbol = Symbol::non_terminal(offset + index); @@ -66,7 +66,7 @@ class ExpandRepeats { }, [&](const rules::Metadata &metadata) { - return rules::Metadata{apply(metadata.rule), metadata.params}; + return rules::Metadata{apply(*metadata.rule), metadata.params}; }, [](auto) { diff --git a/src/compiler/prepare_grammar/expand_tokens.cc b/src/compiler/prepare_grammar/expand_tokens.cc index e384dfed..48396540 100644 --- a/src/compiler/prepare_grammar/expand_tokens.cc +++ b/src/compiler/prepare_grammar/expand_tokens.cc @@ -33,7 +33,7 @@ ExpandTokenResult expand_token(const rules::Rule &rule) { elements.push_back(rules::CharacterSet().include(el)); } - return *rules::Seq::build(elements); + return Rule::seq(elements); }, [](const rules::Pattern &pattern) -> ExpandTokenResult { @@ -43,21 +43,21 @@ ExpandTokenResult expand_token(const rules::Rule &rule) { }, [](const rules::Repeat &rule) -> ExpandTokenResult { - auto result = expand_token(rule.rule); + auto result = expand_token(*rule.rule); if (result.error) return result.error; - return *rules::Repeat::build(result.rule); + return Rule::repeat(result.rule); }, [](const rules::Metadata &rule) -> ExpandTokenResult { - auto result = expand_token(rule.rule); + auto result = expand_token(*rule.rule); if (result.error) return result.error; return Rule(rules::Metadata{result.rule, rule.params}); }, [](const rules::Seq &rule) -> ExpandTokenResult { - auto left_result = expand_token(rule.left); + auto left_result = expand_token(*rule.left); if (left_result.error) return left_result.error; - auto right_result = expand_token(rule.right); + auto right_result = expand_token(*rule.right); if (right_result.error) return right_result.error; return Rule(rules::Seq{left_result.rule, right_result.rule}); }, diff --git a/src/compiler/prepare_grammar/extract_choices.cc b/src/compiler/prepare_grammar/extract_choices.cc index 04225b6e..3b471538 100644 --- a/src/compiler/prepare_grammar/extract_choices.cc +++ b/src/compiler/prepare_grammar/extract_choices.cc @@ -13,9 +13,9 @@ vector extract_choices(const Rule &rule) { return rule.match( [](const rules::Seq &sequence) { vector result; - for (auto &left_entry : extract_choices(sequence.left)) { - for (auto &right_entry : extract_choices(sequence.right)) { - result.push_back(rules::Seq::build({left_entry, right_entry})); + for (auto &left_entry : extract_choices(*sequence.left)) { + for (auto &right_entry : extract_choices(*sequence.right)) { + result.push_back(rules::Rule::seq({left_entry, right_entry})); } } return result; @@ -23,7 +23,7 @@ vector extract_choices(const Rule &rule) { [](const rules::Metadata &rule) { vector result; - for (auto &entry : extract_choices(rule.rule)) { + for (auto &entry : extract_choices(*rule.rule)) { result.push_back(rules::Metadata{entry, rule.params}); } return result; diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index d27175bd..39f21698 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -43,22 +43,22 @@ class SymbolReplacer { for (const auto &element : choice.elements) { elements.push_back(apply(element)); } - return rules::Choice::build(elements); + return Rule::choice(elements); }, [this](const rules::Seq &sequence) { return rules::Seq{ - apply(sequence.left), - apply(sequence.right) + apply(*sequence.left), + apply(*sequence.right) }; }, [this](const rules::Repeat &repeat) { - return rules::Repeat{apply(repeat.rule)}; + return Rule::repeat(apply(*repeat.rule)); }, [this](const rules::Metadata &metadata) { - return rules::Metadata{apply(metadata.rule), metadata.params}; + return rules::Metadata{apply(*metadata.rule), metadata.params}; }, [](auto) { @@ -114,9 +114,9 @@ class TokenExtractor { [this](const rules::Metadata &rule) -> Rule { if (rule.params.is_token) { - return extract_token(rule.rule, VariableTypeAuxiliary); + return extract_token(*rule.rule, VariableTypeAuxiliary); } else { - return rules::Metadata{apply(rule.rule), rule.params}; + return rules::Metadata{apply(*rule.rule), rule.params}; } }, @@ -129,14 +129,11 @@ class TokenExtractor { }, [this](const rules::Repeat &rule) { - return rules::Repeat{apply(rule.rule)}; + return Rule::repeat(apply(*rule.rule)); }, [this](const rules::Seq &rule) { - return rules::Seq{ - apply(rule.left), - apply(rule.right) - }; + return Rule::seq({apply(*rule.left), apply(*rule.right)}); }, [this](const rules::Choice &rule) { @@ -144,7 +141,7 @@ class TokenExtractor { for (const auto &element : rule.elements) { elements.push_back(apply(element)); } - return rules::Choice::build(elements); + return Rule::choice(elements); }, [](const rules::Symbol &symbol) { diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index ff5a3686..846c361d 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -34,12 +34,15 @@ class FlattenRule { }, [&](const rules::Metadata &metadata) { - if (metadata.params.has_precedence) + if (metadata.params.has_precedence) { precedence_stack.push_back(metadata.params.precedence); - if (metadata.params.has_associativity) - associativity_stack.push_back(metadata.params.associativity); + } - apply(metadata.rule); + if (metadata.params.has_associativity) { + associativity_stack.push_back(metadata.params.associativity); + } + + apply(*metadata.rule); if (metadata.params.has_precedence) { last_precedence = precedence_stack.back(); @@ -55,10 +58,10 @@ class FlattenRule { }, [&](const rules::Seq &sequence) { - apply(sequence.left); + apply(*sequence.left); last_precedence = 0; last_associativity = rules::AssociativityNone; - apply(sequence.right); + apply(*sequence.right); }, [&](const rules::Blank &blank) {}, diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index ee015a83..d705f121 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -14,7 +14,6 @@ using std::string; using std::vector; using std::set; using std::pair; -using std::make_shared; using rules::Symbol; using rules::Rule; @@ -40,18 +39,15 @@ class SymbolInterner { }, [&](const rules::Seq &sequence) { - return rules::Seq{ - apply(sequence.left), - apply(sequence.right) - }; + return rules::Seq{apply(*sequence.left), apply(*sequence.right)}; }, [&](const rules::Repeat &repeat) { - return rules::Repeat{apply(repeat.rule)}; + return rules::Repeat{apply(*repeat.rule)}; }, [&](const rules::Metadata &metadata) { - return rules::Metadata{apply(metadata.rule), metadata.params}; + return rules::Metadata{apply(*metadata.rule), metadata.params}; }, [](auto) { diff --git a/src/compiler/prepare_grammar/normalize_rules.cc b/src/compiler/prepare_grammar/normalize_rules.cc index fd659a00..28602b2b 100644 --- a/src/compiler/prepare_grammar/normalize_rules.cc +++ b/src/compiler/prepare_grammar/normalize_rules.cc @@ -11,7 +11,7 @@ LexicalGrammar normalize_rules(const LexicalGrammar &input_grammar) { LexicalGrammar result(input_grammar); for (LexicalVariable &variable : result.variables) { - variable.rule = rules::Choice::build(extract_choices(variable.rule)); + variable.rule = Rule::choice(extract_choices(variable.rule)); } return result; diff --git a/src/compiler/prepare_grammar/parse_regex.cc b/src/compiler/prepare_grammar/parse_regex.cc index 7c9f6ce2..cab76443 100644 --- a/src/compiler/prepare_grammar/parse_regex.cc +++ b/src/compiler/prepare_grammar/parse_regex.cc @@ -12,13 +12,9 @@ namespace prepare_grammar { using std::string; using std::vector; using std::pair; -using std::make_shared; using rules::CharacterSet; using rules::Blank; using rules::Rule; -using rules::Choice; -using rules::Seq; -using rules::Repeat; class PatternParser { public: @@ -45,7 +41,7 @@ class PatternParser { } choices.push_back(pair.first); } while (has_more_input()); - return {Choice::build(choices), CompileError::none()}; + return {Rule::choice(choices), CompileError::none()}; } private: @@ -60,7 +56,7 @@ class PatternParser { if (pair.second) { return {Blank{}, pair.second}; } - result = Seq::build({result, pair.first}); + result = Rule::seq({result, pair.first}); } while (has_more_input()); return { result, CompileError::none() }; } @@ -76,18 +72,18 @@ class PatternParser { switch (peek()) { case '*': next(); - result = Choice::build({ - Repeat{result}, + result = Rule::choice({ + Rule::repeat(result), Blank{} }); break; case '+': next(); - result = Repeat{result}; + result = Rule::repeat(result); break; case '?': next(); - result = Choice::build({result, Blank{}}); + result = Rule::choice({result, Blank{}}); break; } } diff --git a/src/compiler/prepare_grammar/token_description.cc b/src/compiler/prepare_grammar/token_description.cc index 2aa43bd7..f10904ba 100644 --- a/src/compiler/prepare_grammar/token_description.cc +++ b/src/compiler/prepare_grammar/token_description.cc @@ -31,17 +31,17 @@ class TokenDescription { }, [&](const rules::Metadata &rule) { - return apply(rule.rule); + return apply(*rule.rule); }, [&](const rules::Seq &rule) { is_trivial = false; - return apply(rule.left) + apply(rule.right); + return apply(*rule.left) + apply(*rule.right); }, [&](const rules::Repeat &rule) { is_trivial = false; - return apply(rule.rule) + "+"; + return apply(*rule.rule) + "+"; }, [&](const rules::Choice &rule) { diff --git a/src/compiler/rule.cc b/src/compiler/rule.cc index a20d0baa..f802f3fa 100644 --- a/src/compiler/rule.cc +++ b/src/compiler/rule.cc @@ -5,28 +5,29 @@ namespace tree_sitter { namespace rules { using std::move; +using std::vector; using util::hash_combine; -Rule::Rule(const Rule &other) : blank(Blank{}), type(BlankType) { +Rule::Rule(const Rule &other) : blank_(Blank{}), type(BlankType) { *this = other; } -Rule::Rule(Rule &&other) noexcept : blank(Blank{}), type(BlankType) { +Rule::Rule(Rule &&other) noexcept : blank_(Blank{}), type(BlankType) { *this = move(other); } static void destroy_value(Rule *rule) { switch (rule->type) { - case Rule::BlankType: return rule->blank.~Blank(); - case Rule::CharacterSetType: return rule->character_set.~CharacterSet(); - case Rule::StringType: return rule->string .~String(); - case Rule::PatternType: return rule->pattern .~Pattern(); - case Rule::NamedSymbolType: return rule->named_symbol.~NamedSymbol(); - case Rule::SymbolType: return rule->symbol .~Symbol(); - case Rule::ChoiceType: return rule->choice .~Choice(); - case Rule::MetadataType: return rule->metadata .~Metadata(); - case Rule::RepeatType: return rule->repeat .~Repeat(); - case Rule::SeqType: return rule->seq .~Seq(); + case Rule::BlankType: return rule->blank_.~Blank(); + case Rule::CharacterSetType: return rule->character_set_.~CharacterSet(); + case Rule::StringType: return rule->string_ .~String(); + case Rule::PatternType: return rule->pattern_ .~Pattern(); + case Rule::NamedSymbolType: return rule->named_symbol_.~NamedSymbol(); + case Rule::SymbolType: return rule->symbol_ .~Symbol(); + case Rule::ChoiceType: return rule->choice_ .~Choice(); + case Rule::MetadataType: return rule->metadata_ .~Metadata(); + case Rule::RepeatType: return rule->repeat_ .~Repeat(); + case Rule::SeqType: return rule->seq_ .~Seq(); } } @@ -35,34 +36,34 @@ Rule &Rule::operator=(const Rule &other) { type = other.type; switch (type) { case BlankType: - new (&blank) Blank(other.blank); + new (&blank_) Blank(other.blank_); break; case CharacterSetType: - new (&character_set) CharacterSet(other.character_set); + new (&character_set_) CharacterSet(other.character_set_); break; case StringType: - new (&string) String(other.string); + new (&string_) String(other.string_); break; case PatternType: - new (&pattern) Pattern(other.pattern); + new (&pattern_) Pattern(other.pattern_); break; case NamedSymbolType: - new (&named_symbol) NamedSymbol(other.named_symbol); + new (&named_symbol_) NamedSymbol(other.named_symbol_); break; case SymbolType: - new (&symbol) Symbol(other.symbol); + new (&symbol_) Symbol(other.symbol_); break; case ChoiceType: - new (&choice) Choice(other.choice); + new (&choice_) Choice(other.choice_); break; case MetadataType: - new (&metadata) Metadata(other.metadata); + new (&metadata_) Metadata(other.metadata_); break; case RepeatType: - new (&repeat) Repeat(other.repeat); + new (&repeat_) Repeat(other.repeat_); break; case SeqType: - new (&seq) Seq(other.seq); + new (&seq_) Seq(other.seq_); break; } return *this; @@ -73,38 +74,38 @@ Rule &Rule::operator=(Rule &&other) noexcept { type = other.type; switch (type) { case BlankType: - new (&blank) Blank(move(other.blank)); + new (&blank_) Blank(move(other.blank_)); break; case CharacterSetType: - new (&character_set) CharacterSet(move(other.character_set)); + new (&character_set_) CharacterSet(move(other.character_set_)); break; case StringType: - new (&string) String(move(other.string)); + new (&string_) String(move(other.string_)); break; case PatternType: - new (&pattern) Pattern(move(other.pattern)); + new (&pattern_) Pattern(move(other.pattern_)); break; case NamedSymbolType: - new (&named_symbol) NamedSymbol(move(other.named_symbol)); + new (&named_symbol_) NamedSymbol(move(other.named_symbol_)); break; case SymbolType: - new (&symbol) Symbol(move(other.symbol)); + new (&symbol_) Symbol(move(other.symbol_)); break; case ChoiceType: - new (&choice) Choice(move(other.choice)); + new (&choice_) Choice(move(other.choice_)); break; case MetadataType: - new (&metadata) Metadata(move(other.metadata)); + new (&metadata_) Metadata(move(other.metadata_)); break; case RepeatType: - new (&repeat) Repeat(move(other.repeat)); + new (&repeat_) Repeat(move(other.repeat_)); break; case SeqType: - new (&seq) Seq(move(other.seq)); + new (&seq_) Seq(move(other.seq_)); break; } other.type = BlankType; - other.blank = Blank{}; + other.blank_ = Blank{}; return *this; } @@ -115,16 +116,16 @@ Rule::~Rule() noexcept { bool Rule::operator==(const Rule &other) const { if (type != other.type) return false; switch (type) { - case Rule::CharacterSetType: return character_set == other.character_set; - case Rule::StringType: return string == other.string; - case Rule::PatternType: return pattern == other.pattern; - case Rule::NamedSymbolType: return named_symbol == other.named_symbol; - case Rule::SymbolType: return symbol == other.symbol; - case Rule::ChoiceType: return choice == other.choice; - case Rule::MetadataType: return metadata == other.metadata; - case Rule::RepeatType: return repeat == other.repeat; - case Rule::SeqType: return seq == other.seq; - default: return blank == other.blank; + case Rule::CharacterSetType: return character_set_ == other.character_set_; + case Rule::StringType: return string_ == other.string_; + case Rule::PatternType: return pattern_ == other.pattern_; + case Rule::NamedSymbolType: return named_symbol_ == other.named_symbol_; + case Rule::SymbolType: return symbol_ == other.symbol_; + case Rule::ChoiceType: return choice_ == other.choice_; + case Rule::MetadataType: return metadata_ == other.metadata_; + case Rule::RepeatType: return repeat_ == other.repeat_; + case Rule::SeqType: return seq_ == other.seq_; + default: return blank_ == other.blank_; } } @@ -138,7 +139,58 @@ template <> bool Rule::is() const { return type == RepeatType; } template <> -const Symbol & Rule::get_unchecked() const { return symbol; } +const Symbol & Rule::get_unchecked() const { return symbol_; } + +static inline void add_choice_element(std::vector *elements, const Rule &new_rule) { + new_rule.match( + [elements](Choice choice) { + for (auto &element : choice.elements) { + add_choice_element(elements, element); + } + }, + + [elements](auto rule) { + for (auto &element : *elements) { + if (element == rule) return; + } + elements->push_back(rule); + } + ); +} + +Rule Rule::choice(const vector &rules) { + vector elements; + for (auto &element : rules) { + add_choice_element(&elements, element); + } + return (elements.size() == 1) ? elements.front() : Choice{elements}; +} + +Rule Rule::repeat(const Rule &rule) { + return rule.is() ? rule : Repeat{rule}; +} + +Rule Rule::seq(const vector &rules) { + Rule result; + for (const auto &rule : rules) { + rule.match( + [](Blank) {}, + [&](Metadata metadata) { + if (!metadata.rule->is()) { + result = Seq{result, rule}; + } + }, + [&](auto) { + if (result.is()) { + result = rule; + } else { + result = Seq{result, rule}; + } + } + ); + } + return result; +} } // namespace rules } // namespace tree_sitter @@ -219,16 +271,16 @@ size_t hash::operator()(const Metadata &metadata) const { size_t hash::operator()(const Rule &rule) const { size_t result = hash()(rule.type); switch (rule.type) { - case Rule::CharacterSetType: return result ^ hash()(rule.character_set); - case Rule::StringType: return result ^ hash()(rule.string); - case Rule::PatternType: return result ^ hash()(rule.pattern); - case Rule::NamedSymbolType: return result ^ hash()(rule.named_symbol); - case Rule::SymbolType: return result ^ hash()(rule.symbol); - case Rule::ChoiceType: return result ^ hash()(rule.choice); - case Rule::MetadataType: return result ^ hash()(rule.metadata); - case Rule::RepeatType: return result ^ hash()(rule.repeat); - case Rule::SeqType: return result ^ hash()(rule.seq); - default: return result ^ hash()(rule.blank); + case Rule::CharacterSetType: return result ^ hash()(rule.character_set_); + case Rule::StringType: return result ^ hash()(rule.string_); + case Rule::PatternType: return result ^ hash()(rule.pattern_); + case Rule::NamedSymbolType: return result ^ hash()(rule.named_symbol_); + case Rule::SymbolType: return result ^ hash()(rule.symbol_); + case Rule::ChoiceType: return result ^ hash()(rule.choice_); + case Rule::MetadataType: return result ^ hash()(rule.metadata_); + case Rule::RepeatType: return result ^ hash()(rule.repeat_); + case Rule::SeqType: return result ^ hash()(rule.seq_); + default: return result ^ hash()(rule.blank_); } } diff --git a/src/compiler/rule.h b/src/compiler/rule.h index 0dca6f38..5c4064e5 100644 --- a/src/compiler/rule.h +++ b/src/compiler/rule.h @@ -2,6 +2,7 @@ #define COMPILER_RULE_H_ #include +#include #include "compiler/util/make_visitor.h" #include "compiler/util/hash_combine.h" #include "compiler/rules/blank.h" @@ -20,16 +21,16 @@ namespace rules { struct Rule { union { - Blank blank; - CharacterSet character_set; - String string; - Pattern pattern; - NamedSymbol named_symbol; - Symbol symbol; - Choice choice; - Metadata metadata; - Repeat repeat; - Seq seq; + Blank blank_; + CharacterSet character_set_; + String string_; + Pattern pattern_; + NamedSymbol named_symbol_; + Symbol symbol_; + Choice choice_; + Metadata metadata_; + Repeat repeat_; + Seq seq_; }; enum { @@ -45,19 +46,17 @@ struct Rule { SeqType, } type; - Rule() : blank(Blank{}), type(BlankType) {}; - Rule(const Blank &value) : blank(value), type(BlankType) {}; - Rule(const CharacterSet &value) : character_set(value), type(CharacterSetType) {}; - Rule(const String &value) : string(value), type(StringType) {}; - Rule(const Pattern &value) : pattern(value), type(PatternType) {}; - Rule(const NamedSymbol &value) : named_symbol(value), type(NamedSymbolType) {}; - Rule(const Symbol &value) : symbol(value), type(SymbolType) {}; - Rule(const Choice &value) : choice(value), type(ChoiceType) {}; - Rule(const Metadata &value) : metadata(value), type(MetadataType) {}; - Rule(const Repeat &value) : repeat(value), type(RepeatType) {}; - Rule(const Seq &value) : seq(value), type(SeqType) {}; - - Rule(const std::shared_ptr &value) : Rule(*value) {} + Rule() : blank_(Blank{}), type(BlankType) {}; + Rule(const Blank &value) : blank_(value), type(BlankType) {}; + Rule(const CharacterSet &value) : character_set_(value), type(CharacterSetType) {}; + Rule(const String &value) : string_(value), type(StringType) {}; + Rule(const Pattern &value) : pattern_(value), type(PatternType) {}; + Rule(const NamedSymbol &value) : named_symbol_(value), type(NamedSymbolType) {}; + Rule(const Symbol &value) : symbol_(value), type(SymbolType) {}; + Rule(const Choice &value) : choice_(value), type(ChoiceType) {}; + Rule(const Metadata &value) : metadata_(value), type(MetadataType) {}; + Rule(const Repeat &value) : repeat_(value), type(RepeatType) {}; + Rule(const Seq &value) : seq_(value), type(SeqType) {}; Rule(const Rule &other); Rule(Rule &&other) noexcept; @@ -65,6 +64,10 @@ struct Rule { Rule &operator=(Rule &&other) noexcept; ~Rule() noexcept; + static Rule choice(const std::vector &rules); + static Rule seq(const std::vector &rules); + static Rule repeat(const Rule &rule); + template bool is() const; @@ -72,18 +75,18 @@ struct Rule { const RuleType & get_unchecked() const; template - inline auto accept(FunctionType function) const -> decltype(function(blank)) { + inline auto accept(FunctionType function) const -> decltype(function(blank_)) { switch (type) { - case CharacterSetType: return function(character_set); - case StringType: return function(string); - case PatternType: return function(pattern); - case NamedSymbolType: return function(named_symbol); - case SymbolType: return function(symbol); - case ChoiceType: return function(choice); - case MetadataType: return function(metadata); - case RepeatType: return function(repeat); - case SeqType: return function(seq); - default: return function(blank); + case CharacterSetType: return function(character_set_); + case StringType: return function(string_); + case PatternType: return function(pattern_); + case NamedSymbolType: return function(named_symbol_); + case SymbolType: return function(symbol_); + case ChoiceType: return function(choice_); + case MetadataType: return function(metadata_); + case RepeatType: return function(repeat_); + case SeqType: return function(seq_); + default: return function(blank_); } } diff --git a/src/compiler/rules/choice.cc b/src/compiler/rules/choice.cc index cba1f9f7..1b3be56c 100644 --- a/src/compiler/rules/choice.cc +++ b/src/compiler/rules/choice.cc @@ -4,33 +4,6 @@ namespace tree_sitter { namespace rules { -static inline void add_choice_element(std::vector *elements, const Rule &new_rule) { - new_rule.match( - [elements](Choice choice) { - for (auto &element : choice.elements) { - add_choice_element(elements, element); - } - }, - - [elements](auto rule) { - for (auto &element : *elements) { - if (element == rule) return; - } - elements->push_back(rule); - } - ); -} - -std::shared_ptr Choice::build(const std::vector &rules) { - std::vector elements; - for (auto &element : rules) { - add_choice_element(&elements, element); - } - return std::make_shared( - (elements.size() == 1) ? elements.front() : Choice{elements} - ); -} - bool Choice::operator==(const Choice &other) const { return elements == other.elements; } diff --git a/src/compiler/rules/choice.h b/src/compiler/rules/choice.h index 3a883a53..6365a565 100644 --- a/src/compiler/rules/choice.h +++ b/src/compiler/rules/choice.h @@ -12,7 +12,6 @@ struct Rule; struct Choice { std::vector elements; - static std::shared_ptr build(const std::vector &rules); bool operator==(const Choice &other) const; }; diff --git a/src/compiler/rules/repeat.cc b/src/compiler/rules/repeat.cc index 5bdbf185..87cc19cd 100644 --- a/src/compiler/rules/repeat.cc +++ b/src/compiler/rules/repeat.cc @@ -11,11 +11,5 @@ bool Repeat::operator==(const Repeat &other) const { return rule->operator==(*other.rule); } -std::shared_ptr Repeat::build(const Rule &rule) { - return std::make_shared( - rule.is() ? rule : Repeat{rule} - ); -} - } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/repeat.h b/src/compiler/rules/repeat.h index 2eb03001..b9770a51 100644 --- a/src/compiler/rules/repeat.h +++ b/src/compiler/rules/repeat.h @@ -12,7 +12,6 @@ struct Repeat { std::shared_ptr rule; explicit Repeat(const Rule &rule); - static std::shared_ptr build(const Rule &rule); bool operator==(const Repeat &other) const; }; diff --git a/src/compiler/rules/seq.cc b/src/compiler/rules/seq.cc index 4e3a8132..cf898e0e 100644 --- a/src/compiler/rules/seq.cc +++ b/src/compiler/rules/seq.cc @@ -8,28 +8,6 @@ Seq::Seq(const Rule &left, const Rule &right) : left(std::make_shared(left)), right(std::make_shared(right)) {} -std::shared_ptr Seq::build(const std::vector &rules) { - Rule result; - for (const auto &rule : rules) { - rule.match( - [](Blank) {}, - [&](Metadata metadata) { - if (!metadata.rule->is()) { - result = Seq{result, rule}; - } - }, - [&](auto) { - if (result.is()) { - result = rule; - } else { - result = Seq{result, rule}; - } - } - ); - } - return std::make_shared(result); -} - bool Seq::operator==(const Seq &other) const { return left->operator==(*other.left) && right->operator==(*other.right); } diff --git a/src/compiler/rules/seq.h b/src/compiler/rules/seq.h index 036db44b..6ddbb003 100644 --- a/src/compiler/rules/seq.h +++ b/src/compiler/rules/seq.h @@ -14,7 +14,6 @@ struct Seq { std::shared_ptr right; Seq(const Rule &left, const Rule &right); - static std::shared_ptr build(const std::vector &rules); bool operator==(const Seq &other) const; }; diff --git a/test/compiler/build_tables/lex_item_test.cc b/test/compiler/build_tables/lex_item_test.cc index 3ca271fc..7f69d3b9 100644 --- a/test/compiler/build_tables/lex_item_test.cc +++ b/test/compiler/build_tables/lex_item_test.cc @@ -86,7 +86,7 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences", [&]() { LexItemSet item_set({ - LexItem(Symbol::non_terminal(1), Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ 'w' }}, CharacterSet{{ 'x' }}, CharacterSet{{ 'y' }}, @@ -101,7 +101,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('w'), Transition{ LexItemSet({ - LexItem(Symbol::non_terminal(1), Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ 'x' }}, CharacterSet{{ 'y' }}, CharacterSet{{ 'z' }}, @@ -116,10 +116,10 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences with nested precedence", [&]() { LexItemSet item_set({ - LexItem(Symbol::non_terminal(1), Seq::build({ - Metadata::prec(3, Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ + Metadata::prec(3, Rule::seq({ CharacterSet{{ 'v' }}, - Metadata::prec(4, Seq::build({ + Metadata::prec(4, Rule::seq({ CharacterSet{{ 'w' }}, CharacterSet{{ 'x' }} })), CharacterSet{{ 'y' }} })), @@ -138,9 +138,9 @@ describe("LexItemSet::transitions()", [&]() { // The outer precedence is now 'active', because we are within its // contained rule. LexItemSet({ - LexItem(Symbol::non_terminal(1), Seq::build({ - Metadata::active_prec(3, Seq::build({ - Metadata::prec(4, Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ + Metadata::active_prec(3, Rule::seq({ + Metadata::prec(4, Rule::seq({ CharacterSet{{ 'w' }}, CharacterSet{{ 'x' }} })), @@ -168,8 +168,8 @@ describe("LexItemSet::transitions()", [&]() { Transition{ // The inner precedence is now 'active' LexItemSet({ - LexItem(Symbol::non_terminal(1), Seq::build({ - Metadata::active_prec(3, Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ + Metadata::active_prec(3, Rule::seq({ Metadata::active_prec(4, CharacterSet{{'x'}}), CharacterSet{{'y'}} })), @@ -194,7 +194,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol::non_terminal(1), Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ Metadata::active_prec(3, CharacterSet{{'y'}}), CharacterSet{{'z'}}, })), @@ -228,8 +228,8 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences where the left hand side can be blank", [&]() { LexItemSet item_set({ - LexItem(Symbol::non_terminal(1), Seq::build({ - Choice::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ + Rule::choice({ CharacterSet{{ 'x' }}, Blank{}, }), @@ -245,7 +245,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol::non_terminal(1), Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ 'y' }}, CharacterSet{{ 'z' }}, })), @@ -277,7 +277,7 @@ describe("LexItemSet::transitions()", [&]() { it("handles repeats", [&]() { LexItemSet item_set({ - LexItem(Symbol::non_terminal(1), Repeat{Seq::build({ + LexItem(Symbol::non_terminal(1), Repeat{Rule::seq({ CharacterSet{{ 'a' }}, CharacterSet{{ 'b' }}, })}), @@ -291,9 +291,9 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a'), Transition{ LexItemSet({ - LexItem(Symbol::non_terminal(1), Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ 'b' }}, - Repeat{Seq::build({ + Repeat{Rule::seq({ CharacterSet{{ 'a' }}, CharacterSet{{ 'b' }}, })} @@ -342,12 +342,12 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between overlapping character sets", [&]() { LexItemSet item_set({ - LexItem(Symbol::non_terminal(1), Choice::build({ - Metadata::active_prec(2, Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::choice({ + Metadata::active_prec(2, Rule::seq({ CharacterSet{{ 'a', 'b', 'c', 'd' }}, CharacterSet{{ 'x' }}, })), - Metadata::active_prec(3, Seq::build({ + Metadata::active_prec(3, Rule::seq({ CharacterSet{{ 'c', 'd', 'e', 'f' }}, CharacterSet{{ 'y' }}, })), @@ -393,12 +393,12 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between a subset and a superset of characters", [&]() { LexItemSet item_set({ - LexItem(Symbol::non_terminal(1), Choice::build({ - Seq::build({ + LexItem(Symbol::non_terminal(1), Rule::choice({ + Rule::seq({ CharacterSet{{ 'b', 'c', 'd' }}, CharacterSet{{ 'x' }}, }), - Seq::build({ + Rule::seq({ CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }}, CharacterSet{{ 'y' }}, }), @@ -434,10 +434,10 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between whitelisted and blacklisted character sets", [&]() { LexItemSet item_set({ - LexItem(Symbol::non_terminal(1), Seq::build({ - Choice::build({ + LexItem(Symbol::non_terminal(1), Rule::seq({ + Rule::choice({ CharacterSet().include_all().exclude('/'), - Seq::build({ + Rule::seq({ CharacterSet{{ '\\' }}, CharacterSet{{ '/' }}, }), @@ -464,7 +464,7 @@ describe("LexItemSet::transitions()", [&]() { Transition{ LexItemSet({ LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}), - LexItem(Symbol::non_terminal(1), Seq::build({ CharacterSet{{ '/' }}, CharacterSet{{ '/' }} })), + LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ '/' }}, CharacterSet{{ '/' }} })), }), PrecedenceRange(), false diff --git a/test/compiler/build_tables/rule_can_be_blank_test.cc b/test/compiler/build_tables/rule_can_be_blank_test.cc index 0fe07350..92dffa01 100644 --- a/test/compiler/build_tables/rule_can_be_blank_test.cc +++ b/test/compiler/build_tables/rule_can_be_blank_test.cc @@ -24,24 +24,24 @@ describe("rule_can_be_blank", [&]() { }); it("returns true for choices iff one or more sides can be blank", [&]() { - rule = Choice::build({ CharacterSet{{'x'}}, Blank{} }); + rule = Rule::choice({ CharacterSet{{'x'}}, Blank{} }); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = Choice::build({ Blank{}, CharacterSet{{'x'}} }); + rule = Rule::choice({ Blank{}, CharacterSet{{'x'}} }); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = Choice::build({ CharacterSet{{'x'}}, CharacterSet{{'y'}} }); + rule = Rule::choice({ CharacterSet{{'x'}}, CharacterSet{{'y'}} }); AssertThat(rule_can_be_blank(rule), IsFalse()); }); it("returns true for sequences iff both sides can be blank", [&]() { - rule = Seq::build({ Blank{}, CharacterSet{{'x'}} }); + rule = Rule::seq({ Blank{}, CharacterSet{{'x'}} }); AssertThat(rule_can_be_blank(rule), IsFalse()); - rule = Seq::build({ CharacterSet{{'x'}}, Blank{} }); + rule = Rule::seq({ CharacterSet{{'x'}}, Blank{} }); AssertThat(rule_can_be_blank(rule), IsFalse()); - rule = Seq::build({ Blank{}, Choice::build({ CharacterSet{{'x'}}, Blank{} }) }); + rule = Rule::seq({ Blank{}, Rule::choice({ CharacterSet{{'x'}}, Blank{} }) }); AssertThat(rule_can_be_blank(rule), IsTrue()); }); diff --git a/test/compiler/prepare_grammar/expand_repeats_test.cc b/test/compiler/prepare_grammar/expand_repeats_test.cc index 2c70c961..c025a898 100644 --- a/test/compiler/prepare_grammar/expand_repeats_test.cc +++ b/test/compiler/prepare_grammar/expand_repeats_test.cc @@ -27,8 +27,8 @@ describe("expand_repeats", []() { AssertThat(result.variables, Equals(vector{ Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(1)}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(1), Symbol::terminal(0) }), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) }), Symbol::terminal(0), })}, })); @@ -37,7 +37,7 @@ describe("expand_repeats", []() { it("replaces repeats inside of sequences", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, Seq::build({ + Variable{"rule0", VariableTypeNamed, Rule::seq({ Symbol::terminal(10), Repeat{Symbol::terminal(11)}, })}, @@ -48,12 +48,12 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, Seq::build({ + Variable{"rule0", VariableTypeNamed, Rule::seq({ Symbol::terminal(10), Symbol::non_terminal(1), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(1), Symbol::terminal(11) }), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }), Symbol::terminal(11) })}, })); @@ -62,7 +62,7 @@ describe("expand_repeats", []() { it("replaces repeats inside of choices", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, Choice::build({ + Variable{"rule0", VariableTypeNamed, Rule::choice({ Symbol::terminal(10), Repeat{Symbol::terminal(11)} })}, @@ -73,12 +73,12 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, Choice::build({ + Variable{"rule0", VariableTypeNamed, Rule::choice({ Symbol::terminal(10), Symbol::non_terminal(1), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(1), Symbol::terminal(11) }), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }), Symbol::terminal(11), })}, })); @@ -87,11 +87,11 @@ describe("expand_repeats", []() { it("does not create redundant auxiliary rules", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, Choice::build({ - Seq::build({ Symbol::terminal(1), Repeat{Symbol::terminal(4)} }), - Seq::build({ Symbol::terminal(2), Repeat{Symbol::terminal(4)} }), + Variable{"rule0", VariableTypeNamed, Rule::choice({ + Rule::seq({ Symbol::terminal(1), Repeat{Symbol::terminal(4)} }), + Rule::seq({ Symbol::terminal(2), Repeat{Symbol::terminal(4)} }), })}, - Variable{"rule1", VariableTypeNamed, Seq::build({ + Variable{"rule1", VariableTypeNamed, Rule::seq({ Symbol::terminal(3), Repeat{Symbol::terminal(4)} })}, @@ -102,16 +102,16 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, Choice::build({ - Seq::build({ Symbol::terminal(1), Symbol::non_terminal(2) }), - Seq::build({ Symbol::terminal(2), Symbol::non_terminal(2) }), + Variable{"rule0", VariableTypeNamed, Rule::choice({ + Rule::seq({ Symbol::terminal(1), Symbol::non_terminal(2) }), + Rule::seq({ Symbol::terminal(2), Symbol::non_terminal(2) }), })}, - Variable{"rule1", VariableTypeNamed, Seq::build({ + Variable{"rule1", VariableTypeNamed, Rule::seq({ Symbol::terminal(3), Symbol::non_terminal(2), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(2), Symbol::terminal(4) }), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(4) }), Symbol::terminal(4), })}, })); @@ -120,7 +120,7 @@ describe("expand_repeats", []() { it("can replace multiple repeats in the same rule", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, Seq::build({ + Variable{"rule0", VariableTypeNamed, Rule::seq({ Repeat{Symbol::terminal(10)}, Repeat{Symbol::terminal(11)}, })}, @@ -131,16 +131,16 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, Seq::build({ + Variable{"rule0", VariableTypeNamed, Rule::seq({ Symbol::non_terminal(1), Symbol::non_terminal(2), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(1), Symbol::terminal(10) }), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(10) }), Symbol::terminal(10), })}, - Variable{"rule0_repeat2", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(2), Symbol::terminal(11) }), + Variable{"rule0_repeat2", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(11) }), Symbol::terminal(11), })}, })); @@ -160,12 +160,12 @@ describe("expand_repeats", []() { AssertThat(result.variables, Equals(vector{ Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(2)}, Variable{"rule1", VariableTypeNamed, Symbol::non_terminal(3)}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(2), Symbol::terminal(10) }), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(10) }), Symbol::terminal(10), })}, - Variable{"rule1_repeat1", VariableTypeAuxiliary, Choice::build({ - Seq::build({ Symbol::non_terminal(3), Symbol::terminal(11) }), + Variable{"rule1_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(3), Symbol::terminal(11) }), Symbol::terminal(11), })}, })); diff --git a/test/compiler/prepare_grammar/expand_tokens_test.cc b/test/compiler/prepare_grammar/expand_tokens_test.cc index aa273042..5b2038f6 100644 --- a/test/compiler/prepare_grammar/expand_tokens_test.cc +++ b/test/compiler/prepare_grammar/expand_tokens_test.cc @@ -17,14 +17,14 @@ describe("expand_tokens", []() { describe("string rules", [&]() { it("replaces strings with sequences of character sets", [&]() { AssertThat( - expand_token(Seq::build({ + expand_token(Rule::seq({ String{"a"}, String{"bcd"}, String{"e"} })).rule, - Equals(*Seq::build({ + Equals(Rule::seq({ CharacterSet{{ 'a' }}, - Seq::build({ + Rule::seq({ CharacterSet{{ 'b' }}, CharacterSet{{ 'c' }}, CharacterSet{{ 'd' }}, @@ -36,7 +36,7 @@ describe("expand_tokens", []() { it("handles strings containing non-ASCII UTF8 characters", [&]() { AssertThat( expand_token(String{"\u03B1 \u03B2"}).rule, - Equals(*Seq::build({ + Equals(Rule::seq({ CharacterSet{{ 945 }}, CharacterSet{{ ' ' }}, CharacterSet{{ 946 }}, @@ -48,12 +48,12 @@ describe("expand_tokens", []() { describe("regexp rules", [&]() { it("replaces regexps with the equivalent rule tree", [&]() { AssertThat( - expand_token(Seq::build({ + expand_token(Rule::seq({ String{"a"}, Pattern{"x+"}, String{"b"}, })).rule, - Equals(*Seq::build({ + Equals(Rule::seq({ CharacterSet{{'a'}}, Repeat{CharacterSet{{ 'x' }}}, CharacterSet{{'b'}}, @@ -72,7 +72,7 @@ describe("expand_tokens", []() { it("returns an error when the grammar contains an invalid regex", [&]() { AssertThat( - expand_token(Seq::build({ + expand_token(Rule::seq({ Pattern{"("}, String{"xyz"}, Pattern{"["}, diff --git a/test/compiler/prepare_grammar/extract_choices_test.cc b/test/compiler/prepare_grammar/extract_choices_test.cc index dbd8c5d0..d1c859e7 100644 --- a/test/compiler/prepare_grammar/extract_choices_test.cc +++ b/test/compiler/prepare_grammar/extract_choices_test.cc @@ -9,9 +9,9 @@ using prepare_grammar::extract_choices; describe("extract_choices", []() { it("expands rules containing choices into multiple rules", [&]() { - auto rule = Seq::build({ + auto rule = Rule::seq({ Symbol::terminal(1), - Choice::build({ + Rule::choice({ Symbol::terminal(2), Symbol::terminal(3), Symbol::terminal(4) @@ -22,14 +22,14 @@ describe("extract_choices", []() { auto result = extract_choices(rule); AssertThat(result, Equals(vector({ - Seq::build({Symbol::terminal(1), Symbol::terminal(2), Symbol::terminal(5)}), - Seq::build({Symbol::terminal(1), Symbol::terminal(3), Symbol::terminal(5)}), - Seq::build({Symbol::terminal(1), Symbol::terminal(4), Symbol::terminal(5)}), + Rule::seq({Symbol::terminal(1), Symbol::terminal(2), Symbol::terminal(5)}), + Rule::seq({Symbol::terminal(1), Symbol::terminal(3), Symbol::terminal(5)}), + Rule::seq({Symbol::terminal(1), Symbol::terminal(4), Symbol::terminal(5)}), }))); }); it("handles metadata rules", [&]() { - auto rule = Metadata::prec(5, Choice::build({ + auto rule = Metadata::prec(5, Rule::choice({ Symbol::terminal(2), Symbol::terminal(3), Symbol::terminal(4) @@ -43,9 +43,9 @@ describe("extract_choices", []() { }); it("handles nested choices", [&]() { - auto rule = Choice::build({ - Seq::build({ - Choice::build({ + auto rule = Rule::choice({ + Rule::seq({ + Rule::choice({ Symbol::terminal(1), Symbol::terminal(2) }), @@ -55,8 +55,8 @@ describe("extract_choices", []() { }); AssertThat(extract_choices(rule), Equals(vector({ - Seq::build({Symbol::terminal(1), Symbol::terminal(3)}), - Seq::build({Symbol::terminal(2), Symbol::terminal(3)}), + Rule::seq({Symbol::terminal(1), Symbol::terminal(3)}), + Rule::seq({Symbol::terminal(2), Symbol::terminal(3)}), Symbol::terminal(4), }))); }); diff --git a/test/compiler/prepare_grammar/extract_tokens_test.cc b/test/compiler/prepare_grammar/extract_tokens_test.cc index 005174bf..dd9156a2 100644 --- a/test/compiler/prepare_grammar/extract_tokens_test.cc +++ b/test/compiler/prepare_grammar/extract_tokens_test.cc @@ -21,13 +21,13 @@ describe("extract_tokens", []() { InternedVariable{ "rule_A", VariableTypeNamed, - Repeat{Seq::build({ + Repeat{Rule::seq({ String{"ab"}, Pattern{"cd+"}, - Choice::build({ + Rule::choice({ Symbol::non_terminal(1), Symbol::non_terminal(2), - Metadata::token(Repeat{Choice::build({ + Metadata::token(Repeat{Rule::choice({ String{"ef"}, String{"g"} })}), @@ -42,7 +42,7 @@ describe("extract_tokens", []() { InternedVariable{ "rule_C", VariableTypeNamed, - Choice::build({ String{"i"}, Blank{} }) + Rule::choice({ String{"i"}, Blank{} }) }, InternedVariable{ "rule_D", @@ -65,7 +65,7 @@ describe("extract_tokens", []() { InitialSyntaxVariable{ "rule_A", VariableTypeNamed, - Repeat{Seq::build({ + Repeat{Rule::seq({ // This string is now the first token in the lexical grammar. Symbol::terminal(0), @@ -73,7 +73,7 @@ describe("extract_tokens", []() { // This pattern is now the second rule in the lexical grammar. Symbol::terminal(1), - Choice::build({ + Rule::choice({ // Rule 1, which this symbol pointed to, has been moved to the // lexical grammar. Symbol::terminal(3), @@ -91,7 +91,7 @@ describe("extract_tokens", []() { InitialSyntaxVariable{ "rule_C", VariableTypeNamed, - Choice::build({Symbol::terminal(4), Blank{}}) + Rule::choice({Symbol::terminal(4), Blank{}}) }, InitialSyntaxVariable{ @@ -122,7 +122,7 @@ describe("extract_tokens", []() { LexicalVariable{ "/(ef|g)+/", VariableTypeAuxiliary, - Repeat{Choice::build({ + Repeat{Rule::choice({ Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}}, CharacterSet{{'g'}}, })}, @@ -153,7 +153,7 @@ describe("extract_tokens", []() { { "rule_A", VariableTypeNamed, - Seq::build({ + Rule::seq({ String{"ab"}, Symbol::non_terminal(1), String{"ab"}, @@ -172,7 +172,7 @@ describe("extract_tokens", []() { InitialSyntaxVariable{ "rule_A", VariableTypeNamed, - Seq::build({ + Rule::seq({ Symbol::terminal(0), Symbol::non_terminal(1), Symbol::terminal(0) @@ -195,7 +195,7 @@ describe("extract_tokens", []() { InternedVariable{ "rule_A", VariableTypeNamed, - Seq::build({ Symbol::non_terminal(1), String{"ab"} }) + Rule::seq({ Symbol::non_terminal(1), String{"ab"} }) }, InternedVariable{ "rule_B", @@ -205,7 +205,7 @@ describe("extract_tokens", []() { InternedVariable{ "rule_C", VariableTypeNamed, - Seq::build({ String{"ef"}, String{"cd"} }) + Rule::seq({ String{"ef"}, String{"cd"} }) }, }, {}, {}, {}}); @@ -216,7 +216,7 @@ describe("extract_tokens", []() { InitialSyntaxVariable{ "rule_A", VariableTypeNamed, - Seq::build({ Symbol::non_terminal(1), Symbol::terminal(0) }) + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) }) }, InitialSyntaxVariable{ "rule_B", @@ -226,7 +226,7 @@ describe("extract_tokens", []() { InitialSyntaxVariable{ "rule_C", VariableTypeNamed, - Seq::build({ Symbol::terminal(2), Symbol::terminal(1) }) + Rule::seq({ Symbol::terminal(2), Symbol::terminal(1) }) }, }))); @@ -335,7 +335,7 @@ describe("extract_tokens", []() { InternedVariable{ "rule_A", VariableTypeNamed, - Seq::build({ String{"w"}, String{"x"}, Symbol::non_terminal(1) }) + Rule::seq({ String{"w"}, String{"x"}, Symbol::non_terminal(1) }) }, InternedVariable{ "rule_B", @@ -370,12 +370,12 @@ describe("extract_tokens", []() { InternedVariable{ "rule_A", VariableTypeNamed, - Seq::build({ String{"x"}, Symbol::non_terminal(1) }) + Rule::seq({ String{"x"}, Symbol::non_terminal(1) }) }, InternedVariable{ "rule_B", VariableTypeNamed, - Seq::build({ String{"y"}, String{"z"} }) + Rule::seq({ String{"y"}, String{"z"} }) }, }, { @@ -398,7 +398,7 @@ describe("extract_tokens", []() { {"rule_B", VariableTypeNamed, String{"y"}}, }, { - Choice::build({ Symbol::non_terminal(1), Blank{} }) + Rule::choice({ Symbol::non_terminal(1), Blank{} }) }, {}, {} @@ -417,12 +417,12 @@ describe("extract_tokens", []() { { "rule_A", VariableTypeNamed, - Seq::build({ String{"x"}, Symbol::non_terminal(1) }) + Rule::seq({ String{"x"}, Symbol::non_terminal(1) }) }, { "rule_B", VariableTypeNamed, - Seq::build({ String{"y"}, String{"z"} }) + Rule::seq({ String{"y"}, String{"z"} }) }, }, {}, diff --git a/test/compiler/prepare_grammar/flatten_grammar_test.cc b/test/compiler/prepare_grammar/flatten_grammar_test.cc index 15a53aa6..50d48fb4 100644 --- a/test/compiler/prepare_grammar/flatten_grammar_test.cc +++ b/test/compiler/prepare_grammar/flatten_grammar_test.cc @@ -14,12 +14,12 @@ describe("flatten_grammar", []() { SyntaxVariable result = flatten_rule({ "test", VariableTypeNamed, - Seq::build({ + Rule::seq({ Symbol::non_terminal(1), - Metadata::prec_left(101, Seq::build({ + Metadata::prec_left(101, Rule::seq({ Symbol::non_terminal(2), - Choice::build({ - Metadata::prec_right(102, Seq::build({ + Rule::choice({ + Metadata::prec_right(102, Rule::seq({ Symbol::non_terminal(3), Symbol::non_terminal(4) })), @@ -56,7 +56,7 @@ describe("flatten_grammar", []() { SyntaxVariable result = flatten_rule({ "test1", VariableTypeNamed, - Metadata::prec_left(101, Seq::build({ + Metadata::prec_left(101, Rule::seq({ Symbol::non_terminal(1), Symbol::non_terminal(2), })) @@ -72,7 +72,7 @@ describe("flatten_grammar", []() { result = flatten_rule({ "test2", VariableTypeNamed, - Metadata::prec_left(101, Seq::build({ + Metadata::prec_left(101, Rule::seq({ Symbol::non_terminal(1), })) }); diff --git a/test/compiler/prepare_grammar/intern_symbols_test.cc b/test/compiler/prepare_grammar/intern_symbols_test.cc index 9bfe7367..dc488951 100644 --- a/test/compiler/prepare_grammar/intern_symbols_test.cc +++ b/test/compiler/prepare_grammar/intern_symbols_test.cc @@ -13,7 +13,7 @@ describe("intern_symbols", []() { it("replaces named symbols with numerically-indexed symbols", [&]() { InputGrammar grammar{ { - {"x", VariableTypeNamed, Choice::build({ NamedSymbol{"y"}, NamedSymbol{"_z"} })}, + {"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"_z"} })}, {"y", VariableTypeNamed, NamedSymbol{"_z"}}, {"_z", VariableTypeNamed, String{"stuff"}} }, {}, {}, {} @@ -23,7 +23,7 @@ describe("intern_symbols", []() { AssertThat(result.second, Equals(CompileError::none())); AssertThat(result.first.variables, Equals(vector{ - {"x", VariableTypeNamed, Choice::build({ Symbol::non_terminal(1), Symbol::non_terminal(2) })}, + {"x", VariableTypeNamed, Rule::choice({ Symbol::non_terminal(1), Symbol::non_terminal(2) })}, {"y", VariableTypeNamed, Symbol::non_terminal(2)}, {"_z", VariableTypeHidden, String{"stuff"}}, })); @@ -47,7 +47,7 @@ describe("intern_symbols", []() { it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() { InputGrammar grammar{ { - {"x", VariableTypeNamed, Choice::build({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, + {"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, {"y", VariableTypeNamed, NamedSymbol{"z"}}, {"z", VariableTypeNamed, String{"stuff"}} }, @@ -67,7 +67,7 @@ describe("intern_symbols", []() { it("records any rule names that match external token names", [&]() { InputGrammar grammar{ { - {"x", VariableTypeNamed, Choice::build({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, + {"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, {"y", VariableTypeNamed, NamedSymbol{"z"}}, {"z", VariableTypeNamed, String{"stuff"}}, }, diff --git a/test/compiler/prepare_grammar/parse_regex_test.cc b/test/compiler/prepare_grammar/parse_regex_test.cc index 94285e39..a252654b 100644 --- a/test/compiler/prepare_grammar/parse_regex_test.cc +++ b/test/compiler/prepare_grammar/parse_regex_test.cc @@ -29,7 +29,7 @@ describe("parse_regex", []() { { "character classes", "\\w-\\d-\\s-\\W-\\D-\\S", - Seq::build({ + Rule::seq({ CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', @@ -60,7 +60,7 @@ describe("parse_regex", []() { { "choices", "ab|cd|ef", - Choice::build({ + Rule::choice({ Seq{ CharacterSet{{'a'}}, CharacterSet{{'b'}} @@ -79,7 +79,7 @@ describe("parse_regex", []() { { "simple sequences", "abc", - Seq::build({ + Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}}, CharacterSet{{'c'}} @@ -113,12 +113,12 @@ describe("parse_regex", []() { { "character groups in sequences", "x([^x]|\\\\x)*x", - Seq::build({ + Rule::seq({ CharacterSet{{'x'}}, - Choice::build({ - Repeat{Choice::build({ + Rule::choice({ + Repeat{Rule::choice({ CharacterSet().include_all().exclude('x'), - Seq::build({ + Rule::seq({ CharacterSet{{'\\'}}, CharacterSet{{'x'}} }) @@ -132,8 +132,8 @@ describe("parse_regex", []() { { "choices in sequences", "(a|b)cd", - Seq::build({ - Choice::build({ + Rule::seq({ + Rule::choice({ CharacterSet{{'a'}}, CharacterSet{{'b'}} }), CharacterSet{{'c'}}, @@ -143,7 +143,7 @@ describe("parse_regex", []() { { "escaped parentheses", "a\\(b", - Seq::build({ + Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'('}}, CharacterSet{{'b'}}, @@ -153,7 +153,7 @@ describe("parse_regex", []() { { "escaped periods", "a\\.", - Seq::build({ + Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'.'}}, }) @@ -162,7 +162,7 @@ describe("parse_regex", []() { { "escaped characters", "\\t\\n\\r", - Seq::build({ + Rule::seq({ CharacterSet{{'\t'}}, CharacterSet{{'\n'}}, CharacterSet{{'\r'}}, @@ -172,22 +172,22 @@ describe("parse_regex", []() { { "plus repeats", "(ab)+(cd)+", - Seq::build({ - Repeat{Seq::build({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, - Repeat{Seq::build({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, + Rule::seq({ + Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, + Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, }) }, { "asterix repeats", "(ab)*(cd)*", - Seq::build({ - Choice::build({ - Repeat{Seq::build({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, + Rule::seq({ + Rule::choice({ + Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, Blank{}, }), - Choice::build({ - Repeat{Seq::build({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, + Rule::choice({ + Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, Blank{}, }), }) @@ -196,10 +196,10 @@ describe("parse_regex", []() { { "optional rules", "a(bc)?", - Seq::build({ + Rule::seq({ CharacterSet{{'a'}}, - Choice::build({ - Seq::build({ + Rule::choice({ + Rule::seq({ CharacterSet{{'b'}}, CharacterSet{{'c'}}, }), @@ -211,11 +211,11 @@ describe("parse_regex", []() { { "choices containing negated character classes", "/([^/]|(\\\\/))+/", - Seq::build({ + Rule::seq({ CharacterSet{{'/'}}, - Repeat{Choice::build({ + Repeat{Rule::choice({ CharacterSet().include_all().exclude('/'), - Seq::build({ + Rule::seq({ CharacterSet{{'\\'}}, CharacterSet{{'/'}}, }), diff --git a/test/compiler/rules/choice_test.cc b/test/compiler/rules/choice_test.cc index 01ed71a1..fb2bbbaf 100644 --- a/test/compiler/rules/choice_test.cc +++ b/test/compiler/rules/choice_test.cc @@ -8,38 +8,38 @@ START_TEST describe("Choice", []() { describe("constructing choices", [&]() { it("eliminates duplicate members", [&]() { - Rule rule = Choice::build({ - Seq::build({ NamedSymbol{"one"}, NamedSymbol{"two"} }), + Rule rule = Rule::choice({ + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }), NamedSymbol{"three"}, - Seq::build({ NamedSymbol{"one"}, NamedSymbol{"two"} }) + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }) }); AssertThat(rule, Equals(Rule(Choice{{ - Seq::build({ NamedSymbol{"one"}, NamedSymbol{"two"} }), + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }), NamedSymbol{"three"}, }}))); - rule = Choice::build({ + rule = Rule::choice({ Blank{}, Blank{}, - Choice::build({ + Rule::choice({ Blank{}, NamedSymbol{"four"} }) }); - AssertThat(rule, Equals(*Choice::build({Blank{}, NamedSymbol{"four"}}))); + AssertThat(rule, Equals(Rule::choice({Blank{}, NamedSymbol{"four"}}))); }); it("eliminates duplicates within nested choices", [&]() { - Rule rule = Choice::build({ - Seq::build({ + Rule rule = Rule::choice({ + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }), - Choice::build({ + Rule::choice({ NamedSymbol{"three"}, - Seq::build({ + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }) @@ -47,7 +47,7 @@ describe("Choice", []() { }); AssertThat(rule, Equals(Rule(Choice{{ - Seq::build({ + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"}, }), @@ -56,9 +56,9 @@ describe("Choice", []() { }); it("doesn't construct a choice if there's only one unique member", [&]() { - Rule rule = Choice::build({ + Rule rule = Rule::choice({ NamedSymbol{"one"}, - Choice::build({ + Rule::choice({ NamedSymbol{"one"}, }) }); diff --git a/test/compiler/rules/repeat_test.cc b/test/compiler/rules/repeat_test.cc index c3ecb566..05b2f117 100644 --- a/test/compiler/rules/repeat_test.cc +++ b/test/compiler/rules/repeat_test.cc @@ -9,8 +9,8 @@ describe("Repeat", []() { describe("constructing repeats", [&]() { it("doesn't create redundant repeats", [&]() { Rule symbol = Symbol::non_terminal(1); - Rule repeat = Repeat::build(Rule(symbol)); - Rule outer_repeat = Repeat::build(Rule(repeat)); + Rule repeat = Rule::repeat(Rule(symbol)); + Rule outer_repeat = Rule::repeat(Rule(repeat)); AssertThat(repeat, !Equals(symbol)); AssertThat(outer_repeat, Equals(repeat));