diff --git a/.clang_complete b/.clang_complete index 11bc7510..12c483f7 100644 --- a/.clang_complete +++ b/.clang_complete @@ -1,3 +1,4 @@ +-std=c++14 -Isrc -Itest -Iinclude diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 1c287fd5..eda0e51a 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -15,6 +15,7 @@ typedef enum { TSCompileErrorTypeLexConflict, TSCompileErrorTypeParseConflict, TSCompileErrorTypeEpsilonRule, + TSCompileErrorTypeInvalidTokenContents, } TSCompileErrorType; typedef struct { diff --git a/project.gyp b/project.gyp index 8871fc16..4ed51809 100644 --- a/project.gyp +++ b/project.gyp @@ -33,40 +33,25 @@ 'src/compiler/prepare_grammar/extract_tokens.cc', 'src/compiler/prepare_grammar/flatten_grammar.cc', 'src/compiler/prepare_grammar/intern_symbols.cc', - 'src/compiler/prepare_grammar/is_token.cc', 'src/compiler/prepare_grammar/normalize_rules.cc', 'src/compiler/prepare_grammar/parse_regex.cc', 'src/compiler/prepare_grammar/prepare_grammar.cc', 'src/compiler/prepare_grammar/token_description.cc', 'src/compiler/rule.cc', - 'src/compiler/syntax_grammar.cc', - 'src/compiler/rules/blank.cc', - 'src/compiler/rules/built_in_symbols.cc', - 'src/compiler/rules/character_range.cc', 'src/compiler/rules/character_set.cc', 'src/compiler/rules/choice.cc', 'src/compiler/rules/metadata.cc', - 'src/compiler/rules/named_symbol.cc', - 'src/compiler/rules/pattern.cc', 'src/compiler/rules/repeat.cc', - 'src/compiler/rules/rules.cc', 'src/compiler/rules/seq.cc', - 'src/compiler/rules/string.cc', - 'src/compiler/rules/symbol.cc', - 'src/compiler/rules/visitor.cc', 'src/compiler/util/string_helpers.cc', 'externals/utf8proc/utf8proc.c', 'externals/json-parser/json.c', ], 'cflags_cc': [ - '-std=c++0x', - ], - 'cflags_cc!': [ - '-fno-rtti' + '-std=c++14', ], 'xcode_settings': { - 'CLANG_CXX_LANGUAGE_STANDARD': 'c++11', - 'GCC_ENABLE_CPP_RTTI': 'YES', + 'CLANG_CXX_LANGUAGE_STANDARD': 'c++14', 'GCC_ENABLE_CPP_EXCEPTIONS': 'NO', }, 'direct_dependent_settings': { diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 1f8a6939..6ab35f76 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -10,8 +10,7 @@ #include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/lexical_grammar.h" #include "compiler/syntax_grammar.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/build_tables/lex_table_builder.h" namespace tree_sitter { @@ -25,7 +24,6 @@ using std::map; using std::string; using std::to_string; using std::unordered_map; -using std::make_shared; using rules::Associativity; using rules::Symbol; using rules::END_OF_INPUT; @@ -53,8 +51,8 @@ class ParseTableBuilder { pair build() { Symbol start_symbol = grammar.variables.empty() ? - Symbol(0, Symbol::Terminal) : - Symbol(0, Symbol::NonTerminal); + Symbol::terminal(0) : + Symbol::non_terminal(0); Production start_production{ ProductionStep{start_symbol, 0, rules::AssociativityNone}, @@ -121,7 +119,7 @@ class ParseTableBuilder { } if (!has_non_reciprocal_conflict) { - add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal)); + add_out_of_context_parse_state(&error_state, Symbol::terminal(i)); } } @@ -132,11 +130,11 @@ class ParseTableBuilder { } for (size_t i = 0; i < grammar.external_tokens.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External)); + add_out_of_context_parse_state(&error_state, Symbol::external(i)); } for (size_t i = 0; i < grammar.variables.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal)); + add_out_of_context_parse_state(&error_state, Symbol::non_terminal(i)); } error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0)); @@ -253,7 +251,7 @@ class ParseTableBuilder { ParseStateId next_state = add_parse_state(next_item_set); parse_table.set_nonterminal_action(state_id, lookahead, next_state); if (!allow_any_conflict) - recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set); + recovery_states[Symbol::non_terminal(lookahead)].add(next_item_set); } for (Symbol lookahead : lookaheads_with_conflicts) { @@ -428,7 +426,7 @@ class ParseTableBuilder { if (lookahead.is_external()) return false; if (!lookahead.is_built_in()) { for (Symbol::Index incompatible_index : incompatible_token_indices) { - Symbol incompatible_symbol(incompatible_index, Symbol::Terminal); + Symbol incompatible_symbol = Symbol::terminal(incompatible_index); if (other.terminal_entries.count(incompatible_symbol)) return false; } } @@ -452,7 +450,7 @@ class ParseTableBuilder { if (lookahead.is_external()) return false; if (!lookahead.is_built_in()) { for (Symbol::Index incompatible_index : incompatible_token_indices) { - Symbol incompatible_symbol(incompatible_index, Symbol::Terminal); + Symbol incompatible_symbol = Symbol::terminal(incompatible_index); if (state.terminal_entries.count(incompatible_symbol)) return false; } } diff --git a/src/compiler/build_tables/lex_conflict_manager.cc b/src/compiler/build_tables/lex_conflict_manager.cc index 0fbdf4d9..82b5efaf 100644 --- a/src/compiler/build_tables/lex_conflict_manager.cc +++ b/src/compiler/build_tables/lex_conflict_manager.cc @@ -1,7 +1,7 @@ #include "compiler/build_tables/lex_conflict_manager.h" #include #include "compiler/parse_table.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/build_tables/lex_item.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/lex_conflict_manager.h b/src/compiler/build_tables/lex_conflict_manager.h index 0d3177dd..d15c5b58 100644 --- a/src/compiler/build_tables/lex_conflict_manager.h +++ b/src/compiler/build_tables/lex_conflict_manager.h @@ -4,7 +4,7 @@ #include #include #include "compiler/lexical_grammar.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index 4c9056df..59b59dc1 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -2,12 +2,7 @@ #include #include "compiler/build_tables/lex_item_transitions.h" #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" #include "compiler/util/hash_combine.h" namespace tree_sitter { @@ -19,51 +14,63 @@ using std::unordered_set; using rules::CharacterSet; using rules::Symbol; -LexItem::LexItem(const rules::Symbol &lhs, const rule_ptr rule) +LexItem::LexItem(const rules::Symbol &lhs, const rules::Rule &rule) : lhs(lhs), rule(rule) {} bool LexItem::operator==(const LexItem &other) const { - return (other.lhs == lhs) && other.rule->operator==(*rule); + return lhs == other.lhs && rule == other.rule; } -LexItem::CompletionStatus LexItem::completion_status() const { - class GetCompletionStatus : public rules::RuleFn { - protected: - CompletionStatus apply_to(const rules::Choice *rule) { - for (const auto &element : rule->elements) { - CompletionStatus status = apply(element); +using CompletionStatus = LexItem::CompletionStatus; + +static CompletionStatus get_completion_status(const rules::Rule &rule) { + return rule.match( + [](rules::Choice choice) { + for (const auto &element : choice.elements) { + auto status = get_completion_status(element); if (status.is_done) return status; } - return { false, PrecedenceRange() }; - } + return CompletionStatus{false, PrecedenceRange()}; + }, - CompletionStatus apply_to(const rules::Metadata *rule) { - CompletionStatus result = apply(rule->rule); - if (result.is_done && result.precedence.empty && rule->params.has_precedence) { - result.precedence.add(rule->params.precedence); + [](rules::Metadata metadata) { + CompletionStatus result = get_completion_status(*metadata.rule); + if (result.is_done && result.precedence.empty && metadata.params.has_precedence) { + result.precedence.add(metadata.params.precedence); } return result; - } + }, - CompletionStatus apply_to(const rules::Repeat *rule) { - return apply(rule->content); - } + [](rules::Repeat repeat) { + return get_completion_status(*repeat.rule); + }, - CompletionStatus apply_to(const rules::Blank *rule) { - return { true, PrecedenceRange() }; - } - - CompletionStatus apply_to(const rules::Seq *rule) { - CompletionStatus left_status = apply(rule->left); + [](rules::Seq sequence) { + CompletionStatus left_status = get_completion_status(*sequence.left); if (left_status.is_done) { - return apply(rule->right); + return get_completion_status(*sequence.right); } else { - return { false, PrecedenceRange() }; + return CompletionStatus{false, PrecedenceRange()}; } - } - }; + }, - return GetCompletionStatus().apply(rule); + [](rules::Blank blank) { + return CompletionStatus{true, PrecedenceRange()}; + }, + + [](rules::CharacterSet) { + return CompletionStatus{false, PrecedenceRange()}; + }, + + [](auto) { + return CompletionStatus{false, PrecedenceRange()}; + } + ); +} + + +LexItem::CompletionStatus LexItem::completion_status() const { + return get_completion_status(rule); } LexItemSet::LexItemSet() {} diff --git a/src/compiler/build_tables/lex_item.h b/src/compiler/build_tables/lex_item.h index b6b07de7..89a943e3 100644 --- a/src/compiler/build_tables/lex_item.h +++ b/src/compiler/build_tables/lex_item.h @@ -5,8 +5,7 @@ #include #include #include -#include "compiler/rules/character_set.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" #include "compiler/precedence_range.h" namespace tree_sitter { @@ -14,7 +13,7 @@ namespace build_tables { class LexItem { public: - LexItem(const rules::Symbol &, rule_ptr); + LexItem(const rules::Symbol &, const rules::Rule &); struct CompletionStatus { bool is_done; @@ -25,7 +24,7 @@ class LexItem { CompletionStatus completion_status() const; rules::Symbol lhs; - rule_ptr rule; + rules::Rule rule; }; } // namespace build_tables diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc index 9dc4f762..c4e05420 100644 --- a/src/compiler/build_tables/lex_item_transitions.cc +++ b/src/compiler/build_tables/lex_item_transitions.cc @@ -4,47 +4,34 @@ #include #include #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" #include "compiler/build_tables/lex_item.h" namespace tree_sitter { namespace build_tables { using std::function; -using std::make_shared; using std::map; using std::pair; using std::vector; using rules::CharacterSet; -using rules::Symbol; -using rules::Blank; -using rules::Choice; -using rules::Seq; -using rules::Repeat; -using rules::Metadata; -typedef LexItemSet::Transition Transition; -typedef LexItemSet::TransitionMap TransitionMap; +using rules::Rule; +using Transition = LexItemSet::Transition; +using TransitionMap = LexItemSet::TransitionMap; -class TransitionBuilder : public rules::RuleFn { +class TransitionBuilder { TransitionMap *transitions; - const Symbol &item_lhs; + const rules::Symbol &item_lhs; vector *precedence_stack; bool in_main_token; - Transition transform_transition(const Transition &transition, - function callback) { + inline Transition transform_transition(const Transition &transition, + const function &callback) { LexItemSet destination; - for (const LexItem &item : transition.destination.entries) + for (const LexItem &item : transition.destination.entries) { destination.entries.insert(LexItem(item.lhs, callback(item.rule))); - return Transition{ destination, transition.precedence, - transition.in_main_token }; + } + return Transition{destination, transition.precedence, transition.in_main_token}; } void add_transition(TransitionMap *transitions, CharacterSet new_characters, @@ -89,82 +76,102 @@ class TransitionBuilder : public rules::RuleFn { transitions->insert({ new_characters, new_transition }); } - void apply_to(const CharacterSet *character_set) { - PrecedenceRange precedence; - if (!precedence_stack->empty()) - precedence.add(precedence_stack->back()); - - add_transition(transitions, *character_set, - Transition{ - LexItemSet({ LexItem(item_lhs, Blank::build()) }), - precedence, in_main_token, - }); - } - - void apply_to(const Choice *choice) { - for (const rule_ptr &element : choice->elements) - apply(element); - } - - void apply_to(const Seq *sequence) { - TransitionMap left_transitions; - TransitionBuilder(&left_transitions, this).apply(sequence->left); - - for (const auto &pair : left_transitions) { - add_transition( - transitions, pair.first, - transform_transition(pair.second, [&sequence](rule_ptr rule) { - return Seq::build({ rule, sequence->right }); - })); - } - - if (rule_can_be_blank(sequence->left)) - apply(sequence->right); - } - - void apply_to(const Repeat *repeat) { - TransitionMap content_transitions; - TransitionBuilder(&content_transitions, this).apply(repeat->content); - - for (const auto &pair : content_transitions) { - add_transition(transitions, pair.first, pair.second); - add_transition( - transitions, pair.first, - transform_transition(pair.second, [&repeat](rule_ptr item_rule) { - return Seq::build({ item_rule, repeat->copy() }); - })); - } - } - - void apply_to(const Metadata *metadata) { - bool has_active_precedence = metadata->params.is_active; - if (has_active_precedence) - precedence_stack->push_back(metadata->params.precedence); - - if (metadata->params.is_main_token) - in_main_token = true; - - rules::MetadataParams params = metadata->params; - if (params.has_precedence) - params.is_active = true; - - TransitionMap content_transitions; - TransitionBuilder(&content_transitions, this).apply(metadata->rule); - - for (const auto &pair : content_transitions) { - add_transition( - transitions, pair.first, - transform_transition(pair.second, [¶ms](rule_ptr rule) { - return Metadata::build(rule, params); - })); - } - - if (has_active_precedence) - precedence_stack->pop_back(); - } - public: - TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs, + void apply(const Rule &rule) { + rule.match( + [this](const rules::Blank &) {}, + + [this](const rules::CharacterSet &character_set) { + PrecedenceRange precedence; + if (!precedence_stack->empty()) { + precedence.add(precedence_stack->back()); + } + + add_transition( + transitions, + character_set, + Transition{ + LexItemSet({ LexItem(item_lhs, rules::Blank{}) }), + precedence, + in_main_token, + } + ); + }, + + [this](const rules::Choice &choice) { + for (const auto &element : choice.elements) { + apply(element); + } + }, + + [this](const rules::Seq &sequence) { + TransitionMap left_transitions; + TransitionBuilder(&left_transitions, this).apply(*sequence.left); + + for (const auto &pair : left_transitions) { + add_transition( + transitions, + pair.first, + transform_transition(pair.second, [&sequence](Rule rule) -> Rule { + return Rule::seq({rule, *sequence.right}); + }) + ); + } + + if (rule_can_be_blank(*sequence.left)) { + apply(*sequence.right); + } + }, + + [this](const rules::Repeat &repeat) { + TransitionMap content_transitions; + TransitionBuilder(&content_transitions, this).apply(*repeat.rule); + + for (const auto &pair : content_transitions) { + add_transition(transitions, pair.first, pair.second); + add_transition( + transitions, pair.first, + transform_transition(pair.second, [&repeat](Rule item_rule) { + return Rule::seq({ item_rule, repeat }); + }) + ); + } + }, + + [this](const rules::Metadata &metadata) { + bool has_active_precedence = metadata.params.is_active; + if (has_active_precedence) + precedence_stack->push_back(metadata.params.precedence); + + if (metadata.params.is_main_token) + in_main_token = true; + + auto params = metadata.params; + if (params.has_precedence) + params.is_active = true; + + TransitionMap content_transitions; + TransitionBuilder(&content_transitions, this).apply(*metadata.rule); + + for (const auto &pair : content_transitions) { + add_transition( + transitions, pair.first, + transform_transition(pair.second, [¶ms](Rule rule) { + return rules::Metadata{rule, params}; + }) + ); + } + + if (has_active_precedence) { + precedence_stack->pop_back(); + } + }, + + [](auto) {} + ); + } + + TransitionBuilder(TransitionMap *transitions, const rules::Symbol &item_lhs, vector *precedence_stack, bool in_main_token) : transitions(transitions), item_lhs(item_lhs), @@ -180,8 +187,7 @@ class TransitionBuilder : public rules::RuleFn { void lex_item_transitions(TransitionMap *transitions, const LexItem &item) { vector precedence_stack; - TransitionBuilder(transitions, item.lhs, &precedence_stack, false) - .apply(item.rule); + TransitionBuilder(transitions, item.lhs, &precedence_stack, false).apply(item.rule); } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_item_transitions.h b/src/compiler/build_tables/lex_item_transitions.h index b9dc25d0..2cd10917 100644 --- a/src/compiler/build_tables/lex_item_transitions.h +++ b/src/compiler/build_tables/lex_item_transitions.h @@ -1,15 +1,12 @@ #ifndef COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_ #define COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_ -#include "compiler/rules/character_set.h" -#include "compiler/rules/symbol.h" #include "compiler/build_tables/lex_item.h" namespace tree_sitter { namespace build_tables { -void lex_item_transitions(LexItemSet::TransitionMap *transitions, - const LexItem &); +void lex_item_transitions(LexItemSet::TransitionMap *transitions, const LexItem &); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/lex_table_builder.cc b/src/compiler/build_tables/lex_table_builder.cc index e0a18914..f9068d42 100644 --- a/src/compiler/build_tables/lex_table_builder.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -10,13 +10,7 @@ #include "compiler/build_tables/lex_item.h" #include "compiler/parse_table.h" #include "compiler/lexical_grammar.h" -#include "compiler/rules/built_in_symbols.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { @@ -28,6 +22,7 @@ using std::string; using std::vector; using std::unordered_map; using std::unique_ptr; +using rules::Rule; using rules::Blank; using rules::Choice; using rules::CharacterSet; @@ -36,35 +31,45 @@ using rules::Symbol; using rules::Metadata; using rules::Seq; -class StartingCharacterAggregator : public rules::RuleFn { - void apply_to(const rules::Seq *rule) { - apply(rule->left); - } - - void apply_to(const rules::Choice *rule) { - for (const rule_ptr &element : rule->elements) apply(element); - } - - void apply_to(const rules::Repeat *rule) { - apply(rule->content); - } - - void apply_to(const rules::Metadata *rule) { - apply(rule->rule); - } - - void apply_to(const rules::CharacterSet *rule) { - result.add_set(*rule); - } - +class StartingCharacterAggregator { public: + void apply(const Rule &rule) { + rule.match( + [this](const Seq &sequence) { + apply(*sequence.left); + }, + + [this](const rules::Choice &rule) { + for (const auto &element : rule.elements) { + apply(element); + } + }, + + [this](const rules::Repeat &rule) { + apply(*rule.rule); + }, + + [this](const rules::Metadata &rule) { + apply(*rule.rule); + }, + + [this](const rules::CharacterSet &rule) { + result.add_set(rule); + }, + + [this](const rules::Blank) {}, + + [](auto) {} + ); + } + CharacterSet result; }; class LexTableBuilderImpl : public LexTableBuilder { LexTable lex_table; const LexicalGrammar grammar; - vector separator_rules; + vector separator_rules; CharacterSet first_separator_characters; LexConflictManager conflict_manager; unordered_map lex_state_ids; @@ -74,11 +79,11 @@ class LexTableBuilderImpl : public LexTableBuilder { LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) { StartingCharacterAggregator starting_character_aggregator; - for (const rule_ptr &rule : grammar.separators) { - separator_rules.push_back(Repeat::build(rule)); + for (const auto &rule : grammar.separators) { + separator_rules.push_back(Repeat{rule}); starting_character_aggregator.apply(rule); } - separator_rules.push_back(Blank::build()); + separator_rules.push_back(Blank{}); first_separator_characters = starting_character_aggregator.result; shadowed_token_indices.resize(grammar.variables.size()); } @@ -98,8 +103,18 @@ class LexTableBuilderImpl : public LexTableBuilder { clear(); map terminals; - terminals[Symbol(left, Symbol::Terminal)]; - terminals[Symbol(right, Symbol::Terminal)]; + terminals[Symbol::terminal(left)]; + terminals[Symbol::terminal(right)]; + + if (grammar.variables[left].is_string && grammar.variables[right].is_string) { + StartingCharacterAggregator left_starting_characters; + left_starting_characters.apply(grammar.variables[left].rule); + StartingCharacterAggregator right_starting_characters; + right_starting_characters.apply(grammar.variables[right].rule); + if (!(left_starting_characters.result == right_starting_characters.result)) { + return false; + } + } add_lex_state(item_set_for_terminals(terminals)); @@ -183,11 +198,11 @@ class LexTableBuilderImpl : public LexTableBuilder { for (ParseState &state : parse_table->states) { for (auto &entry : state.terminal_entries) { Symbol symbol = entry.first; - if (symbol.is_token()) { + if (symbol.is_terminal()) { auto homonyms = conflict_manager.possible_homonyms.find(symbol.index); if (homonyms != conflict_manager.possible_homonyms.end()) for (Symbol::Index homonym : homonyms->second) - if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) { + if (state.terminal_entries.count(Symbol::terminal(homonym))) { entry.second.reusable = false; break; } @@ -198,7 +213,7 @@ class LexTableBuilderImpl : public LexTableBuilder { auto extensions = conflict_manager.possible_extensions.find(symbol.index); if (extensions != conflict_manager.possible_extensions.end()) for (Symbol::Index extension : extensions->second) - if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) { + if (state.terminal_entries.count(Symbol::terminal(extension))) { entry.second.depends_on_lookahead = true; break; } @@ -278,15 +293,18 @@ class LexTableBuilderImpl : public LexTableBuilder { LexItemSet result; for (const auto &pair : terminals) { Symbol symbol = pair.first; - if (symbol.is_token()) { - for (const rule_ptr &rule : rules_for_symbol(symbol)) { - for (const rule_ptr &separator_rule : separator_rules) { + if (symbol.is_terminal()) { + for (const auto &rule : rules_for_symbol(symbol)) { + for (const auto &separator_rule : separator_rules) { result.entries.insert(LexItem( symbol, Metadata::separator( - Seq::build({ + Rule::seq({ separator_rule, - Metadata::main_token(rule) })))); + Metadata::main_token(rule) + }) + ) + )); } } } @@ -294,17 +312,20 @@ class LexTableBuilderImpl : public LexTableBuilder { return result; } - vector rules_for_symbol(const rules::Symbol &symbol) { - if (symbol == rules::END_OF_INPUT()) - return { CharacterSet().include(0).copy() }; + vector rules_for_symbol(const rules::Symbol &symbol) { + if (symbol == rules::END_OF_INPUT()) { + return { CharacterSet().include(0) }; + } - rule_ptr rule = grammar.variables[symbol.index].rule; + return grammar.variables[symbol.index].rule.match( + [](const Choice &choice) { + return choice.elements; + }, - auto choice = rule->as(); - if (choice) - return choice->elements; - else - return { rule }; + [](auto rule) { + return vector{ rule }; + } + ); } }; diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc index 239bc029..b9604c24 100644 --- a/src/compiler/build_tables/lookahead_set.cc +++ b/src/compiler/build_tables/lookahead_set.cc @@ -1,7 +1,7 @@ #include "compiler/build_tables/lookahead_set.h" #include #include -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h index e62ee34d..74cd63e2 100644 --- a/src/compiler/build_tables/lookahead_set.h +++ b/src/compiler/build_tables/lookahead_set.h @@ -3,7 +3,7 @@ #include #include -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index b9c3831b..baf10a00 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -1,7 +1,7 @@ #include "compiler/build_tables/parse_item.h" #include #include "compiler/syntax_grammar.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/util/hash_combine.h" namespace tree_sitter { @@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const { } Symbol ParseItem::lhs() const { - return Symbol(variable_index, Symbol::NonTerminal); + return Symbol{variable_index, Symbol::NonTerminal}; } bool ParseItem::is_done() const { diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index fc3f0129..020afc07 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -4,8 +4,7 @@ #include #include #include "compiler/build_tables/lookahead_set.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" #include "compiler/syntax_grammar.h" #include "compiler/precedence_range.h" diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc index 0a2039d3..36c3942f 100644 --- a/src/compiler/build_tables/parse_item_set_builder.cc +++ b/src/compiler/build_tables/parse_item_set_builder.cc @@ -4,7 +4,7 @@ #include #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { @@ -16,8 +16,6 @@ using std::get; using std::pair; using std::tuple; using std::make_tuple; -using std::shared_ptr; -using std::make_shared; using rules::Symbol; using rules::NONE; @@ -27,17 +25,17 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, set processed_non_terminals; for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) { - Symbol symbol(i, Symbol::Terminal); + Symbol symbol = Symbol::terminal(i); first_sets.insert({symbol, LookaheadSet({ symbol })}); } for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) { - Symbol symbol(i, Symbol::External); + Symbol symbol = Symbol::external(i); first_sets.insert({symbol, LookaheadSet({ symbol })}); } for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { - Symbol symbol(i, Symbol::NonTerminal); + Symbol symbol = Symbol::non_terminal(i); LookaheadSet first_set; processed_non_terminals.clear(); @@ -64,7 +62,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar, vector components_to_process; for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { - Symbol symbol(i, Symbol::NonTerminal); + Symbol symbol = Symbol::non_terminal(i); map> cache_entry; components_to_process.clear(); diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h index 2a0de268..a319d698 100644 --- a/src/compiler/build_tables/parse_item_set_builder.h +++ b/src/compiler/build_tables/parse_item_set_builder.h @@ -2,7 +2,7 @@ #define COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_ #include "compiler/build_tables/parse_item.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" #include namespace tree_sitter { diff --git a/src/compiler/build_tables/rule_can_be_blank.cc b/src/compiler/build_tables/rule_can_be_blank.cc index 0ae95749..97737fd3 100644 --- a/src/compiler/build_tables/rule_can_be_blank.cc +++ b/src/compiler/build_tables/rule_can_be_blank.cc @@ -1,43 +1,42 @@ #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/repeat.h" +#include "compiler/rule.h" namespace tree_sitter { namespace build_tables { -class CanBeBlank : public rules::RuleFn { - protected: - bool apply_to(const rules::Blank *) { - return true; - } +bool rule_can_be_blank(const rules::Rule &rule) { + return rule.match( + [](rules::Blank) { + return true; + }, - bool apply_to(const rules::Repeat *rule) { - return apply(rule->content); - } + [](rules::CharacterSet) { + return false; + }, - bool apply_to(const rules::Choice *rule) { - for (const auto &element : rule->elements) - if (apply(element)) - return true; - return false; - } + [](rules::Repeat repeat) { + return rule_can_be_blank(*repeat.rule); + }, - bool apply_to(const rules::Seq *rule) { - return apply(rule->left) && apply(rule->right); - } + [](rules::Metadata metadata) { + return rule_can_be_blank(*metadata.rule); + }, - bool apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } -}; + [](rules::Choice choice) { + for (const auto &element : choice.elements) { + if (rule_can_be_blank(element)) { + return true; + } + } + return false; + }, -bool rule_can_be_blank(const rule_ptr &rule) { - return CanBeBlank().apply(rule); + [](rules::Seq seq) { + return rule_can_be_blank(*seq.left) && rule_can_be_blank(*seq.right); + }, + + [](auto) { return false; } + ); } } // namespace build_tables diff --git a/src/compiler/build_tables/rule_can_be_blank.h b/src/compiler/build_tables/rule_can_be_blank.h index 96a4a279..768dc6df 100644 --- a/src/compiler/build_tables/rule_can_be_blank.h +++ b/src/compiler/build_tables/rule_can_be_blank.h @@ -6,7 +6,7 @@ namespace tree_sitter { namespace build_tables { -bool rule_can_be_blank(const rule_ptr &rule); +bool rule_can_be_blank(const rules::Rule &rule); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index cc9d8155..3edcf141 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -22,8 +22,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) { TSCompileErrorTypeInvalidGrammar }; } - auto prepare_grammar_result = - prepare_grammar::prepare_grammar(parse_result.grammar); + auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar); const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); CompileError error = get<2>(prepare_grammar_result); @@ -46,22 +45,20 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) { return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone }; } -pair compile(const Grammar &grammar, +pair compile(const InputGrammar &grammar, std::string name) { auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar); const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); CompileError error = get<2>(prepare_grammar_result); - if (error.type) - return { "", error }; + if (error.type) return { "", error }; auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar); const ParseTable &parse_table = get<0>(table_build_result); const LexTable &lex_table = get<1>(table_build_result); error = get<2>(table_build_result); - if (error.type) - return { "", error }; + if (error.type) return { "", error }; string code = generate_code::c_code(name, parse_table, lex_table, syntax_grammar, lexical_grammar); diff --git a/src/compiler/compile.h b/src/compiler/compile.h index 649c38ea..5f182bc0 100644 --- a/src/compiler/compile.h +++ b/src/compiler/compile.h @@ -7,9 +7,9 @@ namespace tree_sitter { -struct Grammar; +struct InputGrammar; -std::pair compile(const Grammar &, std::string); +std::pair compile(const InputGrammar &, std::string); } // namespace tree_sitter diff --git a/src/compiler/compile_error.h b/src/compiler/compile_error.h index 7035d7a7..9797a459 100644 --- a/src/compiler/compile_error.h +++ b/src/compiler/compile_error.h @@ -8,6 +8,8 @@ namespace tree_sitter { class CompileError { public: + CompileError() : type(TSCompileErrorTypeNone) {} + CompileError(TSCompileErrorType type, std::string message) : type(type), message(message) {} @@ -15,6 +17,10 @@ class CompileError { return CompileError(TSCompileErrorTypeNone, ""); } + operator bool() const { + return type != TSCompileErrorTypeNone; + } + bool operator==(const CompileError &other) const { return type == other.type && message == other.message; } diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index d592966a..b51db626 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -9,7 +9,7 @@ #include "compiler/parse_table.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/util/string_helpers.h" #include "tree_sitter/runtime.h" @@ -129,7 +129,7 @@ class CCodeGenerator { size_t token_count = 0; for (const auto &entry : parse_table.symbols) { const Symbol &symbol = entry.first; - if (symbol.is_token()) { + if (symbol.is_terminal()) { token_count++; } else if (symbol.is_external()) { const ExternalToken &external_token = syntax_grammar.external_tokens[symbol.index]; @@ -256,7 +256,7 @@ class CCodeGenerator { if (symbol.is_external()) { needs_external_scanner = true; external_token_indices.insert(symbol.index); - } else if (symbol.is_token()) { + } else if (symbol.is_terminal()) { auto corresponding_external_token = external_tokens_by_corresponding_internal_token.find(symbol.index); if (corresponding_external_token != external_tokens_by_corresponding_internal_token.end()) { @@ -298,7 +298,7 @@ class CCodeGenerator { line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {"); indent([&]() { for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) { - line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ","); + line("[" + external_token_id(i) + "] = " + symbol_id(Symbol::external(i)) + ","); } }); line("};"); @@ -339,7 +339,7 @@ class CCodeGenerator { line("[" + to_string(state_id++) + "] = {"); indent([&]() { for (const auto &entry : state.nonterminal_entries) { - line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE("); + line("[" + symbol_id(Symbol::non_terminal(entry.first)) + "] = STATE("); add(to_string(entry.second)); add("),"); } @@ -686,9 +686,13 @@ class CCodeGenerator { string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table, const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar) { - return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, - lexical_grammar) - .code(); + return CCodeGenerator( + name, + parse_table, + lex_table, + syntax_grammar, + lexical_grammar + ).code(); } } // namespace generate_code diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h index 0a07280c..f24c0030 100644 --- a/src/compiler/grammar.h +++ b/src/compiler/grammar.h @@ -2,17 +2,43 @@ #define COMPILER_GRAMMAR_H_ #include +#include #include #include #include "compiler/rule.h" namespace tree_sitter { -struct Grammar { - std::vector> rules; - std::vector extra_tokens; - std::vector> expected_conflicts; - std::vector external_tokens; +enum VariableType { + VariableTypeHidden, + VariableTypeAuxiliary, + VariableTypeAnonymous, + VariableTypeNamed, +}; + +struct ExternalToken { + std::string name; + VariableType type; + rules::Symbol corresponding_internal_token; + + inline bool operator==(const ExternalToken &other) const { + return name == other.name && + type == other.type && + corresponding_internal_token == other.corresponding_internal_token; + } +}; + +struct InputGrammar { + struct Variable { + std::string name; + VariableType type; + rules::Rule rule; + }; + + std::vector variables; + std::vector extra_tokens; + std::vector> expected_conflicts; + std::vector external_tokens; }; } // namespace tree_sitter diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index ccca250d..daf4517a 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -1,6 +1,5 @@ #include "compiler/lex_table.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 7b87079a..9317c818 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -6,8 +6,7 @@ #include #include #include "compiler/precedence_range.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/character_set.h" +#include "compiler/rule.h" namespace tree_sitter { diff --git a/src/compiler/lexical_grammar.h b/src/compiler/lexical_grammar.h index 456e2089..78d3faa8 100644 --- a/src/compiler/lexical_grammar.h +++ b/src/compiler/lexical_grammar.h @@ -5,20 +5,25 @@ #include #include #include "compiler/rule.h" -#include "compiler/variable.h" +#include "compiler/grammar.h" namespace tree_sitter { struct LexicalVariable { std::string name; VariableType type; - rule_ptr rule; + rules::Rule rule; bool is_string; + + inline bool operator==(const LexicalVariable &other) const { + return other.name == name && other.type == type && other.rule == rule && + other.is_string == is_string; + } }; struct LexicalGrammar { std::vector variables; - std::vector separators; + std::vector separators; }; } // namespace tree_sitter diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index 327c0f31..536672f4 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -1,20 +1,31 @@ #include "compiler/parse_grammar.h" #include #include +#include #include #include "json.h" #include "compiler/rule.h" -#include "compiler/rules.h" namespace tree_sitter { using std::string; using std::vector; +using std::unordered_set; using std::pair; +using rules::Rule; +using rules::Blank; +using rules::Metadata; +using rules::Pattern; +using rules::String; +using rules::NamedSymbol; struct ParseRuleResult { - rule_ptr rule; + Rule rule; string error_message; + + ParseRuleResult(const string &error_message) : error_message(error_message) {} + ParseRuleResult(const char *error_message) : error_message(error_message) {} + ParseRuleResult(Rule rule) : rule(rule) {} }; ParseRuleResult parse_rule(json_value *rule_json) { @@ -23,193 +34,163 @@ ParseRuleResult parse_rule(json_value *rule_json) { string type; if (!rule_json) { - error_message = "Rule cannot be null"; - goto error; + return "Rule cannot be null"; } if (rule_json->type != json_object) { - error_message = "Rule type must be an object"; - goto error; + return "Rule type must be an object"; } rule_type_json = rule_json->operator[]("type"); if (rule_type_json.type != json_string) { - error_message = "Rule type must be a string"; - goto error; + return "Rule type must be a string"; } type = rule_type_json.u.string.ptr; if (type == "BLANK") { - return { blank(), "" }; + return Rule(Blank{}); } if (type == "CHOICE") { json_value members_json = rule_json->operator[]("members"); if (members_json.type != json_array) { - error_message = "Choice members must be an array"; - goto error; + return "Choice members must be an array"; } - vector members; + vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - ParseRuleResult member = parse_rule(member_json); - if (member.rule.get()) { - members.push_back(member.rule); - } else { - error_message = "Invalid choice member: " + member.error_message; - goto error; + auto result = parse_rule(member_json); + if (!result.error_message.empty()) { + return "Invalid choice member: " + result.error_message; } + members.push_back(result.rule); } - return { choice(members), "" }; + return Rule::choice(members); } if (type == "SEQ") { json_value members_json = rule_json->operator[]("members"); if (members_json.type != json_array) { - error_message = "Seq members must be an array"; - goto error; + return "Seq members must be an array"; } - vector members; + vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - ParseRuleResult member = parse_rule(member_json); - if (member.rule.get()) { - members.push_back(member.rule); - } else { - error_message = "Invalid seq member: " + member.error_message; - goto error; + auto result = parse_rule(member_json); + if (!result.error_message.empty()) { + return "Invalid choice member: " + result.error_message; } + members.push_back(result.rule); } - return { seq(members), "" }; + return Rule::seq(members); } if (type == "REPEAT") { json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (content.rule.get()) { - return { repeat(content.rule), "" }; - } else { - error_message = "Invalid repeat content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid repeat content: " + result.error_message; } + return Rule::choice({Rule::repeat(result.rule), Blank{}}); } if (type == "REPEAT1") { json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (content.rule.get()) { - return { repeat1(content.rule), "" }; - } else { - error_message = "Invalid repeat1 content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid repeat content: " + result.error_message; } + return Rule::repeat(result.rule); } if (type == "TOKEN") { json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (content.rule.get()) { - return { token(content.rule), "" }; - } else { - error_message = "Invalid token content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid token content: " + result.error_message; } + return Rule(Metadata::token(result.rule)); } if (type == "PATTERN") { json_value value_json = rule_json->operator[]("value"); if (value_json.type == json_string) { - return { pattern(value_json.u.string.ptr), "" }; + return Rule(Pattern{value_json.u.string.ptr}); } else { - error_message = "Pattern value must be a string"; - goto error; + return "Pattern value must be a string"; } } if (type == "STRING") { json_value value_json = rule_json->operator[]("value"); if (value_json.type == json_string) { - return { str(value_json.u.string.ptr), "" }; + return Rule(String{value_json.u.string.ptr}); } else { - error_message = "String rule value must be a string"; - goto error; + return "String rule value must be a string"; } } if (type == "SYMBOL") { json_value value_json = rule_json->operator[]("name"); if (value_json.type == json_string) { - return { sym(value_json.u.string.ptr), "" }; + return Rule(NamedSymbol{value_json.u.string.ptr}); } else { - error_message = "Symbol value must be a string"; - goto error; + return "Symbol value must be a string"; } } if (type == "PREC") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { - error_message = "Precedence value must be an integer"; - goto error; + return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (!content.rule.get()) { - error_message = "Invalid precedence content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid precedence content: " + result.error_message; } - - return { prec(precedence_json.u.integer, content.rule), "" }; + return Rule(Metadata::prec(precedence_json.u.integer, result.rule)); } if (type == "PREC_LEFT") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { - error_message = "Precedence value must be an integer"; - goto error; + return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (!content.rule.get()) { - error_message = "Invalid precedence content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid precedence content: " + result.error_message; } - - return { prec_left(precedence_json.u.integer, content.rule), "" }; + return Rule(Metadata::prec_left(precedence_json.u.integer, result.rule)); } if (type == "PREC_RIGHT") { json_value precedence_json = rule_json->operator[]("value"); if (precedence_json.type != json_integer) { - error_message = "Precedence value must be an integer"; - goto error; + return "Precedence value must be an integer"; } json_value content_json = rule_json->operator[]("content"); - ParseRuleResult content = parse_rule(&content_json); - if (!content.rule.get()) { - error_message = "Invalid precedence content: " + content.error_message; - goto error; + auto result = parse_rule(&content_json); + if (!result.error_message.empty()) { + return "Invalid precedence content: " + result.error_message; } - - return { prec_right(precedence_json.u.integer, content.rule), "" }; + return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule)); } - error_message = "Unknown rule type " + type; - -error: - return { rule_ptr(), error_message }; + return "Unknown rule type: " + type; } ParseGrammarResult parse_grammar(const string &input) { string error_message; string name; - Grammar grammar; + InputGrammar grammar; json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json; json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; @@ -242,15 +223,16 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) { json_object_entry entry_json = rules_json.u.object.values[i]; - ParseRuleResult entry = parse_rule(entry_json.value); - - if (!entry.rule.get()) { - error_message = - string("Invalid rule '") + entry_json.name + "' " + entry.error_message; + auto result = parse_rule(entry_json.value); + if (!result.error_message.empty()) { + error_message = result.error_message; goto error; } - - grammar.rules.push_back({ string(entry_json.name), entry.rule }); + grammar.variables.push_back(InputGrammar::Variable{ + string(entry_json.name), + VariableTypeNamed, + result.rule + }); } extras_json = grammar_json->operator[]("extras"); @@ -262,13 +244,12 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) { json_value *extra_json = extras_json.u.array.values[i]; - ParseRuleResult extra = parse_rule(extra_json); - if (!extra.rule.get()) { - error_message = string("Invalid extra token: ") + extra.error_message; + auto result = parse_rule(extra_json); + if (!result.error_message.empty()) { + error_message = "Invalid extra token: " + result.error_message; goto error; } - - grammar.extra_tokens.push_back(extra.rule); + grammar.extra_tokens.push_back(result.rule); } } @@ -286,7 +267,7 @@ ParseGrammarResult parse_grammar(const string &input) { goto error; } - vector conflict; + unordered_set conflict; for (size_t j = 0, conflict_length = conflict_json->u.array.length; j < conflict_length; j++) { json_value *conflict_entry_json = conflict_json->u.array.values[j]; @@ -295,7 +276,9 @@ ParseGrammarResult parse_grammar(const string &input) { goto error; } - conflict.push_back(string(conflict_entry_json->u.string.ptr)); + conflict.insert(rules::NamedSymbol{ + string(conflict_entry_json->u.string.ptr) + }); } grammar.expected_conflicts.push_back(conflict); @@ -317,7 +300,11 @@ ParseGrammarResult parse_grammar(const string &input) { } string token_name = token_name_json->u.string.ptr; - grammar.external_tokens.push_back(token_name); + grammar.external_tokens.push_back({ + token_name, + VariableTypeNamed, + rules::NONE() + }); } } @@ -329,7 +316,7 @@ error: json_value_free(grammar_json); } - return { "", Grammar(), error_message }; + return { "", InputGrammar(), error_message }; } } // namespace tree_sitter diff --git a/src/compiler/parse_grammar.h b/src/compiler/parse_grammar.h index 284ca4b2..04e7672b 100644 --- a/src/compiler/parse_grammar.h +++ b/src/compiler/parse_grammar.h @@ -9,7 +9,7 @@ namespace tree_sitter { struct ParseGrammarResult { std::string name; - Grammar grammar; + InputGrammar grammar; std::string error_message; }; diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 57728e0f..37707ed0 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -1,7 +1,7 @@ #include "compiler/parse_table.h" #include #include "compiler/precedence_range.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { @@ -178,7 +178,7 @@ ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, void ParseTable::set_nonterminal_action(ParseStateId state_id, Symbol::Index lookahead, ParseStateId next_state_id) { - symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true; + symbols[Symbol::non_terminal(lookahead)].structural = true; states[state_id].nonterminal_entries[lookahead] = next_state_id; } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 02501ebd..c00969d2 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -6,8 +6,7 @@ #include #include #include "compiler/lex_table.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" #include "compiler/precedence_range.h" #include "compiler/syntax_grammar.h" diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc index d01bb7a0..ec2ec19a 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cc +++ b/src/compiler/prepare_grammar/expand_repeats.cc @@ -2,14 +2,9 @@ #include #include #include -#include "compiler/prepare_grammar/initial_syntax_grammar.h" +#include +#include "compiler/grammar.h" #include "compiler/rule.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/repeat.h" namespace tree_sitter { namespace prepare_grammar { @@ -18,53 +13,79 @@ using std::string; using std::vector; using std::pair; using std::to_string; -using std::make_shared; -using rules::Blank; -using rules::Choice; -using rules::Repeat; -using rules::Seq; +using rules::Rule; using rules::Symbol; -class ExpandRepeats : public rules::IdentityRuleFn { +class ExpandRepeats { string rule_name; size_t offset; size_t repeat_count; - vector> existing_repeats; + vector> existing_repeats; - rule_ptr apply_to(const Repeat *rule) { - for (const auto pair : existing_repeats) - if (pair.first->operator==(*rule)) - return pair.second.copy(); + Rule apply(Rule rule) { + return rule.match( + [&](const rules::Blank &blank) -> Rule { return blank; }, + [&](const rules::Symbol &symbol) { return symbol; }, - rule_ptr inner_rule = apply(rule->content); - size_t index = aux_rules.size(); - string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count); - Symbol repeat_symbol(offset + index, Symbol::NonTerminal); - existing_repeats.push_back({ rule->copy(), repeat_symbol }); - aux_rules.push_back(Variable{ - helper_rule_name, - VariableTypeAuxiliary, - Choice::build({ - Seq::build({ - repeat_symbol.copy(), - inner_rule, - }), - inner_rule, - }) - }); - return repeat_symbol.copy(); + [&](const rules::Choice &choice) { + vector elements; + for (const auto &element : choice.elements) { + elements.push_back(apply(element)); + } + return Rule::choice(elements); + }, + + [&](const rules::Seq &sequence) { + return rules::Seq{ + apply(*sequence.left), + apply(*sequence.right) + }; + }, + + [&](const rules::Repeat &repeat) { + for (const auto pair : existing_repeats) { + if (pair.first == rule) { + return pair.second; + } + } + + Rule inner_rule = apply(*repeat.rule); + size_t index = aux_rules.size(); + string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count); + Symbol repeat_symbol = Symbol::non_terminal(offset + index); + existing_repeats.push_back({repeat, repeat_symbol}); + aux_rules.push_back({ + helper_rule_name, + VariableTypeAuxiliary, + rules::Choice{{ + rules::Seq{repeat_symbol, inner_rule}, + inner_rule, + }} + }); + return repeat_symbol; + }, + + [&](const rules::Metadata &metadata) { + return rules::Metadata{apply(*metadata.rule), metadata.params}; + }, + + [](auto) { + assert(!"Unexpected rule type"); + return rules::Blank{}; + } + ); } public: explicit ExpandRepeats(size_t offset) : offset(offset) {} - rule_ptr expand(const rule_ptr &rule, const string &name) { + Rule expand(const Rule &rule, const string &name) { rule_name = name; repeat_count = 0; return apply(rule); } - vector aux_rules; + vector aux_rules; }; InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) { @@ -75,11 +96,16 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) { result.external_tokens = grammar.external_tokens; ExpandRepeats expander(result.variables.size()); - for (auto &variable : result.variables) + for (auto &variable : result.variables) { variable.rule = expander.expand(variable.rule, variable.name); + } + + result.variables.insert( + result.variables.end(), + expander.aux_rules.begin(), + expander.aux_rules.end() + ); - result.variables.insert(result.variables.end(), expander.aux_rules.begin(), - expander.aux_rules.end()); return result; } diff --git a/src/compiler/prepare_grammar/expand_repeats.h b/src/compiler/prepare_grammar/expand_repeats.h index 3efabd9a..57e1474f 100644 --- a/src/compiler/prepare_grammar/expand_repeats.h +++ b/src/compiler/prepare_grammar/expand_repeats.h @@ -6,8 +6,6 @@ namespace tree_sitter { namespace prepare_grammar { -struct InitialSyntaxGrammar; - InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &); } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/expand_tokens.cc b/src/compiler/prepare_grammar/expand_tokens.cc index ff268782..48396540 100644 --- a/src/compiler/prepare_grammar/expand_tokens.cc +++ b/src/compiler/prepare_grammar/expand_tokens.cc @@ -2,15 +2,8 @@ #include #include #include -#include #include "compiler/lexical_grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/string.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/character_set.h" +#include "compiler/rule.h" #include "compiler/prepare_grammar/parse_regex.h" #include "utf8proc.h" @@ -19,70 +12,69 @@ namespace prepare_grammar { using std::string; using std::vector; -using std::map; -using std::pair; -using std::make_shared; -using rules::String; -using rules::Pattern; -using rules::Metadata; +using rules::Rule; -class ExpandTokens : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; +ExpandTokenResult expand_token(const rules::Rule &rule) { + return rule.match( + [](const rules::Blank &blank) -> ExpandTokenResult { return Rule(blank); }, - rule_ptr apply_to(const String *rule) { - vector elements; - const uint8_t *iter = reinterpret_cast(rule->value.data()); - const uint8_t *end = iter + rule->value.size(); + [](const rules::String &string) { + vector elements; + const uint8_t *iter = reinterpret_cast(string.value.data()); + const uint8_t *end = iter + string.value.size(); - while (iter < end) { - int32_t el; - size_t size = utf8proc_iterate(iter, (end - iter), &el); - if (!size) - break; - iter += size; + while (iter < end) { + int32_t el; + size_t size = utf8proc_iterate(iter, (end - iter), &el); + if (!size) + break; + iter += size; - elements.push_back(rules::CharacterSet().include(el).copy()); - } + elements.push_back(rules::CharacterSet().include(el)); + } - rules::MetadataParams params; - params.is_token = true; - params.is_string = true; + return Rule::seq(elements); + }, - return rules::Metadata::build(rules::Seq::build(elements), params); - } + [](const rules::Pattern &pattern) -> ExpandTokenResult { + auto result = parse_regex(pattern.value); + if (result.second) return result.second; + return result.first; + }, - rule_ptr apply_to(const Pattern *rule) { - auto pair = parse_regex(rule->value); - if (!error.type) - error = pair.second; - return pair.first; - } + [](const rules::Repeat &rule) -> ExpandTokenResult { + auto result = expand_token(*rule.rule); + if (result.error) return result.error; + return Rule::repeat(result.rule); + }, - public: - CompileError error; - ExpandTokens() : error(CompileError::none()) {} + [](const rules::Metadata &rule) -> ExpandTokenResult { + auto result = expand_token(*rule.rule); + if (result.error) return result.error; + return Rule(rules::Metadata{result.rule, rule.params}); + }, + + [](const rules::Seq &rule) -> ExpandTokenResult { + auto left_result = expand_token(*rule.left); + if (left_result.error) return left_result.error; + auto right_result = expand_token(*rule.right); + if (right_result.error) return right_result.error; + return Rule(rules::Seq{left_result.rule, right_result.rule}); + }, + + [](const rules::Choice &rule) -> ExpandTokenResult { + std::vector elements; + for (const auto &element : rule.elements) { + auto result = expand_token(element); + if (result.error) return result.error; + elements.push_back(result.rule); + } + return Rule(rules::Choice{elements}); + }, + + [](auto) { return CompileError(TSCompileErrorTypeInvalidTokenContents, ""); } + ); }; -pair expand_tokens(const LexicalGrammar &grammar) { - LexicalGrammar result; - ExpandTokens expander; - - for (const LexicalVariable &variable : grammar.variables) { - auto rule = expander.apply(variable.rule); - if (expander.error.type) - return { result, expander.error }; - result.variables.push_back({variable.name, variable.type, rule, variable.is_string}); - } - - for (auto &sep : grammar.separators) { - auto rule = expander.apply(sep); - if (expander.error.type) - return { result, expander.error }; - result.separators.push_back(rule); - } - - return { result, CompileError::none() }; -} - } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/expand_tokens.h b/src/compiler/prepare_grammar/expand_tokens.h index 2e400090..d1545cca 100644 --- a/src/compiler/prepare_grammar/expand_tokens.h +++ b/src/compiler/prepare_grammar/expand_tokens.h @@ -2,15 +2,21 @@ #define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_ #include +#include "compiler/rule.h" #include "compiler/compile_error.h" namespace tree_sitter { - -struct LexicalGrammar; - namespace prepare_grammar { -std::pair expand_tokens(const LexicalGrammar &); +struct ExpandTokenResult { + rules::Rule rule; + CompileError error; + + ExpandTokenResult(const rules::Rule &rule) : rule(rule) {} + ExpandTokenResult(const CompileError &error) : error(error) {} +}; + +ExpandTokenResult expand_token(const rules::Rule &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/extract_choices.cc b/src/compiler/prepare_grammar/extract_choices.cc index 6262001c..3b471538 100644 --- a/src/compiler/prepare_grammar/extract_choices.cc +++ b/src/compiler/prepare_grammar/extract_choices.cc @@ -1,54 +1,48 @@ #include "compiler/prepare_grammar/extract_choices.h" #include #include -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/repeat.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { using std::vector; +using rules::Rule; -class ExtractChoices : public rules::RuleFn> { - vector default_apply(const Rule *rule) { - return vector({ rule->copy() }); - } +vector extract_choices(const Rule &rule) { + return rule.match( + [](const rules::Seq &sequence) { + vector result; + for (auto &left_entry : extract_choices(*sequence.left)) { + for (auto &right_entry : extract_choices(*sequence.right)) { + result.push_back(rules::Rule::seq({left_entry, right_entry})); + } + } + return result; + }, - vector apply_to(const rules::Seq *rule) { - vector result; - for (auto left_entry : apply(rule->left)) - for (auto right_entry : apply(rule->right)) - result.push_back(rules::Seq::build({ left_entry, right_entry })); - return result; - } + [](const rules::Metadata &rule) { + vector result; + for (auto &entry : extract_choices(*rule.rule)) { + result.push_back(rules::Metadata{entry, rule.params}); + } + return result; + }, - vector apply_to(const rules::Metadata *rule) { - vector result; - for (auto entry : apply(rule->rule)) - result.push_back(rules::Metadata::build(entry, rule->params)); - return result; - } + [](const rules::Choice &choice) { + vector result; + for (auto &element : choice.elements) { + for (auto &entry : extract_choices(element)) { + result.push_back(entry); + } + } + return result; + }, - vector apply_to(const rules::Choice *rule) { - vector result; - for (auto element : rule->elements) - for (auto entry : apply(element)) - result.push_back(entry); - return result; - } - - vector apply_to(const rules::Repeat *rule) { - return vector({ - rules::Repeat::build(rules::Choice::build(apply(rule->content))), - }); - } -}; - -std::vector extract_choices(const rule_ptr &rule) { - return ExtractChoices().apply(rule); + [](const auto &rule) { + return vector({rule}); + } + ); } } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/extract_choices.h b/src/compiler/prepare_grammar/extract_choices.h index 50b91467..3b0d12db 100644 --- a/src/compiler/prepare_grammar/extract_choices.h +++ b/src/compiler/prepare_grammar/extract_choices.h @@ -7,7 +7,7 @@ namespace tree_sitter { namespace prepare_grammar { -std::vector extract_choices(const rule_ptr &); +std::vector extract_choices(const rules::Rule &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index ec821ecc..39f21698 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -1,24 +1,19 @@ #include "compiler/prepare_grammar/extract_tokens.h" #include #include +#include #include #include #include #include "tree_sitter/compiler.h" #include "compiler/lexical_grammar.h" -#include "compiler/prepare_grammar/initial_syntax_grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" +#include "compiler/rule.h" #include "compiler/prepare_grammar/token_description.h" -#include "compiler/prepare_grammar/is_token.h" +#include "compiler/prepare_grammar/expand_tokens.h" namespace tree_sitter { namespace prepare_grammar { -using std::make_shared; using std::make_tuple; using std::map; using std::set; @@ -26,74 +21,143 @@ using std::string; using std::tuple; using std::vector; using rules::Symbol; +using rules::Rule; +using rules::Rule; -class SymbolReplacer : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; - - rule_ptr apply_to(const Symbol *rule) { - return replace_symbol(*rule).copy(); - } - +class SymbolReplacer { public: map replacements; + Rule apply(const Rule &rule) { + return rule.match( + [this](const rules::Blank &blank) -> Rule { + return blank; + }, + + [this](const rules::Symbol &symbol) { + return replace_symbol(symbol); + }, + + [this](const rules::Choice &choice) { + vector elements; + for (const auto &element : choice.elements) { + elements.push_back(apply(element)); + } + return Rule::choice(elements); + }, + + [this](const rules::Seq &sequence) { + return rules::Seq{ + apply(*sequence.left), + apply(*sequence.right) + }; + }, + + [this](const rules::Repeat &repeat) { + return Rule::repeat(apply(*repeat.rule)); + }, + + [this](const rules::Metadata &metadata) { + return rules::Metadata{apply(*metadata.rule), metadata.params}; + }, + + [](auto) { + assert(!"Unexpected rule type"); + return rules::Blank{}; + } + ); + } + Symbol replace_symbol(const Symbol &symbol) { - if (!symbol.is_non_terminal()) - return symbol; + if (!symbol.is_non_terminal()) return symbol; auto replacement_pair = replacements.find(symbol); - if (replacement_pair != replacements.end()) + if (replacement_pair != replacements.end()) { return replacement_pair->second; + } int new_index = symbol.index; - for (const auto &pair : replacements) - if (pair.first.index < symbol.index) + for (const auto &pair : replacements) { + if (pair.first.index < symbol.index) { new_index--; - return Symbol(new_index, Symbol::NonTerminal); + } + } + + return Symbol::non_terminal(new_index); } }; -class TokenExtractor : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; - - rule_ptr apply_to_token(const Rule *input, VariableType entry_type, bool is_string) { - for (size_t i = 0; i < tokens.size(); i++) - if (tokens[i].rule->operator==(*input)) { +class TokenExtractor { + Symbol extract_token(const rules::Rule &input, VariableType entry_type) { + for (size_t i = 0; i < tokens.size(); i++) { + if (tokens[i].rule == input) { token_usage_counts[i]++; - return make_shared(i, Symbol::Terminal); + return Symbol::terminal(i); } - - rule_ptr rule = input->copy(); - size_t index = tokens.size(); - tokens.push_back({token_description(rule), entry_type, rule, is_string}); - token_usage_counts.push_back(1); - return make_shared(index, Symbol::Terminal); - } - - rule_ptr apply_to(const rules::String *rule) { - return apply_to_token(rule, VariableTypeAnonymous, true); - } - - rule_ptr apply_to(const rules::Pattern *rule) { - return apply_to_token(rule, VariableTypeAuxiliary, false); - } - - rule_ptr apply_to(const rules::Metadata *rule) { - if (rule->params.is_token) { - return apply_to_token(rule->rule.get(), VariableTypeAuxiliary, false); - } else { - return rules::IdentityRuleFn::apply_to(rule); } + + size_t index = tokens.size(); + tokens.push_back({ + token_description(input), + entry_type, + input + }); + token_usage_counts.push_back(1); + + return Symbol::terminal(index); } public: - vector token_usage_counts; - vector tokens; -}; + Rule apply(const rules::Rule &rule) { + return rule.match( + [this](const rules::Blank &blank) -> Rule { return blank; }, -static CompileError extra_token_error(const string &message) { - return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message); -} + [this](const rules::Metadata &rule) -> Rule { + if (rule.params.is_token) { + return extract_token(*rule.rule, VariableTypeAuxiliary); + } else { + return rules::Metadata{apply(*rule.rule), rule.params}; + } + }, + + [this](const rules::String &rule) { + return extract_token(rule, VariableTypeAnonymous); + }, + + [this](const rules::Pattern &rule) { + return extract_token(rule, VariableTypeAuxiliary); + }, + + [this](const rules::Repeat &rule) { + return Rule::repeat(apply(*rule.rule)); + }, + + [this](const rules::Seq &rule) { + return Rule::seq({apply(*rule.left), apply(*rule.right)}); + }, + + [this](const rules::Choice &rule) { + std::vector elements; + for (const auto &element : rule.elements) { + elements.push_back(apply(element)); + } + return Rule::choice(elements); + }, + + [](const rules::Symbol &symbol) { + return symbol; + }, + + [](auto) { + assert(!"Unexpected rule type"); + return rules::Blank{}; + } + ); + } + + vector token_usage_counts; + vector tokens; +}; tuple extract_tokens( const InternedGrammar &grammar @@ -104,15 +168,29 @@ tuple extract_tokens( TokenExtractor extractor; // First, extract all of the grammar's tokens into the lexical grammar. - vector processed_variables; - for (const Variable &variable : grammar.variables) { - processed_variables.push_back(Variable{ + vector processed_variables; + for (const auto &variable : grammar.variables) { + processed_variables.push_back({ variable.name, variable.type, extractor.apply(variable.rule) }); } - lexical_grammar.variables = extractor.tokens; + + for (const auto &extracted_token : extractor.tokens) { + auto expansion = expand_token(extracted_token.rule); + if (expansion.error) return make_tuple( + syntax_grammar, + lexical_grammar, + expansion.error + ); + lexical_grammar.variables.push_back({ + extracted_token.name, + extracted_token.type, + expansion.rule, + extracted_token.type == VariableTypeAnonymous + }); + } // If a variable's entire rule was extracted as a token and that token didn't // appear within any other rule, then remove that variable from the syntax @@ -120,26 +198,28 @@ tuple extract_tokens( // that pointed to that variable will need to be updated to point to the // variable in the lexical grammar. Symbols that pointed to later variables // will need to have their indices decremented. - size_t i = 0; - for (const Variable &variable : processed_variables) { - auto symbol = variable.rule->as(); - if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) { - lexical_grammar.variables[symbol->index].type = variable.type; - lexical_grammar.variables[symbol->index].name = variable.name; - symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol }); - } else { - syntax_grammar.variables.push_back(variable); - } + size_t i = -1; + for (const auto &variable : processed_variables) { i++; + if (variable.rule.is()) { + auto symbol = variable.rule.get_unchecked(); + if (symbol.is_terminal() && extractor.token_usage_counts[symbol.index] == 1) { + lexical_grammar.variables[symbol.index].type = variable.type; + lexical_grammar.variables[symbol.index].name = variable.name; + symbol_replacer.replacements[Symbol::non_terminal(i)] = symbol; + continue; + } + } + syntax_grammar.variables.push_back(variable); } // Perform any replacements of symbols needed based on the previous step. - for (Variable &variable : syntax_grammar.variables) { + for (auto &variable : syntax_grammar.variables) { variable.rule = symbol_replacer.apply(variable.rule); } - for (const ConflictSet &conflict_set : grammar.expected_conflicts) { - ConflictSet new_conflict_set; + for (const auto &conflict_set : grammar.expected_conflicts) { + set new_conflict_set; for (const Symbol &symbol : conflict_set) { new_conflict_set.insert(symbol_replacer.replace_symbol(symbol)); } @@ -148,47 +228,51 @@ tuple extract_tokens( // The grammar's extra tokens can be either token rules or symbols // pointing to token rules. If they are symbols, then they'll be handled by - // the parser; add them to the syntax grammar's ubiqutous tokens. If they + // the parser; add them to the syntax grammar's extra tokens. If they // are anonymous rules, they can be handled by the lexer; add them to the // lexical grammar's separator rules. - for (const rule_ptr &rule : grammar.extra_tokens) { - int i = 0; - bool used_elsewhere_in_grammar = false; - for (const LexicalVariable &variable : lexical_grammar.variables) { - if (variable.rule->operator==(*rule)) { - syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal)); - used_elsewhere_in_grammar = true; + for (const auto &rule : grammar.extra_tokens) { + CompileError error = rule.match( + [&](const Symbol &symbol) { + Symbol new_symbol = symbol_replacer.replace_symbol(symbol); + if (new_symbol.is_non_terminal()) { + return CompileError( + TSCompileErrorTypeInvalidExtraToken, + "Non-token symbol " + syntax_grammar.variables[new_symbol.index].name + " can't be used as an extra token" + ); + } else { + syntax_grammar.extra_tokens.insert(new_symbol); + return CompileError::none(); + } + }, + + [&](auto non_symbol) { + auto expansion = expand_token(non_symbol); + if (expansion.error) return CompileError( + TSCompileErrorTypeInvalidExtraToken, + "Non-token rule expression can't be used as an extra token" + ); + int i = 0; + for (const LexicalVariable &variable : lexical_grammar.variables) { + if (variable.rule == expansion.rule) { + syntax_grammar.extra_tokens.insert(Symbol::terminal(i)); + return CompileError::none(); + } + i++; + } + + lexical_grammar.separators.push_back(expansion.rule); + return CompileError::none(); } - i++; - } + ); - if (used_elsewhere_in_grammar) { - continue; - } - - if (is_token(rule)) { - lexical_grammar.separators.push_back(rule); - continue; - } - - auto symbol = rule->as(); - if (!symbol) { - return make_tuple(syntax_grammar, lexical_grammar, - extra_token_error(rule->to_string())); - } - - Symbol new_symbol = symbol_replacer.replace_symbol(*symbol); - if (new_symbol.is_non_terminal()) { - return make_tuple( - syntax_grammar, lexical_grammar, - extra_token_error(syntax_grammar.variables[new_symbol.index].name)); - } - - syntax_grammar.extra_tokens.insert(new_symbol); + if (error) return make_tuple(syntax_grammar, lexical_grammar, error); } for (const ExternalToken &external_token : grammar.external_tokens) { - Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token); + Symbol internal_token = symbol_replacer.replace_symbol( + external_token.corresponding_internal_token + ); if (internal_token.is_non_terminal()) { return make_tuple( diff --git a/src/compiler/prepare_grammar/extract_tokens.h b/src/compiler/prepare_grammar/extract_tokens.h index 733524cf..73da39fd 100644 --- a/src/compiler/prepare_grammar/extract_tokens.h +++ b/src/compiler/prepare_grammar/extract_tokens.h @@ -4,14 +4,15 @@ #include #include "compiler/compile_error.h" #include "compiler/lexical_grammar.h" -#include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/prepare_grammar/interned_grammar.h" +#include "compiler/prepare_grammar/initial_syntax_grammar.h" namespace tree_sitter { namespace prepare_grammar { std::tuple extract_tokens( - const InternedGrammar &); + const InternedGrammar & +); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc index fe49c7a3..846c361d 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.cc +++ b/src/compiler/prepare_grammar/flatten_grammar.cc @@ -1,13 +1,11 @@ #include "compiler/prepare_grammar/flatten_grammar.h" #include +#include #include #include "compiler/prepare_grammar/extract_choices.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/grammar.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { @@ -15,8 +13,9 @@ namespace prepare_grammar { using std::find; using std::pair; using std::vector; +using rules::Rule; -class FlattenRule : public rules::RuleFn { +class FlattenRule { private: vector precedence_stack; vector associativity_stack; @@ -24,40 +23,53 @@ class FlattenRule : public rules::RuleFn { rules::Associativity last_associativity; Production production; - void apply_to(const rules::Symbol *sym) { - production.push_back(ProductionStep{ - *sym, - precedence_stack.back(), - associativity_stack.back() - }); - } + void apply(const Rule &rule) { + rule.match( + [&](const rules::Symbol &symbol) { + production.push_back(ProductionStep{ + symbol, + precedence_stack.back(), + associativity_stack.back() + }); + }, - void apply_to(const rules::Metadata *metadata) { - if (metadata->params.has_precedence) - precedence_stack.push_back(metadata->params.precedence); - if (metadata->params.has_associativity) - associativity_stack.push_back(metadata->params.associativity); + [&](const rules::Metadata &metadata) { + if (metadata.params.has_precedence) { + precedence_stack.push_back(metadata.params.precedence); + } - apply(metadata->rule); + if (metadata.params.has_associativity) { + associativity_stack.push_back(metadata.params.associativity); + } - if (metadata->params.has_precedence) { - last_precedence = precedence_stack.back(); - precedence_stack.pop_back(); - production.back().precedence = precedence_stack.back(); - } + apply(*metadata.rule); - if (metadata->params.has_associativity) { - last_associativity = associativity_stack.back(); - associativity_stack.pop_back(); - production.back().associativity = associativity_stack.back(); - } - } + if (metadata.params.has_precedence) { + last_precedence = precedence_stack.back(); + precedence_stack.pop_back(); + production.back().precedence = precedence_stack.back(); + } - void apply_to(const rules::Seq *seq) { - apply(seq->left); - last_precedence = 0; - last_associativity = rules::AssociativityNone; - apply(seq->right); + if (metadata.params.has_associativity) { + last_associativity = associativity_stack.back(); + associativity_stack.pop_back(); + production.back().associativity = associativity_stack.back(); + } + }, + + [&](const rules::Seq &sequence) { + apply(*sequence.left); + last_precedence = 0; + last_associativity = rules::AssociativityNone; + apply(*sequence.right); + }, + + [&](const rules::Blank &blank) {}, + + [&](auto) { + assert(!"Unexpected rule type"); + } + ); } public: @@ -67,7 +79,7 @@ class FlattenRule : public rules::RuleFn { last_precedence(0), last_associativity(rules::AssociativityNone) {} - Production flatten(const rule_ptr &rule) { + Production flatten(const Rule &rule) { apply(rule); if (!production.empty()) { production.back().precedence = last_precedence; @@ -77,10 +89,10 @@ class FlattenRule : public rules::RuleFn { } }; -SyntaxVariable flatten_rule(const Variable &variable) { +SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable) { vector productions; - for (const rule_ptr &rule_component : extract_choices(variable.rule)) { + for (const Rule &rule_component : extract_choices(variable.rule)) { Production production = FlattenRule().flatten(rule_component); auto end = productions.end(); if (find(productions.begin(), end, production) == end) { @@ -93,12 +105,21 @@ SyntaxVariable flatten_rule(const Variable &variable) { pair flatten_grammar(const InitialSyntaxGrammar &grammar) { SyntaxGrammar result; - result.expected_conflicts = grammar.expected_conflicts; - result.extra_tokens = grammar.extra_tokens; result.external_tokens = grammar.external_tokens; + for (const auto &expected_conflict : grammar.expected_conflicts) { + result.expected_conflicts.insert({ + expected_conflict.begin(), + expected_conflict.end(), + }); + } + + for (const rules::Symbol &extra_token : grammar.extra_tokens) { + result.extra_tokens.insert(extra_token); + } + bool is_start = true; - for (const Variable &variable : grammar.variables) { + for (const auto &variable : grammar.variables) { SyntaxVariable syntax_variable = flatten_rule(variable); if (!is_start) { diff --git a/src/compiler/prepare_grammar/flatten_grammar.h b/src/compiler/prepare_grammar/flatten_grammar.h index b5501fb4..4efd9561 100644 --- a/src/compiler/prepare_grammar/flatten_grammar.h +++ b/src/compiler/prepare_grammar/flatten_grammar.h @@ -4,14 +4,14 @@ #include #include "tree_sitter/compiler.h" #include "compiler/compile_error.h" +#include "compiler/grammar.h" +#include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/syntax_grammar.h" namespace tree_sitter { namespace prepare_grammar { -struct InitialSyntaxGrammar; - -SyntaxVariable flatten_rule(const Variable &variable); +SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable); std::pair flatten_grammar(const InitialSyntaxGrammar &); } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/initial_syntax_grammar.h b/src/compiler/prepare_grammar/initial_syntax_grammar.h index 1ac319cb..bc200483 100644 --- a/src/compiler/prepare_grammar/initial_syntax_grammar.h +++ b/src/compiler/prepare_grammar/initial_syntax_grammar.h @@ -4,17 +4,26 @@ #include #include #include "tree_sitter/compiler.h" -#include "compiler/rules/symbol.h" -#include "compiler/syntax_grammar.h" -#include "compiler/variable.h" +#include "compiler/grammar.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { struct InitialSyntaxGrammar { + struct Variable { + std::string name; + VariableType type; + rules::Rule rule; + + inline bool operator==(const Variable &other) const { + return name == other.name && type == other.type && rule == other.rule; + } + }; + std::vector variables; std::set extra_tokens; - std::set expected_conflicts; + std::set> expected_conflicts; std::vector external_tokens; }; diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc index 0786982b..d705f121 100644 --- a/src/compiler/prepare_grammar/intern_symbols.cc +++ b/src/compiler/prepare_grammar/intern_symbols.cc @@ -1,14 +1,11 @@ #include "compiler/prepare_grammar/intern_symbols.h" #include #include +#include #include #include "tree_sitter/compiler.h" #include "compiler/grammar.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/named_symbol.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { @@ -17,34 +14,62 @@ using std::string; using std::vector; using std::set; using std::pair; -using std::make_shared; using rules::Symbol; +using rules::Rule; -class SymbolInterner : public rules::IdentityRuleFn { - using rules::IdentityRuleFn::apply_to; - - rule_ptr apply_to(const rules::NamedSymbol *rule) { - auto result = symbol_for_rule_name(rule->name); - if (!result.get()) { - missing_rule_name = rule->name; - return rules::Blank::build(); - } - return result; - } - +class SymbolInterner { public: - std::shared_ptr symbol_for_rule_name(string rule_name) { - for (size_t i = 0; i < grammar.rules.size(); i++) - if (grammar.rules[i].first == rule_name) - return make_shared(i, Symbol::NonTerminal); - for (size_t i = 0; i < grammar.external_tokens.size(); i++) - if (grammar.external_tokens[i] == rule_name) - return make_shared(i, Symbol::External); - return nullptr; + Rule apply(const Rule &rule) { + return rule.match( + [&](const rules::Blank &blank) -> Rule { return blank; }, + + [&](const rules::NamedSymbol &symbol) { + return intern_symbol(symbol); + }, + + [&](const rules::String &string) { return string; }, + [&](const rules::Pattern &pattern) { return pattern; }, + + [&](const rules::Choice &choice) { + vector elements; + for (const auto &element : choice.elements) { + elements.push_back(apply(element)); + } + return rules::Choice{elements}; + }, + + [&](const rules::Seq &sequence) { + return rules::Seq{apply(*sequence.left), apply(*sequence.right)}; + }, + + [&](const rules::Repeat &repeat) { + return rules::Repeat{apply(*repeat.rule)}; + }, + + [&](const rules::Metadata &metadata) { + return rules::Metadata{apply(*metadata.rule), metadata.params}; + }, + + [](auto) { + assert(!"Unexpected rule type"); + return rules::Blank{}; + } + ); } - explicit SymbolInterner(const Grammar &grammar) : grammar(grammar) {} - const Grammar grammar; + Symbol intern_symbol(rules::NamedSymbol named_symbol) { + for (size_t i = 0; i < grammar.variables.size(); i++) + if (grammar.variables[i].name == named_symbol.value) + return Symbol::non_terminal(i); + for (size_t i = 0; i < grammar.external_tokens.size(); i++) + if (grammar.external_tokens[i].name == named_symbol.value) + return Symbol::external(i); + missing_rule_name = named_symbol.value; + return rules::NONE(); + } + + explicit SymbolInterner(const InputGrammar &grammar) : grammar(grammar) {} + const InputGrammar &grammar; string missing_rule_name; }; @@ -53,52 +78,55 @@ CompileError missing_rule_error(string rule_name) { "Undefined rule '" + rule_name + "'"); } -pair intern_symbols(const Grammar &grammar) { +pair intern_symbols(const InputGrammar &grammar) { InternedGrammar result; - for (auto &external_token_name : grammar.external_tokens) { + for (auto &external_token : grammar.external_tokens) { Symbol corresponding_internal_token = rules::NONE(); - for (size_t i = 0, n = grammar.rules.size(); i < n; i++) { - if (grammar.rules[i].first == external_token_name) { - corresponding_internal_token = Symbol(i, Symbol::NonTerminal); + for (size_t i = 0, n = grammar.variables.size(); i < n; i++) { + if (grammar.variables[i].name == external_token.name) { + corresponding_internal_token = Symbol::non_terminal(i); break; } } result.external_tokens.push_back(ExternalToken{ - external_token_name, - external_token_name[0] == '_' ? VariableTypeHidden : VariableTypeNamed, + external_token.name, + external_token.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed, corresponding_internal_token }); } SymbolInterner interner(grammar); - for (auto &pair : grammar.rules) { - auto new_rule = interner.apply(pair.second); - if (!interner.missing_rule_name.empty()) + for (auto &variable : grammar.variables) { + auto new_rule = interner.apply(variable.rule); + if (!interner.missing_rule_name.empty()) { return { result, missing_rule_error(interner.missing_rule_name) }; + } - result.variables.push_back(Variable{ - pair.first, - pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed, + result.variables.push_back(InternedGrammar::Variable{ + variable.name, + variable.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed, new_rule }); } for (auto &rule : grammar.extra_tokens) { auto new_rule = interner.apply(rule); - if (!interner.missing_rule_name.empty()) + if (!interner.missing_rule_name.empty()) { return { result, missing_rule_error(interner.missing_rule_name) }; + } result.extra_tokens.push_back(new_rule); } - for (auto &names : grammar.expected_conflicts) { + for (auto &expected_conflict : grammar.expected_conflicts) { set entry; - for (auto &name : names) { - auto symbol = interner.symbol_for_rule_name(name); - if (symbol.get()) - entry.insert(*symbol); + for (auto &named_symbol : expected_conflict) { + auto symbol = interner.intern_symbol(named_symbol); + if (symbol != rules::NONE()) { + entry.insert(symbol); + } } result.expected_conflicts.insert(entry); } diff --git a/src/compiler/prepare_grammar/intern_symbols.h b/src/compiler/prepare_grammar/intern_symbols.h index f42369a6..8e8f2abe 100644 --- a/src/compiler/prepare_grammar/intern_symbols.h +++ b/src/compiler/prepare_grammar/intern_symbols.h @@ -8,11 +8,11 @@ namespace tree_sitter { -struct Grammar; +struct InputGrammar; namespace prepare_grammar { -std::pair intern_symbols(const Grammar &); +std::pair intern_symbols(const InputGrammar &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h index c8a14647..f7abdd4f 100644 --- a/src/compiler/prepare_grammar/interned_grammar.h +++ b/src/compiler/prepare_grammar/interned_grammar.h @@ -4,17 +4,26 @@ #include #include #include "tree_sitter/compiler.h" -#include "compiler/rules/symbol.h" -#include "compiler/syntax_grammar.h" -#include "compiler/variable.h" +#include "compiler/grammar.h" +#include "compiler/rule.h" namespace tree_sitter { namespace prepare_grammar { struct InternedGrammar { + struct Variable { + std::string name; + VariableType type; + rules::Rule rule; + + bool operator==(const Variable &other) const { + return name == other.name && type == other.type && rule == other.rule; + } + }; + std::vector variables; - std::vector extra_tokens; - std::set expected_conflicts; + std::vector extra_tokens; + std::set> expected_conflicts; std::vector external_tokens; }; diff --git a/src/compiler/prepare_grammar/is_token.cc b/src/compiler/prepare_grammar/is_token.cc deleted file mode 100644 index 4d209882..00000000 --- a/src/compiler/prepare_grammar/is_token.cc +++ /dev/null @@ -1,30 +0,0 @@ -#include "compiler/prepare_grammar/is_token.h" -#include "tree_sitter/compiler.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" - -namespace tree_sitter { -namespace prepare_grammar { - -class IsToken : public rules::RuleFn { - bool apply_to(const rules::String *rule) { - return true; - } - - bool apply_to(const rules::Pattern *rule) { - return true; - } - - bool apply_to(const rules::Metadata *rule) { - return rule->params.is_token; - } -}; - -bool is_token(const rule_ptr &rule) { - return IsToken().apply(rule); -} - -} // namespace prepare_grammar -} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/is_token.h b/src/compiler/prepare_grammar/is_token.h deleted file mode 100644 index 55d1b62f..00000000 --- a/src/compiler/prepare_grammar/is_token.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_ -#define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_ - -#include "compiler/rule.h" - -namespace tree_sitter { -namespace prepare_grammar { - -bool is_token(const rule_ptr &); - -} // namespace prepare_grammar -} // namespace tree_sitter - -#endif // COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_ diff --git a/src/compiler/prepare_grammar/normalize_rules.cc b/src/compiler/prepare_grammar/normalize_rules.cc index 2e4dd205..28602b2b 100644 --- a/src/compiler/prepare_grammar/normalize_rules.cc +++ b/src/compiler/prepare_grammar/normalize_rules.cc @@ -1,15 +1,17 @@ #include "compiler/prepare_grammar/normalize_rules.h" #include "compiler/prepare_grammar/extract_choices.h" -#include "compiler/rules/choice.h" namespace tree_sitter { namespace prepare_grammar { +using std::vector; +using rules::Rule; + LexicalGrammar normalize_rules(const LexicalGrammar &input_grammar) { LexicalGrammar result(input_grammar); for (LexicalVariable &variable : result.variables) { - variable.rule = rules::Choice::build(extract_choices(variable.rule)); + variable.rule = Rule::choice(extract_choices(variable.rule)); } return result; diff --git a/src/compiler/prepare_grammar/parse_regex.cc b/src/compiler/prepare_grammar/parse_regex.cc index 9fdab0d1..cab76443 100644 --- a/src/compiler/prepare_grammar/parse_regex.cc +++ b/src/compiler/prepare_grammar/parse_regex.cc @@ -2,11 +2,7 @@ #include #include #include -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/blank.h" +#include "compiler/rule.h" #include "compiler/util/string_helpers.h" #include "utf8proc.h" @@ -16,12 +12,9 @@ namespace prepare_grammar { using std::string; using std::vector; using std::pair; -using std::make_shared; using rules::CharacterSet; -using rules::Seq; using rules::Blank; -using rules::Choice; -using rules::Repeat; +using rules::Rule; class PatternParser { public: @@ -32,103 +25,121 @@ class PatternParser { next(); } - pair rule(bool nested) { - vector choices = {}; + pair rule(bool nested) { + vector choices; do { if (!choices.empty()) { - if (peek() == '|') + if (peek() == '|') { next(); - else + } else { break; + } } auto pair = term(nested); - if (pair.second.type) - return { Blank::build(), pair.second }; + if (pair.second.type) { + return {Blank{}, pair.second }; + } choices.push_back(pair.first); } while (has_more_input()); - auto rule = - (choices.size() > 1) ? make_shared(choices) : choices.front(); - return { rule, CompileError::none() }; + return {Rule::choice(choices), CompileError::none()}; } private: - pair term(bool nested) { - rule_ptr result = Blank::build(); + pair term(bool nested) { + Rule result; do { if (peek() == '|') break; if (nested && peek() == ')') break; auto pair = factor(); - if (pair.second.type) - return { Blank::build(), pair.second }; - result = Seq::build({ result, pair.first }); + if (pair.second) { + return {Blank{}, pair.second}; + } + result = Rule::seq({result, pair.first}); } while (has_more_input()); return { result, CompileError::none() }; } - pair factor() { + pair factor() { auto pair = atom(); - if (pair.second.type) - return { Blank::build(), pair.second }; - rule_ptr result = pair.first; + if (pair.second.type) { + return {Blank{}, pair.second}; + } + + Rule result = pair.first; if (has_more_input()) { switch (peek()) { case '*': next(); - result = Choice::build({ Repeat::build(result), Blank::build() }); + result = Rule::choice({ + Rule::repeat(result), + Blank{} + }); break; case '+': next(); - result = Repeat::build(result); + result = Rule::repeat(result); break; case '?': next(); - result = Choice::build({ result, Blank::build() }); + result = Rule::choice({result, Blank{}}); break; } } - return { result, CompileError::none() }; + + return {result, CompileError::none()}; } - pair atom() { + pair atom() { switch (peek()) { case '(': { next(); auto pair = rule(true); - if (pair.second.type) - return { Blank::build(), pair.second }; - if (peek() != ')') + if (pair.second.type) { + return {Blank{}, pair.second}; + } + if (peek() != ')') { return error("unmatched open paren"); + } next(); - return { pair.first, CompileError::none() }; + return {pair.first, CompileError::none()}; } + case '[': { next(); auto pair = char_set(); - if (pair.second.type) - return { Blank::build(), pair.second }; - if (peek() != ']') + if (pair.second.type) { + return {Blank{}, pair.second}; + } + if (peek() != ']') { return error("unmatched open square bracket"); + } next(); - return { pair.first.copy(), CompileError::none() }; + return {pair.first, CompileError::none()}; } + case ')': { return error("unmatched close paren"); } + case ']': { return error("unmatched close square bracket"); } + case '.': { next(); - return { CharacterSet().include_all().exclude('\n').copy(), - CompileError::none() }; + return { + CharacterSet().include_all().exclude('\n'), + CompileError::none() + }; } + default: { auto pair = single_char(); if (pair.second.type) - return { Blank::build(), pair.second }; - return { pair.first.copy(), CompileError::none() }; + return { Blank{}, pair.second }; + return {pair.first, CompileError::none()}; } } } @@ -234,8 +245,8 @@ class PatternParser { return lookahead && iter <= end; } - pair error(string msg) { - return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) }; + pair error(string msg) { + return { Blank{}, CompileError(TSCompileErrorTypeInvalidRegex, msg) }; } string input; @@ -244,7 +255,7 @@ class PatternParser { int32_t lookahead; }; -pair parse_regex(const std::string &input) { +pair parse_regex(const std::string &input) { return PatternParser(input.c_str()).rule(false); } diff --git a/src/compiler/prepare_grammar/parse_regex.h b/src/compiler/prepare_grammar/parse_regex.h index 7db65dce..b1c03f30 100644 --- a/src/compiler/prepare_grammar/parse_regex.h +++ b/src/compiler/prepare_grammar/parse_regex.h @@ -9,7 +9,7 @@ namespace tree_sitter { namespace prepare_grammar { -std::pair parse_regex(const std::string &); +std::pair parse_regex(const std::string &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index 81750b58..ac573a28 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -17,7 +17,7 @@ using std::get; using std::make_tuple; tuple prepare_grammar( - const Grammar &input_grammar) { + const InputGrammar &input_grammar) { /* * Convert all string-based `NamedSymbols` into numerical `Symbols` */ @@ -31,8 +31,9 @@ tuple prepare_grammar( */ auto extract_result = extract_tokens(intern_result.first); error = get<2>(extract_result); - if (error.type) + if (error.type) { return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + } /* * Replace `Repeat` rules with pairs of recursive rules @@ -42,11 +43,12 @@ tuple prepare_grammar( /* * Expand `String` and `Pattern` rules into full rule trees */ - auto expand_tokens_result = expand_tokens(get<1>(extract_result)); - LexicalGrammar lex_grammar = expand_tokens_result.first; - error = expand_tokens_result.second; - if (error.type) - return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + LexicalGrammar lex_grammar = get<1>(extract_result); + // auto expand_tokens_result = expand_tokens(get<1>(extract_result)); + // LexicalGrammar lex_grammar = expand_tokens_result.first; + // error = expand_tokens_result.second; + // if (error.type) + // return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); /* * Flatten syntax rules into lists of productions. diff --git a/src/compiler/prepare_grammar/prepare_grammar.h b/src/compiler/prepare_grammar/prepare_grammar.h index e9cb80ee..bed59a53 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.h +++ b/src/compiler/prepare_grammar/prepare_grammar.h @@ -2,18 +2,15 @@ #define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_ #include +#include "compiler/grammar.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" #include "compiler/compile_error.h" namespace tree_sitter { - -struct Grammar; - namespace prepare_grammar { -std::tuple prepare_grammar( - const Grammar &); +std::tuple prepare_grammar(const InputGrammar &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/token_description.cc b/src/compiler/prepare_grammar/token_description.cc index 72127959..f10904ba 100644 --- a/src/compiler/prepare_grammar/token_description.cc +++ b/src/compiler/prepare_grammar/token_description.cc @@ -1,68 +1,82 @@ #include "compiler/prepare_grammar/token_description.h" -#include "compiler/rules/visitor.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/string.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" #include "compiler/util/string_helpers.h" namespace tree_sitter { namespace prepare_grammar { using std::string; +using rules::Rule; -class TokenDescription : public rules::RuleFn { - string apply_to(const rules::Pattern *rule) { - is_trivial = false; - return rule->value; - } +class TokenDescription { + bool is_trivial; - string apply_to(const rules::String *rule) { - return rule->value; - } + string apply(const Rule &rule) { + return rule.match( + [&](const rules::Blank) -> string { + return ""; + }, - string apply_to(const rules::Metadata *rule) { - return apply(rule->rule); - } + [&](const rules::Symbol) { + return ""; + }, - string apply_to(const rules::Seq *rule) { - is_trivial = false; - return apply(rule->left) + apply(rule->right); - } + [&](const rules::Pattern &rule) { + is_trivial = false; + return rule.value; + }, - string apply_to(const rules::Repeat *rule) { - is_trivial = false; - return apply(rule->content) + "*"; - } + [&](const rules::String &rule) { + return rule.value; + }, - string apply_to(const rules::Choice *rule) { - is_trivial = false; - string result = "("; - bool started = false; - for (auto &element : rule->elements) { - if (started) - result += "|"; - result += apply(element); - started = true; - } - return result + ")"; + [&](const rules::Metadata &rule) { + return apply(*rule.rule); + }, + + [&](const rules::Seq &rule) { + is_trivial = false; + return apply(*rule.left) + apply(*rule.right); + }, + + [&](const rules::Repeat &rule) { + is_trivial = false; + return apply(*rule.rule) + "+"; + }, + + [&](const rules::Choice &rule) { + is_trivial = false; + string result = "("; + bool started = false; + for (auto &element : rule.elements) { + if (started) result += "|"; + result += apply(element); + started = true; + } + return result + ")"; + }, + + [](auto) { + return ""; + } + ); } public: - bool is_trivial; + string describe(const Rule &rule) { + string result = apply(rule); + if (is_trivial) { + return result; + } else { + return "/" + result + "/"; + } + } TokenDescription() : is_trivial(true) {} }; -string token_description(const rule_ptr &rule) { - TokenDescription description; - string result = description.apply(rule); - if (description.is_trivial) - return result; - else - return "/" + result + "/"; +string token_description(const Rule &rule) { + return TokenDescription().describe(rule); } } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/token_description.h b/src/compiler/prepare_grammar/token_description.h index 5e44141c..6d83f7a9 100644 --- a/src/compiler/prepare_grammar/token_description.h +++ b/src/compiler/prepare_grammar/token_description.h @@ -7,7 +7,7 @@ namespace tree_sitter { namespace prepare_grammar { -std::string token_description(const rule_ptr &); +std::string token_description(const rules::Rule &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/rule.cc b/src/compiler/rule.cc index 8cb8ce95..f802f3fa 100644 --- a/src/compiler/rule.cc +++ b/src/compiler/rule.cc @@ -1,15 +1,287 @@ #include "compiler/rule.h" -#include +#include "compiler/util/hash_combine.h" namespace tree_sitter { +namespace rules { -using std::ostream; -using std::string; +using std::move; +using std::vector; +using util::hash_combine; -bool Rule::operator!=(const Rule &other) const { - return !this->operator==(other); +Rule::Rule(const Rule &other) : blank_(Blank{}), type(BlankType) { + *this = other; } -Rule::~Rule() {} +Rule::Rule(Rule &&other) noexcept : blank_(Blank{}), type(BlankType) { + *this = move(other); +} +static void destroy_value(Rule *rule) { + switch (rule->type) { + case Rule::BlankType: return rule->blank_.~Blank(); + case Rule::CharacterSetType: return rule->character_set_.~CharacterSet(); + case Rule::StringType: return rule->string_ .~String(); + case Rule::PatternType: return rule->pattern_ .~Pattern(); + case Rule::NamedSymbolType: return rule->named_symbol_.~NamedSymbol(); + case Rule::SymbolType: return rule->symbol_ .~Symbol(); + case Rule::ChoiceType: return rule->choice_ .~Choice(); + case Rule::MetadataType: return rule->metadata_ .~Metadata(); + case Rule::RepeatType: return rule->repeat_ .~Repeat(); + case Rule::SeqType: return rule->seq_ .~Seq(); + } +} + +Rule &Rule::operator=(const Rule &other) { + destroy_value(this); + type = other.type; + switch (type) { + case BlankType: + new (&blank_) Blank(other.blank_); + break; + case CharacterSetType: + new (&character_set_) CharacterSet(other.character_set_); + break; + case StringType: + new (&string_) String(other.string_); + break; + case PatternType: + new (&pattern_) Pattern(other.pattern_); + break; + case NamedSymbolType: + new (&named_symbol_) NamedSymbol(other.named_symbol_); + break; + case SymbolType: + new (&symbol_) Symbol(other.symbol_); + break; + case ChoiceType: + new (&choice_) Choice(other.choice_); + break; + case MetadataType: + new (&metadata_) Metadata(other.metadata_); + break; + case RepeatType: + new (&repeat_) Repeat(other.repeat_); + break; + case SeqType: + new (&seq_) Seq(other.seq_); + break; + } + return *this; +} + +Rule &Rule::operator=(Rule &&other) noexcept { + destroy_value(this); + type = other.type; + switch (type) { + case BlankType: + new (&blank_) Blank(move(other.blank_)); + break; + case CharacterSetType: + new (&character_set_) CharacterSet(move(other.character_set_)); + break; + case StringType: + new (&string_) String(move(other.string_)); + break; + case PatternType: + new (&pattern_) Pattern(move(other.pattern_)); + break; + case NamedSymbolType: + new (&named_symbol_) NamedSymbol(move(other.named_symbol_)); + break; + case SymbolType: + new (&symbol_) Symbol(move(other.symbol_)); + break; + case ChoiceType: + new (&choice_) Choice(move(other.choice_)); + break; + case MetadataType: + new (&metadata_) Metadata(move(other.metadata_)); + break; + case RepeatType: + new (&repeat_) Repeat(move(other.repeat_)); + break; + case SeqType: + new (&seq_) Seq(move(other.seq_)); + break; + } + other.type = BlankType; + other.blank_ = Blank{}; + return *this; +} + +Rule::~Rule() noexcept { + destroy_value(this); +} + +bool Rule::operator==(const Rule &other) const { + if (type != other.type) return false; + switch (type) { + case Rule::CharacterSetType: return character_set_ == other.character_set_; + case Rule::StringType: return string_ == other.string_; + case Rule::PatternType: return pattern_ == other.pattern_; + case Rule::NamedSymbolType: return named_symbol_ == other.named_symbol_; + case Rule::SymbolType: return symbol_ == other.symbol_; + case Rule::ChoiceType: return choice_ == other.choice_; + case Rule::MetadataType: return metadata_ == other.metadata_; + case Rule::RepeatType: return repeat_ == other.repeat_; + case Rule::SeqType: return seq_ == other.seq_; + default: return blank_ == other.blank_; + } +} + +template <> +bool Rule::is() const { return type == BlankType; } + +template <> +bool Rule::is() const { return type == SymbolType; } + +template <> +bool Rule::is() const { return type == RepeatType; } + +template <> +const Symbol & Rule::get_unchecked() const { return symbol_; } + +static inline void add_choice_element(std::vector *elements, const Rule &new_rule) { + new_rule.match( + [elements](Choice choice) { + for (auto &element : choice.elements) { + add_choice_element(elements, element); + } + }, + + [elements](auto rule) { + for (auto &element : *elements) { + if (element == rule) return; + } + elements->push_back(rule); + } + ); +} + +Rule Rule::choice(const vector &rules) { + vector elements; + for (auto &element : rules) { + add_choice_element(&elements, element); + } + return (elements.size() == 1) ? elements.front() : Choice{elements}; +} + +Rule Rule::repeat(const Rule &rule) { + return rule.is() ? rule : Repeat{rule}; +} + +Rule Rule::seq(const vector &rules) { + Rule result; + for (const auto &rule : rules) { + rule.match( + [](Blank) {}, + [&](Metadata metadata) { + if (!metadata.rule->is()) { + result = Seq{result, rule}; + } + }, + [&](auto) { + if (result.is()) { + result = rule; + } else { + result = Seq{result, rule}; + } + } + ); + } + return result; +} + +} // namespace rules } // namespace tree_sitter + +namespace std { + +size_t hash::operator()(const Symbol &symbol) const { + auto result = hash()(symbol.index); + hash_combine(&result, hash()(symbol.type)); + return result; +} + +size_t hash::operator()(const NamedSymbol &symbol) const { + return hash()(symbol.value); +} + +size_t hash::operator()(const Pattern &symbol) const { + return hash()(symbol.value); +} + +size_t hash::operator()(const String &symbol) const { + return hash()(symbol.value); +} + +size_t hash::operator()(const CharacterSet &character_set) const { + size_t result = 0; + hash_combine(&result, character_set.includes_all); + hash_combine(&result, character_set.included_chars.size()); + for (uint32_t c : character_set.included_chars) { + hash_combine(&result, c); + } + hash_combine(&result, character_set.excluded_chars.size()); + for (uint32_t c : character_set.excluded_chars) { + hash_combine(&result, c); + } + return result; +} + +size_t hash::operator()(const Blank &blank) const { + return 0; +} + +size_t hash::operator()(const Choice &choice) const { + size_t result = 0; + for (const auto &element : choice.elements) { + symmetric_hash_combine(&result, element); + } + return result; +} + +size_t hash::operator()(const Repeat &repeat) const { + size_t result = 0; + hash_combine(&result, *repeat.rule); + return result; +} + +size_t hash::operator()(const Seq &seq) const { + size_t result = 0; + hash_combine(&result, *seq.left); + hash_combine(&result, *seq.right); + return result; +} + +size_t hash::operator()(const Metadata &metadata) const { + size_t result = 0; + hash_combine(&result, *metadata.rule); + hash_combine(&result, metadata.params.precedence); + hash_combine(&result, metadata.params.associativity); + hash_combine(&result, metadata.params.has_precedence); + hash_combine(&result, metadata.params.has_associativity); + hash_combine(&result, metadata.params.is_token); + hash_combine(&result, metadata.params.is_string); + hash_combine(&result, metadata.params.is_active); + hash_combine(&result, metadata.params.is_main_token); + return result; +} + +size_t hash::operator()(const Rule &rule) const { + size_t result = hash()(rule.type); + switch (rule.type) { + case Rule::CharacterSetType: return result ^ hash()(rule.character_set_); + case Rule::StringType: return result ^ hash()(rule.string_); + case Rule::PatternType: return result ^ hash()(rule.pattern_); + case Rule::NamedSymbolType: return result ^ hash()(rule.named_symbol_); + case Rule::SymbolType: return result ^ hash()(rule.symbol_); + case Rule::ChoiceType: return result ^ hash()(rule.choice_); + case Rule::MetadataType: return result ^ hash()(rule.metadata_); + case Rule::RepeatType: return result ^ hash()(rule.repeat_); + case Rule::SeqType: return result ^ hash()(rule.seq_); + default: return result ^ hash()(rule.blank_); + } +} + +} // namespace std \ No newline at end of file diff --git a/src/compiler/rule.h b/src/compiler/rule.h index b77e54a4..5c4064e5 100644 --- a/src/compiler/rule.h +++ b/src/compiler/rule.h @@ -1,44 +1,143 @@ #ifndef COMPILER_RULE_H_ #define COMPILER_RULE_H_ -#include #include +#include +#include "compiler/util/make_visitor.h" +#include "compiler/util/hash_combine.h" +#include "compiler/rules/blank.h" +#include "compiler/rules/character_set.h" +#include "compiler/rules/choice.h" +#include "compiler/rules/metadata.h" +#include "compiler/rules/named_symbol.h" +#include "compiler/rules/pattern.h" +#include "compiler/rules/repeat.h" +#include "compiler/rules/seq.h" +#include "compiler/rules/string.h" +#include "compiler/rules/symbol.h" namespace tree_sitter { - namespace rules { -class Visitor; -} // namespace rules -class Rule; -typedef std::shared_ptr rule_ptr; +struct Rule { + union { + Blank blank_; + CharacterSet character_set_; + String string_; + Pattern pattern_; + NamedSymbol named_symbol_; + Symbol symbol_; + Choice choice_; + Metadata metadata_; + Repeat repeat_; + Seq seq_; + }; -class Rule { - public: - virtual bool operator==(const Rule &other) const = 0; - bool operator!=(const Rule &other) const; - virtual size_t hash_code() const = 0; - virtual rule_ptr copy() const = 0; - virtual std::string to_string() const = 0; - virtual void accept(rules::Visitor *visitor) const = 0; - virtual ~Rule(); + enum { + BlankType, + CharacterSetType, + StringType, + PatternType, + NamedSymbolType, + SymbolType, + ChoiceType, + MetadataType, + RepeatType, + SeqType, + } type; - template - const T *as() const { - return dynamic_cast(this); + Rule() : blank_(Blank{}), type(BlankType) {}; + Rule(const Blank &value) : blank_(value), type(BlankType) {}; + Rule(const CharacterSet &value) : character_set_(value), type(CharacterSetType) {}; + Rule(const String &value) : string_(value), type(StringType) {}; + Rule(const Pattern &value) : pattern_(value), type(PatternType) {}; + Rule(const NamedSymbol &value) : named_symbol_(value), type(NamedSymbolType) {}; + Rule(const Symbol &value) : symbol_(value), type(SymbolType) {}; + Rule(const Choice &value) : choice_(value), type(ChoiceType) {}; + Rule(const Metadata &value) : metadata_(value), type(MetadataType) {}; + Rule(const Repeat &value) : repeat_(value), type(RepeatType) {}; + Rule(const Seq &value) : seq_(value), type(SeqType) {}; + + Rule(const Rule &other); + Rule(Rule &&other) noexcept; + Rule &operator=(const Rule &other); + Rule &operator=(Rule &&other) noexcept; + ~Rule() noexcept; + + static Rule choice(const std::vector &rules); + static Rule seq(const std::vector &rules); + static Rule repeat(const Rule &rule); + + template + bool is() const; + + template + const RuleType & get_unchecked() const; + + template + inline auto accept(FunctionType function) const -> decltype(function(blank_)) { + switch (type) { + case CharacterSetType: return function(character_set_); + case StringType: return function(string_); + case PatternType: return function(pattern_); + case NamedSymbolType: return function(named_symbol_); + case SymbolType: return function(symbol_); + case ChoiceType: return function(choice_); + case MetadataType: return function(metadata_); + case RepeatType: return function(repeat_); + case SeqType: return function(seq_); + default: return function(blank_); + } } + + template + inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward(functions)...))){ + return accept(util::make_visitor(std::forward(functions)...)); + } + + bool operator==(const Rule &other) const; }; +} // namespace rules } // namespace tree_sitter namespace std { +using namespace tree_sitter::rules; +using namespace tree_sitter::util; + template <> -struct hash { - size_t operator()(const tree_sitter::rule_ptr &rule) const { - return rule->hash_code(); - } -}; +struct hash { size_t operator()(const Symbol &) const; }; + +template <> +struct hash { size_t operator()(const NamedSymbol &) const; }; + +template <> +struct hash { size_t operator()(const Pattern &) const; }; + +template <> +struct hash { size_t operator()(const String &) const; }; + +template <> +struct hash { size_t operator()(const CharacterSet &) const; }; + +template <> +struct hash { size_t operator()(const Blank &) const; }; + +template <> +struct hash { size_t operator()(const Choice &) const; }; + +template <> +struct hash { size_t operator()(const Repeat &) const; }; + +template <> +struct hash { size_t operator()(const Seq &) const; }; + +template <> +struct hash { size_t operator()(const Metadata &) const; }; + +template <> +struct hash { size_t operator()(const Rule &) const; }; } // namespace std diff --git a/src/compiler/rules.h b/src/compiler/rules.h deleted file mode 100644 index d98a719a..00000000 --- a/src/compiler/rules.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef COMPILER_RULES_H_ -#define COMPILER_RULES_H_ - -#include -#include -#include -#include "compiler/rule.h" - -namespace tree_sitter { - -rule_ptr blank(); -rule_ptr choice(const std::vector &); -rule_ptr repeat(const rule_ptr &); -rule_ptr repeat1(const rule_ptr &); -rule_ptr seq(const std::vector &); -rule_ptr sym(const std::string &); -rule_ptr pattern(const std::string &); -rule_ptr str(const std::string &); -rule_ptr prec(int precedence, const rule_ptr &); -rule_ptr prec_left(const rule_ptr &); -rule_ptr prec_left(int precedence, const rule_ptr &); -rule_ptr prec_right(const rule_ptr &); -rule_ptr prec_right(int precedence, const rule_ptr &); -rule_ptr token(const rule_ptr &rule); - -} // namespace std - -#endif // COMPILER_RULES_H_ diff --git a/src/compiler/rules/blank.cc b/src/compiler/rules/blank.cc deleted file mode 100644 index 6348bf62..00000000 --- a/src/compiler/rules/blank.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "compiler/rules/blank.h" -#include -#include -#include "compiler/rules/visitor.h" - -namespace tree_sitter { -namespace rules { - -Blank::Blank() {} - -rule_ptr Blank::build() { - return std::make_shared(); -} - -bool Blank::operator==(const Rule &rule) const { - return rule.as() != nullptr; -} - -size_t Blank::hash_code() const { - return 0; -} - -rule_ptr Blank::copy() const { - return std::make_shared(); -} - -std::string Blank::to_string() const { - return "(blank)"; -} - -void Blank::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/blank.h b/src/compiler/rules/blank.h index e67d213d..aa7ed5c9 100644 --- a/src/compiler/rules/blank.h +++ b/src/compiler/rules/blank.h @@ -1,25 +1,16 @@ #ifndef COMPILER_RULES_BLANK_H_ #define COMPILER_RULES_BLANK_H_ -#include -#include "compiler/rule.h" - namespace tree_sitter { namespace rules { -class Blank : public Rule { - public: - Blank(); - static rule_ptr build(); - - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Blank { + inline bool operator==(const Blank &other) const { + return true; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_BLANK_H_ +#endif // COMPILER_RULES_BLANK_H_ \ No newline at end of file diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc deleted file mode 100644 index 0fe45f68..00000000 --- a/src/compiler/rules/built_in_symbols.cc +++ /dev/null @@ -1,19 +0,0 @@ -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { -namespace rules { - -Symbol END_OF_INPUT() { - return Symbol(-1, Symbol::Terminal); -} - -Symbol START() { - return Symbol(-2, Symbol::NonTerminal); -} - -Symbol NONE() { - return Symbol(-3, Symbol::Type(-1)); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h deleted file mode 100644 index bdaed01c..00000000 --- a/src/compiler/rules/built_in_symbols.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef COMPILER_RULES_BUILT_IN_SYMBOLS_H_ -#define COMPILER_RULES_BUILT_IN_SYMBOLS_H_ - -#include "compiler/rules/symbol.h" - -namespace tree_sitter { -namespace rules { - -Symbol END_OF_INPUT(); -Symbol START(); -Symbol NONE(); - -} // namespace rules -} // namespace tree_sitter - -#endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_ diff --git a/src/compiler/rules/character_range.cc b/src/compiler/rules/character_range.cc deleted file mode 100644 index 1f6292e3..00000000 --- a/src/compiler/rules/character_range.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "compiler/rules/character_range.h" -#include -#include "compiler/util/string_helpers.h" - -namespace tree_sitter { -namespace rules { - -using std::string; - -CharacterRange::CharacterRange(uint32_t value) : min(value), max(value) {} -CharacterRange::CharacterRange(uint32_t min, uint32_t max) - : min(min), max(max) {} - -bool CharacterRange::operator==(const CharacterRange &other) const { - return min == other.min && max == other.max; -} - -bool CharacterRange::operator<(const CharacterRange &other) const { - if (min < other.min) - return true; - if (min > other.min) - return false; - if (max < other.max) - return true; - return false; -} - -string CharacterRange::to_string() const { - if (min == max) - return util::escape_char(min); - else - return util::escape_char(min) + "-" + util::escape_char(max); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/character_range.h b/src/compiler/rules/character_range.h deleted file mode 100644 index ecb73eb0..00000000 --- a/src/compiler/rules/character_range.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef COMPILER_RULES_CHARACTER_RANGE_H_ -#define COMPILER_RULES_CHARACTER_RANGE_H_ - -#include -#include - -namespace tree_sitter { -namespace rules { - -struct CharacterRange { - uint32_t min; - uint32_t max; - - explicit CharacterRange(uint32_t value); - explicit CharacterRange(uint32_t min, uint32_t max); - - bool operator==(const CharacterRange &other) const; - bool operator<(const CharacterRange &others) const; - std::string to_string() const; -}; - -} // namespace rules -} // namespace tree_sitter - -#endif // COMPILER_RULES_CHARACTER_RANGE_H_ diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index cff3ab58..5b0c3464 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -1,59 +1,57 @@ #include "compiler/rules/character_set.h" -#include -#include -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/hash_combine.h" + +using std::set; +using std::vector; namespace tree_sitter { namespace rules { -using std::string; -using std::set; -using std::vector; -using util::hash_combine; - static void add_range(set *characters, uint32_t min, uint32_t max) { - for (uint32_t c = min; c <= max; c++) + for (uint32_t c = min; c <= max; c++) { characters->insert(c); + } } static void remove_range(set *characters, uint32_t min, uint32_t max) { - for (uint32_t c = min; c <= max; c++) + for (uint32_t c = min; c <= max; c++) { characters->erase(c); + } } -static set remove_chars(set *left, - const set &right) { +static set remove_chars(set *left, const set &right) { set result; for (uint32_t c : right) { - if (left->erase(c)) + if (left->erase(c)) { result.insert(c); + } } return result; } static set add_chars(set *left, const set &right) { set result; - for (uint32_t c : right) - if (left->insert(c).second) + for (uint32_t c : right) { + if (left->insert(c).second) { result.insert(c); + } + } return result; } static vector consolidate_ranges(const set &chars) { vector result; for (uint32_t c : chars) { - size_t size = result.size(); + auto size = result.size(); if (size >= 2 && result[size - 2].max == (c - 2)) { result.pop_back(); result.back().max = c; } else if (size >= 1) { CharacterRange &last = result.back(); - if (last.min < last.max && last.max == (c - 1)) + if (last.min < last.max && last.max == (c - 1)) { last.max = c; - else + } else { result.push_back(CharacterRange(c)); + } } else { result.push_back(CharacterRange(c)); } @@ -61,14 +59,14 @@ static vector consolidate_ranges(const set &chars) { return result; } -CharacterSet::CharacterSet() - : includes_all(false), included_chars({}), excluded_chars({}) {} +CharacterSet::CharacterSet() : includes_all(false) {} -bool CharacterSet::operator==(const Rule &rule) const { - const CharacterSet *other = rule.as(); - return other && (includes_all == other->includes_all) && - (included_chars == other->included_chars) && - (excluded_chars == other->excluded_chars); +CharacterSet::CharacterSet(const set &chars) : included_chars(chars), includes_all(false) {} + +bool CharacterSet::operator==(const CharacterSet &other) const { + return includes_all == other.includes_all && + included_chars == other.included_chars && + excluded_chars == other.excluded_chars; } bool CharacterSet::operator<(const CharacterSet &other) const { @@ -83,41 +81,6 @@ bool CharacterSet::operator<(const CharacterSet &other) const { return excluded_chars < other.excluded_chars; } -size_t CharacterSet::hash_code() const { - size_t result = 0; - hash_combine(&result, includes_all); - hash_combine(&result, included_chars.size()); - for (uint32_t c : included_chars) - hash_combine(&result, c); - hash_combine(&result, excluded_chars.size()); - for (uint32_t c : excluded_chars) - hash_combine(&result, c); - return result; -} - -rule_ptr CharacterSet::copy() const { - return std::make_shared(*this); -} - -string CharacterSet::to_string() const { - string result("(char"); - if (includes_all) - result += " include_all"; - if (!included_chars.empty()) { - result += " (include"; - for (auto r : included_ranges()) - result += string(" ") + r.to_string(); - result += ")"; - } - if (!excluded_chars.empty()) { - result += " (exclude"; - for (auto r : excluded_ranges()) - result += string(" ") + r.to_string(); - result += ")"; - } - return result + ")"; -} - CharacterSet &CharacterSet::include_all() { includes_all = true; included_chars = {}; @@ -212,9 +175,5 @@ vector CharacterSet::excluded_ranges() const { return consolidate_ranges(excluded_chars); } -void CharacterSet::accept(Visitor *visitor) const { - visitor->visit(this); -} - } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index ced343b0..0c991c43 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -1,20 +1,28 @@ #ifndef COMPILER_RULES_CHARACTER_SET_H_ #define COMPILER_RULES_CHARACTER_SET_H_ -#include -#include -#include -#include #include -#include "compiler/rule.h" -#include "compiler/rules/character_range.h" +#include +#include namespace tree_sitter { namespace rules { -class CharacterSet : public Rule { - public: +struct CharacterRange { + uint32_t min; + uint32_t max; + + inline explicit CharacterRange(uint32_t value) : min{value}, max{value} {} + inline CharacterRange(uint32_t min, uint32_t max) : min{min}, max{max} {} + + inline bool operator==(const CharacterRange &other) const { + return min == other.min && max == other.max; + } +}; + +struct CharacterSet { CharacterSet(); + CharacterSet(const std::set &); CharacterSet &include_all(); CharacterSet &include(uint32_t c); @@ -22,12 +30,8 @@ class CharacterSet : public Rule { CharacterSet &exclude(uint32_t c); CharacterSet &exclude(uint32_t min, uint32_t max); - bool operator==(const Rule &other) const; + bool operator==(const CharacterSet &) const; bool operator<(const CharacterSet &) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; void add_set(const CharacterSet &other); CharacterSet remove_set(const CharacterSet &other); @@ -37,23 +41,12 @@ class CharacterSet : public Rule { std::vector included_ranges() const; std::vector excluded_ranges() const; - bool includes_all; std::set included_chars; std::set excluded_chars; + bool includes_all; }; } // namespace rules } // namespace tree_sitter -namespace std { - -template <> -struct hash { - size_t operator()(const tree_sitter::rules::CharacterSet &rule) const { - return rule.hash_code(); - } -}; - -} // namespace std - -#endif // COMPILER_RULES_CHARACTER_SET_H_ +#endif // COMPILER_RULES_CHARACTER_SET_H_ \ No newline at end of file diff --git a/src/compiler/rules/choice.cc b/src/compiler/rules/choice.cc index c793aed1..1b3be56c 100644 --- a/src/compiler/rules/choice.cc +++ b/src/compiler/rules/choice.cc @@ -1,77 +1,11 @@ #include "compiler/rules/choice.h" -#include -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/hash_combine.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::string; -using std::make_shared; -using std::vector; -using std::set; -using util::symmetric_hash_combine; - -Choice::Choice(const vector &elements) : elements(elements) {} - -void add_choice_element(vector *vec, const rule_ptr new_rule) { - auto choice = new_rule->as(); - if (choice) { - for (auto &child : choice->elements) - add_choice_element(vec, child); - } else { - for (auto &element : *vec) - if (element->operator==(*new_rule)) - return; - vec->push_back(new_rule); - } -} - -rule_ptr Choice::build(const vector &inputs) { - vector elements; - for (auto &el : inputs) - add_choice_element(&elements, el); - if (elements.size() == 1) - return elements.front(); - else - return make_shared(elements); -} - -bool Choice::operator==(const Rule &rule) const { - const Choice *other = rule.as(); - if (!other) - return false; - size_t size = elements.size(); - if (size != other->elements.size()) - return false; - for (size_t i = 0; i < size; i++) - if (!elements[i]->operator==(*other->elements[i])) - return false; - return true; -} - -size_t Choice::hash_code() const { - size_t result = 0; - symmetric_hash_combine(&result, elements.size()); - for (const auto &element : elements) - symmetric_hash_combine(&result, element); - return result; -} - -rule_ptr Choice::copy() const { - return std::make_shared(*this); -} - -string Choice::to_string() const { - string result = "(choice"; - for (const auto &element : elements) - result += " " + element->to_string(); - return result + ")"; -} - -void Choice::accept(Visitor *visitor) const { - visitor->visit(this); +bool Choice::operator==(const Choice &other) const { + return elements == other.elements; } } // namespace rules diff --git a/src/compiler/rules/choice.h b/src/compiler/rules/choice.h index 1139ae6a..6365a565 100644 --- a/src/compiler/rules/choice.h +++ b/src/compiler/rules/choice.h @@ -1,28 +1,21 @@ #ifndef COMPILER_RULES_CHOICE_H_ #define COMPILER_RULES_CHOICE_H_ -#include +#include #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class Choice : public Rule { - public: - explicit Choice(const std::vector &elements); - static rule_ptr build(const std::vector &rules); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Choice { + std::vector elements; - const std::vector elements; + bool operator==(const Choice &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_CHOICE_H_ +#endif // COMPILER_RULES_CHOICE_H_ \ No newline at end of file diff --git a/src/compiler/rules/metadata.cc b/src/compiler/rules/metadata.cc index 5b9724d8..ff98a54b 100644 --- a/src/compiler/rules/metadata.cc +++ b/src/compiler/rules/metadata.cc @@ -1,97 +1,70 @@ #include "compiler/rules/metadata.h" -#include -#include #include -#include "compiler/rules/visitor.h" -#include "compiler/rules/blank.h" -#include "compiler/util/hash_combine.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::make_shared; -using std::map; -using std::pair; -using util::hash_combine; +Metadata::Metadata(const Rule &rule, MetadataParams params) : + rule(std::make_shared(rule)), params(params) {} -MetadataParams::MetadataParams() : - precedence{0}, - associativity{AssociativityNone}, - has_precedence{false}, - has_associativity{false}, - is_token{false}, - is_string{false}, - is_active{false}, - is_main_token{false} {} - -bool MetadataParams::operator==(const MetadataParams &other) const { - return - precedence == other.precedence && - associativity == other.associativity && - has_precedence == other.has_precedence && - has_associativity == other.has_associativity && - is_token == other.is_token && - is_string == other.is_string && - is_active == other.is_active && - is_main_token == other.is_main_token; +bool Metadata::operator==(const Metadata &other) const { + return rule->operator==(*other.rule) && params == other.params; } -Metadata::Metadata(rule_ptr rule, MetadataParams params) - : rule(rule), params(params) {} - -rule_ptr Metadata::build(rule_ptr rule, MetadataParams params) { - return std::make_shared(rule, params); +Metadata Metadata::token(const Rule &rule) { + MetadataParams params; + params.is_token = true; + return Metadata{rule, params}; } -rule_ptr Metadata::main_token(rule_ptr rule) { +Metadata Metadata::active_prec(int precedence, const Rule &rule) { MetadataParams params; params.has_precedence = true; - params.precedence = 0; - params.is_main_token = true; - return Metadata::build(rule, params); + params.precedence = precedence; + params.is_active = true; + return Metadata{rule, params}; } -rule_ptr Metadata::separator(rule_ptr rule) { +Metadata Metadata::prec(int precedence, const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = precedence; + return Metadata{rule, params}; +} + +Metadata Metadata::prec_left(int precedence, const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = precedence; + params.has_associativity = true; + params.associativity = AssociativityLeft; + return Metadata{rule, params}; +} + +Metadata Metadata::prec_right(int precedence, const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = precedence; + params.has_associativity = true; + params.associativity = AssociativityRight; + return Metadata{rule, params}; +} + +Metadata Metadata::separator(const Rule &rule) { MetadataParams params; params.has_precedence = true; params.precedence = INT_MIN; params.is_active = true; - return Metadata::build(rule, params); + return Metadata{rule, params}; } -bool Metadata::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && other->params == params && other->rule->operator==(*this->rule); -} - -size_t Metadata::hash_code() const { - size_t result = 0; - hash_combine(&result, params.precedence); - hash_combine(&result, params.associativity); - hash_combine(&result, params.has_precedence); - hash_combine(&result, params.has_associativity); - hash_combine(&result, params.is_token); - hash_combine(&result, params.is_string); - hash_combine(&result, params.is_active); - hash_combine(&result, params.is_main_token); - return result; -} - -rule_ptr Metadata::copy() const { - return make_shared(rule->copy(), params); -} - -std::string Metadata::to_string() const { - if (params.has_precedence) { - return "(metadata prec:" + std::to_string(params.precedence) + " " + - rule->to_string() + ")"; - } else { - return "(metadata " + rule->to_string() + ")"; - } -} - -void Metadata::accept(Visitor *visitor) const { - visitor->visit(this); +Metadata Metadata::main_token(const Rule &rule) { + MetadataParams params; + params.has_precedence = true; + params.precedence = 0; + params.is_main_token = true; + return Metadata{rule, params}; } } // namespace rules diff --git a/src/compiler/rules/metadata.h b/src/compiler/rules/metadata.h index a9f43c2f..0d55dfd2 100644 --- a/src/compiler/rules/metadata.h +++ b/src/compiler/rules/metadata.h @@ -1,9 +1,7 @@ #ifndef COMPILER_RULES_METADATA_H_ #define COMPILER_RULES_METADATA_H_ -#include -#include -#include "compiler/rule.h" +#include namespace tree_sitter { namespace rules { @@ -24,28 +22,45 @@ struct MetadataParams { bool is_active; bool is_main_token; - MetadataParams(); - bool operator==(const MetadataParams &) const; + inline MetadataParams() : + precedence{0}, associativity{AssociativityNone}, has_precedence{false}, + has_associativity{false}, is_token{false}, is_string{false}, + is_active{false}, is_main_token{false} {} + + inline bool operator==(const MetadataParams &other) const { + return ( + precedence == other.precedence && + associativity == other.associativity && + has_precedence == other.has_precedence && + has_associativity == other.has_associativity && + is_token == other.is_token && + is_string == other.is_string && + is_active == other.is_active && + is_main_token == other.is_main_token + ); + } }; -class Metadata : public Rule { - public: - Metadata(rule_ptr rule, MetadataParams); - static rule_ptr build(rule_ptr rule, MetadataParams); - static rule_ptr main_token(rule_ptr rule); - static rule_ptr separator(rule_ptr rule); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - const rule_ptr rule; +struct Metadata { + std::shared_ptr rule; MetadataParams params; + + Metadata(const Rule &rule, MetadataParams params); + + static Metadata token(const Rule &rule); + static Metadata active_prec(int precedence, const Rule &rule); + static Metadata prec(int precedence, const Rule &rule); + static Metadata prec_left(int precedence, const Rule &rule); + static Metadata prec_right(int precedence, const Rule &rule); + static Metadata separator(const Rule &rule); + static Metadata main_token(const Rule &rule); + + bool operator==(const Metadata &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_METADATA_H_ +#endif // COMPILER_RULES_METADATA_H_ \ No newline at end of file diff --git a/src/compiler/rules/named_symbol.cc b/src/compiler/rules/named_symbol.cc deleted file mode 100644 index d846580b..00000000 --- a/src/compiler/rules/named_symbol.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include "compiler/rules/named_symbol.h" -#include -#include "compiler/rules/visitor.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::hash; - -NamedSymbol::NamedSymbol(const std::string &name) : name(name) {} - -bool NamedSymbol::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && other->name == name; -} - -size_t NamedSymbol::hash_code() const { - return hash()(name); -} - -rule_ptr NamedSymbol::copy() const { - return std::make_shared(*this); -} - -string NamedSymbol::to_string() const { - return string("(sym '") + name + "')"; -} - -void NamedSymbol::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/named_symbol.h b/src/compiler/rules/named_symbol.h index 2d265d0f..dd668aea 100644 --- a/src/compiler/rules/named_symbol.h +++ b/src/compiler/rules/named_symbol.h @@ -2,25 +2,19 @@ #define COMPILER_RULES_NAMED_SYMBOL_H_ #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class NamedSymbol : public Rule { - public: - explicit NamedSymbol(const std::string &name); +struct NamedSymbol { + std::string value; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - std::string name; + inline bool operator==(const NamedSymbol &other) const { + return value == other.value; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_NAMED_SYMBOL_H_ +#endif // COMPILER_RULES_NAMED_SYMBOL_H_ \ No newline at end of file diff --git a/src/compiler/rules/pattern.cc b/src/compiler/rules/pattern.cc deleted file mode 100644 index 5ac8f97b..00000000 --- a/src/compiler/rules/pattern.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "compiler/rules/pattern.h" -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/string_helpers.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::hash; - -Pattern::Pattern(const string &string) : value(string) {} - -bool Pattern::operator==(tree_sitter::Rule const &other) const { - auto pattern = other.as(); - return pattern && (pattern->value == value); -} - -size_t Pattern::hash_code() const { - return hash()(value); -} - -rule_ptr Pattern::copy() const { - return std::make_shared(*this); -} - -string Pattern::to_string() const { - return string("(pattern '") + util::escape_string(value) + "')"; -} - -void Pattern::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/pattern.h b/src/compiler/rules/pattern.h index 305f7024..60c773f6 100644 --- a/src/compiler/rules/pattern.h +++ b/src/compiler/rules/pattern.h @@ -2,25 +2,19 @@ #define COMPILER_RULES_PATTERN_H_ #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class Pattern : public Rule { - public: - explicit Pattern(const std::string &string); +struct Pattern { + std::string value; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - const std::string value; + inline bool operator==(const Pattern &other) const { + return value == other.value; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_PATTERN_H_ +#endif // COMPILER_RULES_PATTERN_H_ \ No newline at end of file diff --git a/src/compiler/rules/repeat.cc b/src/compiler/rules/repeat.cc index 64d793bb..87cc19cd 100644 --- a/src/compiler/rules/repeat.cc +++ b/src/compiler/rules/repeat.cc @@ -1,43 +1,14 @@ #include "compiler/rules/repeat.h" -#include -#include -#include "compiler/rules/visitor.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::make_shared; -using std::string; +Repeat::Repeat(const Rule &rule) : + rule(std::make_shared(rule)) {} -Repeat::Repeat(const rule_ptr content) : content(content) {} - -rule_ptr Repeat::build(const rule_ptr &rule) { - auto inner_repeat = rule->as(); - if (inner_repeat) - return rule; - else - return make_shared(rule); -} - -bool Repeat::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && (*other->content == *content); -} - -size_t Repeat::hash_code() const { - return content->hash_code(); -} - -rule_ptr Repeat::copy() const { - return make_shared(*this); -} - -string Repeat::to_string() const { - return string("(repeat ") + content->to_string() + ")"; -} - -void Repeat::accept(Visitor *visitor) const { - visitor->visit(this); +bool Repeat::operator==(const Repeat &other) const { + return rule->operator==(*other.rule); } } // namespace rules diff --git a/src/compiler/rules/repeat.h b/src/compiler/rules/repeat.h index 35e4531a..b9770a51 100644 --- a/src/compiler/rules/repeat.h +++ b/src/compiler/rules/repeat.h @@ -1,27 +1,21 @@ #ifndef COMPILER_RULES_REPEAT_H_ #define COMPILER_RULES_REPEAT_H_ -#include -#include "compiler/rule.h" +#include namespace tree_sitter { namespace rules { -class Repeat : public Rule { - public: - explicit Repeat(rule_ptr content); - static rule_ptr build(const rule_ptr &rule); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Repeat { + std::shared_ptr rule; - const rule_ptr content; + explicit Repeat(const Rule &rule); + bool operator==(const Repeat &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_REPEAT_H_ +#endif // COMPILER_RULES_REPEAT_H_ \ No newline at end of file diff --git a/src/compiler/rules/rules.cc b/src/compiler/rules/rules.cc deleted file mode 100644 index fdb0ebdf..00000000 --- a/src/compiler/rules/rules.cc +++ /dev/null @@ -1,108 +0,0 @@ -#include -#include -#include -#include -#include "compiler/rule.h" -#include "compiler/rules.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/named_symbol.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { - -using std::make_shared; -using std::string; -using std::set; -using std::vector; -using std::map; -using rules::MetadataParams; - -static rule_ptr metadata(rule_ptr rule, MetadataParams params) { - return std::make_shared(rule, params); -} - -rule_ptr blank() { - return rules::Blank::build(); -} - -rule_ptr choice(const vector &rules) { - return rules::Choice::build(rules); -} - -rule_ptr repeat(const rule_ptr &content) { - return choice({ repeat1(content), blank() }); -} - -rule_ptr repeat1(const rule_ptr &content) { - return rules::Repeat::build(content); -} - -rule_ptr seq(const vector &rules) { - return rules::Seq::build(rules); -} - -rule_ptr sym(const string &name) { - return make_shared(name); -} - -rule_ptr pattern(const string &value) { - return make_shared(value); -} - -rule_ptr str(const string &value) { - return make_shared(value); -} - -rule_ptr prec_left(const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityLeft; - return metadata(rule, params); -} - -rule_ptr prec_left(int precedence, const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityLeft; - params.has_precedence = true; - params.precedence = precedence; - return metadata(rule, params); -} - -rule_ptr prec_right(const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityRight; - return metadata(rule, params); -} - -rule_ptr prec_right(int precedence, const rule_ptr &rule) { - MetadataParams params; - params.has_associativity = true; - params.associativity = rules::AssociativityRight; - params.has_precedence = true; - params.precedence = precedence; - return metadata(rule, params); -} - -rule_ptr prec(int precedence, const rule_ptr &rule) { - MetadataParams params; - params.has_precedence = true; - params.precedence = precedence; - return metadata(rule, params); -} - -rule_ptr token(const rule_ptr &rule) { - MetadataParams params; - params.is_token = true; - return metadata(rule, params); -} - -} // namespace tree_sitter diff --git a/src/compiler/rules/seq.cc b/src/compiler/rules/seq.cc index cc934a5c..cf898e0e 100644 --- a/src/compiler/rules/seq.cc +++ b/src/compiler/rules/seq.cc @@ -1,56 +1,15 @@ #include "compiler/rules/seq.h" -#include -#include "compiler/rules/visitor.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/metadata.h" +#include "compiler/rule.h" namespace tree_sitter { namespace rules { -using std::make_shared; -using std::string; -using std::vector; +Seq::Seq(const Rule &left, const Rule &right) : + left(std::make_shared(left)), + right(std::make_shared(right)) {} -Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {} - -rule_ptr Seq::build(const std::vector &rules) { - rule_ptr result = make_shared(); - for (auto &rule : rules) { - auto blank = rule->as(); - if (blank) - continue; - - auto metadata = rule->as(); - if (metadata && metadata->rule->as()) - continue; - - if (result->as()) - result = rule; - else - result = make_shared(result, rule); - } - return result; -} - -bool Seq::operator==(const Rule &rule) const { - const Seq *other = rule.as(); - return other && (*other->left == *left) && (*other->right == *right); -} - -size_t Seq::hash_code() const { - return left->hash_code() ^ right->hash_code(); -} - -rule_ptr Seq::copy() const { - return std::make_shared(*this); -} - -string Seq::to_string() const { - return string("(seq ") + left->to_string() + " " + right->to_string() + ")"; -} - -void Seq::accept(Visitor *visitor) const { - visitor->visit(this); +bool Seq::operator==(const Seq &other) const { + return left->operator==(*other.left) && right->operator==(*other.right); } } // namespace rules diff --git a/src/compiler/rules/seq.h b/src/compiler/rules/seq.h index b331cf77..6ddbb003 100644 --- a/src/compiler/rules/seq.h +++ b/src/compiler/rules/seq.h @@ -1,29 +1,23 @@ #ifndef COMPILER_RULES_SEQ_H_ #define COMPILER_RULES_SEQ_H_ -#include +#include #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class Seq : public Rule { - public: - Seq(rule_ptr left, rule_ptr right); - static rule_ptr build(const std::vector &rules); +struct Rule; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; +struct Seq { + std::shared_ptr left; + std::shared_ptr right; - const rule_ptr left; - const rule_ptr right; + Seq(const Rule &left, const Rule &right); + bool operator==(const Seq &other) const; }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_SEQ_H_ +#endif // COMPILER_RULES_SEQ_H_ \ No newline at end of file diff --git a/src/compiler/rules/string.cc b/src/compiler/rules/string.cc deleted file mode 100644 index 8a77b169..00000000 --- a/src/compiler/rules/string.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include "compiler/rules/string.h" -#include -#include "compiler/rules/visitor.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::hash; - -String::String(string value) : value(value) {} - -bool String::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && (other->value == value); -} - -size_t String::hash_code() const { - return hash()(value); -} - -rule_ptr String::copy() const { - return std::make_shared(*this); -} - -string String::to_string() const { - return string("(string '") + value + "')"; -} - -void String::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/string.h b/src/compiler/rules/string.h index 4398b560..9fbacd34 100644 --- a/src/compiler/rules/string.h +++ b/src/compiler/rules/string.h @@ -2,25 +2,19 @@ #define COMPILER_RULES_STRING_H_ #include -#include "compiler/rule.h" namespace tree_sitter { namespace rules { -class String : public Rule { - public: - explicit String(std::string value); +struct String { + std::string value; - bool operator==(const Rule &other) const; - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - const std::string value; + inline bool operator==(const String &other) const { + return value == other.value; + } }; } // namespace rules } // namespace tree_sitter -#endif // COMPILER_RULES_STRING_H_ +#endif // COMPILER_RULES_STRING_H_ \ No newline at end of file diff --git a/src/compiler/rules/symbol.cc b/src/compiler/rules/symbol.cc deleted file mode 100644 index e826cb0f..00000000 --- a/src/compiler/rules/symbol.cc +++ /dev/null @@ -1,82 +0,0 @@ -#include "compiler/rules/symbol.h" -#include -#include -#include "compiler/rules/visitor.h" -#include "compiler/util/hash_combine.h" - -namespace tree_sitter { -namespace rules { - -using std::string; -using std::to_string; -using util::hash_combine; - -Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {} - -bool Symbol::operator==(const Symbol &other) const { - return (other.index == index) && (other.type == type); -} - -bool Symbol::operator==(const Rule &rule) const { - auto other = rule.as(); - return other && this->operator==(*other); -} - -size_t Symbol::hash_code() const { - size_t result = 0; - hash_combine(&result, index); - hash_combine(&result, type); - return result; -} - -rule_ptr Symbol::copy() const { - return std::make_shared(*this); -} - -string Symbol::to_string() const { - switch (type) { - case Symbol::Terminal: - return "(terminal " + std::to_string(index) + ")"; - case Symbol::NonTerminal: - return "(non-terminal " + std::to_string(index) + ")"; - case Symbol::External: - return "(external " + std::to_string(index) + ")"; - default: - return "(none)"; - } -} - -bool Symbol::operator<(const Symbol &other) const { - if (type < other.type) - return true; - if (other.type < type) - return false; - return (index < other.index); -} - -bool Symbol::is_built_in(Symbol::Index index) { - return index < 0; -} - -bool Symbol::is_built_in() const { - return is_built_in(index); -} - -bool Symbol::is_token() const { - return type == Symbol::Terminal; -} - -bool Symbol::is_external() const { - return type == Symbol::External; -} - -bool Symbol::is_non_terminal() const { - return type == Symbol::NonTerminal; -} - -void Symbol::accept(Visitor *visitor) const { - visitor->visit(this); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index a963433c..9df0dc84 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -1,55 +1,76 @@ #ifndef COMPILER_RULES_SYMBOL_H_ #define COMPILER_RULES_SYMBOL_H_ -#include -#include "compiler/rule.h" - namespace tree_sitter { namespace rules { -class Symbol : public Rule { - public: - typedef int Index; - - typedef enum { +struct Symbol { + using Index = int; + enum Type { External, Terminal, NonTerminal, - } Type; + }; - Symbol(Index index, Type type); + inline bool operator==(const Symbol &other) const { + return index == other.index && type == other.type; + } - bool operator==(const Symbol &other) const; - bool operator==(const Rule &other) const; + inline bool operator!=(const Symbol &other) const { + return !operator==(other); + } - size_t hash_code() const; - rule_ptr copy() const; - std::string to_string() const; - void accept(Visitor *visitor) const; - - bool operator<(const Symbol &other) const; - static bool is_built_in(Index); - bool is_built_in() const; - bool is_token() const; - bool is_external() const; - bool is_non_terminal() const; + inline bool operator<(const Symbol &other) const { + if (type < other.type) return true; + if (type > other.type) return false; + return index < other.index; + } Index index; Type type; + + static Symbol terminal(Index index) { + return Symbol{index, Type::Terminal}; + } + + static Symbol external(Index index) { + return Symbol{index, Type::External}; + } + + static Symbol non_terminal(Index index) { + return Symbol{index, Type::NonTerminal}; + } + + bool is_non_terminal() const { + return type == Type::NonTerminal; + } + + bool is_terminal() const { + return type == Type::Terminal; + } + + bool is_external() const { + return type == Type::External; + } + + bool is_built_in() const { + return index < 0; + } }; +inline Symbol END_OF_INPUT() { + return Symbol{-1, Symbol::Terminal}; +} + +inline Symbol START() { + return Symbol{-2, Symbol::NonTerminal}; +} + +inline Symbol NONE() { + return Symbol{-3, Symbol::Type(-1)}; +} + } // namespace rules } // namespace tree_sitter -namespace std { - -template <> -struct hash { - size_t operator()(const tree_sitter::rules::Symbol &rule) const { - return rule.hash_code(); - } -}; - -} // std - -#endif // COMPILER_RULES_SYMBOL_H_ +#endif // COMPILER_RULES_SYMBOL_H_ \ No newline at end of file diff --git a/src/compiler/rules/visitor.cc b/src/compiler/rules/visitor.cc deleted file mode 100644 index e06f6c7f..00000000 --- a/src/compiler/rules/visitor.cc +++ /dev/null @@ -1,44 +0,0 @@ -#include "compiler/rules/visitor.h" -#include -#include "compiler/rule.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/string.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/pattern.h" -#include "compiler/rules/repeat.h" - -namespace tree_sitter { -namespace rules { - -using std::vector; - -Visitor::~Visitor() {} - -rule_ptr IdentityRuleFn::default_apply(const Rule *rule) { - return rule->copy(); -} - -rule_ptr IdentityRuleFn::apply_to(const Choice *rule) { - vector rules; - for (const auto &el : rule->elements) - rules.push_back(apply(el)); - return Choice::build(rules); -} - -rule_ptr IdentityRuleFn::apply_to(const Seq *rule) { - return Seq::build({ apply(rule->left), apply(rule->right) }); -} - -rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) { - return Repeat::build(apply(rule->content)); -} - -rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) { - return Metadata::build(apply(rule->rule), rule->params); -} - -} // namespace rules -} // namespace tree_sitter diff --git a/src/compiler/rules/visitor.h b/src/compiler/rules/visitor.h deleted file mode 100644 index c75e31dc..00000000 --- a/src/compiler/rules/visitor.h +++ /dev/null @@ -1,234 +0,0 @@ -#ifndef COMPILER_RULES_VISITOR_H_ -#define COMPILER_RULES_VISITOR_H_ - -#include "compiler/rule.h" - -namespace tree_sitter { -namespace rules { - -class Blank; -class NamedSymbol; -class CharacterSet; -class Choice; -class Repeat; -class Seq; -class String; -class Symbol; -class Pattern; -class Metadata; -class ExternalToken; - -class Visitor { - public: - virtual void visit(const Blank *rule) = 0; - virtual void visit(const CharacterSet *rule) = 0; - virtual void visit(const Choice *rule) = 0; - virtual void visit(const Metadata *rule) = 0; - virtual void visit(const Pattern *rule) = 0; - virtual void visit(const Repeat *rule) = 0; - virtual void visit(const Seq *rule) = 0; - virtual void visit(const String *rule) = 0; - virtual void visit(const NamedSymbol *rule) = 0; - virtual void visit(const Symbol *rule) = 0; - virtual void visit(const ExternalToken *rule) = 0; - virtual ~Visitor(); -}; - -template -class RuleFn : private Visitor { - public: - T apply(const rule_ptr &rule) { - value_ = T(); - rule->accept(this); - return value_; - } - - protected: - virtual T default_apply(const Rule *rule) { - return T(); - } - - virtual T apply_to(const Blank *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const CharacterSet *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Choice *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Metadata *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Pattern *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Repeat *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Seq *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const String *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const NamedSymbol *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const Symbol *rule) { - return default_apply((const Rule *)rule); - } - - virtual T apply_to(const ExternalToken *rule) { - return default_apply((const Rule *)rule); - } - - void visit(const Blank *rule) { - value_ = apply_to(rule); - } - - void visit(const CharacterSet *rule) { - value_ = apply_to(rule); - } - - void visit(const Choice *rule) { - value_ = apply_to(rule); - } - - void visit(const Metadata *rule) { - value_ = apply_to(rule); - } - - void visit(const Pattern *rule) { - value_ = apply_to(rule); - } - - void visit(const Repeat *rule) { - value_ = apply_to(rule); - } - - void visit(const Seq *rule) { - value_ = apply_to(rule); - } - - void visit(const String *rule) { - value_ = apply_to(rule); - } - - void visit(const NamedSymbol *rule) { - value_ = apply_to(rule); - } - - void visit(const Symbol *rule) { - value_ = apply_to(rule); - } - - void visit(const ExternalToken *rule) { - value_ = apply_to(rule); - } - - private: - T value_; -}; - -template <> -class RuleFn : private Visitor { - public: - void apply(const rule_ptr &rule) { - rule->accept(this); - } - - protected: - virtual void default_apply(const Rule *rule) {} - - virtual void apply_to(const Blank *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const CharacterSet *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Choice *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Metadata *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Pattern *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Repeat *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Seq *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const String *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const NamedSymbol *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const Symbol *rule) { - return default_apply((const Rule *)rule); - } - virtual void apply_to(const ExternalToken *rule) { - return default_apply((const Rule *)rule); - } - - void visit(const Blank *rule) { - apply_to(rule); - } - void visit(const CharacterSet *rule) { - apply_to(rule); - } - void visit(const Choice *rule) { - apply_to(rule); - } - void visit(const Metadata *rule) { - apply_to(rule); - } - void visit(const Pattern *rule) { - apply_to(rule); - } - void visit(const Repeat *rule) { - apply_to(rule); - } - void visit(const Seq *rule) { - apply_to(rule); - } - void visit(const String *rule) { - apply_to(rule); - } - void visit(const NamedSymbol *rule) { - apply_to(rule); - } - void visit(const Symbol *rule) { - apply_to(rule); - } - void visit(const ExternalToken *rule) { - apply_to(rule); - } -}; - -class IdentityRuleFn : public RuleFn { - protected: - virtual rule_ptr default_apply(const Rule *rule); - virtual rule_ptr apply_to(const Choice *rule); - virtual rule_ptr apply_to(const Metadata *rule); - virtual rule_ptr apply_to(const Seq *rule); - virtual rule_ptr apply_to(const Repeat *rule); -}; - -} // namespace rules -} // namespace tree_sitter - -#endif // COMPILER_RULES_VISITOR_H_ diff --git a/src/compiler/syntax_grammar.cc b/src/compiler/syntax_grammar.cc deleted file mode 100644 index 254e1a34..00000000 --- a/src/compiler/syntax_grammar.cc +++ /dev/null @@ -1,20 +0,0 @@ -#include "compiler/syntax_grammar.h" -#include -#include -#include -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" - -namespace tree_sitter { - -bool ExternalToken::operator==(const ExternalToken &other) const { - return name == other.name && type == other.type && - corresponding_internal_token == other.corresponding_internal_token; -} - -bool ProductionStep::operator==(const ProductionStep &other) const { - return symbol == other.symbol && precedence == other.precedence && - associativity == other.associativity; -} - -} // namespace tree_sitter diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h index 9d154884..4099de18 100644 --- a/src/compiler/syntax_grammar.h +++ b/src/compiler/syntax_grammar.h @@ -4,22 +4,16 @@ #include #include #include -#include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" -#include "compiler/variable.h" +#include "compiler/rule.h" +#include "compiler/grammar.h" namespace tree_sitter { -struct ExternalToken { - bool operator==(const ExternalToken &) const; - - std::string name; - VariableType type; - rules::Symbol corresponding_internal_token; -}; - struct ProductionStep { - bool operator==(const ProductionStep &) const; + inline bool operator==(const ProductionStep &other) const { + return symbol == other.symbol && precedence == other.precedence && + associativity == other.associativity; + } rules::Symbol symbol; int precedence; @@ -34,7 +28,7 @@ struct SyntaxVariable { std::vector productions; }; -typedef std::set ConflictSet; +using ConflictSet = std::set; struct SyntaxGrammar { std::vector variables; diff --git a/src/compiler/util/make_visitor.h b/src/compiler/util/make_visitor.h new file mode 100644 index 00000000..6de51dc4 --- /dev/null +++ b/src/compiler/util/make_visitor.h @@ -0,0 +1,31 @@ +#ifndef COMPILER_UTIL_MAKE_VISITOR_H_ +#define COMPILER_UTIL_MAKE_VISITOR_H_ + +namespace tree_sitter { +namespace util { + +template +struct visitor; + +template +struct visitor : Fn { + using Fn::operator(); + visitor(Fn fn) : Fn(fn) {} +}; + +template +struct visitor : Fn, visitor { + using Fn::operator(); + using visitor::operator(); + visitor(Fn fn, Fns... fns) : Fn(fn), visitor(fns...) {} +}; + +template +visitor make_visitor(Fns... fns) { + return visitor(fns...); +} + +} // namespace util +} // namespace tree_sitter + +#endif // COMPILER_UTIL_MAKE_VISITOR_H_ diff --git a/src/compiler/variable.h b/src/compiler/variable.h deleted file mode 100644 index 823852ff..00000000 --- a/src/compiler/variable.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef COMPILER_VARIABLE_H_ -#define COMPILER_VARIABLE_H_ - -#include -#include "compiler/rule.h" -#include "compiler/rules/symbol.h" - -namespace tree_sitter { - -enum VariableType { - VariableTypeHidden, - VariableTypeAuxiliary, - VariableTypeAnonymous, - VariableTypeNamed, -}; - -struct Variable { - std::string name; - VariableType type; - rule_ptr rule; -}; - -} // namespace tree_sitter - -#endif // COMPILER_VARIABLE_H_ diff --git a/test/compiler/build_tables/lex_conflict_manager_test.cc b/test/compiler/build_tables/lex_conflict_manager_test.cc index ca05a32c..7273a3c2 100644 --- a/test/compiler/build_tables/lex_conflict_manager_test.cc +++ b/test/compiler/build_tables/lex_conflict_manager_test.cc @@ -1,7 +1,6 @@ #include "test_helper.h" -#include "helpers/rule_helpers.h" #include "helpers/stream_methods.h" -#include "compiler/rules/built_in_symbols.h" +#include "compiler/rule.h" #include "compiler/parse_table.h" #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/build_tables/lex_item.h" @@ -14,11 +13,11 @@ START_TEST describe("LexConflictManager::resolve(new_action, old_action)", []() { LexConflictManager conflict_manager; bool update; - Symbol sym1(0, Symbol::Terminal); - Symbol sym2(1, Symbol::Terminal); - Symbol sym3(2, Symbol::Terminal); - Symbol sym4(3, Symbol::Terminal); - LexItemSet item_set({ LexItem(sym4, blank() )}); + Symbol sym1 = Symbol::terminal(0); + Symbol sym2 = Symbol::terminal(1); + Symbol sym3 = Symbol::terminal(2); + Symbol sym4 = Symbol::terminal(3); + LexItemSet item_set({ LexItem(sym4, Blank{} )}); before_each([&]() { conflict_manager = LexConflictManager(); diff --git a/test/compiler/build_tables/lex_item_test.cc b/test/compiler/build_tables/lex_item_test.cc index 55413759..7f69d3b9 100644 --- a/test/compiler/build_tables/lex_item_test.cc +++ b/test/compiler/build_tables/lex_item_test.cc @@ -1,8 +1,6 @@ #include "test_helper.h" #include "compiler/build_tables/lex_item.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules.h" -#include "helpers/rule_helpers.h" +#include "compiler/rule.h" #include "helpers/stream_methods.h" using namespace rules; @@ -14,7 +12,7 @@ START_TEST describe("LexItem", []() { describe("completion_status()", [&]() { it("indicates whether the item is done and its precedence", [&]() { - LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' })); + LexItem item1(Symbol::terminal(0), CharacterSet({'a', 'b', 'c'})); AssertThat(item1.completion_status().is_done, IsFalse()); AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange())); @@ -22,15 +20,18 @@ describe("LexItem", []() { params.precedence = 3; params.has_precedence = true; params.is_string = 1; - LexItem item2(Symbol(0, Symbol::Terminal), choice({ - metadata(blank(), params), - character({ 'a', 'b', 'c' }) - })); + LexItem item2(Symbol::terminal(0), Choice{{ + Metadata{Blank{}, params}, + CharacterSet{{ 'a', 'b', 'c' }} + }}); AssertThat(item2.completion_status().is_done, IsTrue()); AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3))); - LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' }))); + LexItem item3(Symbol::terminal(0), Choice{{ + Blank{}, + Repeat{CharacterSet{{ ' ', '\t' }}}, + }}); AssertThat(item3.completion_status().is_done, IsTrue()); AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange())); }); @@ -40,17 +41,17 @@ describe("LexItem", []() { describe("LexItemSet::transitions()", [&]() { it("handles single characters", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}), }); AssertThat( item_set.transitions(), Equals(LexItemSet::TransitionMap({ { - CharacterSet().include('x'), + CharacterSet{{'x'}}, Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), }), PrecedenceRange(), false @@ -64,7 +65,7 @@ describe("LexItemSet::transitions()", [&]() { params.is_main_token = true; LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)), + LexItem(Symbol::non_terminal(1), Metadata{CharacterSet{{'x'}}, params}), }); AssertThat( @@ -74,7 +75,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)), + LexItem(Symbol::non_terminal(1), Metadata{Blank{}, params}), }), PrecedenceRange(), true @@ -85,11 +86,11 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'w' }), - character({ 'x' }), - character({ 'y' }), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + CharacterSet{{ 'w' }}, + CharacterSet{{ 'x' }}, + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }); @@ -100,10 +101,10 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('w'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'x' }), - character({ 'y' }), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + CharacterSet{{ 'x' }}, + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }), PrecedenceRange(), @@ -115,14 +116,14 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences with nested precedence", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - prec(3, seq({ - character({ 'v' }), - prec(4, seq({ - character({ 'w' }), - character({ 'x' }) })), - character({ 'y' }) })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + Metadata::prec(3, Rule::seq({ + CharacterSet{{ 'v' }}, + Metadata::prec(4, Rule::seq({ + CharacterSet{{ 'w' }}, + CharacterSet{{ 'x' }} })), + CharacterSet{{ 'y' }} })), + CharacterSet{{ 'z' }}, })), }); @@ -137,13 +138,15 @@ describe("LexItemSet::transitions()", [&]() { // The outer precedence is now 'active', because we are within its // contained rule. LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - active_prec(3, seq({ - prec(4, seq({ - character({ 'w' }), - character({ 'x' }) })), - character({ 'y' }) })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + Metadata::active_prec(3, Rule::seq({ + Metadata::prec(4, Rule::seq({ + CharacterSet{{ 'w' }}, + CharacterSet{{ 'x' }} + })), + CharacterSet{{ 'y' }} + })), + CharacterSet{{ 'z' }}, })), }), @@ -165,11 +168,12 @@ describe("LexItemSet::transitions()", [&]() { Transition{ // The inner precedence is now 'active' LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - active_prec(3, seq({ - active_prec(4, character({ 'x' })), - character({ 'y' }) })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + Metadata::active_prec(3, Rule::seq({ + Metadata::active_prec(4, CharacterSet{{'x'}}), + CharacterSet{{'y'}} + })), + CharacterSet{{'z'}}, })), }), @@ -190,9 +194,9 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - active_prec(3, character({ 'y' })), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + Metadata::active_prec(3, CharacterSet{{'y'}}), + CharacterSet{{'z'}}, })), }), @@ -213,7 +217,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('y'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}), }), PrecedenceRange(3), false @@ -224,13 +228,13 @@ describe("LexItemSet::transitions()", [&]() { it("handles sequences where the left hand side can be blank", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - choice({ - character({ 'x' }), - blank(), + LexItem(Symbol::non_terminal(1), Rule::seq({ + Rule::choice({ + CharacterSet{{ 'x' }}, + Blank{}, }), - character({ 'y' }), - character({ 'z' }), + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }); @@ -241,9 +245,9 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('x'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'y' }), - character({ 'z' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + CharacterSet{{ 'y' }}, + CharacterSet{{ 'z' }}, })), }), PrecedenceRange(), @@ -254,7 +258,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('y'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}), }), PrecedenceRange(), false @@ -265,7 +269,7 @@ describe("LexItemSet::transitions()", [&]() { it("handles blanks", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), }); AssertThat(item_set.transitions(), IsEmpty()); @@ -273,11 +277,11 @@ describe("LexItemSet::transitions()", [&]() { it("handles repeats", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({ - character({ 'a' }), - character({ 'b' }), - }))), - LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))), + LexItem(Symbol::non_terminal(1), Repeat{Rule::seq({ + CharacterSet{{ 'a' }}, + CharacterSet{{ 'b' }}, + })}), + LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{'c'}}}), }); AssertThat( @@ -287,14 +291,14 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - character({ 'b' }), - repeat1(seq({ - character({ 'a' }), - character({ 'b' }), - })) + LexItem(Symbol::non_terminal(1), Rule::seq({ + CharacterSet{{ 'b' }}, + Repeat{Rule::seq({ + CharacterSet{{ 'a' }}, + CharacterSet{{ 'b' }}, + })} })), - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'b' }}), }), PrecedenceRange(), false @@ -304,8 +308,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('c'), Transition{ LexItemSet({ - LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))), - LexItem(Symbol(2, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{ 'c' }}}), + LexItem(Symbol::non_terminal(2), Blank{}), }), PrecedenceRange(), false @@ -316,7 +320,7 @@ describe("LexItemSet::transitions()", [&]() { it("handles repeats with precedence", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))) + LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}})) }); AssertThat( @@ -326,8 +330,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))), - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}})), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Blank{})), }), PrecedenceRange(-1), false @@ -338,14 +342,14 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between overlapping character sets", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), choice({ - active_prec(2, seq({ - character({ 'a', 'b', 'c', 'd' }), - character({ 'x' }), + LexItem(Symbol::non_terminal(1), Rule::choice({ + Metadata::active_prec(2, Rule::seq({ + CharacterSet{{ 'a', 'b', 'c', 'd' }}, + CharacterSet{{ 'x' }}, })), - active_prec(3, seq({ - character({ 'c', 'd', 'e', 'f' }), - character({ 'y' }), + Metadata::active_prec(3, Rule::seq({ + CharacterSet{{ 'c', 'd', 'e', 'f' }}, + CharacterSet{{ 'y' }}, })), })) }); @@ -357,7 +361,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a', 'b'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})), }), PrecedenceRange(2), false @@ -367,8 +371,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('c', 'd'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))), - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})), }), PrecedenceRange(2, 3), false @@ -378,7 +382,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('e', 'f'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))), + LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})), }), PrecedenceRange(3), false @@ -389,14 +393,14 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between a subset and a superset of characters", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), choice({ - seq({ - character({ 'b', 'c', 'd' }), - character({ 'x' }), + LexItem(Symbol::non_terminal(1), Rule::choice({ + Rule::seq({ + CharacterSet{{ 'b', 'c', 'd' }}, + CharacterSet{{ 'x' }}, }), - seq({ - character({ 'a', 'b', 'c', 'd', 'e', 'f' }), - character({ 'y' }), + Rule::seq({ + CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }}, + CharacterSet{{ 'y' }}, }), })), }); @@ -408,7 +412,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a').include('e', 'f'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}), }), PrecedenceRange(), false @@ -418,8 +422,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('b', 'd'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })), - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}), + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}), }), PrecedenceRange(), false @@ -430,15 +434,15 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between whitelisted and blacklisted character sets", [&]() { LexItemSet item_set({ - LexItem(Symbol(1, Symbol::NonTerminal), seq({ - choice({ - character({ '/' }, false), - seq({ - character({ '\\' }), - character({ '/' }), + LexItem(Symbol::non_terminal(1), Rule::seq({ + Rule::choice({ + CharacterSet().include_all().exclude('/'), + Rule::seq({ + CharacterSet{{ '\\' }}, + CharacterSet{{ '/' }}, }), }), - character({ '/' }), + CharacterSet{{ '/' }}, })) }); @@ -449,7 +453,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include_all().exclude('/').exclude('\\'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}), }), PrecedenceRange(), false @@ -459,8 +463,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('\\'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })), - LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })), + LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}), + LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ '/' }}, CharacterSet{{ '/' }} })), }), PrecedenceRange(), false @@ -471,8 +475,8 @@ describe("LexItemSet::transitions()", [&]() { it("handles different items with overlapping character sets", [&]() { LexItemSet set1({ - LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })), - LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' })) + LexItem(Symbol::non_terminal(1), CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }}), + LexItem(Symbol::non_terminal(2), CharacterSet{{ 'e', 'f', 'g', 'h', 'i' }}) }); AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({ @@ -480,7 +484,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('a', 'd'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), }), PrecedenceRange(), false @@ -490,8 +494,8 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('e', 'f'), Transition{ LexItemSet({ - LexItem(Symbol(1, Symbol::NonTerminal), blank()), - LexItem(Symbol(2, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(1), Blank{}), + LexItem(Symbol::non_terminal(2), Blank{}), }), PrecedenceRange(), false @@ -501,7 +505,7 @@ describe("LexItemSet::transitions()", [&]() { CharacterSet().include('g', 'i'), Transition{ LexItemSet({ - LexItem(Symbol(2, Symbol::NonTerminal), blank()), + LexItem(Symbol::non_terminal(2), Blank{}), }), PrecedenceRange(), false diff --git a/test/compiler/build_tables/parse_item_set_builder_test.cc b/test/compiler/build_tables/parse_item_set_builder_test.cc index 9bc4715b..8583c7b1 100644 --- a/test/compiler/build_tables/parse_item_set_builder_test.cc +++ b/test/compiler/build_tables/parse_item_set_builder_test.cc @@ -3,8 +3,6 @@ #include "compiler/lexical_grammar.h" #include "compiler/build_tables/parse_item_set_builder.h" #include "compiler/build_tables/lookahead_set.h" -#include "compiler/rules/built_in_symbols.h" -#include "helpers/rule_helpers.h" using namespace build_tables; using namespace rules; @@ -17,7 +15,7 @@ describe("ParseItemSetBuilder", []() { lexical_variables.push_back({ "token_" + to_string(i), VariableTypeNamed, - blank(), + Blank{}, false }); } @@ -28,23 +26,23 @@ describe("ParseItemSetBuilder", []() { SyntaxGrammar grammar{{ SyntaxVariable{"rule0", VariableTypeNamed, { Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(11, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::terminal(11), 0, AssociativityNone}, }), }}, SyntaxVariable{"rule1", VariableTypeNamed, { Production({ - {Symbol(12, Symbol::Terminal), 0, AssociativityNone}, - {Symbol(13, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::terminal(12), 0, AssociativityNone}, + {Symbol::terminal(13), 0, AssociativityNone}, }), Production({ - {Symbol(2, Symbol::NonTerminal), 0, AssociativityNone}, + {Symbol::non_terminal(2), 0, AssociativityNone}, }) }}, SyntaxVariable{"rule2", VariableTypeNamed, { Production({ - {Symbol(14, Symbol::Terminal), 0, AssociativityNone}, - {Symbol(15, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::terminal(14), 0, AssociativityNone}, + {Symbol::terminal(15), 0, AssociativityNone}, }) }}, }, {}, {}, {}}; @@ -55,8 +53,8 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }), + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }), } }); @@ -65,20 +63,20 @@ describe("ParseItemSetBuilder", []() { AssertThat(item_set, Equals(ParseItemSet({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 0), 0), + LookaheadSet({ Symbol::terminal(11) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 1), 0), + LookaheadSet({ Symbol::terminal(11) }) }, { - ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(2), production(2, 0), 0), + LookaheadSet({ Symbol::terminal(11) }) }, }))); }); @@ -87,14 +85,14 @@ describe("ParseItemSetBuilder", []() { SyntaxGrammar grammar{{ SyntaxVariable{"rule0", VariableTypeNamed, { Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(11, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::terminal(11), 0, AssociativityNone}, }), }}, SyntaxVariable{"rule1", VariableTypeNamed, { Production({ - {Symbol(12, Symbol::Terminal), 0, AssociativityNone}, - {Symbol(13, Symbol::Terminal), 0, AssociativityNone}, + {Symbol::terminal(12), 0, AssociativityNone}, + {Symbol::terminal(13), 0, AssociativityNone}, }), Production({}) }}, @@ -106,8 +104,8 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }), + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }), } }); @@ -116,16 +114,16 @@ describe("ParseItemSetBuilder", []() { AssertThat(item_set, Equals(ParseItemSet({ { - ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0), - LookaheadSet({ Symbol(10, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + LookaheadSet({ Symbol::terminal(10) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 0), 0), + LookaheadSet({ Symbol::terminal(11) }) }, { - ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0), - LookaheadSet({ Symbol(11, Symbol::Terminal) }) + ParseItem(Symbol::non_terminal(1), production(1, 1), 0), + LookaheadSet({ Symbol::terminal(11) }) }, }))); }); diff --git a/test/compiler/build_tables/rule_can_be_blank_test.cc b/test/compiler/build_tables/rule_can_be_blank_test.cc index a3068f91..92dffa01 100644 --- a/test/compiler/build_tables/rule_can_be_blank_test.cc +++ b/test/compiler/build_tables/rule_can_be_blank_test.cc @@ -1,8 +1,6 @@ #include "test_helper.h" #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules.h" -#include "helpers/rule_helpers.h" +#include "compiler/rule.h" using namespace rules; using build_tables::rule_can_be_blank; @@ -10,49 +8,48 @@ using build_tables::rule_can_be_blank; START_TEST describe("rule_can_be_blank", [&]() { - rule_ptr rule; + Rule rule; it("returns false for basic rules", [&]() { - AssertThat(rule_can_be_blank(i_sym(3)), IsFalse()); - AssertThat(rule_can_be_blank(str("x")), IsFalse()); - AssertThat(rule_can_be_blank(pattern("x")), IsFalse()); + AssertThat(rule_can_be_blank(CharacterSet{{'x'}}), IsFalse()); }); it("returns true for blanks", [&]() { - AssertThat(rule_can_be_blank(blank()), IsTrue()); + AssertThat(rule_can_be_blank(Blank{}), IsTrue()); }); - it("returns true for repeats", [&]() { - AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue()); + it("returns true for repeats iff the content can be blank", [&]() { + AssertThat(rule_can_be_blank(Repeat{CharacterSet{{'x'}}}), IsFalse()); + AssertThat(rule_can_be_blank(Repeat{Blank{}}), IsTrue()); }); it("returns true for choices iff one or more sides can be blank", [&]() { - rule = choice({ sym("x"), blank() }); + rule = Rule::choice({ CharacterSet{{'x'}}, Blank{} }); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = choice({ blank(), sym("x") }); + rule = Rule::choice({ Blank{}, CharacterSet{{'x'}} }); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = choice({ sym("x"), sym("y") }); + rule = Rule::choice({ CharacterSet{{'x'}}, CharacterSet{{'y'}} }); AssertThat(rule_can_be_blank(rule), IsFalse()); }); it("returns true for sequences iff both sides can be blank", [&]() { - rule = seq({ blank(), str("x") }); + rule = Rule::seq({ Blank{}, CharacterSet{{'x'}} }); AssertThat(rule_can_be_blank(rule), IsFalse()); - rule = seq({ str("x"), blank() }); + rule = Rule::seq({ CharacterSet{{'x'}}, Blank{} }); AssertThat(rule_can_be_blank(rule), IsFalse()); - rule = seq({ blank(), choice({ sym("x"), blank() }) }); + rule = Rule::seq({ Blank{}, Rule::choice({ CharacterSet{{'x'}}, Blank{} }) }); AssertThat(rule_can_be_blank(rule), IsTrue()); }); it("ignores metadata rules", [&]() { - rule = make_shared(blank(), MetadataParams()); + rule = Metadata::prec(1, Blank{}); AssertThat(rule_can_be_blank(rule), IsTrue()); - rule = make_shared(sym("one"), MetadataParams()); + rule = Metadata::prec(1, CharacterSet{{'x'}}); AssertThat(rule_can_be_blank(rule), IsFalse()); }); }); diff --git a/test/compiler/prepare_grammar/expand_repeats_test.cc b/test/compiler/prepare_grammar/expand_repeats_test.cc index 7d735a4a..c025a898 100644 --- a/test/compiler/prepare_grammar/expand_repeats_test.cc +++ b/test/compiler/prepare_grammar/expand_repeats_test.cc @@ -1,20 +1,24 @@ #include "test_helper.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/prepare_grammar/expand_repeats.h" -#include "helpers/rule_helpers.h" #include "helpers/stream_methods.h" -START_TEST - using namespace rules; using prepare_grammar::InitialSyntaxGrammar; using prepare_grammar::expand_repeats; +using Variable = InitialSyntaxGrammar::Variable; + +bool operator==(const Variable &left, const Variable &right) { + return left.name == right.name && left.rule == right.rule && left.type == right.type; +} + +START_TEST describe("expand_repeats", []() { it("replaces repeat rules with pairs of recursive rules", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))}, + Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(0)}}, }, {}, {}, {} }; @@ -22,10 +26,10 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, i_sym(1)}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(0) }), - i_token(0), + Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(1)}, + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) }), + Symbol::terminal(0), })}, })); }); @@ -33,9 +37,9 @@ describe("expand_repeats", []() { it("replaces repeats inside of sequences", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, seq({ - i_token(10), - repeat1(i_token(11)), + Variable{"rule0", VariableTypeNamed, Rule::seq({ + Symbol::terminal(10), + Repeat{Symbol::terminal(11)}, })}, }, {}, {}, {} @@ -44,13 +48,13 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, seq({ - i_token(10), - i_sym(1), + Variable{"rule0", VariableTypeNamed, Rule::seq({ + Symbol::terminal(10), + Symbol::non_terminal(1), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(11) }), - i_token(11) + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }), + Symbol::terminal(11) })}, })); }); @@ -58,9 +62,9 @@ describe("expand_repeats", []() { it("replaces repeats inside of choices", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, choice({ - i_token(10), - repeat1(i_token(11)) + Variable{"rule0", VariableTypeNamed, Rule::choice({ + Symbol::terminal(10), + Repeat{Symbol::terminal(11)} })}, }, {}, {}, {} @@ -69,13 +73,13 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, choice({ - i_token(10), - i_sym(1), + Variable{"rule0", VariableTypeNamed, Rule::choice({ + Symbol::terminal(10), + Symbol::non_terminal(1), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(11) }), - i_token(11), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }), + Symbol::terminal(11), })}, })); }); @@ -83,13 +87,13 @@ describe("expand_repeats", []() { it("does not create redundant auxiliary rules", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, choice({ - seq({ i_token(1), repeat1(i_token(4)) }), - seq({ i_token(2), repeat1(i_token(4)) }), + Variable{"rule0", VariableTypeNamed, Rule::choice({ + Rule::seq({ Symbol::terminal(1), Repeat{Symbol::terminal(4)} }), + Rule::seq({ Symbol::terminal(2), Repeat{Symbol::terminal(4)} }), })}, - Variable{"rule1", VariableTypeNamed, seq({ - i_token(3), - repeat1(i_token(4)) + Variable{"rule1", VariableTypeNamed, Rule::seq({ + Symbol::terminal(3), + Repeat{Symbol::terminal(4)} })}, }, {}, {}, {} @@ -98,17 +102,17 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, choice({ - seq({ i_token(1), i_sym(2) }), - seq({ i_token(2), i_sym(2) }), + Variable{"rule0", VariableTypeNamed, Rule::choice({ + Rule::seq({ Symbol::terminal(1), Symbol::non_terminal(2) }), + Rule::seq({ Symbol::terminal(2), Symbol::non_terminal(2) }), })}, - Variable{"rule1", VariableTypeNamed, seq({ - i_token(3), - i_sym(2), + Variable{"rule1", VariableTypeNamed, Rule::seq({ + Symbol::terminal(3), + Symbol::non_terminal(2), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(2), i_token(4) }), - i_token(4), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(4) }), + Symbol::terminal(4), })}, })); }); @@ -116,9 +120,9 @@ describe("expand_repeats", []() { it("can replace multiple repeats in the same rule", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, seq({ - repeat1(i_token(10)), - repeat1(i_token(11)), + Variable{"rule0", VariableTypeNamed, Rule::seq({ + Repeat{Symbol::terminal(10)}, + Repeat{Symbol::terminal(11)}, })}, }, {}, {}, {} @@ -127,17 +131,17 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, seq({ - i_sym(1), - i_sym(2), + Variable{"rule0", VariableTypeNamed, Rule::seq({ + Symbol::non_terminal(1), + Symbol::non_terminal(2), })}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(1), i_token(10) }), - i_token(10), + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(10) }), + Symbol::terminal(10), })}, - Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({ - seq({ i_sym(2), i_token(11) }), - i_token(11), + Variable{"rule0_repeat2", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(11) }), + Symbol::terminal(11), })}, })); }); @@ -145,8 +149,8 @@ describe("expand_repeats", []() { it("can replace repeats in multiple rules", [&]() { InitialSyntaxGrammar grammar{ { - Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))}, - Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))}, + Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(10)}}, + Variable{"rule1", VariableTypeNamed, Repeat{Symbol::terminal(11)}}, }, {}, {}, {} }; @@ -154,15 +158,15 @@ describe("expand_repeats", []() { auto result = expand_repeats(grammar); AssertThat(result.variables, Equals(vector{ - Variable{"rule0", VariableTypeNamed, i_sym(2)}, - Variable{"rule1", VariableTypeNamed, i_sym(3)}, - Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(2), i_token(10) }), - i_token(10), + Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(2)}, + Variable{"rule1", VariableTypeNamed, Symbol::non_terminal(3)}, + Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(10) }), + Symbol::terminal(10), })}, - Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({ - seq({ i_sym(3), i_token(11) }), - i_token(11), + Variable{"rule1_repeat1", VariableTypeAuxiliary, Rule::choice({ + Rule::seq({ Symbol::non_terminal(3), Symbol::terminal(11) }), + Symbol::terminal(11), })}, })); }); diff --git a/test/compiler/prepare_grammar/expand_tokens_test.cc b/test/compiler/prepare_grammar/expand_tokens_test.cc index 0c1e6209..5b2038f6 100644 --- a/test/compiler/prepare_grammar/expand_tokens_test.cc +++ b/test/compiler/prepare_grammar/expand_tokens_test.cc @@ -1,12 +1,13 @@ #include "test_helper.h" #include "compiler/lexical_grammar.h" #include "compiler/prepare_grammar/expand_tokens.h" -#include "helpers/rule_helpers.h" +#include "helpers/stream_methods.h" START_TEST using namespace rules; -using prepare_grammar::expand_tokens; +using prepare_grammar::expand_token; +using prepare_grammar::ExpandTokenResult; describe("expand_tokens", []() { MetadataParams string_token_params; @@ -15,153 +16,72 @@ describe("expand_tokens", []() { describe("string rules", [&]() { it("replaces strings with sequences of character sets", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - str("xyz"), - i_sym(11), - }), - false - } - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.second, Equals(CompileError::none())); - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - metadata(seq({ - character({ 'x' }), - character({ 'y' }), - character({ 'z' }), - }), string_token_params), - i_sym(11), + AssertThat( + expand_token(Rule::seq({ + String{"a"}, + String{"bcd"}, + String{"e"} + })).rule, + Equals(Rule::seq({ + CharacterSet{{ 'a' }}, + Rule::seq({ + CharacterSet{{ 'b' }}, + CharacterSet{{ 'c' }}, + CharacterSet{{ 'd' }}, }), - false - } - })); + CharacterSet{{ 'e' }}, + }))); }); it("handles strings containing non-ASCII UTF8 characters", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - str("\u03B1 \u03B2"), - false - }, - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - metadata(seq({ - character({ 945 }), - character({ ' ' }), - character({ 946 }), - }), string_token_params), - false - } - })); + AssertThat( + expand_token(String{"\u03B1 \u03B2"}).rule, + Equals(Rule::seq({ + CharacterSet{{ 945 }}, + CharacterSet{{ ' ' }}, + CharacterSet{{ 946 }}, + })) + ); }); }); describe("regexp rules", [&]() { it("replaces regexps with the equivalent rule tree", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - pattern("x*"), - i_sym(11), - }), - false - } - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.second, Equals(CompileError::none())); - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - i_sym(10), - repeat(character({ 'x' })), - i_sym(11), - }), - false - } - })); + AssertThat( + expand_token(Rule::seq({ + String{"a"}, + Pattern{"x+"}, + String{"b"}, + })).rule, + Equals(Rule::seq({ + CharacterSet{{'a'}}, + Repeat{CharacterSet{{ 'x' }}}, + CharacterSet{{'b'}}, + })) + ); }); it("handles regexps containing non-ASCII UTF8 characters", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - pattern("[^\u03B1-\u03B4]*"), - false - } - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.first.variables, Equals(vector{ - LexicalVariable{ - "rule_A", - VariableTypeNamed, - repeat(character({ 945, 946, 947, 948 }, false)), - false - } - })); + AssertThat( + expand_token(Pattern{"[^\u03B1-\u03B4]+"}).rule, + Equals(Rule(Repeat{ + CharacterSet().include_all().exclude(945, 948) + })) + ); }); it("returns an error when the grammar contains an invalid regex", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "rule_A", - VariableTypeNamed, - seq({ - pattern("("), - str("xyz"), - pattern("["), - }), - false - }, - }, - {} - }; - - auto result = expand_tokens(grammar); - - AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren"))); + AssertThat( + expand_token(Rule::seq({ + Pattern{"("}, + String{"xyz"}, + Pattern{"["}, + })).error, + Equals(CompileError( + TSCompileErrorTypeInvalidRegex, + "unmatched open paren" + )) + ); }); }); }); diff --git a/test/compiler/prepare_grammar/extract_choices_test.cc b/test/compiler/prepare_grammar/extract_choices_test.cc index f6891865..d1c859e7 100644 --- a/test/compiler/prepare_grammar/extract_choices_test.cc +++ b/test/compiler/prepare_grammar/extract_choices_test.cc @@ -1,104 +1,75 @@ #include "test_helper.h" +#include "helpers/stream_methods.h" #include "compiler/prepare_grammar/extract_choices.h" -#include "helpers/rule_helpers.h" START_TEST using namespace rules; using prepare_grammar::extract_choices; -class rule_vector : public vector { - public: - bool operator==(const vector &other) const { - if (this->size() != other.size()) return false; - for (size_t i = 0; i < this->size(); i++) { - auto rule = this->operator[](i); - auto other_rule = other[i]; - if (!rule->operator==(*rule)) - return false; - } - return true; - } - - rule_vector(const initializer_list &list) : - vector(list) {} -}; - describe("extract_choices", []() { it("expands rules containing choices into multiple rules", [&]() { - auto rule = seq({ - sym("a"), - choice({ sym("b"), sym("c"), sym("d") }), - sym("e") + auto rule = Rule::seq({ + Symbol::terminal(1), + Rule::choice({ + Symbol::terminal(2), + Symbol::terminal(3), + Symbol::terminal(4) + }), + Symbol::terminal(5) }); - AssertThat(extract_choices(rule), Equals(rule_vector({ - seq({ sym("a"), sym("b"), sym("e") }), - seq({ sym("a"), sym("c"), sym("e") }), - seq({ sym("a"), sym("d"), sym("e") }), + auto result = extract_choices(rule); + + AssertThat(result, Equals(vector({ + Rule::seq({Symbol::terminal(1), Symbol::terminal(2), Symbol::terminal(5)}), + Rule::seq({Symbol::terminal(1), Symbol::terminal(3), Symbol::terminal(5)}), + Rule::seq({Symbol::terminal(1), Symbol::terminal(4), Symbol::terminal(5)}), }))); }); it("handles metadata rules", [&]() { - auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") })); + auto rule = Metadata::prec(5, Rule::choice({ + Symbol::terminal(2), + Symbol::terminal(3), + Symbol::terminal(4) + })); - AssertThat(extract_choices(rule), Equals(rule_vector({ - prec(5, sym("b")), - prec(5, sym("c")), - prec(5, sym("d")), + AssertThat(extract_choices(rule), Equals(vector({ + Metadata::prec(5, Symbol::terminal(2)), + Metadata::prec(5, Symbol::terminal(3)), + Metadata::prec(5, Symbol::terminal(4)), }))); }); it("handles nested choices", [&]() { - auto rule = choice({ - seq({ choice({ sym("a"), sym("b") }), sym("c") }), - sym("d") + auto rule = Rule::choice({ + Rule::seq({ + Rule::choice({ + Symbol::terminal(1), + Symbol::terminal(2) + }), + Symbol::terminal(3) + }), + Symbol::terminal(4) }); - AssertThat(extract_choices(rule), Equals(rule_vector({ - seq({ sym("a"), sym("c") }), - seq({ sym("b"), sym("c") }), - sym("d"), + AssertThat(extract_choices(rule), Equals(vector({ + Rule::seq({Symbol::terminal(1), Symbol::terminal(3)}), + Rule::seq({Symbol::terminal(2), Symbol::terminal(3)}), + Symbol::terminal(4), + }))); + }); + + it("handles single symbols", [&]() { + AssertThat(extract_choices(Symbol::terminal(2)), Equals(vector({ + Symbol::terminal(2) }))); }); it("handles blank rules", [&]() { - AssertThat(extract_choices(blank()), Equals(rule_vector({ - blank(), - }))); - }); - - it("does not move choices outside of repeats", [&]() { - auto rule = seq({ - choice({ sym("a"), sym("b") }), - repeat1(seq({ - sym("c"), - choice({ - sym("d"), - sym("e"), - }), - sym("f"), - })), - sym("g"), - }); - - AssertThat(extract_choices(rule), Equals(rule_vector({ - seq({ - sym("a"), - repeat1(choice({ - seq({ sym("c"), sym("d"), sym("f") }), - seq({ sym("c"), sym("e"), sym("f") }), - })), - sym("g"), - }), - seq({ - sym("b"), - repeat1(choice({ - seq({ sym("c"), sym("d"), sym("f") }), - seq({ sym("c"), sym("e"), sym("f") }), - })), - sym("g"), - }), + AssertThat(extract_choices(Blank{}), Equals(vector({ + Blank{}, }))); }); }); diff --git a/test/compiler/prepare_grammar/extract_tokens_test.cc b/test/compiler/prepare_grammar/extract_tokens_test.cc index 76ed50a0..dd9156a2 100644 --- a/test/compiler/prepare_grammar/extract_tokens_test.cc +++ b/test/compiler/prepare_grammar/extract_tokens_test.cc @@ -3,8 +3,6 @@ #include "compiler/prepare_grammar/interned_grammar.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/prepare_grammar/extract_tokens.h" -#include "helpers/rule_helpers.h" -#include "helpers/equals_pointer.h" #include "helpers/stream_methods.h" START_TEST @@ -13,23 +11,44 @@ using namespace rules; using prepare_grammar::extract_tokens; using prepare_grammar::InternedGrammar; using prepare_grammar::InitialSyntaxGrammar; +using InternedVariable = InternedGrammar::Variable; +using InitialSyntaxVariable = InitialSyntaxGrammar::Variable; describe("extract_tokens", []() { it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, repeat1(seq({ - str("ab"), - pattern("cd*"), - choice({ - i_sym(1), - i_sym(2), - token(repeat1(choice({ str("ef"), str("gh") }))), - }), - }))}, - Variable{"rule_B", VariableTypeNamed, pattern("ij+")}, - Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })}, - Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + Repeat{Rule::seq({ + String{"ab"}, + Pattern{"cd+"}, + Rule::choice({ + Symbol::non_terminal(1), + Symbol::non_terminal(2), + Metadata::token(Repeat{Rule::choice({ + String{"ef"}, + String{"g"} + })}), + }), + })} + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + Pattern{"h+"} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + Rule::choice({ String{"i"}, Blank{} }) + }, + InternedVariable{ + "rule_D", + VariableTypeNamed, + Repeat{Symbol::non_terminal(3)} + }, }, {}, {}, @@ -42,62 +61,104 @@ describe("extract_tokens", []() { AssertThat(error, Equals(CompileError::none())); - AssertThat(syntax_grammar.variables, Equals(vector{ - Variable{"rule_A", VariableTypeNamed, repeat1(seq({ + AssertThat(syntax_grammar.variables, Equals(vector{ + InitialSyntaxVariable{ + "rule_A", + VariableTypeNamed, + Repeat{Rule::seq({ - // This string is now the first token in the lexical grammar. - i_token(0), + // This string is now the first token in the lexical grammar. + Symbol::terminal(0), - // This pattern is now the second rule in the lexical grammar. - i_token(1), + // This pattern is now the second rule in the lexical grammar. + Symbol::terminal(1), - choice({ - // Rule 1, which this symbol pointed to, has been moved to the - // lexical grammar. - i_token(3), + Rule::choice({ + // Rule 1, which this symbol pointed to, has been moved to the + // lexical grammar. + Symbol::terminal(3), - // This symbol's index has been decremented, because a previous rule - // was moved to the lexical grammar. - i_sym(1), + // This symbol's index has been decremented, because a previous rule + // was moved to the lexical grammar. + Symbol::non_terminal(1), - // This token rule is now the third rule in the lexical grammar. - i_token(2), - }), - }))}, + // This token rule is now the third rule in the lexical grammar. + Symbol::terminal(2), + }), + })} + }, - Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })}, - Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))}, + InitialSyntaxVariable{ + "rule_C", + VariableTypeNamed, + Rule::choice({Symbol::terminal(4), Blank{}}) + }, + + InitialSyntaxVariable{ + "rule_D", + VariableTypeNamed, + Repeat{Symbol::non_terminal(2)} + }, })); AssertThat(lexical_grammar.variables, Equals(vector({ // Strings become anonymous rules. - LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true}, + LexicalVariable{ + "ab", + VariableTypeAnonymous, + Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, + true + }, // Patterns become hidden rules. - LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false}, + LexicalVariable{ + "/cd+/", + VariableTypeAuxiliary, + Seq{CharacterSet{{'c'}}, Repeat{CharacterSet{{'d'}}}}, + false + }, // Rules marked as tokens become hidden rules. - LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({ - str("ef"), - str("gh") - })), false}, + LexicalVariable{ + "/(ef|g)+/", + VariableTypeAuxiliary, + Repeat{Rule::choice({ + Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}}, + CharacterSet{{'g'}}, + })}, + false + }, // This named rule was moved wholesale to the lexical grammar. - LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false}, + LexicalVariable{ + "rule_B", + VariableTypeNamed, + Repeat{CharacterSet{{'h'}}}, + false + }, // Strings become anonymous rules. - LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true}, + LexicalVariable{ + "i", + VariableTypeAnonymous, + CharacterSet{{'i'}}, + true + }, }))); }); it("does not create duplicate tokens in the lexical grammar", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, seq({ - str("ab"), - i_sym(0), - str("ab"), - })}, + { + "rule_A", + VariableTypeNamed, + Rule::seq({ + String{"ab"}, + Symbol::non_terminal(1), + String{"ab"}, + }) + }, }, {}, {}, @@ -107,50 +168,114 @@ describe("extract_tokens", []() { InitialSyntaxGrammar &syntax_grammar = get<0>(result); LexicalGrammar &lexical_grammar = get<1>(result); - AssertThat(syntax_grammar.variables, Equals(vector { - Variable {"rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })}, + AssertThat(syntax_grammar.variables, Equals(vector { + InitialSyntaxVariable{ + "rule_A", + VariableTypeNamed, + Rule::seq({ + Symbol::terminal(0), + Symbol::non_terminal(1), + Symbol::terminal(0) + }) + }, })); AssertThat(lexical_grammar.variables, Equals(vector { - LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true}, + LexicalVariable{ + "ab", + VariableTypeAnonymous, + Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, + true + }, })) }); it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() { auto result = extract_tokens(InternedGrammar{{ - Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })}, - Variable{"rule_B", VariableTypeNamed, str("cd")}, - Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + Rule::seq({ Symbol::non_terminal(1), String{"ab"} }) + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + String{"cd"} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + Rule::seq({ String{"ef"}, String{"cd"} }) + }, }, {}, {}, {}}); InitialSyntaxGrammar &syntax_grammar = get<0>(result); LexicalGrammar &lexical_grammar = get<1>(result); - AssertThat(syntax_grammar.variables, Equals(vector({ - Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })}, - Variable{"rule_B", VariableTypeNamed, i_token(1)}, - Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })}, + AssertThat(syntax_grammar.variables, Equals(vector({ + InitialSyntaxVariable{ + "rule_A", + VariableTypeNamed, + Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) }) + }, + InitialSyntaxVariable{ + "rule_B", + VariableTypeNamed, + Symbol::terminal(1) + }, + InitialSyntaxVariable{ + "rule_C", + VariableTypeNamed, + Rule::seq({ Symbol::terminal(2), Symbol::terminal(1) }) + }, }))); AssertThat(lexical_grammar.variables, Equals(vector { - LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true}, - LexicalVariable {"cd", VariableTypeAnonymous, str("cd"), true}, - LexicalVariable {"ef", VariableTypeAnonymous, str("ef"), true}, + LexicalVariable{ + "ab", + VariableTypeAnonymous, + Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, + true + }, + LexicalVariable{ + "cd", + VariableTypeAnonymous, + Seq{CharacterSet{{'c'}}, CharacterSet{{'d'}}}, + true + }, + LexicalVariable{ + "ef", + VariableTypeAnonymous, + Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}}, + true + }, })); }); it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, str("ok")}, - Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))}, - Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + String{"ok"} + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + Repeat{Symbol::non_terminal(0)} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}} + }, }, { - str(" ") + String{" "} }, { - { Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) } + { Symbol::non_terminal(1), Symbol::non_terminal(2) } }, {} }); @@ -159,7 +284,7 @@ describe("extract_tokens", []() { AssertThat(syntax_grammar.variables.size(), Equals(2)); AssertThat(syntax_grammar.expected_conflicts, Equals(set>({ - { Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) }, + { Symbol::non_terminal(0), Symbol::non_terminal(1) }, }))); }); @@ -167,11 +292,11 @@ describe("extract_tokens", []() { it("adds inline extra tokens to the lexical grammar's separators", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, str("x")}, + InternedVariable{"rule_A", VariableTypeNamed, String{"x"}}, }, { - str("y"), - pattern("\\s+"), + String{"y"}, + Pattern{" "}, }, {}, {} @@ -180,8 +305,8 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), Equals(CompileError::none())); AssertThat(get<1>(result).separators.size(), Equals(2)); - AssertThat(get<1>(result).separators[0], EqualsPointer(str("y"))); - AssertThat(get<1>(result).separators[1], EqualsPointer(pattern("\\s+"))); + AssertThat(get<1>(result).separators[0], Equals(Rule(CharacterSet{{'y'}}))); + AssertThat(get<1>(result).separators[1], Equals(Rule(CharacterSet{{' '}}))); AssertThat(get<0>(result).extra_tokens, IsEmpty()); }); @@ -189,11 +314,11 @@ describe("extract_tokens", []() { it("handles inline extra tokens that match tokens in the grammar", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, str("x")}, - Variable{"rule_B", VariableTypeNamed, str("y")}, + InternedVariable{"rule_A", VariableTypeNamed, String{"x"}}, + InternedVariable{"rule_B", VariableTypeNamed, String{"y"}}, }, { - str("y"), + String{"y"}, }, {}, {} @@ -201,18 +326,30 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), Equals(CompileError::none())); AssertThat(get<1>(result).separators.size(), Equals(0)); - AssertThat(get<0>(result).extra_tokens, Equals(set({ Symbol(1, Symbol::Terminal) }))); + AssertThat(get<0>(result).extra_tokens, Equals(set({ Symbol::terminal(1) }))); }); it("updates extra symbols according to the new symbol numbers", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })}, - Variable{"rule_B", VariableTypeNamed, str("y")}, - Variable{"rule_C", VariableTypeNamed, str("z")}, + InternedVariable{ + "rule_A", + VariableTypeNamed, + Rule::seq({ String{"w"}, String{"x"}, Symbol::non_terminal(1) }) + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + String{"y"} + }, + InternedVariable{ + "rule_C", + VariableTypeNamed, + String{"z"} + }, }, { - i_sym(2), + Symbol::non_terminal(2), }, {}, {} @@ -221,34 +358,55 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), Equals(CompileError::none())); AssertThat(get<0>(result).extra_tokens, Equals(set({ - { Symbol(3, Symbol::Terminal) }, + { Symbol::terminal(3) }, }))); AssertThat(get<1>(result).separators, IsEmpty()); }); it("returns an error if any extra tokens are non-token symbols", [&]() { - auto result = extract_tokens(InternedGrammar{{ - Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })}, - Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })}, - }, { i_sym(1) }, {}, {}}); + auto result = extract_tokens(InternedGrammar{ + { + InternedVariable{ + "rule_A", + VariableTypeNamed, + Rule::seq({ String{"x"}, Symbol::non_terminal(1) }) + }, + InternedVariable{ + "rule_B", + VariableTypeNamed, + Rule::seq({ String{"y"}, String{"z"} }) + }, + }, + { + Symbol::non_terminal(1) + }, + {}, + {} + }); - AssertThat(get<2>(result), !Equals(CompileError::none())); - AssertThat(get<2>(result), Equals( - CompileError(TSCompileErrorTypeInvalidExtraToken, - "Not a token: rule_B"))); + AssertThat(get<2>(result), Equals(CompileError( + TSCompileErrorTypeInvalidExtraToken, + "Non-token symbol rule_B can't be used as an extra token" + ))); }); it("returns an error if any extra tokens are non-token rules", [&]() { - auto result = extract_tokens(InternedGrammar{{ - Variable{"rule_A", VariableTypeNamed, str("x")}, - Variable{"rule_B", VariableTypeNamed, str("y")}, - }, { choice({ i_sym(1), blank() }) }, {}, {}}); + auto result = extract_tokens(InternedGrammar{ + { + {"rule_A", VariableTypeNamed, String{"x"}}, + {"rule_B", VariableTypeNamed, String{"y"}}, + }, + { + Rule::choice({ Symbol::non_terminal(1), Blank{} }) + }, + {}, + {} + }); - AssertThat(get<2>(result), !Equals(CompileError::none())); AssertThat(get<2>(result), Equals(CompileError( TSCompileErrorTypeInvalidExtraToken, - "Not a token: (choice (non-terminal 1) (blank))" + "Non-token rule expression can't be used as an extra token" ))); }); }); @@ -256,13 +414,21 @@ describe("extract_tokens", []() { it("returns an error if an external token has the same name as a non-terminal rule", [&]() { auto result = extract_tokens(InternedGrammar{ { - Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })}, - Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })}, + { + "rule_A", + VariableTypeNamed, + Rule::seq({ String{"x"}, Symbol::non_terminal(1) }) + }, + { + "rule_B", + VariableTypeNamed, + Rule::seq({ String{"y"}, String{"z"} }) + }, }, {}, {}, { - ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)} + ExternalToken {"rule_A", VariableTypeNamed, Symbol::non_terminal(0)} } }); diff --git a/test/compiler/prepare_grammar/flatten_grammar_test.cc b/test/compiler/prepare_grammar/flatten_grammar_test.cc index f935490c..50d48fb4 100644 --- a/test/compiler/prepare_grammar/flatten_grammar_test.cc +++ b/test/compiler/prepare_grammar/flatten_grammar_test.cc @@ -2,7 +2,6 @@ #include "compiler/prepare_grammar/flatten_grammar.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" #include "compiler/syntax_grammar.h" -#include "helpers/rule_helpers.h" #include "helpers/stream_methods.h" START_TEST @@ -12,23 +11,23 @@ using prepare_grammar::flatten_rule; describe("flatten_grammar", []() { it("associates each symbol with the precedence and associativity binding it to its successor", [&]() { - SyntaxVariable result = flatten_rule(Variable{ + SyntaxVariable result = flatten_rule({ "test", VariableTypeNamed, - seq({ - i_sym(1), - prec_left(101, seq({ - i_sym(2), - choice({ - prec_right(102, seq({ - i_sym(3), - i_sym(4) + Rule::seq({ + Symbol::non_terminal(1), + Metadata::prec_left(101, Rule::seq({ + Symbol::non_terminal(2), + Rule::choice({ + Metadata::prec_right(102, Rule::seq({ + Symbol::non_terminal(3), + Symbol::non_terminal(4) })), - i_sym(5), + Symbol::non_terminal(5), }), - i_sym(6), + Symbol::non_terminal(6), })), - i_sym(7), + Symbol::non_terminal(7), }) }); @@ -36,51 +35,51 @@ describe("flatten_grammar", []() { AssertThat(result.type, Equals(VariableTypeNamed)); AssertThat(result.productions, Equals(vector({ Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(3, Symbol::NonTerminal), 102, AssociativityRight}, - {Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(6, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(7, Symbol::NonTerminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::non_terminal(2), 101, AssociativityLeft}, + {Symbol::non_terminal(3), 102, AssociativityRight}, + {Symbol::non_terminal(4), 101, AssociativityLeft}, + {Symbol::non_terminal(6), 0, AssociativityNone}, + {Symbol::non_terminal(7), 0, AssociativityNone}, }), Production({ - {Symbol(1, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(6, Symbol::NonTerminal), 0, AssociativityNone}, - {Symbol(7, Symbol::NonTerminal), 0, AssociativityNone}, + {Symbol::non_terminal(1), 0, AssociativityNone}, + {Symbol::non_terminal(2), 101, AssociativityLeft}, + {Symbol::non_terminal(5), 101, AssociativityLeft}, + {Symbol::non_terminal(6), 0, AssociativityNone}, + {Symbol::non_terminal(7), 0, AssociativityNone}, }) }))) }); it("uses the last assigned precedence", [&]() { - SyntaxVariable result = flatten_rule(Variable{ + SyntaxVariable result = flatten_rule({ "test1", VariableTypeNamed, - prec_left(101, seq({ - i_sym(1), - i_sym(2), + Metadata::prec_left(101, Rule::seq({ + Symbol::non_terminal(1), + Symbol::non_terminal(2), })) }); AssertThat(result.productions, Equals(vector({ Production({ - {Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft}, - {Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft}, + {Symbol::non_terminal(1), 101, AssociativityLeft}, + {Symbol::non_terminal(2), 101, AssociativityLeft}, }) }))) - result = flatten_rule(Variable{ + result = flatten_rule({ "test2", VariableTypeNamed, - prec_left(101, seq({ - i_sym(1), + Metadata::prec_left(101, Rule::seq({ + Symbol::non_terminal(1), })) }); AssertThat(result.productions, Equals(vector({ Production({ - {Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft}, + {Symbol::non_terminal(1), 101, AssociativityLeft}, }) }))) }); diff --git a/test/compiler/prepare_grammar/intern_symbols_test.cc b/test/compiler/prepare_grammar/intern_symbols_test.cc index 1950d638..dc488951 100644 --- a/test/compiler/prepare_grammar/intern_symbols_test.cc +++ b/test/compiler/prepare_grammar/intern_symbols_test.cc @@ -1,11 +1,7 @@ #include "test_helper.h" #include "compiler/prepare_grammar/intern_symbols.h" #include "compiler/grammar.h" -#include "compiler/rules/named_symbol.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/built_in_symbols.h" -#include "helpers/equals_pointer.h" -#include "helpers/rule_helpers.h" +#include "compiler/rule.h" #include "helpers/stream_methods.h" START_TEST @@ -15,29 +11,29 @@ using prepare_grammar::intern_symbols; describe("intern_symbols", []() { it("replaces named symbols with numerically-indexed symbols", [&]() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", choice({ sym("y"), sym("_z") })}, - {"y", sym("_z")}, - {"_z", str("stuff")} + {"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"_z"} })}, + {"y", VariableTypeNamed, NamedSymbol{"_z"}}, + {"_z", VariableTypeNamed, String{"stuff"}} }, {}, {}, {} }; auto result = intern_symbols(grammar); AssertThat(result.second, Equals(CompileError::none())); - AssertThat(result.first.variables, Equals(vector{ - Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })}, - Variable{"y", VariableTypeNamed, i_sym(2)}, - Variable{"_z", VariableTypeHidden, str("stuff")}, + AssertThat(result.first.variables, Equals(vector{ + {"x", VariableTypeNamed, Rule::choice({ Symbol::non_terminal(1), Symbol::non_terminal(2) })}, + {"y", VariableTypeNamed, Symbol::non_terminal(2)}, + {"_z", VariableTypeHidden, String{"stuff"}}, })); }); describe("when there are symbols that reference undefined rules", [&]() { it("returns an error", []() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", sym("y")}, + {"x", VariableTypeNamed, NamedSymbol{"y"}}, }, {}, {}, {} }; @@ -49,14 +45,14 @@ describe("intern_symbols", []() { }); it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", choice({ sym("y"), sym("z") })}, - {"y", sym("z")}, - {"z", str("stuff")} + {"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, + {"y", VariableTypeNamed, NamedSymbol{"z"}}, + {"z", VariableTypeNamed, String{"stuff"}} }, { - sym("z") + NamedSymbol{"z"} }, {}, {} }; @@ -65,21 +61,29 @@ describe("intern_symbols", []() { AssertThat(result.second, Equals(CompileError::none())); AssertThat(result.first.extra_tokens.size(), Equals(1)); - AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2))); + AssertThat(result.first.extra_tokens, Equals(vector({ Symbol::non_terminal(2) }))); }); it("records any rule names that match external token names", [&]() { - Grammar grammar{ + InputGrammar grammar{ { - {"x", choice({ sym("y"), sym("z") })}, - {"y", sym("z")}, - {"z", str("stuff")}, + {"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })}, + {"y", VariableTypeNamed, NamedSymbol{"z"}}, + {"z", VariableTypeNamed, String{"stuff"}}, }, {}, {}, { - "w", - "z" + ExternalToken{ + "w", + VariableTypeNamed, + NONE() + }, + ExternalToken{ + "z", + VariableTypeNamed, + NONE() + }, } }; @@ -94,7 +98,7 @@ describe("intern_symbols", []() { ExternalToken{ "z", VariableTypeNamed, - Symbol(2, Symbol::NonTerminal) + Symbol::non_terminal(2) }, })) }); diff --git a/test/compiler/prepare_grammar/parse_regex_test.cc b/test/compiler/prepare_grammar/parse_regex_test.cc index 72ca0a40..a252654b 100644 --- a/test/compiler/prepare_grammar/parse_regex_test.cc +++ b/test/compiler/prepare_grammar/parse_regex_test.cc @@ -1,7 +1,5 @@ #include "test_helper.h" #include "compiler/prepare_grammar/parse_regex.h" -#include "helpers/equals_pointer.h" -#include "helpers/rule_helpers.h" START_TEST @@ -12,178 +10,218 @@ describe("parse_regex", []() { struct ValidInputRow { string description; string pattern; - rule_ptr rule; + Rule rule; }; vector valid_inputs = { { "character sets", "[aAeE]", - character({ 'a', 'A', 'e', 'E' }) + CharacterSet{{ 'a', 'A', 'e', 'E' }} }, { "'.' characters as wildcards", ".", - character({ '\n' }, false) + CharacterSet().include_all().exclude('\n') }, { "character classes", "\\w-\\d-\\s-\\W-\\D-\\S", - seq({ - character({ + Rule::seq({ + CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }), - character({ '-' }), - character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }), - character({ '-' }), - character({ ' ', '\t', '\r', '\n' }), - character({ '-' }), - character({ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }, false), - character({ '-' }), - character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false), - character({ '-' }), - character({ ' ', '\t', '\r', '\n' }, false), + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }}, + CharacterSet{{ '-' }}, + CharacterSet{{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }}, + CharacterSet{{ '-' }}, + CharacterSet{{ ' ', '\t', '\r', '\n' }}, + CharacterSet{{ '-' }}, + CharacterSet().include_all() + .exclude('a', 'z') + .exclude('A', 'Z') + .exclude('0', '9') + .exclude('_'), + CharacterSet{{ '-' }}, + CharacterSet().include_all().exclude('0', '9'), + CharacterSet{{ '-' }}, + CharacterSet().include_all() + .exclude(' ') + .exclude('\t') + .exclude('\r') + .exclude('\n') }) }, { "choices", "ab|cd|ef", - choice({ - seq({ - character({ 'a' }), - character({ 'b' }) }), - seq({ - character({ 'c' }), - character({ 'd' }) }), - seq({ - character({ 'e' }), - character({ 'f' }) }) }) + Rule::choice({ + Seq{ + CharacterSet{{'a'}}, + CharacterSet{{'b'}} + }, + Seq{ + CharacterSet{{'c'}}, + CharacterSet{{'d'}} + }, + Seq{ + CharacterSet{{'e'}}, + CharacterSet{{'f'}} + } + }) }, { "simple sequences", "abc", - seq({ - character({ 'a' }), - character({ 'b' }), - character({ 'c' }) }) + Rule::seq({ + CharacterSet{{'a'}}, + CharacterSet{{'b'}}, + CharacterSet{{'c'}} + }) }, { "character ranges", "[12a-dA-D3]", - character({ + CharacterSet{{ '1', '2', '3', 'a', 'b', 'c', 'd', - 'A', 'B', 'C', 'D' }) + 'A', 'B', 'C', 'D' + }} }, { "negated characters", "[^a\\d]", - character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false) + CharacterSet().include_all() + .exclude('a') + .exclude('0', '9') }, { "backslashes", "\\\\", - character({ '\\' }) + CharacterSet{{'\\'}} }, { "character groups in sequences", "x([^x]|\\\\x)*x", - seq({ - character({ 'x' }), - repeat(choice({ - character({ 'x' }, false), - seq({ character({ '\\' }), character({ 'x' }) }) })), - character({ 'x' }) }) + Rule::seq({ + CharacterSet{{'x'}}, + Rule::choice({ + Repeat{Rule::choice({ + CharacterSet().include_all().exclude('x'), + Rule::seq({ + CharacterSet{{'\\'}}, + CharacterSet{{'x'}} + }) + })}, + Blank{} + }), + CharacterSet{{'x'}} + }) }, { "choices in sequences", "(a|b)cd", - seq({ - choice({ - character({ 'a' }), - character({ 'b' }) }), - character({ 'c' }), - character({ 'd' }) }) + Rule::seq({ + Rule::choice({ + CharacterSet{{'a'}}, + CharacterSet{{'b'}} }), + CharacterSet{{'c'}}, + CharacterSet{{'d'}} }) }, { "escaped parentheses", "a\\(b", - seq({ - character({ 'a' }), - character({ '(' }), - character({ 'b' }) }) + Rule::seq({ + CharacterSet{{'a'}}, + CharacterSet{{'('}}, + CharacterSet{{'b'}}, + }) }, { "escaped periods", "a\\.", - seq({ - character({ 'a' }), - character({ '.' }) }) + Rule::seq({ + CharacterSet{{'a'}}, + CharacterSet{{'.'}}, + }) }, { "escaped characters", "\\t\\n\\r", - seq({ - character({ '\t' }), - character({ '\n' }), - character({ '\r' }) }) + Rule::seq({ + CharacterSet{{'\t'}}, + CharacterSet{{'\n'}}, + CharacterSet{{'\r'}}, + }) }, { "plus repeats", "(ab)+(cd)+", - seq({ - repeat1(seq({ character({ 'a' }), character({ 'b' }) })), - repeat1(seq({ character({ 'c' }), character({ 'd' }) })) }) + Rule::seq({ + Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, + Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, + }) }, { "asterix repeats", "(ab)*(cd)*", - seq({ - repeat(seq({ character({ 'a' }), character({ 'b' }) })), - repeat(seq({ character({ 'c' }), character({ 'd' }) })) }) + Rule::seq({ + Rule::choice({ + Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })}, + Blank{}, + }), + Rule::choice({ + Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })}, + Blank{}, + }), + }) }, { "optional rules", "a(bc)?", - seq({ - character({ 'a' }), - choice({ - seq({ character({ 'b' }), character({ 'c' }) }), - blank() }) }) + Rule::seq({ + CharacterSet{{'a'}}, + Rule::choice({ + Rule::seq({ + CharacterSet{{'b'}}, + CharacterSet{{'c'}}, + }), + Blank{} + }), + }) }, { "choices containing negated character classes", - "/([^/]|(\\\\/))*/", - seq({ - character({ '/' }), - repeat(choice({ - character({ '/' }, false), - seq({ character({ '\\' }), character({ '/' }) }) })), - character({ '/' }), }), + "/([^/]|(\\\\/))+/", + Rule::seq({ + CharacterSet{{'/'}}, + Repeat{Rule::choice({ + CharacterSet().include_all().exclude('/'), + Rule::seq({ + CharacterSet{{'\\'}}, + CharacterSet{{'/'}}, + }), + })}, + CharacterSet{{'/'}}, + }), }, }; @@ -229,7 +267,7 @@ describe("parse_regex", []() { for (auto &row : valid_inputs) { it(("parses " + row.description).c_str(), [&]() { auto result = parse_regex(row.pattern); - AssertThat(result.first, EqualsPointer(row.rule)); + AssertThat(result.first, Equals(row.rule)); }); } diff --git a/test/compiler/rules/character_set_test.cc b/test/compiler/rules/character_set_test.cc index 4c8f415b..f7c2e632 100644 --- a/test/compiler/rules/character_set_test.cc +++ b/test/compiler/rules/character_set_test.cc @@ -1,5 +1,5 @@ #include "test_helper.h" -#include "compiler/rules/character_set.h" +#include "compiler/rule.h" using namespace rules; @@ -66,7 +66,7 @@ describe("CharacterSet", []() { .include('a', 'd') .include('f', 'm'); - AssertThat(set1.hash_code(), Equals(set2.hash_code())); + AssertThat(hash()(set1), Equals(hash()(set2))); }); it("returns different numbers for character sets that include different ranges", [&]() { @@ -78,8 +78,8 @@ describe("CharacterSet", []() { .include('a', 'c') .include('f', 'm'); - AssertThat(set1.hash_code(), !Equals(set2.hash_code())); - AssertThat(set2.hash_code(), !Equals(set1.hash_code())); + AssertThat(hash()(set1), !Equals(hash()(set2))); + AssertThat(hash()(set2), !Equals(hash()(set1))); }); it("returns different numbers for character sets that exclude different ranges", [&]() { @@ -93,16 +93,16 @@ describe("CharacterSet", []() { .exclude('a', 'c') .exclude('f', 'm'); - AssertThat(set1.hash_code(), !Equals(set2.hash_code())); - AssertThat(set2.hash_code(), !Equals(set1.hash_code())); + AssertThat(hash()(set1), !Equals(hash()(set2))); + AssertThat(hash()(set2), !Equals(hash()(set1))); }); it("returns different numbers for character sets with different sign", [&]() { CharacterSet set1 = CharacterSet().include_all(); CharacterSet set2 = CharacterSet(); - AssertThat(set1.hash_code(), !Equals(set2.hash_code())); - AssertThat(set2.hash_code(), !Equals(set1.hash_code())); + AssertThat(hash()(set1), !Equals(hash()(set2))); + AssertThat(hash()(set2), !Equals(hash()(set1))); }); }); @@ -312,7 +312,7 @@ describe("CharacterSet", []() { .include('z'); AssertThat(set1.included_ranges(), Equals(vector({ - CharacterRange('a', 'c'), + CharacterRange{'a', 'c'}, CharacterRange('g'), CharacterRange('z'), }))); diff --git a/test/compiler/rules/choice_test.cc b/test/compiler/rules/choice_test.cc index 59b52740..fb2bbbaf 100644 --- a/test/compiler/rules/choice_test.cc +++ b/test/compiler/rules/choice_test.cc @@ -1,7 +1,5 @@ #include "test_helper.h" -#include "compiler/rules/choice.h" -#include "helpers/rule_helpers.h" -#include "helpers/equals_pointer.h" +#include "compiler/rule.h" using namespace rules; @@ -10,42 +8,62 @@ START_TEST describe("Choice", []() { describe("constructing choices", [&]() { it("eliminates duplicate members", [&]() { - auto rule = Choice::build({ - seq({ sym("one"), sym("two") }), - sym("three"), - seq({ sym("one"), sym("two") }) + Rule rule = Rule::choice({ + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }), + NamedSymbol{"three"}, + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }) }); - AssertThat(rule, EqualsPointer(choice({ - seq({ sym("one"), sym("two") }), - sym("three"), - }))); + AssertThat(rule, Equals(Rule(Choice{{ + Rule::seq({ NamedSymbol{"one"}, NamedSymbol{"two"} }), + NamedSymbol{"three"}, + }}))); + + rule = Rule::choice({ + Blank{}, + Blank{}, + Rule::choice({ + Blank{}, + NamedSymbol{"four"} + }) + }); + + AssertThat(rule, Equals(Rule::choice({Blank{}, NamedSymbol{"four"}}))); }); it("eliminates duplicates within nested choices", [&]() { - auto rule = Choice::build({ - seq({ sym("one"), sym("two") }), - Choice::build({ - sym("three"), - seq({ sym("one"), sym("two") }) + Rule rule = Rule::choice({ + Rule::seq({ + NamedSymbol{"one"}, + NamedSymbol{"two"} + }), + Rule::choice({ + NamedSymbol{"three"}, + Rule::seq({ + NamedSymbol{"one"}, + NamedSymbol{"two"} + }) }) }); - AssertThat(rule, EqualsPointer(choice({ - seq({ sym("one"), sym("two") }), - sym("three"), - }))); + AssertThat(rule, Equals(Rule(Choice{{ + Rule::seq({ + NamedSymbol{"one"}, + NamedSymbol{"two"}, + }), + NamedSymbol{"three"}, + }}))); }); it("doesn't construct a choice if there's only one unique member", [&]() { - auto rule = Choice::build({ - sym("one"), - Choice::build({ - sym("one"), + Rule rule = Rule::choice({ + NamedSymbol{"one"}, + Rule::choice({ + NamedSymbol{"one"}, }) }); - AssertThat(rule, EqualsPointer(sym("one"))); + AssertThat(rule, Equals(Rule(NamedSymbol{"one"}))); }); }); }); diff --git a/test/compiler/rules/repeat_test.cc b/test/compiler/rules/repeat_test.cc index 693b2d43..05b2f117 100644 --- a/test/compiler/rules/repeat_test.cc +++ b/test/compiler/rules/repeat_test.cc @@ -1,6 +1,5 @@ #include "test_helper.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/symbol.h" +#include "compiler/rule.h" using namespace rules; @@ -9,11 +8,11 @@ START_TEST describe("Repeat", []() { describe("constructing repeats", [&]() { it("doesn't create redundant repeats", [&]() { - auto sym = make_shared(1, Symbol::NonTerminal); - auto repeat = Repeat::build(sym); - auto outer_repeat = Repeat::build(repeat); + Rule symbol = Symbol::non_terminal(1); + Rule repeat = Rule::repeat(Rule(symbol)); + Rule outer_repeat = Rule::repeat(Rule(repeat)); - AssertThat(repeat, !Equals(sym)); + AssertThat(repeat, !Equals(symbol)); AssertThat(outer_repeat, Equals(repeat)); }); }); diff --git a/test/helpers/equals_pointer.h b/test/helpers/equals_pointer.h deleted file mode 100644 index b78be66e..00000000 --- a/test/helpers/equals_pointer.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef HELPERS_EQUALS_POINTER_H_ -#define HELPERS_EQUALS_POINTER_H_ - -#include "bandit/bandit.h" -#include - -namespace snowhouse { - using namespace std; - - template - struct EqualsPointerConstraint : Expression> { - EqualsPointerConstraint(const ExpectedType& expected) : expected(expected) {} - - template - bool operator()(const ActualType& actual) const { - return *expected == *actual; - } - - ExpectedType expected; - }; - - template - struct Stringizer> { - static string ToString(const EqualsPointerConstraint& constraint) { - ostringstream builder; - builder << "pointer to " << snowhouse::Stringize(constraint.expected); - return builder.str(); - } - }; - - template - inline EqualsPointerConstraint EqualsPointer(const ExpectedType& expected) { - return EqualsPointerConstraint(expected); - } -} - -#endif // HELPERS_EQUALS_POINTER_H_ diff --git a/test/helpers/rule_helpers.cc b/test/helpers/rule_helpers.cc deleted file mode 100644 index 968d59ba..00000000 --- a/test/helpers/rule_helpers.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "rule_helpers.h" -#include -#include "compiler/rules/symbol.h" -#include "compiler/variable.h" -#include "compiler/lexical_grammar.h" - -namespace tree_sitter { - using std::make_shared; - using std::set; - using std::map; - using std::ostream; - using std::string; - using std::to_string; - using rules::Symbol; - - rule_ptr character(const set &ranges) { - return character(ranges, true); - } - - rule_ptr character(const set &chars, bool sign) { - rules::CharacterSet result; - if (sign) { - for (uint32_t c : chars) - result.include(c); - } else { - result.include_all(); - for (uint32_t c : chars) - result.exclude(c); - } - return result.copy(); - } - - rule_ptr i_sym(size_t index) { - return make_shared(index, Symbol::NonTerminal); - } - - rule_ptr i_token(size_t index) { - return make_shared(index, Symbol::Terminal); - } - - rule_ptr metadata(rule_ptr rule, rules::MetadataParams params) { - return rules::Metadata::build(rule, params); - } - - rule_ptr active_prec(int precedence, rule_ptr rule) { - rules::MetadataParams params; - params.precedence = precedence; - params.has_precedence = true; - params.is_active = true; - return rules::Metadata::build(rule, params); - } - - bool operator==(const Variable &left, const Variable &right) { - return left.name == right.name && left.rule->operator==(*right.rule) && - left.type == right.type; - } - - bool operator==(const LexicalVariable &left, const LexicalVariable &right) { - return left.name == right.name && left.rule->operator==(*right.rule) && - left.type == right.type && left.is_string == right.is_string; - } -} diff --git a/test/helpers/rule_helpers.h b/test/helpers/rule_helpers.h deleted file mode 100644 index 8ebe87e8..00000000 --- a/test/helpers/rule_helpers.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef HELPERS_RULE_HELPERS_H_ -#define HELPERS_RULE_HELPERS_H_ - -#include "tree_sitter/compiler.h" -#include "compiler/rules.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/metadata.h" -#include "compiler/variable.h" - -namespace tree_sitter { - rule_ptr metadata(rule_ptr, rules::MetadataParams params); - rule_ptr character(const std::set &); - rule_ptr character(const std::set &, bool sign); - rule_ptr i_sym(size_t index); - rule_ptr i_token(size_t index); - rule_ptr active_prec(int precedence, rule_ptr); - - struct Variable; - struct LexicalVariable; - - bool operator==(const Variable &left, const Variable &right); - bool operator==(const LexicalVariable &left, const LexicalVariable &right); -} - -#endif // HELPERS_RULE_HELPERS_H_ diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc index 20338b47..56c85890 100644 --- a/test/helpers/stream_methods.cc +++ b/test/helpers/stream_methods.cc @@ -1,6 +1,7 @@ #include "helpers/stream_methods.h" #include "test_helper.h" #include "tree_sitter/compiler.h" +#include "compiler/util/string_helpers.h" #include "compiler/parse_table.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" @@ -9,109 +10,177 @@ namespace tree_sitter { -ostream &operator<<(ostream &stream, const Grammar &grammar) { - stream << string("#"); +ostream &operator<<(ostream &stream, const InputGrammar &grammar) { + return stream << "(InputGrammar variables: " << grammar.variables << ")"; } ostream &operator<<(ostream &stream, const CompileError &error) { - if (error.type) - return stream << (string("#"); - else - return stream << string("#"); + if (error.type) { + return stream << "(CompileError " << error.message << ")"; + } else { + return stream << "(No CompileError)"; + } +} + +namespace rules { + +ostream &operator<<(ostream &stream, Associativity associativity) { + switch (associativity) { + case AssociativityLeft: + return stream << "AssociativityLeft"; + case AssociativityRight: + return stream << "AssociativityRight"; + case AssociativityNone: + return stream << "AssociativityNone"; + } +} + +ostream &operator<<(ostream &stream, const Blank &) { + return stream << "(Blank)"; +} + +ostream &operator<<(ostream &stream, const CharacterRange &range) { + if (range.min == range.max) { + return stream << util::escape_char(range.min); + } else { + return stream << "(" + util::escape_char(range.min) << "-" << util::escape_char(range.max) << ")"; + } +} + +ostream &operator<<(ostream &stream, const CharacterSet &rule) { + stream << "(CharacterSet"; + if (rule.includes_all) { + if (rule.excluded_chars.empty()) { + stream << " all"; + } else { + stream << " exclude"; + for (const auto &range : rule.excluded_ranges()) { + stream << " " << range; + } + } + } else { + for (const auto &range : rule.included_ranges()) { + stream << " " << range; + } + } + return stream << ")"; +} + +ostream &operator<<(ostream &stream, const Symbol &rule) { + stream << "(Symbol "; + switch (rule.type) { + case Symbol::External: + stream << "external"; + break; + case Symbol::Terminal: + stream << "terminal"; + break; + case Symbol::NonTerminal: + stream << "non-terminal"; + break; + } + return stream << " " << rule.index << ")"; +} + +ostream &operator<<(ostream &stream, const NamedSymbol &rule) { + return stream << "(NamedSymbol " << rule.value << ")"; +} + +ostream &operator<<(ostream &stream, const String &rule) { + return stream << "(String " << rule.value << ")"; +} + +ostream &operator<<(ostream &stream, const Pattern &rule) { + return stream << "(Pattern " << rule.value << ")"; +} + +ostream &operator<<(ostream &stream, const Choice &rule) { + stream << "(Choice"; + for (const auto &element : rule.elements) { + stream << " " << element; + } + return stream << ")"; +} + +ostream &operator<<(ostream &stream, const Seq &rule) { + return stream << "(Seq " << *rule.left << " " << *rule.right << ")"; +} + +ostream &operator<<(ostream &stream, const Repeat &rule) { + return stream << "(Repeat " << *rule.rule << ")"; +} + +ostream &operator<<(ostream &stream, const Metadata &rule) { + return stream << "(Metadata " << *rule.rule << ")"; } ostream &operator<<(ostream &stream, const Rule &rule) { - return stream << rule.to_string(); -} - -ostream &operator<<(ostream &stream, const rule_ptr &rule) { - if (rule.get()) - stream << *rule; - else - stream << string("(null-rule)"); + rule.match( + [&stream](Blank r) { stream << r; }, + [&stream](NamedSymbol r) { stream << r; }, + [&stream](Symbol r) { stream << r; }, + [&stream](String r) { stream << r; }, + [&stream](Pattern r) { stream << r; }, + [&stream](CharacterSet r) { stream << r; }, + [&stream](Choice r) { stream << r; }, + [&stream](Seq r) { stream << r; }, + [&stream](Repeat r) { stream << r; }, + [&stream](Metadata r) { stream << r; } + ); return stream; } -ostream &operator<<(ostream &stream, const Variable &variable) { - return stream << string("{") << variable.name << string(", ") << variable.rule << string(", ") << to_string(variable.type) << string("}"); +} // namespace rules + +ostream &operator<<(ostream &stream, const InputGrammar::Variable &variable) { + return stream << "(Variable " << variable.name << " " << variable.rule << ")"; } ostream &operator<<(ostream &stream, const SyntaxVariable &variable) { - return stream << string("{") << variable.name << string(", ") << variable.productions << string(", ") << to_string(variable.type) << string("}"); + return stream << "(Variable " << variable.name << " " << variable.productions << + " " << to_string(variable.type) << "}"; } ostream &operator<<(ostream &stream, const LexicalVariable &variable) { - return stream << "{" << variable.name << ", " << variable.rule << ", " << - to_string(variable.type) << ", " << to_string(variable.is_string) << "}"; -} - -std::ostream &operator<<(std::ostream &stream, const AdvanceAction &action) { - return stream << string("#"; -} - -std::ostream &operator<<(std::ostream &stream, const AcceptTokenAction &action) { - return stream << string("#"; -} - -ostream &operator<<(ostream &stream, const ParseAction &action) { - switch (action.type) { - case ParseActionTypeError: - return stream << string("#"); - case ParseActionTypeAccept: - return stream << string("#"); - case ParseActionTypeShift: - return stream << string("#"; - case ParseActionTypeReduce: - return stream << ("#"); - default: - return stream; - } -} - -ostream &operator<<(ostream &stream, const ParseTableEntry &entry) { - return stream << entry.actions; -} - -ostream &operator<<(ostream &stream, const ParseState &state) { - stream << string("#"); + return stream << "(Variable " << variable.name << " " << to_string(variable.type) << + " " << variable.rule << ")"; } ostream &operator<<(ostream &stream, const ExternalToken &external_token) { - return stream << "{" << external_token.name << ", " << external_token.type << - "," << external_token.corresponding_internal_token << "}"; + return stream << "(ExternalToken " << external_token.name << " " << + external_token.type << " " << external_token.corresponding_internal_token << ")"; } ostream &operator<<(ostream &stream, const ProductionStep &step) { - stream << "(symbol: " << step.symbol << ", precedence:" << to_string(step.precedence); - stream << ", associativity: "; - switch (step.associativity) { - case rules::AssociativityLeft: - return stream << "left)"; - case rules::AssociativityRight: - return stream << "right)"; - default: - return stream << "none)"; - } + return stream << "(ProductionStep " << step.symbol << " precedence:" << + to_string(step.precedence) << " associativity:" << step.associativity << ")"; } ostream &operator<<(ostream &stream, const PrecedenceRange &range) { - if (range.empty) - return stream << string("{empty}"); - else - return stream << string("{") << to_string(range.min) << string(", ") << to_string(range.max) << string("}"); + if (range.empty) { + return stream << "(PrecedenceRange)"; + } else { + return stream << "(PrecedenceRange " << to_string(range.min) << " " << + to_string(range.max) << ")"; + } } +namespace prepare_grammar { + +ostream &operator<<(ostream &stream, const prepare_grammar::InternedGrammar::Variable &variable) { + return stream << "(Variable " << variable.name << " " << variable.rule << ")"; +} + +ostream &operator<<(ostream &stream, const prepare_grammar::InitialSyntaxGrammar::Variable &variable) { + return stream << "(Variable " << variable.name << " " << variable.rule << ")"; +} + +} // namespace prepare_grammar + namespace build_tables { ostream &operator<<(ostream &stream, const LexItem &item) { - return stream << string("(item ") << item.lhs << string(" ") << *item.rule - << string(")"); + return stream << "(LexItem " << item.lhs << " " << item.rule << ")"; } ostream &operator<<(ostream &stream, const LexItemSet &item_set) { @@ -119,26 +188,7 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) { } ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) { - return stream << "{dest: " << transition.destination << ", prec: " << transition.precedence << "}"; -} - -ostream &operator<<(ostream &stream, const ParseItem &item) { - return stream << string("(item variable:") << to_string(item.variable_index) - << string(" production:") << to_string((size_t)item.production % 1000) - << string(" step:") << to_string(item.step_index) - << string(")"); -} - -std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) { - return stream << item_set.entries; -} - -std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) { - if (set.entries.get()) { - return stream << *set.entries; - } else { - return stream << "{}"; - } + return stream << "(Transition " << transition.destination << " prec:" << transition.precedence << ")"; } } // namespace build_tables diff --git a/test/helpers/stream_methods.h b/test/helpers/stream_methods.h index 149e43c5..58b7fd17 100644 --- a/test/helpers/stream_methods.h +++ b/test/helpers/stream_methods.h @@ -8,6 +8,11 @@ #include #include #include "compiler/grammar.h" +#include "compiler/prepare_grammar/interned_grammar.h" +#include "compiler/prepare_grammar/initial_syntax_grammar.h" +#include "compiler/lexical_grammar.h" +#include "compiler/syntax_grammar.h" +#include "compiler/rule.h" #include "compiler/compile_error.h" #include "compiler/build_tables/lex_item.h" @@ -91,9 +96,8 @@ namespace tree_sitter { using std::ostream; using std::string; using std::to_string; -struct Variable; -struct SyntaxVariable; -struct LexicalVariable; + +struct InputGrammar; struct AdvanceAction; struct AcceptTokenAction; struct ParseAction; @@ -102,20 +106,36 @@ struct ExternalToken; struct ProductionStep; struct PrecedenceRange; -ostream &operator<<(ostream &, const Grammar &); +ostream &operator<<(ostream &, const InputGrammar &); ostream &operator<<(ostream &, const CompileError &); -ostream &operator<<(ostream &, const Rule &); -ostream &operator<<(ostream &, const rule_ptr &); -ostream &operator<<(ostream &, const Variable &); -ostream &operator<<(ostream &, const SyntaxVariable &); -ostream &operator<<(ostream &, const LexicalVariable &); -ostream &operator<<(ostream &, const AdvanceAction &); -ostream &operator<<(ostream &, const AcceptTokenAction &); -ostream &operator<<(ostream &, const ParseAction &); -ostream &operator<<(ostream &, const ParseState &); ostream &operator<<(ostream &, const ExternalToken &); ostream &operator<<(ostream &, const ProductionStep &); ostream &operator<<(ostream &, const PrecedenceRange &); +ostream &operator<<(ostream &, const LexicalVariable &); + +namespace rules { + +ostream &operator<<(ostream &, const Blank &); +ostream &operator<<(ostream &, const CharacterRange &); +ostream &operator<<(ostream &, const CharacterSet &); +ostream &operator<<(ostream &, const Symbol &); +ostream &operator<<(ostream &, const NamedSymbol &); +ostream &operator<<(ostream &, const String &); +ostream &operator<<(ostream &, const Pattern &); +ostream &operator<<(ostream &stream, const Choice &rule); +ostream &operator<<(ostream &stream, const Seq &rule); +ostream &operator<<(ostream &stream, const Repeat &rule); +ostream &operator<<(ostream &stream, const Metadata &rule); +ostream &operator<<(ostream &stream, const Rule &rule); + +} // namespace rules + +namespace prepare_grammar { + +ostream &operator<<(ostream &, const InitialSyntaxGrammar::Variable &); +ostream &operator<<(ostream &, const InternedGrammar::Variable &); + +} // namespace prepare_grammar namespace build_tables { diff --git a/test/integration/test_grammars.cc b/test/integration/test_grammars.cc index f72043d9..37a1a949 100644 --- a/test/integration/test_grammars.cc +++ b/test/integration/test_grammars.cc @@ -25,10 +25,10 @@ for (auto &language_name : test_languages) { } string grammar_json = read_file(grammar_path); - TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str()); if (file_exists(expected_error_path)) { it("fails with the correct error message", [&]() { + TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str()); string expected_error = read_file(expected_error_path); AssertThat((void *)compile_result.error_message, !IsNull()); AssertThat(compile_result.error_message, Equals(expected_error)); @@ -41,6 +41,8 @@ for (auto &language_name : test_languages) { before_each([&]() { if (!language) { + TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str()); + language = load_test_language( language_name, compile_result, diff --git a/tests.gyp b/tests.gyp index 9a792820..770ec9d8 100644 --- a/tests.gyp +++ b/tests.gyp @@ -37,13 +37,13 @@ '-std=c99', ], 'cflags_cc': [ - '-std=c++0x', + '-std=c++14', ], 'ldflags': [ '-g', ], 'xcode_settings': { - 'CLANG_CXX_LANGUAGE_STANDARD': 'c++11', + 'CLANG_CXX_LANGUAGE_STANDARD': 'c++14', 'OTHER_LDFLAGS': ['-g'], 'GCC_OPTIMIZATION_LEVEL': '0', 'ALWAYS_SEARCH_USER_PATHS': 'NO',