Merge pull request #69 from tree-sitter/rules-variant
Implement Rule as a union type rather than an abstract base class
This commit is contained in:
commit
4d39f13eaf
109 changed files with 2795 additions and 3151 deletions
|
|
@ -1,3 +1,4 @@
|
|||
-std=c++14
|
||||
-Isrc
|
||||
-Itest
|
||||
-Iinclude
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ typedef enum {
|
|||
TSCompileErrorTypeLexConflict,
|
||||
TSCompileErrorTypeParseConflict,
|
||||
TSCompileErrorTypeEpsilonRule,
|
||||
TSCompileErrorTypeInvalidTokenContents,
|
||||
} TSCompileErrorType;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
19
project.gyp
19
project.gyp
|
|
@ -33,40 +33,25 @@
|
|||
'src/compiler/prepare_grammar/extract_tokens.cc',
|
||||
'src/compiler/prepare_grammar/flatten_grammar.cc',
|
||||
'src/compiler/prepare_grammar/intern_symbols.cc',
|
||||
'src/compiler/prepare_grammar/is_token.cc',
|
||||
'src/compiler/prepare_grammar/normalize_rules.cc',
|
||||
'src/compiler/prepare_grammar/parse_regex.cc',
|
||||
'src/compiler/prepare_grammar/prepare_grammar.cc',
|
||||
'src/compiler/prepare_grammar/token_description.cc',
|
||||
'src/compiler/rule.cc',
|
||||
'src/compiler/syntax_grammar.cc',
|
||||
'src/compiler/rules/blank.cc',
|
||||
'src/compiler/rules/built_in_symbols.cc',
|
||||
'src/compiler/rules/character_range.cc',
|
||||
'src/compiler/rules/character_set.cc',
|
||||
'src/compiler/rules/choice.cc',
|
||||
'src/compiler/rules/metadata.cc',
|
||||
'src/compiler/rules/named_symbol.cc',
|
||||
'src/compiler/rules/pattern.cc',
|
||||
'src/compiler/rules/repeat.cc',
|
||||
'src/compiler/rules/rules.cc',
|
||||
'src/compiler/rules/seq.cc',
|
||||
'src/compiler/rules/string.cc',
|
||||
'src/compiler/rules/symbol.cc',
|
||||
'src/compiler/rules/visitor.cc',
|
||||
'src/compiler/util/string_helpers.cc',
|
||||
'externals/utf8proc/utf8proc.c',
|
||||
'externals/json-parser/json.c',
|
||||
],
|
||||
'cflags_cc': [
|
||||
'-std=c++0x',
|
||||
],
|
||||
'cflags_cc!': [
|
||||
'-fno-rtti'
|
||||
'-std=c++14',
|
||||
],
|
||||
'xcode_settings': {
|
||||
'CLANG_CXX_LANGUAGE_STANDARD': 'c++11',
|
||||
'GCC_ENABLE_CPP_RTTI': 'YES',
|
||||
'CLANG_CXX_LANGUAGE_STANDARD': 'c++14',
|
||||
'GCC_ENABLE_CPP_EXCEPTIONS': 'NO',
|
||||
},
|
||||
'direct_dependent_settings': {
|
||||
|
|
|
|||
|
|
@ -10,8 +10,7 @@
|
|||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -25,7 +24,6 @@ using std::map;
|
|||
using std::string;
|
||||
using std::to_string;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Associativity;
|
||||
using rules::Symbol;
|
||||
using rules::END_OF_INPUT;
|
||||
|
|
@ -53,8 +51,8 @@ class ParseTableBuilder {
|
|||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
Symbol start_symbol = grammar.variables.empty() ?
|
||||
Symbol(0, Symbol::Terminal) :
|
||||
Symbol(0, Symbol::NonTerminal);
|
||||
Symbol::terminal(0) :
|
||||
Symbol::non_terminal(0);
|
||||
|
||||
Production start_production{
|
||||
ProductionStep{start_symbol, 0, rules::AssociativityNone},
|
||||
|
|
@ -121,7 +119,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
if (!has_non_reciprocal_conflict) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal));
|
||||
add_out_of_context_parse_state(&error_state, Symbol::terminal(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -132,11 +130,11 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External));
|
||||
add_out_of_context_parse_state(&error_state, Symbol::external(i));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal));
|
||||
add_out_of_context_parse_state(&error_state, Symbol::non_terminal(i));
|
||||
}
|
||||
|
||||
error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
|
||||
|
|
@ -253,7 +251,7 @@ class ParseTableBuilder {
|
|||
ParseStateId next_state = add_parse_state(next_item_set);
|
||||
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
|
||||
if (!allow_any_conflict)
|
||||
recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set);
|
||||
recovery_states[Symbol::non_terminal(lookahead)].add(next_item_set);
|
||||
}
|
||||
|
||||
for (Symbol lookahead : lookaheads_with_conflicts) {
|
||||
|
|
@ -428,7 +426,7 @@ class ParseTableBuilder {
|
|||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
|
||||
Symbol incompatible_symbol = Symbol::terminal(incompatible_index);
|
||||
if (other.terminal_entries.count(incompatible_symbol)) return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -452,7 +450,7 @@ class ParseTableBuilder {
|
|||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
|
||||
Symbol incompatible_symbol = Symbol::terminal(incompatible_index);
|
||||
if (state.terminal_entries.count(incompatible_symbol)) return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include <map>
|
||||
#include <set>
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
|
|||
|
|
@ -2,12 +2,7 @@
|
|||
#include <unordered_set>
|
||||
#include "compiler/build_tables/lex_item_transitions.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -19,51 +14,63 @@ using std::unordered_set;
|
|||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rule_ptr rule)
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rules::Rule &rule)
|
||||
: lhs(lhs), rule(rule) {}
|
||||
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
return (other.lhs == lhs) && other.rule->operator==(*rule);
|
||||
return lhs == other.lhs && rule == other.rule;
|
||||
}
|
||||
|
||||
LexItem::CompletionStatus LexItem::completion_status() const {
|
||||
class GetCompletionStatus : public rules::RuleFn<CompletionStatus> {
|
||||
protected:
|
||||
CompletionStatus apply_to(const rules::Choice *rule) {
|
||||
for (const auto &element : rule->elements) {
|
||||
CompletionStatus status = apply(element);
|
||||
using CompletionStatus = LexItem::CompletionStatus;
|
||||
|
||||
static CompletionStatus get_completion_status(const rules::Rule &rule) {
|
||||
return rule.match(
|
||||
[](rules::Choice choice) {
|
||||
for (const auto &element : choice.elements) {
|
||||
auto status = get_completion_status(element);
|
||||
if (status.is_done) return status;
|
||||
}
|
||||
return { false, PrecedenceRange() };
|
||||
}
|
||||
return CompletionStatus{false, PrecedenceRange()};
|
||||
},
|
||||
|
||||
CompletionStatus apply_to(const rules::Metadata *rule) {
|
||||
CompletionStatus result = apply(rule->rule);
|
||||
if (result.is_done && result.precedence.empty && rule->params.has_precedence) {
|
||||
result.precedence.add(rule->params.precedence);
|
||||
[](rules::Metadata metadata) {
|
||||
CompletionStatus result = get_completion_status(*metadata.rule);
|
||||
if (result.is_done && result.precedence.empty && metadata.params.has_precedence) {
|
||||
result.precedence.add(metadata.params.precedence);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
},
|
||||
|
||||
CompletionStatus apply_to(const rules::Repeat *rule) {
|
||||
return apply(rule->content);
|
||||
}
|
||||
[](rules::Repeat repeat) {
|
||||
return get_completion_status(*repeat.rule);
|
||||
},
|
||||
|
||||
CompletionStatus apply_to(const rules::Blank *rule) {
|
||||
return { true, PrecedenceRange() };
|
||||
}
|
||||
|
||||
CompletionStatus apply_to(const rules::Seq *rule) {
|
||||
CompletionStatus left_status = apply(rule->left);
|
||||
[](rules::Seq sequence) {
|
||||
CompletionStatus left_status = get_completion_status(*sequence.left);
|
||||
if (left_status.is_done) {
|
||||
return apply(rule->right);
|
||||
return get_completion_status(*sequence.right);
|
||||
} else {
|
||||
return { false, PrecedenceRange() };
|
||||
return CompletionStatus{false, PrecedenceRange()};
|
||||
}
|
||||
}
|
||||
};
|
||||
},
|
||||
|
||||
return GetCompletionStatus().apply(rule);
|
||||
[](rules::Blank blank) {
|
||||
return CompletionStatus{true, PrecedenceRange()};
|
||||
},
|
||||
|
||||
[](rules::CharacterSet) {
|
||||
return CompletionStatus{false, PrecedenceRange()};
|
||||
},
|
||||
|
||||
[](auto) {
|
||||
return CompletionStatus{false, PrecedenceRange()};
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
LexItem::CompletionStatus LexItem::completion_status() const {
|
||||
return get_completion_status(rule);
|
||||
}
|
||||
|
||||
LexItemSet::LexItemSet() {}
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@
|
|||
#include <map>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/precedence_range.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -14,7 +13,7 @@ namespace build_tables {
|
|||
|
||||
class LexItem {
|
||||
public:
|
||||
LexItem(const rules::Symbol &, rule_ptr);
|
||||
LexItem(const rules::Symbol &, const rules::Rule &);
|
||||
|
||||
struct CompletionStatus {
|
||||
bool is_done;
|
||||
|
|
@ -25,7 +24,7 @@ class LexItem {
|
|||
CompletionStatus completion_status() const;
|
||||
|
||||
rules::Symbol lhs;
|
||||
rule_ptr rule;
|
||||
rules::Rule rule;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -4,47 +4,34 @@
|
|||
#include <functional>
|
||||
#include <utility>
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::function;
|
||||
using std::make_shared;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Seq;
|
||||
using rules::Repeat;
|
||||
using rules::Metadata;
|
||||
typedef LexItemSet::Transition Transition;
|
||||
typedef LexItemSet::TransitionMap TransitionMap;
|
||||
using rules::Rule;
|
||||
using Transition = LexItemSet::Transition;
|
||||
using TransitionMap = LexItemSet::TransitionMap;
|
||||
|
||||
class TransitionBuilder : public rules::RuleFn<void> {
|
||||
class TransitionBuilder {
|
||||
TransitionMap *transitions;
|
||||
const Symbol &item_lhs;
|
||||
const rules::Symbol &item_lhs;
|
||||
vector<int> *precedence_stack;
|
||||
bool in_main_token;
|
||||
|
||||
Transition transform_transition(const Transition &transition,
|
||||
function<rule_ptr(rule_ptr)> callback) {
|
||||
inline Transition transform_transition(const Transition &transition,
|
||||
const function<Rule(const Rule &)> &callback) {
|
||||
LexItemSet destination;
|
||||
for (const LexItem &item : transition.destination.entries)
|
||||
for (const LexItem &item : transition.destination.entries) {
|
||||
destination.entries.insert(LexItem(item.lhs, callback(item.rule)));
|
||||
return Transition{ destination, transition.precedence,
|
||||
transition.in_main_token };
|
||||
}
|
||||
return Transition{destination, transition.precedence, transition.in_main_token};
|
||||
}
|
||||
|
||||
void add_transition(TransitionMap *transitions, CharacterSet new_characters,
|
||||
|
|
@ -89,82 +76,102 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
transitions->insert({ new_characters, new_transition });
|
||||
}
|
||||
|
||||
void apply_to(const CharacterSet *character_set) {
|
||||
PrecedenceRange precedence;
|
||||
if (!precedence_stack->empty())
|
||||
precedence.add(precedence_stack->back());
|
||||
|
||||
add_transition(transitions, *character_set,
|
||||
Transition{
|
||||
LexItemSet({ LexItem(item_lhs, Blank::build()) }),
|
||||
precedence, in_main_token,
|
||||
});
|
||||
}
|
||||
|
||||
void apply_to(const Choice *choice) {
|
||||
for (const rule_ptr &element : choice->elements)
|
||||
apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const Seq *sequence) {
|
||||
TransitionMap left_transitions;
|
||||
TransitionBuilder(&left_transitions, this).apply(sequence->left);
|
||||
|
||||
for (const auto &pair : left_transitions) {
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_transition(pair.second, [&sequence](rule_ptr rule) {
|
||||
return Seq::build({ rule, sequence->right });
|
||||
}));
|
||||
}
|
||||
|
||||
if (rule_can_be_blank(sequence->left))
|
||||
apply(sequence->right);
|
||||
}
|
||||
|
||||
void apply_to(const Repeat *repeat) {
|
||||
TransitionMap content_transitions;
|
||||
TransitionBuilder(&content_transitions, this).apply(repeat->content);
|
||||
|
||||
for (const auto &pair : content_transitions) {
|
||||
add_transition(transitions, pair.first, pair.second);
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_transition(pair.second, [&repeat](rule_ptr item_rule) {
|
||||
return Seq::build({ item_rule, repeat->copy() });
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
void apply_to(const Metadata *metadata) {
|
||||
bool has_active_precedence = metadata->params.is_active;
|
||||
if (has_active_precedence)
|
||||
precedence_stack->push_back(metadata->params.precedence);
|
||||
|
||||
if (metadata->params.is_main_token)
|
||||
in_main_token = true;
|
||||
|
||||
rules::MetadataParams params = metadata->params;
|
||||
if (params.has_precedence)
|
||||
params.is_active = true;
|
||||
|
||||
TransitionMap content_transitions;
|
||||
TransitionBuilder(&content_transitions, this).apply(metadata->rule);
|
||||
|
||||
for (const auto &pair : content_transitions) {
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_transition(pair.second, [¶ms](rule_ptr rule) {
|
||||
return Metadata::build(rule, params);
|
||||
}));
|
||||
}
|
||||
|
||||
if (has_active_precedence)
|
||||
precedence_stack->pop_back();
|
||||
}
|
||||
|
||||
public:
|
||||
TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs,
|
||||
void apply(const Rule &rule) {
|
||||
rule.match(
|
||||
[this](const rules::Blank &) {},
|
||||
|
||||
[this](const rules::CharacterSet &character_set) {
|
||||
PrecedenceRange precedence;
|
||||
if (!precedence_stack->empty()) {
|
||||
precedence.add(precedence_stack->back());
|
||||
}
|
||||
|
||||
add_transition(
|
||||
transitions,
|
||||
character_set,
|
||||
Transition{
|
||||
LexItemSet({ LexItem(item_lhs, rules::Blank{}) }),
|
||||
precedence,
|
||||
in_main_token,
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
[this](const rules::Choice &choice) {
|
||||
for (const auto &element : choice.elements) {
|
||||
apply(element);
|
||||
}
|
||||
},
|
||||
|
||||
[this](const rules::Seq &sequence) {
|
||||
TransitionMap left_transitions;
|
||||
TransitionBuilder(&left_transitions, this).apply(*sequence.left);
|
||||
|
||||
for (const auto &pair : left_transitions) {
|
||||
add_transition(
|
||||
transitions,
|
||||
pair.first,
|
||||
transform_transition(pair.second, [&sequence](Rule rule) -> Rule {
|
||||
return Rule::seq({rule, *sequence.right});
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
if (rule_can_be_blank(*sequence.left)) {
|
||||
apply(*sequence.right);
|
||||
}
|
||||
},
|
||||
|
||||
[this](const rules::Repeat &repeat) {
|
||||
TransitionMap content_transitions;
|
||||
TransitionBuilder(&content_transitions, this).apply(*repeat.rule);
|
||||
|
||||
for (const auto &pair : content_transitions) {
|
||||
add_transition(transitions, pair.first, pair.second);
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_transition(pair.second, [&repeat](Rule item_rule) {
|
||||
return Rule::seq({ item_rule, repeat });
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
|
||||
[this](const rules::Metadata &metadata) {
|
||||
bool has_active_precedence = metadata.params.is_active;
|
||||
if (has_active_precedence)
|
||||
precedence_stack->push_back(metadata.params.precedence);
|
||||
|
||||
if (metadata.params.is_main_token)
|
||||
in_main_token = true;
|
||||
|
||||
auto params = metadata.params;
|
||||
if (params.has_precedence)
|
||||
params.is_active = true;
|
||||
|
||||
TransitionMap content_transitions;
|
||||
TransitionBuilder(&content_transitions, this).apply(*metadata.rule);
|
||||
|
||||
for (const auto &pair : content_transitions) {
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_transition(pair.second, [¶ms](Rule rule) {
|
||||
return rules::Metadata{rule, params};
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
if (has_active_precedence) {
|
||||
precedence_stack->pop_back();
|
||||
}
|
||||
},
|
||||
|
||||
[](auto) {}
|
||||
);
|
||||
}
|
||||
|
||||
TransitionBuilder(TransitionMap *transitions, const rules::Symbol &item_lhs,
|
||||
vector<int> *precedence_stack, bool in_main_token)
|
||||
: transitions(transitions),
|
||||
item_lhs(item_lhs),
|
||||
|
|
@ -180,8 +187,7 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
|
||||
void lex_item_transitions(TransitionMap *transitions, const LexItem &item) {
|
||||
vector<int> precedence_stack;
|
||||
TransitionBuilder(transitions, item.lhs, &precedence_stack, false)
|
||||
.apply(item.rule);
|
||||
TransitionBuilder(transitions, item.lhs, &precedence_stack, false).apply(item.rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -1,15 +1,12 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
|
||||
#define COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
|
||||
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
|
||||
const LexItem &);
|
||||
void lex_item_transitions(LexItemSet::TransitionMap *transitions, const LexItem &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,13 +10,7 @@
|
|||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -28,6 +22,7 @@ using std::string;
|
|||
using std::vector;
|
||||
using std::unordered_map;
|
||||
using std::unique_ptr;
|
||||
using rules::Rule;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::CharacterSet;
|
||||
|
|
@ -36,35 +31,45 @@ using rules::Symbol;
|
|||
using rules::Metadata;
|
||||
using rules::Seq;
|
||||
|
||||
class StartingCharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
apply(rule->left);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements) apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
class StartingCharacterAggregator {
|
||||
public:
|
||||
void apply(const Rule &rule) {
|
||||
rule.match(
|
||||
[this](const Seq &sequence) {
|
||||
apply(*sequence.left);
|
||||
},
|
||||
|
||||
[this](const rules::Choice &rule) {
|
||||
for (const auto &element : rule.elements) {
|
||||
apply(element);
|
||||
}
|
||||
},
|
||||
|
||||
[this](const rules::Repeat &rule) {
|
||||
apply(*rule.rule);
|
||||
},
|
||||
|
||||
[this](const rules::Metadata &rule) {
|
||||
apply(*rule.rule);
|
||||
},
|
||||
|
||||
[this](const rules::CharacterSet &rule) {
|
||||
result.add_set(rule);
|
||||
},
|
||||
|
||||
[this](const rules::Blank) {},
|
||||
|
||||
[](auto) {}
|
||||
);
|
||||
}
|
||||
|
||||
CharacterSet result;
|
||||
};
|
||||
|
||||
class LexTableBuilderImpl : public LexTableBuilder {
|
||||
LexTable lex_table;
|
||||
const LexicalGrammar grammar;
|
||||
vector<rule_ptr> separator_rules;
|
||||
vector<Rule> separator_rules;
|
||||
CharacterSet first_separator_characters;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<LexItemSet, LexStateId> lex_state_ids;
|
||||
|
|
@ -74,11 +79,11 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
|
||||
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
|
||||
StartingCharacterAggregator starting_character_aggregator;
|
||||
for (const rule_ptr &rule : grammar.separators) {
|
||||
separator_rules.push_back(Repeat::build(rule));
|
||||
for (const auto &rule : grammar.separators) {
|
||||
separator_rules.push_back(Repeat{rule});
|
||||
starting_character_aggregator.apply(rule);
|
||||
}
|
||||
separator_rules.push_back(Blank::build());
|
||||
separator_rules.push_back(Blank{});
|
||||
first_separator_characters = starting_character_aggregator.result;
|
||||
shadowed_token_indices.resize(grammar.variables.size());
|
||||
}
|
||||
|
|
@ -98,8 +103,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
clear();
|
||||
|
||||
map<Symbol, ParseTableEntry> terminals;
|
||||
terminals[Symbol(left, Symbol::Terminal)];
|
||||
terminals[Symbol(right, Symbol::Terminal)];
|
||||
terminals[Symbol::terminal(left)];
|
||||
terminals[Symbol::terminal(right)];
|
||||
|
||||
if (grammar.variables[left].is_string && grammar.variables[right].is_string) {
|
||||
StartingCharacterAggregator left_starting_characters;
|
||||
left_starting_characters.apply(grammar.variables[left].rule);
|
||||
StartingCharacterAggregator right_starting_characters;
|
||||
right_starting_characters.apply(grammar.variables[right].rule);
|
||||
if (!(left_starting_characters.result == right_starting_characters.result)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
add_lex_state(item_set_for_terminals(terminals));
|
||||
|
||||
|
|
@ -183,11 +198,11 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol symbol = entry.first;
|
||||
if (symbol.is_token()) {
|
||||
if (symbol.is_terminal()) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
|
||||
if (state.terminal_entries.count(Symbol::terminal(homonym))) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
|
@ -198,7 +213,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
|
||||
if (state.terminal_entries.count(Symbol::terminal(extension))) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -278,15 +293,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
LexItemSet result;
|
||||
for (const auto &pair : terminals) {
|
||||
Symbol symbol = pair.first;
|
||||
if (symbol.is_token()) {
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol)) {
|
||||
for (const rule_ptr &separator_rule : separator_rules) {
|
||||
if (symbol.is_terminal()) {
|
||||
for (const auto &rule : rules_for_symbol(symbol)) {
|
||||
for (const auto &separator_rule : separator_rules) {
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::separator(
|
||||
Seq::build({
|
||||
Rule::seq({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule) }))));
|
||||
Metadata::main_token(rule)
|
||||
})
|
||||
)
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -294,17 +312,20 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return { CharacterSet().include(0).copy() };
|
||||
vector<Rule> rules_for_symbol(const rules::Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT()) {
|
||||
return { CharacterSet().include(0) };
|
||||
}
|
||||
|
||||
rule_ptr rule = grammar.variables[symbol.index].rule;
|
||||
return grammar.variables[symbol.index].rule.match(
|
||||
[](const Choice &choice) {
|
||||
return choice.elements;
|
||||
},
|
||||
|
||||
auto choice = rule->as<Choice>();
|
||||
if (choice)
|
||||
return choice->elements;
|
||||
else
|
||||
return { rule };
|
||||
[](auto rule) {
|
||||
return vector<Rule>{ rule };
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include <set>
|
||||
#include <memory>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <set>
|
||||
#include <memory>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include <string>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
|
|||
}
|
||||
|
||||
Symbol ParseItem::lhs() const {
|
||||
return Symbol(variable_index, Symbol::NonTerminal);
|
||||
return Symbol{variable_index, Symbol::NonTerminal};
|
||||
}
|
||||
|
||||
bool ParseItem::is_done() const {
|
||||
|
|
|
|||
|
|
@ -4,8 +4,7 @@
|
|||
#include <map>
|
||||
#include <utility>
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/precedence_range.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -16,8 +16,6 @@ using std::get;
|
|||
using std::pair;
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
using std::shared_ptr;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
||||
|
|
@ -27,17 +25,17 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
set<Symbol::Index> processed_non_terminals;
|
||||
|
||||
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i, Symbol::Terminal);
|
||||
Symbol symbol = Symbol::terminal(i);
|
||||
first_sets.insert({symbol, LookaheadSet({ symbol })});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
|
||||
Symbol symbol(i, Symbol::External);
|
||||
Symbol symbol = Symbol::external(i);
|
||||
first_sets.insert({symbol, LookaheadSet({ symbol })});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i, Symbol::NonTerminal);
|
||||
Symbol symbol = Symbol::non_terminal(i);
|
||||
LookaheadSet first_set;
|
||||
|
||||
processed_non_terminals.clear();
|
||||
|
|
@ -64,7 +62,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
vector<ParseItemSetComponent> components_to_process;
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i, Symbol::NonTerminal);
|
||||
Symbol symbol = Symbol::non_terminal(i);
|
||||
map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
|
||||
|
||||
components_to_process.clear();
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
|
||||
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rule.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -1,43 +1,42 @@
|
|||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class CanBeBlank : public rules::RuleFn<bool> {
|
||||
protected:
|
||||
bool apply_to(const rules::Blank *) {
|
||||
return true;
|
||||
}
|
||||
bool rule_can_be_blank(const rules::Rule &rule) {
|
||||
return rule.match(
|
||||
[](rules::Blank) {
|
||||
return true;
|
||||
},
|
||||
|
||||
bool apply_to(const rules::Repeat *rule) {
|
||||
return apply(rule->content);
|
||||
}
|
||||
[](rules::CharacterSet) {
|
||||
return false;
|
||||
},
|
||||
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const auto &element : rule->elements)
|
||||
if (apply(element))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
[](rules::Repeat repeat) {
|
||||
return rule_can_be_blank(*repeat.rule);
|
||||
},
|
||||
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
return apply(rule->left) && apply(rule->right);
|
||||
}
|
||||
[](rules::Metadata metadata) {
|
||||
return rule_can_be_blank(*metadata.rule);
|
||||
},
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
};
|
||||
[](rules::Choice choice) {
|
||||
for (const auto &element : choice.elements) {
|
||||
if (rule_can_be_blank(element)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
},
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule) {
|
||||
return CanBeBlank().apply(rule);
|
||||
[](rules::Seq seq) {
|
||||
return rule_can_be_blank(*seq.left) && rule_can_be_blank(*seq.right);
|
||||
},
|
||||
|
||||
[](auto) { return false; }
|
||||
);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::Rule &rule);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -22,8 +22,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
|
|||
TSCompileErrorTypeInvalidGrammar };
|
||||
}
|
||||
|
||||
auto prepare_grammar_result =
|
||||
prepare_grammar::prepare_grammar(parse_result.grammar);
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
CompileError error = get<2>(prepare_grammar_result);
|
||||
|
|
@ -46,22 +45,20 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
|
|||
return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
|
||||
}
|
||||
|
||||
pair<string, const CompileError> compile(const Grammar &grammar,
|
||||
pair<string, const CompileError> compile(const InputGrammar &grammar,
|
||||
std::string name) {
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
CompileError error = get<2>(prepare_grammar_result);
|
||||
if (error.type)
|
||||
return { "", error };
|
||||
if (error.type) return { "", error };
|
||||
|
||||
auto table_build_result =
|
||||
build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
const ParseTable &parse_table = get<0>(table_build_result);
|
||||
const LexTable &lex_table = get<1>(table_build_result);
|
||||
error = get<2>(table_build_result);
|
||||
if (error.type)
|
||||
return { "", error };
|
||||
if (error.type) return { "", error };
|
||||
|
||||
string code = generate_code::c_code(name, parse_table, lex_table,
|
||||
syntax_grammar, lexical_grammar);
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
struct InputGrammar;
|
||||
|
||||
std::pair<std::string, CompileError> compile(const Grammar &, std::string);
|
||||
std::pair<std::string, CompileError> compile(const InputGrammar &, std::string);
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ namespace tree_sitter {
|
|||
|
||||
class CompileError {
|
||||
public:
|
||||
CompileError() : type(TSCompileErrorTypeNone) {}
|
||||
|
||||
CompileError(TSCompileErrorType type, std::string message)
|
||||
: type(type), message(message) {}
|
||||
|
||||
|
|
@ -15,6 +17,10 @@ class CompileError {
|
|||
return CompileError(TSCompileErrorTypeNone, "");
|
||||
}
|
||||
|
||||
operator bool() const {
|
||||
return type != TSCompileErrorTypeNone;
|
||||
}
|
||||
|
||||
bool operator==(const CompileError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
#include "compiler/parse_table.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
|
|
@ -129,7 +129,7 @@ class CCodeGenerator {
|
|||
size_t token_count = 0;
|
||||
for (const auto &entry : parse_table.symbols) {
|
||||
const Symbol &symbol = entry.first;
|
||||
if (symbol.is_token()) {
|
||||
if (symbol.is_terminal()) {
|
||||
token_count++;
|
||||
} else if (symbol.is_external()) {
|
||||
const ExternalToken &external_token = syntax_grammar.external_tokens[symbol.index];
|
||||
|
|
@ -256,7 +256,7 @@ class CCodeGenerator {
|
|||
if (symbol.is_external()) {
|
||||
needs_external_scanner = true;
|
||||
external_token_indices.insert(symbol.index);
|
||||
} else if (symbol.is_token()) {
|
||||
} else if (symbol.is_terminal()) {
|
||||
auto corresponding_external_token =
|
||||
external_tokens_by_corresponding_internal_token.find(symbol.index);
|
||||
if (corresponding_external_token != external_tokens_by_corresponding_internal_token.end()) {
|
||||
|
|
@ -298,7 +298,7 @@ class CCodeGenerator {
|
|||
line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
|
||||
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
|
||||
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol::external(i)) + ",");
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
|
|
@ -339,7 +339,7 @@ class CCodeGenerator {
|
|||
line("[" + to_string(state_id++) + "] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : state.nonterminal_entries) {
|
||||
line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE(");
|
||||
line("[" + symbol_id(Symbol::non_terminal(entry.first)) + "] = STATE(");
|
||||
add(to_string(entry.second));
|
||||
add("),");
|
||||
}
|
||||
|
|
@ -686,9 +686,13 @@ class CCodeGenerator {
|
|||
string c_code(string name, const ParseTable &parse_table,
|
||||
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
|
||||
const LexicalGrammar &lexical_grammar) {
|
||||
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar,
|
||||
lexical_grammar)
|
||||
.code();
|
||||
return CCodeGenerator(
|
||||
name,
|
||||
parse_table,
|
||||
lex_table,
|
||||
syntax_grammar,
|
||||
lexical_grammar
|
||||
).code();
|
||||
}
|
||||
|
||||
} // namespace generate_code
|
||||
|
|
|
|||
|
|
@ -2,17 +2,43 @@
|
|||
#define COMPILER_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar {
|
||||
std::vector<std::pair<std::string, rule_ptr>> rules;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::vector<std::vector<std::string>> expected_conflicts;
|
||||
std::vector<std::string> external_tokens;
|
||||
enum VariableType {
|
||||
VariableTypeHidden,
|
||||
VariableTypeAuxiliary,
|
||||
VariableTypeAnonymous,
|
||||
VariableTypeNamed,
|
||||
};
|
||||
|
||||
struct ExternalToken {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
|
||||
inline bool operator==(const ExternalToken &other) const {
|
||||
return name == other.name &&
|
||||
type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
}
|
||||
};
|
||||
|
||||
struct InputGrammar {
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Rule rule;
|
||||
};
|
||||
|
||||
std::vector<Variable> variables;
|
||||
std::vector<rules::Rule> extra_tokens;
|
||||
std::vector<std::unordered_set<rules::NamedSymbol>> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
#include "compiler/lex_table.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,7 @@
|
|||
#include <set>
|
||||
#include <string>
|
||||
#include "compiler/precedence_range.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
|
|||
|
|
@ -5,20 +5,25 @@
|
|||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalVariable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rule_ptr rule;
|
||||
rules::Rule rule;
|
||||
bool is_string;
|
||||
|
||||
inline bool operator==(const LexicalVariable &other) const {
|
||||
return other.name == name && other.type == type && other.rule == rule &&
|
||||
other.is_string == is_string;
|
||||
}
|
||||
};
|
||||
|
||||
struct LexicalGrammar {
|
||||
std::vector<LexicalVariable> variables;
|
||||
std::vector<rule_ptr> separators;
|
||||
std::vector<rules::Rule> separators;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,20 +1,31 @@
|
|||
#include "compiler/parse_grammar.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include "json.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::unordered_set;
|
||||
using std::pair;
|
||||
using rules::Rule;
|
||||
using rules::Blank;
|
||||
using rules::Metadata;
|
||||
using rules::Pattern;
|
||||
using rules::String;
|
||||
using rules::NamedSymbol;
|
||||
|
||||
struct ParseRuleResult {
|
||||
rule_ptr rule;
|
||||
Rule rule;
|
||||
string error_message;
|
||||
|
||||
ParseRuleResult(const string &error_message) : error_message(error_message) {}
|
||||
ParseRuleResult(const char *error_message) : error_message(error_message) {}
|
||||
ParseRuleResult(Rule rule) : rule(rule) {}
|
||||
};
|
||||
|
||||
ParseRuleResult parse_rule(json_value *rule_json) {
|
||||
|
|
@ -23,193 +34,163 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
string type;
|
||||
|
||||
if (!rule_json) {
|
||||
error_message = "Rule cannot be null";
|
||||
goto error;
|
||||
return "Rule cannot be null";
|
||||
}
|
||||
|
||||
if (rule_json->type != json_object) {
|
||||
error_message = "Rule type must be an object";
|
||||
goto error;
|
||||
return "Rule type must be an object";
|
||||
}
|
||||
|
||||
rule_type_json = rule_json->operator[]("type");
|
||||
if (rule_type_json.type != json_string) {
|
||||
error_message = "Rule type must be a string";
|
||||
goto error;
|
||||
return "Rule type must be a string";
|
||||
}
|
||||
|
||||
type = rule_type_json.u.string.ptr;
|
||||
|
||||
if (type == "BLANK") {
|
||||
return { blank(), "" };
|
||||
return Rule(Blank{});
|
||||
}
|
||||
|
||||
if (type == "CHOICE") {
|
||||
json_value members_json = rule_json->operator[]("members");
|
||||
if (members_json.type != json_array) {
|
||||
error_message = "Choice members must be an array";
|
||||
goto error;
|
||||
return "Choice members must be an array";
|
||||
}
|
||||
|
||||
vector<rule_ptr> members;
|
||||
vector<Rule> members;
|
||||
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
|
||||
json_value *member_json = members_json.u.array.values[i];
|
||||
ParseRuleResult member = parse_rule(member_json);
|
||||
if (member.rule.get()) {
|
||||
members.push_back(member.rule);
|
||||
} else {
|
||||
error_message = "Invalid choice member: " + member.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(member_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid choice member: " + result.error_message;
|
||||
}
|
||||
members.push_back(result.rule);
|
||||
}
|
||||
return { choice(members), "" };
|
||||
return Rule::choice(members);
|
||||
}
|
||||
|
||||
if (type == "SEQ") {
|
||||
json_value members_json = rule_json->operator[]("members");
|
||||
if (members_json.type != json_array) {
|
||||
error_message = "Seq members must be an array";
|
||||
goto error;
|
||||
return "Seq members must be an array";
|
||||
}
|
||||
|
||||
vector<rule_ptr> members;
|
||||
vector<Rule> members;
|
||||
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
|
||||
json_value *member_json = members_json.u.array.values[i];
|
||||
ParseRuleResult member = parse_rule(member_json);
|
||||
if (member.rule.get()) {
|
||||
members.push_back(member.rule);
|
||||
} else {
|
||||
error_message = "Invalid seq member: " + member.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(member_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid choice member: " + result.error_message;
|
||||
}
|
||||
members.push_back(result.rule);
|
||||
}
|
||||
return { seq(members), "" };
|
||||
return Rule::seq(members);
|
||||
}
|
||||
|
||||
if (type == "REPEAT") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return { repeat(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid repeat content: " + content.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid repeat content: " + result.error_message;
|
||||
}
|
||||
return Rule::choice({Rule::repeat(result.rule), Blank{}});
|
||||
}
|
||||
|
||||
if (type == "REPEAT1") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return { repeat1(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid repeat1 content: " + content.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid repeat content: " + result.error_message;
|
||||
}
|
||||
return Rule::repeat(result.rule);
|
||||
}
|
||||
|
||||
if (type == "TOKEN") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return { token(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid token content: " + content.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid token content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::token(result.rule));
|
||||
}
|
||||
|
||||
if (type == "PATTERN") {
|
||||
json_value value_json = rule_json->operator[]("value");
|
||||
if (value_json.type == json_string) {
|
||||
return { pattern(value_json.u.string.ptr), "" };
|
||||
return Rule(Pattern{value_json.u.string.ptr});
|
||||
} else {
|
||||
error_message = "Pattern value must be a string";
|
||||
goto error;
|
||||
return "Pattern value must be a string";
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "STRING") {
|
||||
json_value value_json = rule_json->operator[]("value");
|
||||
if (value_json.type == json_string) {
|
||||
return { str(value_json.u.string.ptr), "" };
|
||||
return Rule(String{value_json.u.string.ptr});
|
||||
} else {
|
||||
error_message = "String rule value must be a string";
|
||||
goto error;
|
||||
return "String rule value must be a string";
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "SYMBOL") {
|
||||
json_value value_json = rule_json->operator[]("name");
|
||||
if (value_json.type == json_string) {
|
||||
return { sym(value_json.u.string.ptr), "" };
|
||||
return Rule(NamedSymbol{value_json.u.string.ptr});
|
||||
} else {
|
||||
error_message = "Symbol value must be a string";
|
||||
goto error;
|
||||
return "Symbol value must be a string";
|
||||
}
|
||||
}
|
||||
|
||||
if (type == "PREC") {
|
||||
json_value precedence_json = rule_json->operator[]("value");
|
||||
if (precedence_json.type != json_integer) {
|
||||
error_message = "Precedence value must be an integer";
|
||||
goto error;
|
||||
return "Precedence value must be an integer";
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (!content.rule.get()) {
|
||||
error_message = "Invalid precedence content: " + content.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
|
||||
return { prec(precedence_json.u.integer, content.rule), "" };
|
||||
return Rule(Metadata::prec(precedence_json.u.integer, result.rule));
|
||||
}
|
||||
|
||||
if (type == "PREC_LEFT") {
|
||||
json_value precedence_json = rule_json->operator[]("value");
|
||||
if (precedence_json.type != json_integer) {
|
||||
error_message = "Precedence value must be an integer";
|
||||
goto error;
|
||||
return "Precedence value must be an integer";
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (!content.rule.get()) {
|
||||
error_message = "Invalid precedence content: " + content.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
|
||||
return { prec_left(precedence_json.u.integer, content.rule), "" };
|
||||
return Rule(Metadata::prec_left(precedence_json.u.integer, result.rule));
|
||||
}
|
||||
|
||||
if (type == "PREC_RIGHT") {
|
||||
json_value precedence_json = rule_json->operator[]("value");
|
||||
if (precedence_json.type != json_integer) {
|
||||
error_message = "Precedence value must be an integer";
|
||||
goto error;
|
||||
return "Precedence value must be an integer";
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (!content.rule.get()) {
|
||||
error_message = "Invalid precedence content: " + content.error_message;
|
||||
goto error;
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
|
||||
return { prec_right(precedence_json.u.integer, content.rule), "" };
|
||||
return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule));
|
||||
}
|
||||
|
||||
error_message = "Unknown rule type " + type;
|
||||
|
||||
error:
|
||||
return { rule_ptr(), error_message };
|
||||
return "Unknown rule type: " + type;
|
||||
}
|
||||
|
||||
ParseGrammarResult parse_grammar(const string &input) {
|
||||
string error_message;
|
||||
string name;
|
||||
Grammar grammar;
|
||||
InputGrammar grammar;
|
||||
json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
|
||||
|
||||
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
|
||||
|
|
@ -242,15 +223,16 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
|
||||
for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) {
|
||||
json_object_entry entry_json = rules_json.u.object.values[i];
|
||||
ParseRuleResult entry = parse_rule(entry_json.value);
|
||||
|
||||
if (!entry.rule.get()) {
|
||||
error_message =
|
||||
string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
|
||||
auto result = parse_rule(entry_json.value);
|
||||
if (!result.error_message.empty()) {
|
||||
error_message = result.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
grammar.rules.push_back({ string(entry_json.name), entry.rule });
|
||||
grammar.variables.push_back(InputGrammar::Variable{
|
||||
string(entry_json.name),
|
||||
VariableTypeNamed,
|
||||
result.rule
|
||||
});
|
||||
}
|
||||
|
||||
extras_json = grammar_json->operator[]("extras");
|
||||
|
|
@ -262,13 +244,12 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
|
||||
for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) {
|
||||
json_value *extra_json = extras_json.u.array.values[i];
|
||||
ParseRuleResult extra = parse_rule(extra_json);
|
||||
if (!extra.rule.get()) {
|
||||
error_message = string("Invalid extra token: ") + extra.error_message;
|
||||
auto result = parse_rule(extra_json);
|
||||
if (!result.error_message.empty()) {
|
||||
error_message = "Invalid extra token: " + result.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
grammar.extra_tokens.push_back(extra.rule);
|
||||
grammar.extra_tokens.push_back(result.rule);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -286,7 +267,7 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
goto error;
|
||||
}
|
||||
|
||||
vector<string> conflict;
|
||||
unordered_set<NamedSymbol> conflict;
|
||||
for (size_t j = 0, conflict_length = conflict_json->u.array.length;
|
||||
j < conflict_length; j++) {
|
||||
json_value *conflict_entry_json = conflict_json->u.array.values[j];
|
||||
|
|
@ -295,7 +276,9 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
goto error;
|
||||
}
|
||||
|
||||
conflict.push_back(string(conflict_entry_json->u.string.ptr));
|
||||
conflict.insert(rules::NamedSymbol{
|
||||
string(conflict_entry_json->u.string.ptr)
|
||||
});
|
||||
}
|
||||
|
||||
grammar.expected_conflicts.push_back(conflict);
|
||||
|
|
@ -317,7 +300,11 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
}
|
||||
|
||||
string token_name = token_name_json->u.string.ptr;
|
||||
grammar.external_tokens.push_back(token_name);
|
||||
grammar.external_tokens.push_back({
|
||||
token_name,
|
||||
VariableTypeNamed,
|
||||
rules::NONE()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -329,7 +316,7 @@ error:
|
|||
json_value_free(grammar_json);
|
||||
}
|
||||
|
||||
return { "", Grammar(), error_message };
|
||||
return { "", InputGrammar(), error_message };
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ namespace tree_sitter {
|
|||
|
||||
struct ParseGrammarResult {
|
||||
std::string name;
|
||||
Grammar grammar;
|
||||
InputGrammar grammar;
|
||||
std::string error_message;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "compiler/parse_table.h"
|
||||
#include <string>
|
||||
#include "compiler/precedence_range.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
|||
void ParseTable::set_nonterminal_action(ParseStateId state_id,
|
||||
Symbol::Index lookahead,
|
||||
ParseStateId next_state_id) {
|
||||
symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true;
|
||||
symbols[Symbol::non_terminal(lookahead)].structural = true;
|
||||
states[state_id].nonterminal_entries[lookahead] = next_state_id;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/lex_table.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/precedence_range.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,14 +2,9 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include <cassert>
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -18,53 +13,79 @@ using std::string;
|
|||
using std::vector;
|
||||
using std::pair;
|
||||
using std::to_string;
|
||||
using std::make_shared;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Repeat;
|
||||
using rules::Seq;
|
||||
using rules::Rule;
|
||||
using rules::Symbol;
|
||||
|
||||
class ExpandRepeats : public rules::IdentityRuleFn {
|
||||
class ExpandRepeats {
|
||||
string rule_name;
|
||||
size_t offset;
|
||||
size_t repeat_count;
|
||||
vector<pair<rule_ptr, Symbol>> existing_repeats;
|
||||
vector<pair<Rule, Symbol>> existing_repeats;
|
||||
|
||||
rule_ptr apply_to(const Repeat *rule) {
|
||||
for (const auto pair : existing_repeats)
|
||||
if (pair.first->operator==(*rule))
|
||||
return pair.second.copy();
|
||||
Rule apply(Rule rule) {
|
||||
return rule.match(
|
||||
[&](const rules::Blank &blank) -> Rule { return blank; },
|
||||
[&](const rules::Symbol &symbol) { return symbol; },
|
||||
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
|
||||
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
|
||||
existing_repeats.push_back({ rule->copy(), repeat_symbol });
|
||||
aux_rules.push_back(Variable{
|
||||
helper_rule_name,
|
||||
VariableTypeAuxiliary,
|
||||
Choice::build({
|
||||
Seq::build({
|
||||
repeat_symbol.copy(),
|
||||
inner_rule,
|
||||
}),
|
||||
inner_rule,
|
||||
})
|
||||
});
|
||||
return repeat_symbol.copy();
|
||||
[&](const rules::Choice &choice) {
|
||||
vector<Rule> elements;
|
||||
for (const auto &element : choice.elements) {
|
||||
elements.push_back(apply(element));
|
||||
}
|
||||
return Rule::choice(elements);
|
||||
},
|
||||
|
||||
[&](const rules::Seq &sequence) {
|
||||
return rules::Seq{
|
||||
apply(*sequence.left),
|
||||
apply(*sequence.right)
|
||||
};
|
||||
},
|
||||
|
||||
[&](const rules::Repeat &repeat) {
|
||||
for (const auto pair : existing_repeats) {
|
||||
if (pair.first == rule) {
|
||||
return pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
Rule inner_rule = apply(*repeat.rule);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
|
||||
Symbol repeat_symbol = Symbol::non_terminal(offset + index);
|
||||
existing_repeats.push_back({repeat, repeat_symbol});
|
||||
aux_rules.push_back({
|
||||
helper_rule_name,
|
||||
VariableTypeAuxiliary,
|
||||
rules::Choice{{
|
||||
rules::Seq{repeat_symbol, inner_rule},
|
||||
inner_rule,
|
||||
}}
|
||||
});
|
||||
return repeat_symbol;
|
||||
},
|
||||
|
||||
[&](const rules::Metadata &metadata) {
|
||||
return rules::Metadata{apply(*metadata.rule), metadata.params};
|
||||
},
|
||||
|
||||
[](auto) {
|
||||
assert(!"Unexpected rule type");
|
||||
return rules::Blank{};
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit ExpandRepeats(size_t offset) : offset(offset) {}
|
||||
|
||||
rule_ptr expand(const rule_ptr &rule, const string &name) {
|
||||
Rule expand(const Rule &rule, const string &name) {
|
||||
rule_name = name;
|
||||
repeat_count = 0;
|
||||
return apply(rule);
|
||||
}
|
||||
|
||||
vector<Variable> aux_rules;
|
||||
vector<InitialSyntaxGrammar::Variable> aux_rules;
|
||||
};
|
||||
|
||||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
||||
|
|
@ -75,11 +96,16 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
|||
result.external_tokens = grammar.external_tokens;
|
||||
|
||||
ExpandRepeats expander(result.variables.size());
|
||||
for (auto &variable : result.variables)
|
||||
for (auto &variable : result.variables) {
|
||||
variable.rule = expander.expand(variable.rule, variable.name);
|
||||
}
|
||||
|
||||
result.variables.insert(
|
||||
result.variables.end(),
|
||||
expander.aux_rules.begin(),
|
||||
expander.aux_rules.end()
|
||||
);
|
||||
|
||||
result.variables.insert(result.variables.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,6 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InitialSyntaxGrammar;
|
||||
|
||||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -2,15 +2,8 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/prepare_grammar/parse_regex.h"
|
||||
#include "utf8proc.h"
|
||||
|
||||
|
|
@ -19,70 +12,69 @@ namespace prepare_grammar {
|
|||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::String;
|
||||
using rules::Pattern;
|
||||
using rules::Metadata;
|
||||
using rules::Rule;
|
||||
|
||||
class ExpandTokens : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
ExpandTokenResult expand_token(const rules::Rule &rule) {
|
||||
return rule.match(
|
||||
[](const rules::Blank &blank) -> ExpandTokenResult { return Rule(blank); },
|
||||
|
||||
rule_ptr apply_to(const String *rule) {
|
||||
vector<rule_ptr> elements;
|
||||
const uint8_t *iter = reinterpret_cast<const uint8_t *>(rule->value.data());
|
||||
const uint8_t *end = iter + rule->value.size();
|
||||
[](const rules::String &string) {
|
||||
vector<Rule> elements;
|
||||
const uint8_t *iter = reinterpret_cast<const uint8_t *>(string.value.data());
|
||||
const uint8_t *end = iter + string.value.size();
|
||||
|
||||
while (iter < end) {
|
||||
int32_t el;
|
||||
size_t size = utf8proc_iterate(iter, (end - iter), &el);
|
||||
if (!size)
|
||||
break;
|
||||
iter += size;
|
||||
while (iter < end) {
|
||||
int32_t el;
|
||||
size_t size = utf8proc_iterate(iter, (end - iter), &el);
|
||||
if (!size)
|
||||
break;
|
||||
iter += size;
|
||||
|
||||
elements.push_back(rules::CharacterSet().include(el).copy());
|
||||
}
|
||||
elements.push_back(rules::CharacterSet().include(el));
|
||||
}
|
||||
|
||||
rules::MetadataParams params;
|
||||
params.is_token = true;
|
||||
params.is_string = true;
|
||||
return Rule::seq(elements);
|
||||
},
|
||||
|
||||
return rules::Metadata::build(rules::Seq::build(elements), params);
|
||||
}
|
||||
[](const rules::Pattern &pattern) -> ExpandTokenResult {
|
||||
auto result = parse_regex(pattern.value);
|
||||
if (result.second) return result.second;
|
||||
return result.first;
|
||||
},
|
||||
|
||||
rule_ptr apply_to(const Pattern *rule) {
|
||||
auto pair = parse_regex(rule->value);
|
||||
if (!error.type)
|
||||
error = pair.second;
|
||||
return pair.first;
|
||||
}
|
||||
[](const rules::Repeat &rule) -> ExpandTokenResult {
|
||||
auto result = expand_token(*rule.rule);
|
||||
if (result.error) return result.error;
|
||||
return Rule::repeat(result.rule);
|
||||
},
|
||||
|
||||
public:
|
||||
CompileError error;
|
||||
ExpandTokens() : error(CompileError::none()) {}
|
||||
[](const rules::Metadata &rule) -> ExpandTokenResult {
|
||||
auto result = expand_token(*rule.rule);
|
||||
if (result.error) return result.error;
|
||||
return Rule(rules::Metadata{result.rule, rule.params});
|
||||
},
|
||||
|
||||
[](const rules::Seq &rule) -> ExpandTokenResult {
|
||||
auto left_result = expand_token(*rule.left);
|
||||
if (left_result.error) return left_result.error;
|
||||
auto right_result = expand_token(*rule.right);
|
||||
if (right_result.error) return right_result.error;
|
||||
return Rule(rules::Seq{left_result.rule, right_result.rule});
|
||||
},
|
||||
|
||||
[](const rules::Choice &rule) -> ExpandTokenResult {
|
||||
std::vector<Rule> elements;
|
||||
for (const auto &element : rule.elements) {
|
||||
auto result = expand_token(element);
|
||||
if (result.error) return result.error;
|
||||
elements.push_back(result.rule);
|
||||
}
|
||||
return Rule(rules::Choice{elements});
|
||||
},
|
||||
|
||||
[](auto) { return CompileError(TSCompileErrorTypeInvalidTokenContents, ""); }
|
||||
);
|
||||
};
|
||||
|
||||
pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &grammar) {
|
||||
LexicalGrammar result;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (const LexicalVariable &variable : grammar.variables) {
|
||||
auto rule = expander.apply(variable.rule);
|
||||
if (expander.error.type)
|
||||
return { result, expander.error };
|
||||
result.variables.push_back({variable.name, variable.type, rule, variable.is_string});
|
||||
}
|
||||
|
||||
for (auto &sep : grammar.separators) {
|
||||
auto rule = expander.apply(sep);
|
||||
if (expander.error.type)
|
||||
return { result, expander.error };
|
||||
result.separators.push_back(rule);
|
||||
}
|
||||
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,15 +2,21 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
|
||||
|
||||
#include <utility>
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &);
|
||||
struct ExpandTokenResult {
|
||||
rules::Rule rule;
|
||||
CompileError error;
|
||||
|
||||
ExpandTokenResult(const rules::Rule &rule) : rule(rule) {}
|
||||
ExpandTokenResult(const CompileError &error) : error(error) {}
|
||||
};
|
||||
|
||||
ExpandTokenResult expand_token(const rules::Rule &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,54 +1,48 @@
|
|||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::vector;
|
||||
using rules::Rule;
|
||||
|
||||
class ExtractChoices : public rules::RuleFn<vector<rule_ptr>> {
|
||||
vector<rule_ptr> default_apply(const Rule *rule) {
|
||||
return vector<rule_ptr>({ rule->copy() });
|
||||
}
|
||||
vector<Rule> extract_choices(const Rule &rule) {
|
||||
return rule.match(
|
||||
[](const rules::Seq &sequence) {
|
||||
vector<Rule> result;
|
||||
for (auto &left_entry : extract_choices(*sequence.left)) {
|
||||
for (auto &right_entry : extract_choices(*sequence.right)) {
|
||||
result.push_back(rules::Rule::seq({left_entry, right_entry}));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
},
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Seq *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto left_entry : apply(rule->left))
|
||||
for (auto right_entry : apply(rule->right))
|
||||
result.push_back(rules::Seq::build({ left_entry, right_entry }));
|
||||
return result;
|
||||
}
|
||||
[](const rules::Metadata &rule) {
|
||||
vector<Rule> result;
|
||||
for (auto &entry : extract_choices(*rule.rule)) {
|
||||
result.push_back(rules::Metadata{entry, rule.params});
|
||||
}
|
||||
return result;
|
||||
},
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Metadata *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto entry : apply(rule->rule))
|
||||
result.push_back(rules::Metadata::build(entry, rule->params));
|
||||
return result;
|
||||
}
|
||||
[](const rules::Choice &choice) {
|
||||
vector<Rule> result;
|
||||
for (auto &element : choice.elements) {
|
||||
for (auto &entry : extract_choices(element)) {
|
||||
result.push_back(entry);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
},
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Choice *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto element : rule->elements)
|
||||
for (auto entry : apply(element))
|
||||
result.push_back(entry);
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Repeat *rule) {
|
||||
return vector<rule_ptr>({
|
||||
rules::Repeat::build(rules::Choice::build(apply(rule->content))),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<rule_ptr> extract_choices(const rule_ptr &rule) {
|
||||
return ExtractChoices().apply(rule);
|
||||
[](const auto &rule) {
|
||||
return vector<Rule>({rule});
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::vector<rule_ptr> extract_choices(const rule_ptr &);
|
||||
std::vector<rules::Rule> extract_choices(const rules::Rule &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,24 +1,19 @@
|
|||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/prepare_grammar/token_description.h"
|
||||
#include "compiler/prepare_grammar/is_token.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::make_shared;
|
||||
using std::make_tuple;
|
||||
using std::map;
|
||||
using std::set;
|
||||
|
|
@ -26,74 +21,143 @@ using std::string;
|
|||
using std::tuple;
|
||||
using std::vector;
|
||||
using rules::Symbol;
|
||||
using rules::Rule;
|
||||
using rules::Rule;
|
||||
|
||||
class SymbolReplacer : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to(const Symbol *rule) {
|
||||
return replace_symbol(*rule).copy();
|
||||
}
|
||||
|
||||
class SymbolReplacer {
|
||||
public:
|
||||
map<Symbol, Symbol> replacements;
|
||||
|
||||
Rule apply(const Rule &rule) {
|
||||
return rule.match(
|
||||
[this](const rules::Blank &blank) -> Rule {
|
||||
return blank;
|
||||
},
|
||||
|
||||
[this](const rules::Symbol &symbol) {
|
||||
return replace_symbol(symbol);
|
||||
},
|
||||
|
||||
[this](const rules::Choice &choice) {
|
||||
vector<rules::Rule> elements;
|
||||
for (const auto &element : choice.elements) {
|
||||
elements.push_back(apply(element));
|
||||
}
|
||||
return Rule::choice(elements);
|
||||
},
|
||||
|
||||
[this](const rules::Seq &sequence) {
|
||||
return rules::Seq{
|
||||
apply(*sequence.left),
|
||||
apply(*sequence.right)
|
||||
};
|
||||
},
|
||||
|
||||
[this](const rules::Repeat &repeat) {
|
||||
return Rule::repeat(apply(*repeat.rule));
|
||||
},
|
||||
|
||||
[this](const rules::Metadata &metadata) {
|
||||
return rules::Metadata{apply(*metadata.rule), metadata.params};
|
||||
},
|
||||
|
||||
[](auto) {
|
||||
assert(!"Unexpected rule type");
|
||||
return rules::Blank{};
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Symbol replace_symbol(const Symbol &symbol) {
|
||||
if (!symbol.is_non_terminal())
|
||||
return symbol;
|
||||
if (!symbol.is_non_terminal()) return symbol;
|
||||
|
||||
auto replacement_pair = replacements.find(symbol);
|
||||
if (replacement_pair != replacements.end())
|
||||
if (replacement_pair != replacements.end()) {
|
||||
return replacement_pair->second;
|
||||
}
|
||||
|
||||
int new_index = symbol.index;
|
||||
for (const auto &pair : replacements)
|
||||
if (pair.first.index < symbol.index)
|
||||
for (const auto &pair : replacements) {
|
||||
if (pair.first.index < symbol.index) {
|
||||
new_index--;
|
||||
return Symbol(new_index, Symbol::NonTerminal);
|
||||
}
|
||||
}
|
||||
|
||||
return Symbol::non_terminal(new_index);
|
||||
}
|
||||
};
|
||||
|
||||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to_token(const Rule *input, VariableType entry_type, bool is_string) {
|
||||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].rule->operator==(*input)) {
|
||||
class TokenExtractor {
|
||||
Symbol extract_token(const rules::Rule &input, VariableType entry_type) {
|
||||
for (size_t i = 0; i < tokens.size(); i++) {
|
||||
if (tokens[i].rule == input) {
|
||||
token_usage_counts[i]++;
|
||||
return make_shared<Symbol>(i, Symbol::Terminal);
|
||||
return Symbol::terminal(i);
|
||||
}
|
||||
|
||||
rule_ptr rule = input->copy();
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back({token_description(rule), entry_type, rule, is_string});
|
||||
token_usage_counts.push_back(1);
|
||||
return make_shared<Symbol>(index, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::String *rule) {
|
||||
return apply_to_token(rule, VariableTypeAnonymous, true);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Pattern *rule) {
|
||||
return apply_to_token(rule, VariableTypeAuxiliary, false);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
if (rule->params.is_token) {
|
||||
return apply_to_token(rule->rule.get(), VariableTypeAuxiliary, false);
|
||||
} else {
|
||||
return rules::IdentityRuleFn::apply_to(rule);
|
||||
}
|
||||
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back({
|
||||
token_description(input),
|
||||
entry_type,
|
||||
input
|
||||
});
|
||||
token_usage_counts.push_back(1);
|
||||
|
||||
return Symbol::terminal(index);
|
||||
}
|
||||
|
||||
public:
|
||||
vector<size_t> token_usage_counts;
|
||||
vector<LexicalVariable> tokens;
|
||||
};
|
||||
Rule apply(const rules::Rule &rule) {
|
||||
return rule.match(
|
||||
[this](const rules::Blank &blank) -> Rule { return blank; },
|
||||
|
||||
static CompileError extra_token_error(const string &message) {
|
||||
return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message);
|
||||
}
|
||||
[this](const rules::Metadata &rule) -> Rule {
|
||||
if (rule.params.is_token) {
|
||||
return extract_token(*rule.rule, VariableTypeAuxiliary);
|
||||
} else {
|
||||
return rules::Metadata{apply(*rule.rule), rule.params};
|
||||
}
|
||||
},
|
||||
|
||||
[this](const rules::String &rule) {
|
||||
return extract_token(rule, VariableTypeAnonymous);
|
||||
},
|
||||
|
||||
[this](const rules::Pattern &rule) {
|
||||
return extract_token(rule, VariableTypeAuxiliary);
|
||||
},
|
||||
|
||||
[this](const rules::Repeat &rule) {
|
||||
return Rule::repeat(apply(*rule.rule));
|
||||
},
|
||||
|
||||
[this](const rules::Seq &rule) {
|
||||
return Rule::seq({apply(*rule.left), apply(*rule.right)});
|
||||
},
|
||||
|
||||
[this](const rules::Choice &rule) {
|
||||
std::vector<Rule> elements;
|
||||
for (const auto &element : rule.elements) {
|
||||
elements.push_back(apply(element));
|
||||
}
|
||||
return Rule::choice(elements);
|
||||
},
|
||||
|
||||
[](const rules::Symbol &symbol) {
|
||||
return symbol;
|
||||
},
|
||||
|
||||
[](auto) {
|
||||
assert(!"Unexpected rule type");
|
||||
return rules::Blank{};
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
vector<size_t> token_usage_counts;
|
||||
vector<InternedGrammar::Variable> tokens;
|
||||
};
|
||||
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
const InternedGrammar &grammar
|
||||
|
|
@ -104,15 +168,29 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
TokenExtractor extractor;
|
||||
|
||||
// First, extract all of the grammar's tokens into the lexical grammar.
|
||||
vector<Variable> processed_variables;
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
processed_variables.push_back(Variable{
|
||||
vector<InitialSyntaxGrammar::Variable> processed_variables;
|
||||
for (const auto &variable : grammar.variables) {
|
||||
processed_variables.push_back({
|
||||
variable.name,
|
||||
variable.type,
|
||||
extractor.apply(variable.rule)
|
||||
});
|
||||
}
|
||||
lexical_grammar.variables = extractor.tokens;
|
||||
|
||||
for (const auto &extracted_token : extractor.tokens) {
|
||||
auto expansion = expand_token(extracted_token.rule);
|
||||
if (expansion.error) return make_tuple(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
expansion.error
|
||||
);
|
||||
lexical_grammar.variables.push_back({
|
||||
extracted_token.name,
|
||||
extracted_token.type,
|
||||
expansion.rule,
|
||||
extracted_token.type == VariableTypeAnonymous
|
||||
});
|
||||
}
|
||||
|
||||
// If a variable's entire rule was extracted as a token and that token didn't
|
||||
// appear within any other rule, then remove that variable from the syntax
|
||||
|
|
@ -120,26 +198,28 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
// that pointed to that variable will need to be updated to point to the
|
||||
// variable in the lexical grammar. Symbols that pointed to later variables
|
||||
// will need to have their indices decremented.
|
||||
size_t i = 0;
|
||||
for (const Variable &variable : processed_variables) {
|
||||
auto symbol = variable.rule->as<Symbol>();
|
||||
if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) {
|
||||
lexical_grammar.variables[symbol->index].type = variable.type;
|
||||
lexical_grammar.variables[symbol->index].name = variable.name;
|
||||
symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol });
|
||||
} else {
|
||||
syntax_grammar.variables.push_back(variable);
|
||||
}
|
||||
size_t i = -1;
|
||||
for (const auto &variable : processed_variables) {
|
||||
i++;
|
||||
if (variable.rule.is<Symbol>()) {
|
||||
auto symbol = variable.rule.get_unchecked<Symbol>();
|
||||
if (symbol.is_terminal() && extractor.token_usage_counts[symbol.index] == 1) {
|
||||
lexical_grammar.variables[symbol.index].type = variable.type;
|
||||
lexical_grammar.variables[symbol.index].name = variable.name;
|
||||
symbol_replacer.replacements[Symbol::non_terminal(i)] = symbol;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
syntax_grammar.variables.push_back(variable);
|
||||
}
|
||||
|
||||
// Perform any replacements of symbols needed based on the previous step.
|
||||
for (Variable &variable : syntax_grammar.variables) {
|
||||
for (auto &variable : syntax_grammar.variables) {
|
||||
variable.rule = symbol_replacer.apply(variable.rule);
|
||||
}
|
||||
|
||||
for (const ConflictSet &conflict_set : grammar.expected_conflicts) {
|
||||
ConflictSet new_conflict_set;
|
||||
for (const auto &conflict_set : grammar.expected_conflicts) {
|
||||
set<Symbol> new_conflict_set;
|
||||
for (const Symbol &symbol : conflict_set) {
|
||||
new_conflict_set.insert(symbol_replacer.replace_symbol(symbol));
|
||||
}
|
||||
|
|
@ -148,47 +228,51 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
|
||||
// The grammar's extra tokens can be either token rules or symbols
|
||||
// pointing to token rules. If they are symbols, then they'll be handled by
|
||||
// the parser; add them to the syntax grammar's ubiqutous tokens. If they
|
||||
// the parser; add them to the syntax grammar's extra tokens. If they
|
||||
// are anonymous rules, they can be handled by the lexer; add them to the
|
||||
// lexical grammar's separator rules.
|
||||
for (const rule_ptr &rule : grammar.extra_tokens) {
|
||||
int i = 0;
|
||||
bool used_elsewhere_in_grammar = false;
|
||||
for (const LexicalVariable &variable : lexical_grammar.variables) {
|
||||
if (variable.rule->operator==(*rule)) {
|
||||
syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal));
|
||||
used_elsewhere_in_grammar = true;
|
||||
for (const auto &rule : grammar.extra_tokens) {
|
||||
CompileError error = rule.match(
|
||||
[&](const Symbol &symbol) {
|
||||
Symbol new_symbol = symbol_replacer.replace_symbol(symbol);
|
||||
if (new_symbol.is_non_terminal()) {
|
||||
return CompileError(
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
"Non-token symbol " + syntax_grammar.variables[new_symbol.index].name + " can't be used as an extra token"
|
||||
);
|
||||
} else {
|
||||
syntax_grammar.extra_tokens.insert(new_symbol);
|
||||
return CompileError::none();
|
||||
}
|
||||
},
|
||||
|
||||
[&](auto non_symbol) {
|
||||
auto expansion = expand_token(non_symbol);
|
||||
if (expansion.error) return CompileError(
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
"Non-token rule expression can't be used as an extra token"
|
||||
);
|
||||
int i = 0;
|
||||
for (const LexicalVariable &variable : lexical_grammar.variables) {
|
||||
if (variable.rule == expansion.rule) {
|
||||
syntax_grammar.extra_tokens.insert(Symbol::terminal(i));
|
||||
return CompileError::none();
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
lexical_grammar.separators.push_back(expansion.rule);
|
||||
return CompileError::none();
|
||||
}
|
||||
i++;
|
||||
}
|
||||
);
|
||||
|
||||
if (used_elsewhere_in_grammar) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_token(rule)) {
|
||||
lexical_grammar.separators.push_back(rule);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto symbol = rule->as<Symbol>();
|
||||
if (!symbol) {
|
||||
return make_tuple(syntax_grammar, lexical_grammar,
|
||||
extra_token_error(rule->to_string()));
|
||||
}
|
||||
|
||||
Symbol new_symbol = symbol_replacer.replace_symbol(*symbol);
|
||||
if (new_symbol.is_non_terminal()) {
|
||||
return make_tuple(
|
||||
syntax_grammar, lexical_grammar,
|
||||
extra_token_error(syntax_grammar.variables[new_symbol.index].name));
|
||||
}
|
||||
|
||||
syntax_grammar.extra_tokens.insert(new_symbol);
|
||||
if (error) return make_tuple(syntax_grammar, lexical_grammar, error);
|
||||
}
|
||||
|
||||
for (const ExternalToken &external_token : grammar.external_tokens) {
|
||||
Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token);
|
||||
Symbol internal_token = symbol_replacer.replace_symbol(
|
||||
external_token.corresponding_internal_token
|
||||
);
|
||||
|
||||
if (internal_token.is_non_terminal()) {
|
||||
return make_tuple(
|
||||
|
|
|
|||
|
|
@ -4,14 +4,15 @@
|
|||
#include <tuple>
|
||||
#include "compiler/compile_error.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
const InternedGrammar &);
|
||||
const InternedGrammar &
|
||||
);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -15,8 +13,9 @@ namespace prepare_grammar {
|
|||
using std::find;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using rules::Rule;
|
||||
|
||||
class FlattenRule : public rules::RuleFn<void> {
|
||||
class FlattenRule {
|
||||
private:
|
||||
vector<int> precedence_stack;
|
||||
vector<rules::Associativity> associativity_stack;
|
||||
|
|
@ -24,40 +23,53 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
rules::Associativity last_associativity;
|
||||
Production production;
|
||||
|
||||
void apply_to(const rules::Symbol *sym) {
|
||||
production.push_back(ProductionStep{
|
||||
*sym,
|
||||
precedence_stack.back(),
|
||||
associativity_stack.back()
|
||||
});
|
||||
}
|
||||
void apply(const Rule &rule) {
|
||||
rule.match(
|
||||
[&](const rules::Symbol &symbol) {
|
||||
production.push_back(ProductionStep{
|
||||
symbol,
|
||||
precedence_stack.back(),
|
||||
associativity_stack.back()
|
||||
});
|
||||
},
|
||||
|
||||
void apply_to(const rules::Metadata *metadata) {
|
||||
if (metadata->params.has_precedence)
|
||||
precedence_stack.push_back(metadata->params.precedence);
|
||||
if (metadata->params.has_associativity)
|
||||
associativity_stack.push_back(metadata->params.associativity);
|
||||
[&](const rules::Metadata &metadata) {
|
||||
if (metadata.params.has_precedence) {
|
||||
precedence_stack.push_back(metadata.params.precedence);
|
||||
}
|
||||
|
||||
apply(metadata->rule);
|
||||
if (metadata.params.has_associativity) {
|
||||
associativity_stack.push_back(metadata.params.associativity);
|
||||
}
|
||||
|
||||
if (metadata->params.has_precedence) {
|
||||
last_precedence = precedence_stack.back();
|
||||
precedence_stack.pop_back();
|
||||
production.back().precedence = precedence_stack.back();
|
||||
}
|
||||
apply(*metadata.rule);
|
||||
|
||||
if (metadata->params.has_associativity) {
|
||||
last_associativity = associativity_stack.back();
|
||||
associativity_stack.pop_back();
|
||||
production.back().associativity = associativity_stack.back();
|
||||
}
|
||||
}
|
||||
if (metadata.params.has_precedence) {
|
||||
last_precedence = precedence_stack.back();
|
||||
precedence_stack.pop_back();
|
||||
production.back().precedence = precedence_stack.back();
|
||||
}
|
||||
|
||||
void apply_to(const rules::Seq *seq) {
|
||||
apply(seq->left);
|
||||
last_precedence = 0;
|
||||
last_associativity = rules::AssociativityNone;
|
||||
apply(seq->right);
|
||||
if (metadata.params.has_associativity) {
|
||||
last_associativity = associativity_stack.back();
|
||||
associativity_stack.pop_back();
|
||||
production.back().associativity = associativity_stack.back();
|
||||
}
|
||||
},
|
||||
|
||||
[&](const rules::Seq &sequence) {
|
||||
apply(*sequence.left);
|
||||
last_precedence = 0;
|
||||
last_associativity = rules::AssociativityNone;
|
||||
apply(*sequence.right);
|
||||
},
|
||||
|
||||
[&](const rules::Blank &blank) {},
|
||||
|
||||
[&](auto) {
|
||||
assert(!"Unexpected rule type");
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
public:
|
||||
|
|
@ -67,7 +79,7 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
last_precedence(0),
|
||||
last_associativity(rules::AssociativityNone) {}
|
||||
|
||||
Production flatten(const rule_ptr &rule) {
|
||||
Production flatten(const Rule &rule) {
|
||||
apply(rule);
|
||||
if (!production.empty()) {
|
||||
production.back().precedence = last_precedence;
|
||||
|
|
@ -77,10 +89,10 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
}
|
||||
};
|
||||
|
||||
SyntaxVariable flatten_rule(const Variable &variable) {
|
||||
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable) {
|
||||
vector<Production> productions;
|
||||
|
||||
for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
|
||||
for (const Rule &rule_component : extract_choices(variable.rule)) {
|
||||
Production production = FlattenRule().flatten(rule_component);
|
||||
auto end = productions.end();
|
||||
if (find(productions.begin(), end, production) == end) {
|
||||
|
|
@ -93,12 +105,21 @@ SyntaxVariable flatten_rule(const Variable &variable) {
|
|||
|
||||
pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
SyntaxGrammar result;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
result.extra_tokens = grammar.extra_tokens;
|
||||
result.external_tokens = grammar.external_tokens;
|
||||
|
||||
for (const auto &expected_conflict : grammar.expected_conflicts) {
|
||||
result.expected_conflicts.insert({
|
||||
expected_conflict.begin(),
|
||||
expected_conflict.end(),
|
||||
});
|
||||
}
|
||||
|
||||
for (const rules::Symbol &extra_token : grammar.extra_tokens) {
|
||||
result.extra_tokens.insert(extra_token);
|
||||
}
|
||||
|
||||
bool is_start = true;
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
for (const auto &variable : grammar.variables) {
|
||||
SyntaxVariable syntax_variable = flatten_rule(variable);
|
||||
|
||||
if (!is_start) {
|
||||
|
|
|
|||
|
|
@ -4,14 +4,14 @@
|
|||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InitialSyntaxGrammar;
|
||||
|
||||
SyntaxVariable flatten_rule(const Variable &variable);
|
||||
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable);
|
||||
std::pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -4,17 +4,26 @@
|
|||
#include <set>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InitialSyntaxGrammar {
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Rule rule;
|
||||
|
||||
inline bool operator==(const Variable &other) const {
|
||||
return name == other.name && type == other.type && rule == other.rule;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<Variable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,11 @@
|
|||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -17,34 +14,62 @@ using std::string;
|
|||
using std::vector;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::Rule;
|
||||
|
||||
class SymbolInterner : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to(const rules::NamedSymbol *rule) {
|
||||
auto result = symbol_for_rule_name(rule->name);
|
||||
if (!result.get()) {
|
||||
missing_rule_name = rule->name;
|
||||
return rules::Blank::build();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
class SymbolInterner {
|
||||
public:
|
||||
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule_name)
|
||||
return make_shared<Symbol>(i, Symbol::NonTerminal);
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
|
||||
if (grammar.external_tokens[i] == rule_name)
|
||||
return make_shared<rules::Symbol>(i, Symbol::External);
|
||||
return nullptr;
|
||||
Rule apply(const Rule &rule) {
|
||||
return rule.match(
|
||||
[&](const rules::Blank &blank) -> Rule { return blank; },
|
||||
|
||||
[&](const rules::NamedSymbol &symbol) {
|
||||
return intern_symbol(symbol);
|
||||
},
|
||||
|
||||
[&](const rules::String &string) { return string; },
|
||||
[&](const rules::Pattern &pattern) { return pattern; },
|
||||
|
||||
[&](const rules::Choice &choice) {
|
||||
vector<rules::Rule> elements;
|
||||
for (const auto &element : choice.elements) {
|
||||
elements.push_back(apply(element));
|
||||
}
|
||||
return rules::Choice{elements};
|
||||
},
|
||||
|
||||
[&](const rules::Seq &sequence) {
|
||||
return rules::Seq{apply(*sequence.left), apply(*sequence.right)};
|
||||
},
|
||||
|
||||
[&](const rules::Repeat &repeat) {
|
||||
return rules::Repeat{apply(*repeat.rule)};
|
||||
},
|
||||
|
||||
[&](const rules::Metadata &metadata) {
|
||||
return rules::Metadata{apply(*metadata.rule), metadata.params};
|
||||
},
|
||||
|
||||
[](auto) {
|
||||
assert(!"Unexpected rule type");
|
||||
return rules::Blank{};
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
explicit SymbolInterner(const Grammar &grammar) : grammar(grammar) {}
|
||||
const Grammar grammar;
|
||||
Symbol intern_symbol(rules::NamedSymbol named_symbol) {
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++)
|
||||
if (grammar.variables[i].name == named_symbol.value)
|
||||
return Symbol::non_terminal(i);
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
|
||||
if (grammar.external_tokens[i].name == named_symbol.value)
|
||||
return Symbol::external(i);
|
||||
missing_rule_name = named_symbol.value;
|
||||
return rules::NONE();
|
||||
}
|
||||
|
||||
explicit SymbolInterner(const InputGrammar &grammar) : grammar(grammar) {}
|
||||
const InputGrammar &grammar;
|
||||
string missing_rule_name;
|
||||
};
|
||||
|
||||
|
|
@ -53,52 +78,55 @@ CompileError missing_rule_error(string rule_name) {
|
|||
"Undefined rule '" + rule_name + "'");
|
||||
}
|
||||
|
||||
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
|
||||
pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar) {
|
||||
InternedGrammar result;
|
||||
|
||||
for (auto &external_token_name : grammar.external_tokens) {
|
||||
for (auto &external_token : grammar.external_tokens) {
|
||||
Symbol corresponding_internal_token = rules::NONE();
|
||||
for (size_t i = 0, n = grammar.rules.size(); i < n; i++) {
|
||||
if (grammar.rules[i].first == external_token_name) {
|
||||
corresponding_internal_token = Symbol(i, Symbol::NonTerminal);
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
if (grammar.variables[i].name == external_token.name) {
|
||||
corresponding_internal_token = Symbol::non_terminal(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result.external_tokens.push_back(ExternalToken{
|
||||
external_token_name,
|
||||
external_token_name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
external_token.name,
|
||||
external_token.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
corresponding_internal_token
|
||||
});
|
||||
}
|
||||
|
||||
SymbolInterner interner(grammar);
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
for (auto &variable : grammar.variables) {
|
||||
auto new_rule = interner.apply(variable.rule);
|
||||
if (!interner.missing_rule_name.empty()) {
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
}
|
||||
|
||||
result.variables.push_back(Variable{
|
||||
pair.first,
|
||||
pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
result.variables.push_back(InternedGrammar::Variable{
|
||||
variable.name,
|
||||
variable.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
new_rule
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &rule : grammar.extra_tokens) {
|
||||
auto new_rule = interner.apply(rule);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
if (!interner.missing_rule_name.empty()) {
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
}
|
||||
result.extra_tokens.push_back(new_rule);
|
||||
}
|
||||
|
||||
for (auto &names : grammar.expected_conflicts) {
|
||||
for (auto &expected_conflict : grammar.expected_conflicts) {
|
||||
set<rules::Symbol> entry;
|
||||
for (auto &name : names) {
|
||||
auto symbol = interner.symbol_for_rule_name(name);
|
||||
if (symbol.get())
|
||||
entry.insert(*symbol);
|
||||
for (auto &named_symbol : expected_conflict) {
|
||||
auto symbol = interner.intern_symbol(named_symbol);
|
||||
if (symbol != rules::NONE()) {
|
||||
entry.insert(symbol);
|
||||
}
|
||||
}
|
||||
result.expected_conflicts.insert(entry);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,11 +8,11 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
struct InputGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<InternedGrammar, CompileError> intern_symbols(const Grammar &);
|
||||
std::pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,17 +4,26 @@
|
|||
#include <set>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InternedGrammar {
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Rule rule;
|
||||
|
||||
bool operator==(const Variable &other) const {
|
||||
return name == other.name && type == other.type && rule == other.rule;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<Variable> variables;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::vector<rules::Rule> extra_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,30 +0,0 @@
|
|||
#include "compiler/prepare_grammar/is_token.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
class IsToken : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::String *rule) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Pattern *rule) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return rule->params.is_token;
|
||||
}
|
||||
};
|
||||
|
||||
bool is_token(const rule_ptr &rule) {
|
||||
return IsToken().apply(rule);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
|
||||
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
bool is_token(const rule_ptr &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
|
||||
|
|
@ -1,15 +1,17 @@
|
|||
#include "compiler/prepare_grammar/normalize_rules.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::vector;
|
||||
using rules::Rule;
|
||||
|
||||
LexicalGrammar normalize_rules(const LexicalGrammar &input_grammar) {
|
||||
LexicalGrammar result(input_grammar);
|
||||
|
||||
for (LexicalVariable &variable : result.variables) {
|
||||
variable.rule = rules::Choice::build(extract_choices(variable.rule));
|
||||
variable.rule = Rule::choice(extract_choices(variable.rule));
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -2,11 +2,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "utf8proc.h"
|
||||
|
||||
|
|
@ -16,12 +12,9 @@ namespace prepare_grammar {
|
|||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::CharacterSet;
|
||||
using rules::Seq;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Repeat;
|
||||
using rules::Rule;
|
||||
|
||||
class PatternParser {
|
||||
public:
|
||||
|
|
@ -32,103 +25,121 @@ class PatternParser {
|
|||
next();
|
||||
}
|
||||
|
||||
pair<rule_ptr, CompileError> rule(bool nested) {
|
||||
vector<rule_ptr> choices = {};
|
||||
pair<Rule, CompileError> rule(bool nested) {
|
||||
vector<Rule> choices;
|
||||
do {
|
||||
if (!choices.empty()) {
|
||||
if (peek() == '|')
|
||||
if (peek() == '|') {
|
||||
next();
|
||||
else
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto pair = term(nested);
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
if (pair.second.type) {
|
||||
return {Blank{}, pair.second };
|
||||
}
|
||||
choices.push_back(pair.first);
|
||||
} while (has_more_input());
|
||||
auto rule =
|
||||
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
|
||||
return { rule, CompileError::none() };
|
||||
return {Rule::choice(choices), CompileError::none()};
|
||||
}
|
||||
|
||||
private:
|
||||
pair<rule_ptr, CompileError> term(bool nested) {
|
||||
rule_ptr result = Blank::build();
|
||||
pair<Rule, CompileError> term(bool nested) {
|
||||
Rule result;
|
||||
do {
|
||||
if (peek() == '|')
|
||||
break;
|
||||
if (nested && peek() == ')')
|
||||
break;
|
||||
auto pair = factor();
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
result = Seq::build({ result, pair.first });
|
||||
if (pair.second) {
|
||||
return {Blank{}, pair.second};
|
||||
}
|
||||
result = Rule::seq({result, pair.first});
|
||||
} while (has_more_input());
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
pair<rule_ptr, CompileError> factor() {
|
||||
pair<Rule, CompileError> factor() {
|
||||
auto pair = atom();
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
rule_ptr result = pair.first;
|
||||
if (pair.second.type) {
|
||||
return {Blank{}, pair.second};
|
||||
}
|
||||
|
||||
Rule result = pair.first;
|
||||
if (has_more_input()) {
|
||||
switch (peek()) {
|
||||
case '*':
|
||||
next();
|
||||
result = Choice::build({ Repeat::build(result), Blank::build() });
|
||||
result = Rule::choice({
|
||||
Rule::repeat(result),
|
||||
Blank{}
|
||||
});
|
||||
break;
|
||||
case '+':
|
||||
next();
|
||||
result = Repeat::build(result);
|
||||
result = Rule::repeat(result);
|
||||
break;
|
||||
case '?':
|
||||
next();
|
||||
result = Choice::build({ result, Blank::build() });
|
||||
result = Rule::choice({result, Blank{}});
|
||||
break;
|
||||
}
|
||||
}
|
||||
return { result, CompileError::none() };
|
||||
|
||||
return {result, CompileError::none()};
|
||||
}
|
||||
|
||||
pair<rule_ptr, CompileError> atom() {
|
||||
pair<Rule, CompileError> atom() {
|
||||
switch (peek()) {
|
||||
case '(': {
|
||||
next();
|
||||
auto pair = rule(true);
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
if (peek() != ')')
|
||||
if (pair.second.type) {
|
||||
return {Blank{}, pair.second};
|
||||
}
|
||||
if (peek() != ')') {
|
||||
return error("unmatched open paren");
|
||||
}
|
||||
next();
|
||||
return { pair.first, CompileError::none() };
|
||||
return {pair.first, CompileError::none()};
|
||||
}
|
||||
|
||||
case '[': {
|
||||
next();
|
||||
auto pair = char_set();
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
if (peek() != ']')
|
||||
if (pair.second.type) {
|
||||
return {Blank{}, pair.second};
|
||||
}
|
||||
if (peek() != ']') {
|
||||
return error("unmatched open square bracket");
|
||||
}
|
||||
next();
|
||||
return { pair.first.copy(), CompileError::none() };
|
||||
return {pair.first, CompileError::none()};
|
||||
}
|
||||
|
||||
case ')': {
|
||||
return error("unmatched close paren");
|
||||
}
|
||||
|
||||
case ']': {
|
||||
return error("unmatched close square bracket");
|
||||
}
|
||||
|
||||
case '.': {
|
||||
next();
|
||||
return { CharacterSet().include_all().exclude('\n').copy(),
|
||||
CompileError::none() };
|
||||
return {
|
||||
CharacterSet().include_all().exclude('\n'),
|
||||
CompileError::none()
|
||||
};
|
||||
}
|
||||
|
||||
default: {
|
||||
auto pair = single_char();
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
return { pair.first.copy(), CompileError::none() };
|
||||
return { Blank{}, pair.second };
|
||||
return {pair.first, CompileError::none()};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -234,8 +245,8 @@ class PatternParser {
|
|||
return lookahead && iter <= end;
|
||||
}
|
||||
|
||||
pair<rule_ptr, CompileError> error(string msg) {
|
||||
return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) };
|
||||
pair<Rule, CompileError> error(string msg) {
|
||||
return { Blank{}, CompileError(TSCompileErrorTypeInvalidRegex, msg) };
|
||||
}
|
||||
|
||||
string input;
|
||||
|
|
@ -244,7 +255,7 @@ class PatternParser {
|
|||
int32_t lookahead;
|
||||
};
|
||||
|
||||
pair<rule_ptr, CompileError> parse_regex(const std::string &input) {
|
||||
pair<Rule, CompileError> parse_regex(const std::string &input) {
|
||||
return PatternParser(input.c_str()).rule(false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<rule_ptr, CompileError> parse_regex(const std::string &);
|
||||
std::pair<rules::Rule, CompileError> parse_regex(const std::string &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ using std::get;
|
|||
using std::make_tuple;
|
||||
|
||||
tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
||||
const Grammar &input_grammar) {
|
||||
const InputGrammar &input_grammar) {
|
||||
/*
|
||||
* Convert all string-based `NamedSymbols` into numerical `Symbols`
|
||||
*/
|
||||
|
|
@ -31,8 +31,9 @@ tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
|||
*/
|
||||
auto extract_result = extract_tokens(intern_result.first);
|
||||
error = get<2>(extract_result);
|
||||
if (error.type)
|
||||
if (error.type) {
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace `Repeat` rules with pairs of recursive rules
|
||||
|
|
@ -42,11 +43,12 @@ tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
|||
/*
|
||||
* Expand `String` and `Pattern` rules into full rule trees
|
||||
*/
|
||||
auto expand_tokens_result = expand_tokens(get<1>(extract_result));
|
||||
LexicalGrammar lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
LexicalGrammar lex_grammar = get<1>(extract_result);
|
||||
// auto expand_tokens_result = expand_tokens(get<1>(extract_result));
|
||||
// LexicalGrammar lex_grammar = expand_tokens_result.first;
|
||||
// error = expand_tokens_result.second;
|
||||
// if (error.type)
|
||||
// return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
* Flatten syntax rules into lists of productions.
|
||||
|
|
|
|||
|
|
@ -2,18 +2,15 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
|
||||
|
||||
#include <tuple>
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
||||
const Grammar &);
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(const InputGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,68 +1,82 @@
|
|||
#include "compiler/prepare_grammar/token_description.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::string;
|
||||
using rules::Rule;
|
||||
|
||||
class TokenDescription : public rules::RuleFn<string> {
|
||||
string apply_to(const rules::Pattern *rule) {
|
||||
is_trivial = false;
|
||||
return rule->value;
|
||||
}
|
||||
class TokenDescription {
|
||||
bool is_trivial;
|
||||
|
||||
string apply_to(const rules::String *rule) {
|
||||
return rule->value;
|
||||
}
|
||||
string apply(const Rule &rule) {
|
||||
return rule.match(
|
||||
[&](const rules::Blank) -> string {
|
||||
return "";
|
||||
},
|
||||
|
||||
string apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
[&](const rules::Symbol) {
|
||||
return "";
|
||||
},
|
||||
|
||||
string apply_to(const rules::Seq *rule) {
|
||||
is_trivial = false;
|
||||
return apply(rule->left) + apply(rule->right);
|
||||
}
|
||||
[&](const rules::Pattern &rule) {
|
||||
is_trivial = false;
|
||||
return rule.value;
|
||||
},
|
||||
|
||||
string apply_to(const rules::Repeat *rule) {
|
||||
is_trivial = false;
|
||||
return apply(rule->content) + "*";
|
||||
}
|
||||
[&](const rules::String &rule) {
|
||||
return rule.value;
|
||||
},
|
||||
|
||||
string apply_to(const rules::Choice *rule) {
|
||||
is_trivial = false;
|
||||
string result = "(";
|
||||
bool started = false;
|
||||
for (auto &element : rule->elements) {
|
||||
if (started)
|
||||
result += "|";
|
||||
result += apply(element);
|
||||
started = true;
|
||||
}
|
||||
return result + ")";
|
||||
[&](const rules::Metadata &rule) {
|
||||
return apply(*rule.rule);
|
||||
},
|
||||
|
||||
[&](const rules::Seq &rule) {
|
||||
is_trivial = false;
|
||||
return apply(*rule.left) + apply(*rule.right);
|
||||
},
|
||||
|
||||
[&](const rules::Repeat &rule) {
|
||||
is_trivial = false;
|
||||
return apply(*rule.rule) + "+";
|
||||
},
|
||||
|
||||
[&](const rules::Choice &rule) {
|
||||
is_trivial = false;
|
||||
string result = "(";
|
||||
bool started = false;
|
||||
for (auto &element : rule.elements) {
|
||||
if (started) result += "|";
|
||||
result += apply(element);
|
||||
started = true;
|
||||
}
|
||||
return result + ")";
|
||||
},
|
||||
|
||||
[](auto) {
|
||||
return "";
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
public:
|
||||
bool is_trivial;
|
||||
string describe(const Rule &rule) {
|
||||
string result = apply(rule);
|
||||
if (is_trivial) {
|
||||
return result;
|
||||
} else {
|
||||
return "/" + result + "/";
|
||||
}
|
||||
}
|
||||
|
||||
TokenDescription() : is_trivial(true) {}
|
||||
};
|
||||
|
||||
string token_description(const rule_ptr &rule) {
|
||||
TokenDescription description;
|
||||
string result = description.apply(rule);
|
||||
if (description.is_trivial)
|
||||
return result;
|
||||
else
|
||||
return "/" + result + "/";
|
||||
string token_description(const Rule &rule) {
|
||||
return TokenDescription().describe(rule);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::string token_description(const rule_ptr &);
|
||||
std::string token_description(const rules::Rule &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,15 +1,287 @@
|
|||
#include "compiler/rule.h"
|
||||
#include <memory>
|
||||
#include "compiler/util/hash_combine.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::ostream;
|
||||
using std::string;
|
||||
using std::move;
|
||||
using std::vector;
|
||||
using util::hash_combine;
|
||||
|
||||
bool Rule::operator!=(const Rule &other) const {
|
||||
return !this->operator==(other);
|
||||
Rule::Rule(const Rule &other) : blank_(Blank{}), type(BlankType) {
|
||||
*this = other;
|
||||
}
|
||||
|
||||
Rule::~Rule() {}
|
||||
Rule::Rule(Rule &&other) noexcept : blank_(Blank{}), type(BlankType) {
|
||||
*this = move(other);
|
||||
}
|
||||
|
||||
static void destroy_value(Rule *rule) {
|
||||
switch (rule->type) {
|
||||
case Rule::BlankType: return rule->blank_.~Blank();
|
||||
case Rule::CharacterSetType: return rule->character_set_.~CharacterSet();
|
||||
case Rule::StringType: return rule->string_ .~String();
|
||||
case Rule::PatternType: return rule->pattern_ .~Pattern();
|
||||
case Rule::NamedSymbolType: return rule->named_symbol_.~NamedSymbol();
|
||||
case Rule::SymbolType: return rule->symbol_ .~Symbol();
|
||||
case Rule::ChoiceType: return rule->choice_ .~Choice();
|
||||
case Rule::MetadataType: return rule->metadata_ .~Metadata();
|
||||
case Rule::RepeatType: return rule->repeat_ .~Repeat();
|
||||
case Rule::SeqType: return rule->seq_ .~Seq();
|
||||
}
|
||||
}
|
||||
|
||||
Rule &Rule::operator=(const Rule &other) {
|
||||
destroy_value(this);
|
||||
type = other.type;
|
||||
switch (type) {
|
||||
case BlankType:
|
||||
new (&blank_) Blank(other.blank_);
|
||||
break;
|
||||
case CharacterSetType:
|
||||
new (&character_set_) CharacterSet(other.character_set_);
|
||||
break;
|
||||
case StringType:
|
||||
new (&string_) String(other.string_);
|
||||
break;
|
||||
case PatternType:
|
||||
new (&pattern_) Pattern(other.pattern_);
|
||||
break;
|
||||
case NamedSymbolType:
|
||||
new (&named_symbol_) NamedSymbol(other.named_symbol_);
|
||||
break;
|
||||
case SymbolType:
|
||||
new (&symbol_) Symbol(other.symbol_);
|
||||
break;
|
||||
case ChoiceType:
|
||||
new (&choice_) Choice(other.choice_);
|
||||
break;
|
||||
case MetadataType:
|
||||
new (&metadata_) Metadata(other.metadata_);
|
||||
break;
|
||||
case RepeatType:
|
||||
new (&repeat_) Repeat(other.repeat_);
|
||||
break;
|
||||
case SeqType:
|
||||
new (&seq_) Seq(other.seq_);
|
||||
break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Rule &Rule::operator=(Rule &&other) noexcept {
|
||||
destroy_value(this);
|
||||
type = other.type;
|
||||
switch (type) {
|
||||
case BlankType:
|
||||
new (&blank_) Blank(move(other.blank_));
|
||||
break;
|
||||
case CharacterSetType:
|
||||
new (&character_set_) CharacterSet(move(other.character_set_));
|
||||
break;
|
||||
case StringType:
|
||||
new (&string_) String(move(other.string_));
|
||||
break;
|
||||
case PatternType:
|
||||
new (&pattern_) Pattern(move(other.pattern_));
|
||||
break;
|
||||
case NamedSymbolType:
|
||||
new (&named_symbol_) NamedSymbol(move(other.named_symbol_));
|
||||
break;
|
||||
case SymbolType:
|
||||
new (&symbol_) Symbol(move(other.symbol_));
|
||||
break;
|
||||
case ChoiceType:
|
||||
new (&choice_) Choice(move(other.choice_));
|
||||
break;
|
||||
case MetadataType:
|
||||
new (&metadata_) Metadata(move(other.metadata_));
|
||||
break;
|
||||
case RepeatType:
|
||||
new (&repeat_) Repeat(move(other.repeat_));
|
||||
break;
|
||||
case SeqType:
|
||||
new (&seq_) Seq(move(other.seq_));
|
||||
break;
|
||||
}
|
||||
other.type = BlankType;
|
||||
other.blank_ = Blank{};
|
||||
return *this;
|
||||
}
|
||||
|
||||
Rule::~Rule() noexcept {
|
||||
destroy_value(this);
|
||||
}
|
||||
|
||||
bool Rule::operator==(const Rule &other) const {
|
||||
if (type != other.type) return false;
|
||||
switch (type) {
|
||||
case Rule::CharacterSetType: return character_set_ == other.character_set_;
|
||||
case Rule::StringType: return string_ == other.string_;
|
||||
case Rule::PatternType: return pattern_ == other.pattern_;
|
||||
case Rule::NamedSymbolType: return named_symbol_ == other.named_symbol_;
|
||||
case Rule::SymbolType: return symbol_ == other.symbol_;
|
||||
case Rule::ChoiceType: return choice_ == other.choice_;
|
||||
case Rule::MetadataType: return metadata_ == other.metadata_;
|
||||
case Rule::RepeatType: return repeat_ == other.repeat_;
|
||||
case Rule::SeqType: return seq_ == other.seq_;
|
||||
default: return blank_ == other.blank_;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool Rule::is<Blank>() const { return type == BlankType; }
|
||||
|
||||
template <>
|
||||
bool Rule::is<Symbol>() const { return type == SymbolType; }
|
||||
|
||||
template <>
|
||||
bool Rule::is<Repeat>() const { return type == RepeatType; }
|
||||
|
||||
template <>
|
||||
const Symbol & Rule::get_unchecked<Symbol>() const { return symbol_; }
|
||||
|
||||
static inline void add_choice_element(std::vector<Rule> *elements, const Rule &new_rule) {
|
||||
new_rule.match(
|
||||
[elements](Choice choice) {
|
||||
for (auto &element : choice.elements) {
|
||||
add_choice_element(elements, element);
|
||||
}
|
||||
},
|
||||
|
||||
[elements](auto rule) {
|
||||
for (auto &element : *elements) {
|
||||
if (element == rule) return;
|
||||
}
|
||||
elements->push_back(rule);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Rule Rule::choice(const vector<Rule> &rules) {
|
||||
vector<Rule> elements;
|
||||
for (auto &element : rules) {
|
||||
add_choice_element(&elements, element);
|
||||
}
|
||||
return (elements.size() == 1) ? elements.front() : Choice{elements};
|
||||
}
|
||||
|
||||
Rule Rule::repeat(const Rule &rule) {
|
||||
return rule.is<Repeat>() ? rule : Repeat{rule};
|
||||
}
|
||||
|
||||
Rule Rule::seq(const vector<Rule> &rules) {
|
||||
Rule result;
|
||||
for (const auto &rule : rules) {
|
||||
rule.match(
|
||||
[](Blank) {},
|
||||
[&](Metadata metadata) {
|
||||
if (!metadata.rule->is<Blank>()) {
|
||||
result = Seq{result, rule};
|
||||
}
|
||||
},
|
||||
[&](auto) {
|
||||
if (result.is<Blank>()) {
|
||||
result = rule;
|
||||
} else {
|
||||
result = Seq{result, rule};
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
|
||||
size_t hash<Symbol>::operator()(const Symbol &symbol) const {
|
||||
auto result = hash<int>()(symbol.index);
|
||||
hash_combine(&result, hash<int>()(symbol.type));
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t hash<NamedSymbol>::operator()(const NamedSymbol &symbol) const {
|
||||
return hash<string>()(symbol.value);
|
||||
}
|
||||
|
||||
size_t hash<Pattern>::operator()(const Pattern &symbol) const {
|
||||
return hash<string>()(symbol.value);
|
||||
}
|
||||
|
||||
size_t hash<String>::operator()(const String &symbol) const {
|
||||
return hash<string>()(symbol.value);
|
||||
}
|
||||
|
||||
size_t hash<CharacterSet>::operator()(const CharacterSet &character_set) const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, character_set.includes_all);
|
||||
hash_combine(&result, character_set.included_chars.size());
|
||||
for (uint32_t c : character_set.included_chars) {
|
||||
hash_combine(&result, c);
|
||||
}
|
||||
hash_combine(&result, character_set.excluded_chars.size());
|
||||
for (uint32_t c : character_set.excluded_chars) {
|
||||
hash_combine(&result, c);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t hash<Blank>::operator()(const Blank &blank) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t hash<Choice>::operator()(const Choice &choice) const {
|
||||
size_t result = 0;
|
||||
for (const auto &element : choice.elements) {
|
||||
symmetric_hash_combine(&result, element);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t hash<Repeat>::operator()(const Repeat &repeat) const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, *repeat.rule);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t hash<Seq>::operator()(const Seq &seq) const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, *seq.left);
|
||||
hash_combine(&result, *seq.right);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t hash<Metadata>::operator()(const Metadata &metadata) const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, *metadata.rule);
|
||||
hash_combine(&result, metadata.params.precedence);
|
||||
hash_combine<int>(&result, metadata.params.associativity);
|
||||
hash_combine(&result, metadata.params.has_precedence);
|
||||
hash_combine(&result, metadata.params.has_associativity);
|
||||
hash_combine(&result, metadata.params.is_token);
|
||||
hash_combine(&result, metadata.params.is_string);
|
||||
hash_combine(&result, metadata.params.is_active);
|
||||
hash_combine(&result, metadata.params.is_main_token);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t hash<Rule>::operator()(const Rule &rule) const {
|
||||
size_t result = hash<int>()(rule.type);
|
||||
switch (rule.type) {
|
||||
case Rule::CharacterSetType: return result ^ hash<CharacterSet>()(rule.character_set_);
|
||||
case Rule::StringType: return result ^ hash<String>()(rule.string_);
|
||||
case Rule::PatternType: return result ^ hash<Pattern>()(rule.pattern_);
|
||||
case Rule::NamedSymbolType: return result ^ hash<NamedSymbol>()(rule.named_symbol_);
|
||||
case Rule::SymbolType: return result ^ hash<Symbol>()(rule.symbol_);
|
||||
case Rule::ChoiceType: return result ^ hash<Choice>()(rule.choice_);
|
||||
case Rule::MetadataType: return result ^ hash<Metadata>()(rule.metadata_);
|
||||
case Rule::RepeatType: return result ^ hash<Repeat>()(rule.repeat_);
|
||||
case Rule::SeqType: return result ^ hash<Seq>()(rule.seq_);
|
||||
default: return result ^ hash<Blank>()(rule.blank_);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace std
|
||||
|
|
@ -1,44 +1,143 @@
|
|||
#ifndef COMPILER_RULE_H_
|
||||
#define COMPILER_RULE_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "compiler/util/make_visitor.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
namespace rules {
|
||||
class Visitor;
|
||||
} // namespace rules
|
||||
|
||||
class Rule;
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
struct Rule {
|
||||
union {
|
||||
Blank blank_;
|
||||
CharacterSet character_set_;
|
||||
String string_;
|
||||
Pattern pattern_;
|
||||
NamedSymbol named_symbol_;
|
||||
Symbol symbol_;
|
||||
Choice choice_;
|
||||
Metadata metadata_;
|
||||
Repeat repeat_;
|
||||
Seq seq_;
|
||||
};
|
||||
|
||||
class Rule {
|
||||
public:
|
||||
virtual bool operator==(const Rule &other) const = 0;
|
||||
bool operator!=(const Rule &other) const;
|
||||
virtual size_t hash_code() const = 0;
|
||||
virtual rule_ptr copy() const = 0;
|
||||
virtual std::string to_string() const = 0;
|
||||
virtual void accept(rules::Visitor *visitor) const = 0;
|
||||
virtual ~Rule();
|
||||
enum {
|
||||
BlankType,
|
||||
CharacterSetType,
|
||||
StringType,
|
||||
PatternType,
|
||||
NamedSymbolType,
|
||||
SymbolType,
|
||||
ChoiceType,
|
||||
MetadataType,
|
||||
RepeatType,
|
||||
SeqType,
|
||||
} type;
|
||||
|
||||
template <typename T>
|
||||
const T *as() const {
|
||||
return dynamic_cast<const T *>(this);
|
||||
Rule() : blank_(Blank{}), type(BlankType) {};
|
||||
Rule(const Blank &value) : blank_(value), type(BlankType) {};
|
||||
Rule(const CharacterSet &value) : character_set_(value), type(CharacterSetType) {};
|
||||
Rule(const String &value) : string_(value), type(StringType) {};
|
||||
Rule(const Pattern &value) : pattern_(value), type(PatternType) {};
|
||||
Rule(const NamedSymbol &value) : named_symbol_(value), type(NamedSymbolType) {};
|
||||
Rule(const Symbol &value) : symbol_(value), type(SymbolType) {};
|
||||
Rule(const Choice &value) : choice_(value), type(ChoiceType) {};
|
||||
Rule(const Metadata &value) : metadata_(value), type(MetadataType) {};
|
||||
Rule(const Repeat &value) : repeat_(value), type(RepeatType) {};
|
||||
Rule(const Seq &value) : seq_(value), type(SeqType) {};
|
||||
|
||||
Rule(const Rule &other);
|
||||
Rule(Rule &&other) noexcept;
|
||||
Rule &operator=(const Rule &other);
|
||||
Rule &operator=(Rule &&other) noexcept;
|
||||
~Rule() noexcept;
|
||||
|
||||
static Rule choice(const std::vector<Rule> &rules);
|
||||
static Rule seq(const std::vector<Rule> &rules);
|
||||
static Rule repeat(const Rule &rule);
|
||||
|
||||
template <typename RuleType>
|
||||
bool is() const;
|
||||
|
||||
template <typename RuleType>
|
||||
const RuleType & get_unchecked() const;
|
||||
|
||||
template <typename FunctionType>
|
||||
inline auto accept(FunctionType function) const -> decltype(function(blank_)) {
|
||||
switch (type) {
|
||||
case CharacterSetType: return function(character_set_);
|
||||
case StringType: return function(string_);
|
||||
case PatternType: return function(pattern_);
|
||||
case NamedSymbolType: return function(named_symbol_);
|
||||
case SymbolType: return function(symbol_);
|
||||
case ChoiceType: return function(choice_);
|
||||
case MetadataType: return function(metadata_);
|
||||
case RepeatType: return function(repeat_);
|
||||
case SeqType: return function(seq_);
|
||||
default: return function(blank_);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ...FunctionTypes>
|
||||
inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward<FunctionTypes>(functions)...))){
|
||||
return accept(util::make_visitor(std::forward<FunctionTypes>(functions)...));
|
||||
}
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
|
||||
using namespace tree_sitter::rules;
|
||||
using namespace tree_sitter::util;
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::rule_ptr> {
|
||||
size_t operator()(const tree_sitter::rule_ptr &rule) const {
|
||||
return rule->hash_code();
|
||||
}
|
||||
};
|
||||
struct hash<Symbol> { size_t operator()(const Symbol &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<NamedSymbol> { size_t operator()(const NamedSymbol &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<Pattern> { size_t operator()(const Pattern &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<String> { size_t operator()(const String &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<CharacterSet> { size_t operator()(const CharacterSet &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<Blank> { size_t operator()(const Blank &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<Choice> { size_t operator()(const Choice &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<Repeat> { size_t operator()(const Repeat &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<Seq> { size_t operator()(const Seq &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<Metadata> { size_t operator()(const Metadata &) const; };
|
||||
|
||||
template <>
|
||||
struct hash<Rule> { size_t operator()(const Rule &) const; };
|
||||
|
||||
} // namespace std
|
||||
|
||||
|
|
|
|||
|
|
@ -1,28 +0,0 @@
|
|||
#ifndef COMPILER_RULES_H_
|
||||
#define COMPILER_RULES_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
rule_ptr blank();
|
||||
rule_ptr choice(const std::vector<rule_ptr> &);
|
||||
rule_ptr repeat(const rule_ptr &);
|
||||
rule_ptr repeat1(const rule_ptr &);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &);
|
||||
rule_ptr sym(const std::string &);
|
||||
rule_ptr pattern(const std::string &);
|
||||
rule_ptr str(const std::string &);
|
||||
rule_ptr prec(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_left(const rule_ptr &);
|
||||
rule_ptr prec_left(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_right(const rule_ptr &);
|
||||
rule_ptr prec_right(int precedence, const rule_ptr &);
|
||||
rule_ptr token(const rule_ptr &rule);
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_RULES_H_
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
#include "compiler/rules/blank.h"
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
Blank::Blank() {}
|
||||
|
||||
rule_ptr Blank::build() {
|
||||
return std::make_shared<Blank>();
|
||||
}
|
||||
|
||||
bool Blank::operator==(const Rule &rule) const {
|
||||
return rule.as<Blank>() != nullptr;
|
||||
}
|
||||
|
||||
size_t Blank::hash_code() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rule_ptr Blank::copy() const {
|
||||
return std::make_shared<Blank>();
|
||||
}
|
||||
|
||||
std::string Blank::to_string() const {
|
||||
return "(blank)";
|
||||
}
|
||||
|
||||
void Blank::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,25 +1,16 @@
|
|||
#ifndef COMPILER_RULES_BLANK_H_
|
||||
#define COMPILER_RULES_BLANK_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Blank : public Rule {
|
||||
public:
|
||||
Blank();
|
||||
static rule_ptr build();
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
struct Blank {
|
||||
inline bool operator==(const Blank &other) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_BLANK_H_
|
||||
#endif // COMPILER_RULES_BLANK_H_
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
Symbol END_OF_INPUT() {
|
||||
return Symbol(-1, Symbol::Terminal);
|
||||
}
|
||||
|
||||
Symbol START() {
|
||||
return Symbol(-2, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
Symbol NONE() {
|
||||
return Symbol(-3, Symbol::Type(-1));
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
#ifndef COMPILER_RULES_BUILT_IN_SYMBOLS_H_
|
||||
#define COMPILER_RULES_BUILT_IN_SYMBOLS_H_
|
||||
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
Symbol END_OF_INPUT();
|
||||
Symbol START();
|
||||
Symbol NONE();
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
#include "compiler/rules/character_range.h"
|
||||
#include <string>
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
|
||||
CharacterRange::CharacterRange(uint32_t value) : min(value), max(value) {}
|
||||
CharacterRange::CharacterRange(uint32_t min, uint32_t max)
|
||||
: min(min), max(max) {}
|
||||
|
||||
bool CharacterRange::operator==(const CharacterRange &other) const {
|
||||
return min == other.min && max == other.max;
|
||||
}
|
||||
|
||||
bool CharacterRange::operator<(const CharacterRange &other) const {
|
||||
if (min < other.min)
|
||||
return true;
|
||||
if (min > other.min)
|
||||
return false;
|
||||
if (max < other.max)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string CharacterRange::to_string() const {
|
||||
if (min == max)
|
||||
return util::escape_char(min);
|
||||
else
|
||||
return util::escape_char(min) + "-" + util::escape_char(max);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
#ifndef COMPILER_RULES_CHARACTER_RANGE_H_
|
||||
#define COMPILER_RULES_CHARACTER_RANGE_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
struct CharacterRange {
|
||||
uint32_t min;
|
||||
uint32_t max;
|
||||
|
||||
explicit CharacterRange(uint32_t value);
|
||||
explicit CharacterRange(uint32_t min, uint32_t max);
|
||||
|
||||
bool operator==(const CharacterRange &other) const;
|
||||
bool operator<(const CharacterRange &others) const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_CHARACTER_RANGE_H_
|
||||
|
|
@ -1,59 +1,57 @@
|
|||
#include "compiler/rules/character_set.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
|
||||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using util::hash_combine;
|
||||
|
||||
static void add_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
|
||||
for (uint32_t c = min; c <= max; c++)
|
||||
for (uint32_t c = min; c <= max; c++) {
|
||||
characters->insert(c);
|
||||
}
|
||||
}
|
||||
|
||||
static void remove_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
|
||||
for (uint32_t c = min; c <= max; c++)
|
||||
for (uint32_t c = min; c <= max; c++) {
|
||||
characters->erase(c);
|
||||
}
|
||||
}
|
||||
|
||||
static set<uint32_t> remove_chars(set<uint32_t> *left,
|
||||
const set<uint32_t> &right) {
|
||||
static set<uint32_t> remove_chars(set<uint32_t> *left, const set<uint32_t> &right) {
|
||||
set<uint32_t> result;
|
||||
for (uint32_t c : right) {
|
||||
if (left->erase(c))
|
||||
if (left->erase(c)) {
|
||||
result.insert(c);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static set<uint32_t> add_chars(set<uint32_t> *left, const set<uint32_t> &right) {
|
||||
set<uint32_t> result;
|
||||
for (uint32_t c : right)
|
||||
if (left->insert(c).second)
|
||||
for (uint32_t c : right) {
|
||||
if (left->insert(c).second) {
|
||||
result.insert(c);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
|
||||
vector<CharacterRange> result;
|
||||
for (uint32_t c : chars) {
|
||||
size_t size = result.size();
|
||||
auto size = result.size();
|
||||
if (size >= 2 && result[size - 2].max == (c - 2)) {
|
||||
result.pop_back();
|
||||
result.back().max = c;
|
||||
} else if (size >= 1) {
|
||||
CharacterRange &last = result.back();
|
||||
if (last.min < last.max && last.max == (c - 1))
|
||||
if (last.min < last.max && last.max == (c - 1)) {
|
||||
last.max = c;
|
||||
else
|
||||
} else {
|
||||
result.push_back(CharacterRange(c));
|
||||
}
|
||||
} else {
|
||||
result.push_back(CharacterRange(c));
|
||||
}
|
||||
|
|
@ -61,14 +59,14 @@ static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
|
|||
return result;
|
||||
}
|
||||
|
||||
CharacterSet::CharacterSet()
|
||||
: includes_all(false), included_chars({}), excluded_chars({}) {}
|
||||
CharacterSet::CharacterSet() : includes_all(false) {}
|
||||
|
||||
bool CharacterSet::operator==(const Rule &rule) const {
|
||||
const CharacterSet *other = rule.as<CharacterSet>();
|
||||
return other && (includes_all == other->includes_all) &&
|
||||
(included_chars == other->included_chars) &&
|
||||
(excluded_chars == other->excluded_chars);
|
||||
CharacterSet::CharacterSet(const set<uint32_t> &chars) : included_chars(chars), includes_all(false) {}
|
||||
|
||||
bool CharacterSet::operator==(const CharacterSet &other) const {
|
||||
return includes_all == other.includes_all &&
|
||||
included_chars == other.included_chars &&
|
||||
excluded_chars == other.excluded_chars;
|
||||
}
|
||||
|
||||
bool CharacterSet::operator<(const CharacterSet &other) const {
|
||||
|
|
@ -83,41 +81,6 @@ bool CharacterSet::operator<(const CharacterSet &other) const {
|
|||
return excluded_chars < other.excluded_chars;
|
||||
}
|
||||
|
||||
size_t CharacterSet::hash_code() const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, includes_all);
|
||||
hash_combine(&result, included_chars.size());
|
||||
for (uint32_t c : included_chars)
|
||||
hash_combine(&result, c);
|
||||
hash_combine(&result, excluded_chars.size());
|
||||
for (uint32_t c : excluded_chars)
|
||||
hash_combine(&result, c);
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr CharacterSet::copy() const {
|
||||
return std::make_shared<CharacterSet>(*this);
|
||||
}
|
||||
|
||||
string CharacterSet::to_string() const {
|
||||
string result("(char");
|
||||
if (includes_all)
|
||||
result += " include_all";
|
||||
if (!included_chars.empty()) {
|
||||
result += " (include";
|
||||
for (auto r : included_ranges())
|
||||
result += string(" ") + r.to_string();
|
||||
result += ")";
|
||||
}
|
||||
if (!excluded_chars.empty()) {
|
||||
result += " (exclude";
|
||||
for (auto r : excluded_ranges())
|
||||
result += string(" ") + r.to_string();
|
||||
result += ")";
|
||||
}
|
||||
return result + ")";
|
||||
}
|
||||
|
||||
CharacterSet &CharacterSet::include_all() {
|
||||
includes_all = true;
|
||||
included_chars = {};
|
||||
|
|
@ -212,9 +175,5 @@ vector<CharacterRange> CharacterSet::excluded_ranges() const {
|
|||
return consolidate_ranges(excluded_chars);
|
||||
}
|
||||
|
||||
void CharacterSet::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,20 +1,28 @@
|
|||
#ifndef COMPILER_RULES_CHARACTER_SET_H_
|
||||
#define COMPILER_RULES_CHARACTER_SET_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/character_range.h"
|
||||
#include <set>
|
||||
#include <cstdint>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class CharacterSet : public Rule {
|
||||
public:
|
||||
struct CharacterRange {
|
||||
uint32_t min;
|
||||
uint32_t max;
|
||||
|
||||
inline explicit CharacterRange(uint32_t value) : min{value}, max{value} {}
|
||||
inline CharacterRange(uint32_t min, uint32_t max) : min{min}, max{max} {}
|
||||
|
||||
inline bool operator==(const CharacterRange &other) const {
|
||||
return min == other.min && max == other.max;
|
||||
}
|
||||
};
|
||||
|
||||
struct CharacterSet {
|
||||
CharacterSet();
|
||||
CharacterSet(const std::set<uint32_t> &);
|
||||
|
||||
CharacterSet &include_all();
|
||||
CharacterSet &include(uint32_t c);
|
||||
|
|
@ -22,12 +30,8 @@ class CharacterSet : public Rule {
|
|||
CharacterSet &exclude(uint32_t c);
|
||||
CharacterSet &exclude(uint32_t min, uint32_t max);
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
bool operator==(const CharacterSet &) const;
|
||||
bool operator<(const CharacterSet &) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
|
|
@ -37,23 +41,12 @@ class CharacterSet : public Rule {
|
|||
std::vector<CharacterRange> included_ranges() const;
|
||||
std::vector<CharacterRange> excluded_ranges() const;
|
||||
|
||||
bool includes_all;
|
||||
std::set<uint32_t> included_chars;
|
||||
std::set<uint32_t> excluded_chars;
|
||||
bool includes_all;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::rules::CharacterSet> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
|
||||
return rule.hash_code();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_RULES_CHARACTER_SET_H_
|
||||
#endif // COMPILER_RULES_CHARACTER_SET_H_
|
||||
|
|
@ -1,77 +1,11 @@
|
|||
#include "compiler/rules/choice.h"
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::make_shared;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using util::symmetric_hash_combine;
|
||||
|
||||
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
|
||||
|
||||
void add_choice_element(vector<rule_ptr> *vec, const rule_ptr new_rule) {
|
||||
auto choice = new_rule->as<Choice>();
|
||||
if (choice) {
|
||||
for (auto &child : choice->elements)
|
||||
add_choice_element(vec, child);
|
||||
} else {
|
||||
for (auto &element : *vec)
|
||||
if (element->operator==(*new_rule))
|
||||
return;
|
||||
vec->push_back(new_rule);
|
||||
}
|
||||
}
|
||||
|
||||
rule_ptr Choice::build(const vector<rule_ptr> &inputs) {
|
||||
vector<rule_ptr> elements;
|
||||
for (auto &el : inputs)
|
||||
add_choice_element(&elements, el);
|
||||
if (elements.size() == 1)
|
||||
return elements.front();
|
||||
else
|
||||
return make_shared<Choice>(elements);
|
||||
}
|
||||
|
||||
bool Choice::operator==(const Rule &rule) const {
|
||||
const Choice *other = rule.as<Choice>();
|
||||
if (!other)
|
||||
return false;
|
||||
size_t size = elements.size();
|
||||
if (size != other->elements.size())
|
||||
return false;
|
||||
for (size_t i = 0; i < size; i++)
|
||||
if (!elements[i]->operator==(*other->elements[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t Choice::hash_code() const {
|
||||
size_t result = 0;
|
||||
symmetric_hash_combine(&result, elements.size());
|
||||
for (const auto &element : elements)
|
||||
symmetric_hash_combine(&result, element);
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr Choice::copy() const {
|
||||
return std::make_shared<Choice>(*this);
|
||||
}
|
||||
|
||||
string Choice::to_string() const {
|
||||
string result = "(choice";
|
||||
for (const auto &element : elements)
|
||||
result += " " + element->to_string();
|
||||
return result + ")";
|
||||
}
|
||||
|
||||
void Choice::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
bool Choice::operator==(const Choice &other) const {
|
||||
return elements == other.elements;
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -1,28 +1,21 @@
|
|||
#ifndef COMPILER_RULES_CHOICE_H_
|
||||
#define COMPILER_RULES_CHOICE_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Choice : public Rule {
|
||||
public:
|
||||
explicit Choice(const std::vector<rule_ptr> &elements);
|
||||
static rule_ptr build(const std::vector<rule_ptr> &rules);
|
||||
struct Rule;
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
struct Choice {
|
||||
std::vector<Rule> elements;
|
||||
|
||||
const std::vector<rule_ptr> elements;
|
||||
bool operator==(const Choice &other) const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_CHOICE_H_
|
||||
#endif // COMPILER_RULES_CHOICE_H_
|
||||
|
|
@ -1,97 +1,70 @@
|
|||
#include "compiler/rules/metadata.h"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <climits>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::make_shared;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using util::hash_combine;
|
||||
Metadata::Metadata(const Rule &rule, MetadataParams params) :
|
||||
rule(std::make_shared<Rule>(rule)), params(params) {}
|
||||
|
||||
MetadataParams::MetadataParams() :
|
||||
precedence{0},
|
||||
associativity{AssociativityNone},
|
||||
has_precedence{false},
|
||||
has_associativity{false},
|
||||
is_token{false},
|
||||
is_string{false},
|
||||
is_active{false},
|
||||
is_main_token{false} {}
|
||||
|
||||
bool MetadataParams::operator==(const MetadataParams &other) const {
|
||||
return
|
||||
precedence == other.precedence &&
|
||||
associativity == other.associativity &&
|
||||
has_precedence == other.has_precedence &&
|
||||
has_associativity == other.has_associativity &&
|
||||
is_token == other.is_token &&
|
||||
is_string == other.is_string &&
|
||||
is_active == other.is_active &&
|
||||
is_main_token == other.is_main_token;
|
||||
bool Metadata::operator==(const Metadata &other) const {
|
||||
return rule->operator==(*other.rule) && params == other.params;
|
||||
}
|
||||
|
||||
Metadata::Metadata(rule_ptr rule, MetadataParams params)
|
||||
: rule(rule), params(params) {}
|
||||
|
||||
rule_ptr Metadata::build(rule_ptr rule, MetadataParams params) {
|
||||
return std::make_shared<Metadata>(rule, params);
|
||||
Metadata Metadata::token(const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.is_token = true;
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
rule_ptr Metadata::main_token(rule_ptr rule) {
|
||||
Metadata Metadata::active_prec(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = 0;
|
||||
params.is_main_token = true;
|
||||
return Metadata::build(rule, params);
|
||||
params.precedence = precedence;
|
||||
params.is_active = true;
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
rule_ptr Metadata::separator(rule_ptr rule) {
|
||||
Metadata Metadata::prec(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
Metadata Metadata::prec_left(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
params.has_associativity = true;
|
||||
params.associativity = AssociativityLeft;
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
Metadata Metadata::prec_right(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
params.has_associativity = true;
|
||||
params.associativity = AssociativityRight;
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
Metadata Metadata::separator(const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = INT_MIN;
|
||||
params.is_active = true;
|
||||
return Metadata::build(rule, params);
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
bool Metadata::operator==(const Rule &rule) const {
|
||||
auto other = rule.as<Metadata>();
|
||||
return other && other->params == params && other->rule->operator==(*this->rule);
|
||||
}
|
||||
|
||||
size_t Metadata::hash_code() const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, params.precedence);
|
||||
hash_combine<int>(&result, params.associativity);
|
||||
hash_combine(&result, params.has_precedence);
|
||||
hash_combine(&result, params.has_associativity);
|
||||
hash_combine(&result, params.is_token);
|
||||
hash_combine(&result, params.is_string);
|
||||
hash_combine(&result, params.is_active);
|
||||
hash_combine(&result, params.is_main_token);
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr Metadata::copy() const {
|
||||
return make_shared<Metadata>(rule->copy(), params);
|
||||
}
|
||||
|
||||
std::string Metadata::to_string() const {
|
||||
if (params.has_precedence) {
|
||||
return "(metadata prec:" + std::to_string(params.precedence) + " " +
|
||||
rule->to_string() + ")";
|
||||
} else {
|
||||
return "(metadata " + rule->to_string() + ")";
|
||||
}
|
||||
}
|
||||
|
||||
void Metadata::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
Metadata Metadata::main_token(const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = 0;
|
||||
params.is_main_token = true;
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -1,9 +1,7 @@
|
|||
#ifndef COMPILER_RULES_METADATA_H_
|
||||
#define COMPILER_RULES_METADATA_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "compiler/rule.h"
|
||||
#include <memory>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
@ -24,28 +22,45 @@ struct MetadataParams {
|
|||
bool is_active;
|
||||
bool is_main_token;
|
||||
|
||||
MetadataParams();
|
||||
bool operator==(const MetadataParams &) const;
|
||||
inline MetadataParams() :
|
||||
precedence{0}, associativity{AssociativityNone}, has_precedence{false},
|
||||
has_associativity{false}, is_token{false}, is_string{false},
|
||||
is_active{false}, is_main_token{false} {}
|
||||
|
||||
inline bool operator==(const MetadataParams &other) const {
|
||||
return (
|
||||
precedence == other.precedence &&
|
||||
associativity == other.associativity &&
|
||||
has_precedence == other.has_precedence &&
|
||||
has_associativity == other.has_associativity &&
|
||||
is_token == other.is_token &&
|
||||
is_string == other.is_string &&
|
||||
is_active == other.is_active &&
|
||||
is_main_token == other.is_main_token
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
class Metadata : public Rule {
|
||||
public:
|
||||
Metadata(rule_ptr rule, MetadataParams);
|
||||
static rule_ptr build(rule_ptr rule, MetadataParams);
|
||||
static rule_ptr main_token(rule_ptr rule);
|
||||
static rule_ptr separator(rule_ptr rule);
|
||||
struct Rule;
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const rule_ptr rule;
|
||||
struct Metadata {
|
||||
std::shared_ptr<Rule> rule;
|
||||
MetadataParams params;
|
||||
|
||||
Metadata(const Rule &rule, MetadataParams params);
|
||||
|
||||
static Metadata token(const Rule &rule);
|
||||
static Metadata active_prec(int precedence, const Rule &rule);
|
||||
static Metadata prec(int precedence, const Rule &rule);
|
||||
static Metadata prec_left(int precedence, const Rule &rule);
|
||||
static Metadata prec_right(int precedence, const Rule &rule);
|
||||
static Metadata separator(const Rule &rule);
|
||||
static Metadata main_token(const Rule &rule);
|
||||
|
||||
bool operator==(const Metadata &other) const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_METADATA_H_
|
||||
#endif // COMPILER_RULES_METADATA_H_
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#include "compiler/rules/named_symbol.h"
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
|
||||
|
||||
bool NamedSymbol::operator==(const Rule &rule) const {
|
||||
auto other = rule.as<NamedSymbol>();
|
||||
return other && other->name == name;
|
||||
}
|
||||
|
||||
size_t NamedSymbol::hash_code() const {
|
||||
return hash<string>()(name);
|
||||
}
|
||||
|
||||
rule_ptr NamedSymbol::copy() const {
|
||||
return std::make_shared<NamedSymbol>(*this);
|
||||
}
|
||||
|
||||
string NamedSymbol::to_string() const {
|
||||
return string("(sym '") + name + "')";
|
||||
}
|
||||
|
||||
void NamedSymbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -2,25 +2,19 @@
|
|||
#define COMPILER_RULES_NAMED_SYMBOL_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class NamedSymbol : public Rule {
|
||||
public:
|
||||
explicit NamedSymbol(const std::string &name);
|
||||
struct NamedSymbol {
|
||||
std::string value;
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
std::string name;
|
||||
inline bool operator==(const NamedSymbol &other) const {
|
||||
return value == other.value;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_NAMED_SYMBOL_H_
|
||||
#endif // COMPILER_RULES_NAMED_SYMBOL_H_
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
#include "compiler/rules/pattern.h"
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
Pattern::Pattern(const string &string) : value(string) {}
|
||||
|
||||
bool Pattern::operator==(tree_sitter::Rule const &other) const {
|
||||
auto pattern = other.as<Pattern>();
|
||||
return pattern && (pattern->value == value);
|
||||
}
|
||||
|
||||
size_t Pattern::hash_code() const {
|
||||
return hash<string>()(value);
|
||||
}
|
||||
|
||||
rule_ptr Pattern::copy() const {
|
||||
return std::make_shared<Pattern>(*this);
|
||||
}
|
||||
|
||||
string Pattern::to_string() const {
|
||||
return string("(pattern '") + util::escape_string(value) + "')";
|
||||
}
|
||||
|
||||
void Pattern::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -2,25 +2,19 @@
|
|||
#define COMPILER_RULES_PATTERN_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Pattern : public Rule {
|
||||
public:
|
||||
explicit Pattern(const std::string &string);
|
||||
struct Pattern {
|
||||
std::string value;
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const std::string value;
|
||||
inline bool operator==(const Pattern &other) const {
|
||||
return value == other.value;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_PATTERN_H_
|
||||
#endif // COMPILER_RULES_PATTERN_H_
|
||||
|
|
@ -1,43 +1,14 @@
|
|||
#include "compiler/rules/repeat.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
Repeat::Repeat(const Rule &rule) :
|
||||
rule(std::make_shared<Rule>(rule)) {}
|
||||
|
||||
Repeat::Repeat(const rule_ptr content) : content(content) {}
|
||||
|
||||
rule_ptr Repeat::build(const rule_ptr &rule) {
|
||||
auto inner_repeat = rule->as<Repeat>();
|
||||
if (inner_repeat)
|
||||
return rule;
|
||||
else
|
||||
return make_shared<Repeat>(rule);
|
||||
}
|
||||
|
||||
bool Repeat::operator==(const Rule &rule) const {
|
||||
auto other = rule.as<Repeat>();
|
||||
return other && (*other->content == *content);
|
||||
}
|
||||
|
||||
size_t Repeat::hash_code() const {
|
||||
return content->hash_code();
|
||||
}
|
||||
|
||||
rule_ptr Repeat::copy() const {
|
||||
return make_shared<Repeat>(*this);
|
||||
}
|
||||
|
||||
string Repeat::to_string() const {
|
||||
return string("(repeat ") + content->to_string() + ")";
|
||||
}
|
||||
|
||||
void Repeat::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
bool Repeat::operator==(const Repeat &other) const {
|
||||
return rule->operator==(*other.rule);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -1,27 +1,21 @@
|
|||
#ifndef COMPILER_RULES_REPEAT_H_
|
||||
#define COMPILER_RULES_REPEAT_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
#include <memory>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Repeat : public Rule {
|
||||
public:
|
||||
explicit Repeat(rule_ptr content);
|
||||
static rule_ptr build(const rule_ptr &rule);
|
||||
struct Rule;
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
struct Repeat {
|
||||
std::shared_ptr<Rule> rule;
|
||||
|
||||
const rule_ptr content;
|
||||
explicit Repeat(const Rule &rule);
|
||||
bool operator==(const Repeat &other) const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_REPEAT_H_
|
||||
#endif // COMPILER_RULES_REPEAT_H_
|
||||
|
|
@ -1,108 +0,0 @@
|
|||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::map;
|
||||
using rules::MetadataParams;
|
||||
|
||||
static rule_ptr metadata(rule_ptr rule, MetadataParams params) {
|
||||
return std::make_shared<rules::Metadata>(rule, params);
|
||||
}
|
||||
|
||||
rule_ptr blank() {
|
||||
return rules::Blank::build();
|
||||
}
|
||||
|
||||
rule_ptr choice(const vector<rule_ptr> &rules) {
|
||||
return rules::Choice::build(rules);
|
||||
}
|
||||
|
||||
rule_ptr repeat(const rule_ptr &content) {
|
||||
return choice({ repeat1(content), blank() });
|
||||
}
|
||||
|
||||
rule_ptr repeat1(const rule_ptr &content) {
|
||||
return rules::Repeat::build(content);
|
||||
}
|
||||
|
||||
rule_ptr seq(const vector<rule_ptr> &rules) {
|
||||
return rules::Seq::build(rules);
|
||||
}
|
||||
|
||||
rule_ptr sym(const string &name) {
|
||||
return make_shared<rules::NamedSymbol>(name);
|
||||
}
|
||||
|
||||
rule_ptr pattern(const string &value) {
|
||||
return make_shared<rules::Pattern>(value);
|
||||
}
|
||||
|
||||
rule_ptr str(const string &value) {
|
||||
return make_shared<rules::String>(value);
|
||||
}
|
||||
|
||||
rule_ptr prec_left(const rule_ptr &rule) {
|
||||
MetadataParams params;
|
||||
params.has_associativity = true;
|
||||
params.associativity = rules::AssociativityLeft;
|
||||
return metadata(rule, params);
|
||||
}
|
||||
|
||||
rule_ptr prec_left(int precedence, const rule_ptr &rule) {
|
||||
MetadataParams params;
|
||||
params.has_associativity = true;
|
||||
params.associativity = rules::AssociativityLeft;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
return metadata(rule, params);
|
||||
}
|
||||
|
||||
rule_ptr prec_right(const rule_ptr &rule) {
|
||||
MetadataParams params;
|
||||
params.has_associativity = true;
|
||||
params.associativity = rules::AssociativityRight;
|
||||
return metadata(rule, params);
|
||||
}
|
||||
|
||||
rule_ptr prec_right(int precedence, const rule_ptr &rule) {
|
||||
MetadataParams params;
|
||||
params.has_associativity = true;
|
||||
params.associativity = rules::AssociativityRight;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
return metadata(rule, params);
|
||||
}
|
||||
|
||||
rule_ptr prec(int precedence, const rule_ptr &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
return metadata(rule, params);
|
||||
}
|
||||
|
||||
rule_ptr token(const rule_ptr &rule) {
|
||||
MetadataParams params;
|
||||
params.is_token = true;
|
||||
return metadata(rule, params);
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,56 +1,15 @@
|
|||
#include "compiler/rules/seq.h"
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
Seq::Seq(const Rule &left, const Rule &right) :
|
||||
left(std::make_shared<Rule>(left)),
|
||||
right(std::make_shared<Rule>(right)) {}
|
||||
|
||||
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
|
||||
|
||||
rule_ptr Seq::build(const std::vector<rule_ptr> &rules) {
|
||||
rule_ptr result = make_shared<Blank>();
|
||||
for (auto &rule : rules) {
|
||||
auto blank = rule->as<Blank>();
|
||||
if (blank)
|
||||
continue;
|
||||
|
||||
auto metadata = rule->as<Metadata>();
|
||||
if (metadata && metadata->rule->as<Blank>())
|
||||
continue;
|
||||
|
||||
if (result->as<Blank>())
|
||||
result = rule;
|
||||
else
|
||||
result = make_shared<Seq>(result, rule);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Seq::operator==(const Rule &rule) const {
|
||||
const Seq *other = rule.as<Seq>();
|
||||
return other && (*other->left == *left) && (*other->right == *right);
|
||||
}
|
||||
|
||||
size_t Seq::hash_code() const {
|
||||
return left->hash_code() ^ right->hash_code();
|
||||
}
|
||||
|
||||
rule_ptr Seq::copy() const {
|
||||
return std::make_shared<Seq>(*this);
|
||||
}
|
||||
|
||||
string Seq::to_string() const {
|
||||
return string("(seq ") + left->to_string() + " " + right->to_string() + ")";
|
||||
}
|
||||
|
||||
void Seq::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
bool Seq::operator==(const Seq &other) const {
|
||||
return left->operator==(*other.left) && right->operator==(*other.right);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -1,29 +1,23 @@
|
|||
#ifndef COMPILER_RULES_SEQ_H_
|
||||
#define COMPILER_RULES_SEQ_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Seq : public Rule {
|
||||
public:
|
||||
Seq(rule_ptr left, rule_ptr right);
|
||||
static rule_ptr build(const std::vector<rule_ptr> &rules);
|
||||
struct Rule;
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
struct Seq {
|
||||
std::shared_ptr<Rule> left;
|
||||
std::shared_ptr<Rule> right;
|
||||
|
||||
const rule_ptr left;
|
||||
const rule_ptr right;
|
||||
Seq(const Rule &left, const Rule &right);
|
||||
bool operator==(const Seq &other) const;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_SEQ_H_
|
||||
#endif // COMPILER_RULES_SEQ_H_
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#include "compiler/rules/string.h"
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
String::String(string value) : value(value) {}
|
||||
|
||||
bool String::operator==(const Rule &rule) const {
|
||||
auto other = rule.as<String>();
|
||||
return other && (other->value == value);
|
||||
}
|
||||
|
||||
size_t String::hash_code() const {
|
||||
return hash<string>()(value);
|
||||
}
|
||||
|
||||
rule_ptr String::copy() const {
|
||||
return std::make_shared<String>(*this);
|
||||
}
|
||||
|
||||
string String::to_string() const {
|
||||
return string("(string '") + value + "')";
|
||||
}
|
||||
|
||||
void String::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -2,25 +2,19 @@
|
|||
#define COMPILER_RULES_STRING_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class String : public Rule {
|
||||
public:
|
||||
explicit String(std::string value);
|
||||
struct String {
|
||||
std::string value;
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const std::string value;
|
||||
inline bool operator==(const String &other) const {
|
||||
return value == other.value;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_STRING_H_
|
||||
#endif // COMPILER_RULES_STRING_H_
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/util/hash_combine.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using util::hash_combine;
|
||||
|
||||
Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {}
|
||||
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.index == index) && (other.type == type);
|
||||
}
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
auto other = rule.as<Symbol>();
|
||||
return other && this->operator==(*other);
|
||||
}
|
||||
|
||||
size_t Symbol::hash_code() const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, index);
|
||||
hash_combine<int>(&result, type);
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr Symbol::copy() const {
|
||||
return std::make_shared<Symbol>(*this);
|
||||
}
|
||||
|
||||
string Symbol::to_string() const {
|
||||
switch (type) {
|
||||
case Symbol::Terminal:
|
||||
return "(terminal " + std::to_string(index) + ")";
|
||||
case Symbol::NonTerminal:
|
||||
return "(non-terminal " + std::to_string(index) + ")";
|
||||
case Symbol::External:
|
||||
return "(external " + std::to_string(index) + ")";
|
||||
default:
|
||||
return "(none)";
|
||||
}
|
||||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
if (type < other.type)
|
||||
return true;
|
||||
if (other.type < type)
|
||||
return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in(Symbol::Index index) {
|
||||
return index < 0;
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in() const {
|
||||
return is_built_in(index);
|
||||
}
|
||||
|
||||
bool Symbol::is_token() const {
|
||||
return type == Symbol::Terminal;
|
||||
}
|
||||
|
||||
bool Symbol::is_external() const {
|
||||
return type == Symbol::External;
|
||||
}
|
||||
|
||||
bool Symbol::is_non_terminal() const {
|
||||
return type == Symbol::NonTerminal;
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,55 +1,76 @@
|
|||
#ifndef COMPILER_RULES_SYMBOL_H_
|
||||
#define COMPILER_RULES_SYMBOL_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Symbol : public Rule {
|
||||
public:
|
||||
typedef int Index;
|
||||
|
||||
typedef enum {
|
||||
struct Symbol {
|
||||
using Index = int;
|
||||
enum Type {
|
||||
External,
|
||||
Terminal,
|
||||
NonTerminal,
|
||||
} Type;
|
||||
};
|
||||
|
||||
Symbol(Index index, Type type);
|
||||
inline bool operator==(const Symbol &other) const {
|
||||
return index == other.index && type == other.type;
|
||||
}
|
||||
|
||||
bool operator==(const Symbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
inline bool operator!=(const Symbol &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
bool operator<(const Symbol &other) const;
|
||||
static bool is_built_in(Index);
|
||||
bool is_built_in() const;
|
||||
bool is_token() const;
|
||||
bool is_external() const;
|
||||
bool is_non_terminal() const;
|
||||
inline bool operator<(const Symbol &other) const {
|
||||
if (type < other.type) return true;
|
||||
if (type > other.type) return false;
|
||||
return index < other.index;
|
||||
}
|
||||
|
||||
Index index;
|
||||
Type type;
|
||||
|
||||
static Symbol terminal(Index index) {
|
||||
return Symbol{index, Type::Terminal};
|
||||
}
|
||||
|
||||
static Symbol external(Index index) {
|
||||
return Symbol{index, Type::External};
|
||||
}
|
||||
|
||||
static Symbol non_terminal(Index index) {
|
||||
return Symbol{index, Type::NonTerminal};
|
||||
}
|
||||
|
||||
bool is_non_terminal() const {
|
||||
return type == Type::NonTerminal;
|
||||
}
|
||||
|
||||
bool is_terminal() const {
|
||||
return type == Type::Terminal;
|
||||
}
|
||||
|
||||
bool is_external() const {
|
||||
return type == Type::External;
|
||||
}
|
||||
|
||||
bool is_built_in() const {
|
||||
return index < 0;
|
||||
}
|
||||
};
|
||||
|
||||
inline Symbol END_OF_INPUT() {
|
||||
return Symbol{-1, Symbol::Terminal};
|
||||
}
|
||||
|
||||
inline Symbol START() {
|
||||
return Symbol{-2, Symbol::NonTerminal};
|
||||
}
|
||||
|
||||
inline Symbol NONE() {
|
||||
return Symbol{-3, Symbol::Type(-1)};
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::rules::Symbol> {
|
||||
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
|
||||
return rule.hash_code();
|
||||
}
|
||||
};
|
||||
|
||||
} // std
|
||||
|
||||
#endif // COMPILER_RULES_SYMBOL_H_
|
||||
#endif // COMPILER_RULES_SYMBOL_H_
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
#include "compiler/rules/visitor.h"
|
||||
#include <vector>
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::vector;
|
||||
|
||||
Visitor::~Visitor() {}
|
||||
|
||||
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
|
||||
return rule->copy();
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
|
||||
vector<rule_ptr> rules;
|
||||
for (const auto &el : rule->elements)
|
||||
rules.push_back(apply(el));
|
||||
return Choice::build(rules);
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
|
||||
return Seq::build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
|
||||
return Repeat::build(apply(rule->content));
|
||||
}
|
||||
|
||||
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
|
||||
return Metadata::build(apply(rule->rule), rule->params);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,234 +0,0 @@
|
|||
#ifndef COMPILER_RULES_VISITOR_H_
|
||||
#define COMPILER_RULES_VISITOR_H_
|
||||
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class Blank;
|
||||
class NamedSymbol;
|
||||
class CharacterSet;
|
||||
class Choice;
|
||||
class Repeat;
|
||||
class Seq;
|
||||
class String;
|
||||
class Symbol;
|
||||
class Pattern;
|
||||
class Metadata;
|
||||
class ExternalToken;
|
||||
|
||||
class Visitor {
|
||||
public:
|
||||
virtual void visit(const Blank *rule) = 0;
|
||||
virtual void visit(const CharacterSet *rule) = 0;
|
||||
virtual void visit(const Choice *rule) = 0;
|
||||
virtual void visit(const Metadata *rule) = 0;
|
||||
virtual void visit(const Pattern *rule) = 0;
|
||||
virtual void visit(const Repeat *rule) = 0;
|
||||
virtual void visit(const Seq *rule) = 0;
|
||||
virtual void visit(const String *rule) = 0;
|
||||
virtual void visit(const NamedSymbol *rule) = 0;
|
||||
virtual void visit(const Symbol *rule) = 0;
|
||||
virtual void visit(const ExternalToken *rule) = 0;
|
||||
virtual ~Visitor();
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class RuleFn : private Visitor {
|
||||
public:
|
||||
T apply(const rule_ptr &rule) {
|
||||
value_ = T();
|
||||
rule->accept(this);
|
||||
return value_;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual T default_apply(const Rule *rule) {
|
||||
return T();
|
||||
}
|
||||
|
||||
virtual T apply_to(const Blank *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const CharacterSet *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const Choice *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const Metadata *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const Pattern *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const Repeat *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const Seq *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const String *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const NamedSymbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const Symbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const ExternalToken *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const CharacterSet *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const Metadata *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const Pattern *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const String *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const NamedSymbol *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const Symbol *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const ExternalToken *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
private:
|
||||
T value_;
|
||||
};
|
||||
|
||||
template <>
|
||||
class RuleFn<void> : private Visitor {
|
||||
public:
|
||||
void apply(const rule_ptr &rule) {
|
||||
rule->accept(this);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void default_apply(const Rule *rule) {}
|
||||
|
||||
virtual void apply_to(const Blank *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const CharacterSet *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Choice *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Metadata *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Pattern *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Repeat *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Seq *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const String *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const NamedSymbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Symbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const ExternalToken *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const CharacterSet *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Choice *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Metadata *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Pattern *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Repeat *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Seq *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const String *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const NamedSymbol *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Symbol *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const ExternalToken *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
};
|
||||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
protected:
|
||||
virtual rule_ptr default_apply(const Rule *rule);
|
||||
virtual rule_ptr apply_to(const Choice *rule);
|
||||
virtual rule_ptr apply_to(const Metadata *rule);
|
||||
virtual rule_ptr apply_to(const Seq *rule);
|
||||
virtual rule_ptr apply_to(const Repeat *rule);
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_VISITOR_H_
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
#include "compiler/syntax_grammar.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
bool ExternalToken::operator==(const ExternalToken &other) const {
|
||||
return name == other.name && type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
}
|
||||
|
||||
bool ProductionStep::operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol && precedence == other.precedence &&
|
||||
associativity == other.associativity;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -4,22 +4,16 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct ExternalToken {
|
||||
bool operator==(const ExternalToken &) const;
|
||||
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
};
|
||||
|
||||
struct ProductionStep {
|
||||
bool operator==(const ProductionStep &) const;
|
||||
inline bool operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol && precedence == other.precedence &&
|
||||
associativity == other.associativity;
|
||||
}
|
||||
|
||||
rules::Symbol symbol;
|
||||
int precedence;
|
||||
|
|
@ -34,7 +28,7 @@ struct SyntaxVariable {
|
|||
std::vector<Production> productions;
|
||||
};
|
||||
|
||||
typedef std::set<rules::Symbol> ConflictSet;
|
||||
using ConflictSet = std::set<rules::Symbol>;
|
||||
|
||||
struct SyntaxGrammar {
|
||||
std::vector<SyntaxVariable> variables;
|
||||
|
|
|
|||
31
src/compiler/util/make_visitor.h
Normal file
31
src/compiler/util/make_visitor.h
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#ifndef COMPILER_UTIL_MAKE_VISITOR_H_
|
||||
#define COMPILER_UTIL_MAKE_VISITOR_H_
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace util {
|
||||
|
||||
template <typename... Fns>
|
||||
struct visitor;
|
||||
|
||||
template <typename Fn>
|
||||
struct visitor<Fn> : Fn {
|
||||
using Fn::operator();
|
||||
visitor(Fn fn) : Fn(fn) {}
|
||||
};
|
||||
|
||||
template <typename Fn, typename... Fns>
|
||||
struct visitor<Fn, Fns...> : Fn, visitor<Fns...> {
|
||||
using Fn::operator();
|
||||
using visitor<Fns...>::operator();
|
||||
visitor(Fn fn, Fns... fns) : Fn(fn), visitor<Fns...>(fns...) {}
|
||||
};
|
||||
|
||||
template <typename... Fns>
|
||||
visitor<Fns...> make_visitor(Fns... fns) {
|
||||
return visitor<Fns...>(fns...);
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_UTIL_MAKE_VISITOR_H_
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
#ifndef COMPILER_VARIABLE_H_
|
||||
#define COMPILER_VARIABLE_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
enum VariableType {
|
||||
VariableTypeHidden,
|
||||
VariableTypeAuxiliary,
|
||||
VariableTypeAnonymous,
|
||||
VariableTypeNamed,
|
||||
};
|
||||
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rule_ptr rule;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_VARIABLE_H_
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
#include "test_helper.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
|
@ -14,11 +13,11 @@ START_TEST
|
|||
describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
||||
LexConflictManager conflict_manager;
|
||||
bool update;
|
||||
Symbol sym1(0, Symbol::Terminal);
|
||||
Symbol sym2(1, Symbol::Terminal);
|
||||
Symbol sym3(2, Symbol::Terminal);
|
||||
Symbol sym4(3, Symbol::Terminal);
|
||||
LexItemSet item_set({ LexItem(sym4, blank() )});
|
||||
Symbol sym1 = Symbol::terminal(0);
|
||||
Symbol sym2 = Symbol::terminal(1);
|
||||
Symbol sym3 = Symbol::terminal(2);
|
||||
Symbol sym4 = Symbol::terminal(3);
|
||||
LexItemSet item_set({ LexItem(sym4, Blank{} )});
|
||||
|
||||
before_each([&]() {
|
||||
conflict_manager = LexConflictManager();
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
using namespace rules;
|
||||
|
|
@ -14,7 +12,7 @@ START_TEST
|
|||
describe("LexItem", []() {
|
||||
describe("completion_status()", [&]() {
|
||||
it("indicates whether the item is done and its precedence", [&]() {
|
||||
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
|
||||
LexItem item1(Symbol::terminal(0), CharacterSet({'a', 'b', 'c'}));
|
||||
AssertThat(item1.completion_status().is_done, IsFalse());
|
||||
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
|
||||
|
|
@ -22,15 +20,18 @@ describe("LexItem", []() {
|
|||
params.precedence = 3;
|
||||
params.has_precedence = true;
|
||||
params.is_string = 1;
|
||||
LexItem item2(Symbol(0, Symbol::Terminal), choice({
|
||||
metadata(blank(), params),
|
||||
character({ 'a', 'b', 'c' })
|
||||
}));
|
||||
LexItem item2(Symbol::terminal(0), Choice{{
|
||||
Metadata{Blank{}, params},
|
||||
CharacterSet{{ 'a', 'b', 'c' }}
|
||||
}});
|
||||
|
||||
AssertThat(item2.completion_status().is_done, IsTrue());
|
||||
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
|
||||
|
||||
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
|
||||
LexItem item3(Symbol::terminal(0), Choice{{
|
||||
Blank{},
|
||||
Repeat{CharacterSet{{ ' ', '\t' }}},
|
||||
}});
|
||||
AssertThat(item3.completion_status().is_done, IsTrue());
|
||||
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
});
|
||||
|
|
@ -40,17 +41,17 @@ describe("LexItem", []() {
|
|||
describe("LexItemSet::transitions()", [&]() {
|
||||
it("handles single characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(LexItemSet::TransitionMap({
|
||||
{
|
||||
CharacterSet().include('x'),
|
||||
CharacterSet{{'x'}},
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol::non_terminal(1), Blank{}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -64,7 +65,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
params.is_main_token = true;
|
||||
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
|
||||
LexItem(Symbol::non_terminal(1), Metadata{CharacterSet{{'x'}}, params}),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -74,7 +75,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
|
||||
LexItem(Symbol::non_terminal(1), Metadata{Blank{}, params}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
true
|
||||
|
|
@ -85,11 +86,11 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
CharacterSet{{ 'w' }},
|
||||
CharacterSet{{ 'x' }},
|
||||
CharacterSet{{ 'y' }},
|
||||
CharacterSet{{ 'z' }},
|
||||
})),
|
||||
});
|
||||
|
||||
|
|
@ -100,10 +101,10 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('w'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
CharacterSet{{ 'x' }},
|
||||
CharacterSet{{ 'y' }},
|
||||
CharacterSet{{ 'z' }},
|
||||
})),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
|
|
@ -115,14 +116,14 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences with nested precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
prec(3, seq({
|
||||
character({ 'v' }),
|
||||
prec(4, seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }) })),
|
||||
character({ 'y' }) })),
|
||||
character({ 'z' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
Metadata::prec(3, Rule::seq({
|
||||
CharacterSet{{ 'v' }},
|
||||
Metadata::prec(4, Rule::seq({
|
||||
CharacterSet{{ 'w' }},
|
||||
CharacterSet{{ 'x' }} })),
|
||||
CharacterSet{{ 'y' }} })),
|
||||
CharacterSet{{ 'z' }},
|
||||
})),
|
||||
});
|
||||
|
||||
|
|
@ -137,13 +138,15 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
// The outer precedence is now 'active', because we are within its
|
||||
// contained rule.
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
prec(4, seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }) })),
|
||||
character({ 'y' }) })),
|
||||
character({ 'z' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
Metadata::active_prec(3, Rule::seq({
|
||||
Metadata::prec(4, Rule::seq({
|
||||
CharacterSet{{ 'w' }},
|
||||
CharacterSet{{ 'x' }}
|
||||
})),
|
||||
CharacterSet{{ 'y' }}
|
||||
})),
|
||||
CharacterSet{{ 'z' }},
|
||||
})),
|
||||
}),
|
||||
|
||||
|
|
@ -165,11 +168,12 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
Transition{
|
||||
// The inner precedence is now 'active'
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
active_prec(4, character({ 'x' })),
|
||||
character({ 'y' }) })),
|
||||
character({ 'z' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
Metadata::active_prec(3, Rule::seq({
|
||||
Metadata::active_prec(4, CharacterSet{{'x'}}),
|
||||
CharacterSet{{'y'}}
|
||||
})),
|
||||
CharacterSet{{'z'}},
|
||||
})),
|
||||
}),
|
||||
|
||||
|
|
@ -190,9 +194,9 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, character({ 'y' })),
|
||||
character({ 'z' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
Metadata::active_prec(3, CharacterSet{{'y'}}),
|
||||
CharacterSet{{'z'}},
|
||||
})),
|
||||
}),
|
||||
|
||||
|
|
@ -213,7 +217,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
|
|
@ -224,13 +228,13 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences where the left hand side can be blank", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ 'x' }),
|
||||
blank(),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
Rule::choice({
|
||||
CharacterSet{{ 'x' }},
|
||||
Blank{},
|
||||
}),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
CharacterSet{{ 'y' }},
|
||||
CharacterSet{{ 'z' }},
|
||||
})),
|
||||
});
|
||||
|
||||
|
|
@ -241,9 +245,9 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
CharacterSet{{ 'y' }},
|
||||
CharacterSet{{ 'z' }},
|
||||
})),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
|
|
@ -254,7 +258,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -265,7 +269,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles blanks", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol::non_terminal(1), Blank{}),
|
||||
});
|
||||
|
||||
AssertThat(item_set.transitions(), IsEmpty());
|
||||
|
|
@ -273,11 +277,11 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles repeats", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol::non_terminal(1), Repeat{Rule::seq({
|
||||
CharacterSet{{ 'a' }},
|
||||
CharacterSet{{ 'b' }},
|
||||
})}),
|
||||
LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{'c'}}}),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -287,14 +291,14 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'b' }),
|
||||
repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
CharacterSet{{ 'b' }},
|
||||
Repeat{Rule::seq({
|
||||
CharacterSet{{ 'a' }},
|
||||
CharacterSet{{ 'b' }},
|
||||
})}
|
||||
})),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'b' }}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -304,8 +308,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('c'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{ 'c' }}}),
|
||||
LexItem(Symbol::non_terminal(2), Blank{}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -316,7 +320,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles repeats with precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}}))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -326,8 +330,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}})),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Blank{})),
|
||||
}),
|
||||
PrecedenceRange(-1),
|
||||
false
|
||||
|
|
@ -338,14 +342,14 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between overlapping character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
active_prec(2, seq({
|
||||
character({ 'a', 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::choice({
|
||||
Metadata::active_prec(2, Rule::seq({
|
||||
CharacterSet{{ 'a', 'b', 'c', 'd' }},
|
||||
CharacterSet{{ 'x' }},
|
||||
})),
|
||||
active_prec(3, seq({
|
||||
character({ 'c', 'd', 'e', 'f' }),
|
||||
character({ 'y' }),
|
||||
Metadata::active_prec(3, Rule::seq({
|
||||
CharacterSet{{ 'c', 'd', 'e', 'f' }},
|
||||
CharacterSet{{ 'y' }},
|
||||
})),
|
||||
}))
|
||||
});
|
||||
|
|
@ -357,7 +361,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a', 'b'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})),
|
||||
}),
|
||||
PrecedenceRange(2),
|
||||
false
|
||||
|
|
@ -367,8 +371,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('c', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})),
|
||||
}),
|
||||
PrecedenceRange(2, 3),
|
||||
false
|
||||
|
|
@ -378,7 +382,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
|
|
@ -389,14 +393,14 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between a subset and a superset of characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
seq({
|
||||
character({ 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::choice({
|
||||
Rule::seq({
|
||||
CharacterSet{{ 'b', 'c', 'd' }},
|
||||
CharacterSet{{ 'x' }},
|
||||
}),
|
||||
seq({
|
||||
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
|
||||
character({ 'y' }),
|
||||
Rule::seq({
|
||||
CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }},
|
||||
CharacterSet{{ 'y' }},
|
||||
}),
|
||||
})),
|
||||
});
|
||||
|
|
@ -408,7 +412,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a').include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -418,8 +422,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('b', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -430,15 +434,15 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between whitelisted and blacklisted character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ '/' }, false),
|
||||
seq({
|
||||
character({ '\\' }),
|
||||
character({ '/' }),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({
|
||||
Rule::choice({
|
||||
CharacterSet().include_all().exclude('/'),
|
||||
Rule::seq({
|
||||
CharacterSet{{ '\\' }},
|
||||
CharacterSet{{ '/' }},
|
||||
}),
|
||||
}),
|
||||
character({ '/' }),
|
||||
CharacterSet{{ '/' }},
|
||||
}))
|
||||
});
|
||||
|
||||
|
|
@ -449,7 +453,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include_all().exclude('/').exclude('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -459,8 +463,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}),
|
||||
LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ '/' }}, CharacterSet{{ '/' }} })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -471,8 +475,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles different items with overlapping character sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
|
||||
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }}),
|
||||
LexItem(Symbol::non_terminal(2), CharacterSet{{ 'e', 'f', 'g', 'h', 'i' }})
|
||||
});
|
||||
|
||||
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
|
||||
|
|
@ -480,7 +484,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol::non_terminal(1), Blank{}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -490,8 +494,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol::non_terminal(1), Blank{}),
|
||||
LexItem(Symbol::non_terminal(2), Blank{}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -501,7 +505,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('g', 'i'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol::non_terminal(2), Blank{}),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@
|
|||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
|
@ -17,7 +15,7 @@ describe("ParseItemSetBuilder", []() {
|
|||
lexical_variables.push_back({
|
||||
"token_" + to_string(i),
|
||||
VariableTypeNamed,
|
||||
blank(),
|
||||
Blank{},
|
||||
false
|
||||
});
|
||||
}
|
||||
|
|
@ -28,23 +26,23 @@ describe("ParseItemSetBuilder", []() {
|
|||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::terminal(11), 0, AssociativityNone},
|
||||
}),
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol::terminal(12), 0, AssociativityNone},
|
||||
{Symbol::terminal(13), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 0, AssociativityNone},
|
||||
})
|
||||
}},
|
||||
SyntaxVariable{"rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol::terminal(14), 0, AssociativityNone},
|
||||
{Symbol::terminal(15), 0, AssociativityNone},
|
||||
})
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
|
@ -55,8 +53,8 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) }),
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -65,20 +63,20 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
ParseItem(Symbol::non_terminal(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
ParseItem(Symbol::non_terminal(1), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
ParseItem(Symbol::non_terminal(2), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
@ -87,14 +85,14 @@ describe("ParseItemSetBuilder", []() {
|
|||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::terminal(11), 0, AssociativityNone},
|
||||
}),
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol::terminal(12), 0, AssociativityNone},
|
||||
{Symbol::terminal(13), 0, AssociativityNone},
|
||||
}),
|
||||
Production({})
|
||||
}},
|
||||
|
|
@ -106,8 +104,8 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) }),
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -116,16 +114,16 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
ParseItem(Symbol::non_terminal(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
ParseItem(Symbol::non_terminal(1), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
using namespace rules;
|
||||
using build_tables::rule_can_be_blank;
|
||||
|
|
@ -10,49 +8,48 @@ using build_tables::rule_can_be_blank;
|
|||
START_TEST
|
||||
|
||||
describe("rule_can_be_blank", [&]() {
|
||||
rule_ptr rule;
|
||||
Rule rule;
|
||||
|
||||
it("returns false for basic rules", [&]() {
|
||||
AssertThat(rule_can_be_blank(i_sym(3)), IsFalse());
|
||||
AssertThat(rule_can_be_blank(str("x")), IsFalse());
|
||||
AssertThat(rule_can_be_blank(pattern("x")), IsFalse());
|
||||
AssertThat(rule_can_be_blank(CharacterSet{{'x'}}), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true for blanks", [&]() {
|
||||
AssertThat(rule_can_be_blank(blank()), IsTrue());
|
||||
AssertThat(rule_can_be_blank(Blank{}), IsTrue());
|
||||
});
|
||||
|
||||
it("returns true for repeats", [&]() {
|
||||
AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue());
|
||||
it("returns true for repeats iff the content can be blank", [&]() {
|
||||
AssertThat(rule_can_be_blank(Repeat{CharacterSet{{'x'}}}), IsFalse());
|
||||
AssertThat(rule_can_be_blank(Repeat{Blank{}}), IsTrue());
|
||||
});
|
||||
|
||||
it("returns true for choices iff one or more sides can be blank", [&]() {
|
||||
rule = choice({ sym("x"), blank() });
|
||||
rule = Rule::choice({ CharacterSet{{'x'}}, Blank{} });
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
|
||||
rule = choice({ blank(), sym("x") });
|
||||
rule = Rule::choice({ Blank{}, CharacterSet{{'x'}} });
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
|
||||
rule = choice({ sym("x"), sym("y") });
|
||||
rule = Rule::choice({ CharacterSet{{'x'}}, CharacterSet{{'y'}} });
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true for sequences iff both sides can be blank", [&]() {
|
||||
rule = seq({ blank(), str("x") });
|
||||
rule = Rule::seq({ Blank{}, CharacterSet{{'x'}} });
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
|
||||
rule = seq({ str("x"), blank() });
|
||||
rule = Rule::seq({ CharacterSet{{'x'}}, Blank{} });
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
|
||||
rule = seq({ blank(), choice({ sym("x"), blank() }) });
|
||||
rule = Rule::seq({ Blank{}, Rule::choice({ CharacterSet{{'x'}}, Blank{} }) });
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
});
|
||||
|
||||
it("ignores metadata rules", [&]() {
|
||||
rule = make_shared<rules::Metadata>(blank(), MetadataParams());
|
||||
rule = Metadata::prec(1, Blank{});
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
|
||||
rule = make_shared<rules::Metadata>(sym("one"), MetadataParams());
|
||||
rule = Metadata::prec(1, CharacterSet{{'x'}});
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,20 +1,24 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using prepare_grammar::expand_repeats;
|
||||
using Variable = InitialSyntaxGrammar::Variable;
|
||||
|
||||
bool operator==(const Variable &left, const Variable &right) {
|
||||
return left.name == right.name && left.rule == right.rule && left.type == right.type;
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))},
|
||||
Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(0)}},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
|
@ -22,10 +26,10 @@ describe("expand_repeats", []() {
|
|||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(1)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(0) }),
|
||||
i_token(0),
|
||||
Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(1)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) }),
|
||||
Symbol::terminal(0),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
|
@ -33,9 +37,9 @@ describe("expand_repeats", []() {
|
|||
it("replaces repeats inside of sequences", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
repeat1(i_token(11)),
|
||||
Variable{"rule0", VariableTypeNamed, Rule::seq({
|
||||
Symbol::terminal(10),
|
||||
Repeat{Symbol::terminal(11)},
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
|
|
@ -44,13 +48,13 @@ describe("expand_repeats", []() {
|
|||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
Variable{"rule0", VariableTypeNamed, Rule::seq({
|
||||
Symbol::terminal(10),
|
||||
Symbol::non_terminal(1),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11)
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }),
|
||||
Symbol::terminal(11)
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
|
@ -58,9 +62,9 @@ describe("expand_repeats", []() {
|
|||
it("replaces repeats inside of choices", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
repeat1(i_token(11))
|
||||
Variable{"rule0", VariableTypeNamed, Rule::choice({
|
||||
Symbol::terminal(10),
|
||||
Repeat{Symbol::terminal(11)}
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
|
|
@ -69,13 +73,13 @@ describe("expand_repeats", []() {
|
|||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
Variable{"rule0", VariableTypeNamed, Rule::choice({
|
||||
Symbol::terminal(10),
|
||||
Symbol::non_terminal(1),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11),
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }),
|
||||
Symbol::terminal(11),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
|
@ -83,13 +87,13 @@ describe("expand_repeats", []() {
|
|||
it("does not create redundant auxiliary rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), repeat1(i_token(4)) }),
|
||||
seq({ i_token(2), repeat1(i_token(4)) }),
|
||||
Variable{"rule0", VariableTypeNamed, Rule::choice({
|
||||
Rule::seq({ Symbol::terminal(1), Repeat{Symbol::terminal(4)} }),
|
||||
Rule::seq({ Symbol::terminal(2), Repeat{Symbol::terminal(4)} }),
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
repeat1(i_token(4))
|
||||
Variable{"rule1", VariableTypeNamed, Rule::seq({
|
||||
Symbol::terminal(3),
|
||||
Repeat{Symbol::terminal(4)}
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
|
|
@ -98,17 +102,17 @@ describe("expand_repeats", []() {
|
|||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), i_sym(2) }),
|
||||
seq({ i_token(2), i_sym(2) }),
|
||||
Variable{"rule0", VariableTypeNamed, Rule::choice({
|
||||
Rule::seq({ Symbol::terminal(1), Symbol::non_terminal(2) }),
|
||||
Rule::seq({ Symbol::terminal(2), Symbol::non_terminal(2) }),
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
i_sym(2),
|
||||
Variable{"rule1", VariableTypeNamed, Rule::seq({
|
||||
Symbol::terminal(3),
|
||||
Symbol::non_terminal(2),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(4) }),
|
||||
i_token(4),
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(4) }),
|
||||
Symbol::terminal(4),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
|
@ -116,9 +120,9 @@ describe("expand_repeats", []() {
|
|||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
repeat1(i_token(10)),
|
||||
repeat1(i_token(11)),
|
||||
Variable{"rule0", VariableTypeNamed, Rule::seq({
|
||||
Repeat{Symbol::terminal(10)},
|
||||
Repeat{Symbol::terminal(11)},
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
|
|
@ -127,17 +131,17 @@ describe("expand_repeats", []() {
|
|||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
Variable{"rule0", VariableTypeNamed, Rule::seq({
|
||||
Symbol::non_terminal(1),
|
||||
Symbol::non_terminal(2),
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(10) }),
|
||||
i_token(10),
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(10) }),
|
||||
Symbol::terminal(10),
|
||||
})},
|
||||
Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(11) }),
|
||||
i_token(11),
|
||||
Variable{"rule0_repeat2", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(11) }),
|
||||
Symbol::terminal(11),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
|
@ -145,8 +149,8 @@ describe("expand_repeats", []() {
|
|||
it("can replace repeats in multiple rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))},
|
||||
Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))},
|
||||
Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(10)}},
|
||||
Variable{"rule1", VariableTypeNamed, Repeat{Symbol::terminal(11)}},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
|
@ -154,15 +158,15 @@ describe("expand_repeats", []() {
|
|||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"rule1", VariableTypeNamed, i_sym(3)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(10) }),
|
||||
i_token(10),
|
||||
Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(2)},
|
||||
Variable{"rule1", VariableTypeNamed, Symbol::non_terminal(3)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(10) }),
|
||||
Symbol::terminal(10),
|
||||
})},
|
||||
Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(3), i_token(11) }),
|
||||
i_token(11),
|
||||
Variable{"rule1_repeat1", VariableTypeAuxiliary, Rule::choice({
|
||||
Rule::seq({ Symbol::non_terminal(3), Symbol::terminal(11) }),
|
||||
Symbol::terminal(11),
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::expand_tokens;
|
||||
using prepare_grammar::expand_token;
|
||||
using prepare_grammar::ExpandTokenResult;
|
||||
|
||||
describe("expand_tokens", []() {
|
||||
MetadataParams string_token_params;
|
||||
|
|
@ -15,153 +16,72 @@ describe("expand_tokens", []() {
|
|||
|
||||
describe("string rules", [&]() {
|
||||
it("replaces strings with sequences of character sets", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
metadata(seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
}), string_token_params),
|
||||
i_sym(11),
|
||||
AssertThat(
|
||||
expand_token(Rule::seq({
|
||||
String{"a"},
|
||||
String{"bcd"},
|
||||
String{"e"}
|
||||
})).rule,
|
||||
Equals(Rule::seq({
|
||||
CharacterSet{{ 'a' }},
|
||||
Rule::seq({
|
||||
CharacterSet{{ 'b' }},
|
||||
CharacterSet{{ 'c' }},
|
||||
CharacterSet{{ 'd' }},
|
||||
}),
|
||||
false
|
||||
}
|
||||
}));
|
||||
CharacterSet{{ 'e' }},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles strings containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
str("\u03B1 \u03B2"),
|
||||
false
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
metadata(seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }),
|
||||
}), string_token_params),
|
||||
false
|
||||
}
|
||||
}));
|
||||
AssertThat(
|
||||
expand_token(String{"\u03B1 \u03B2"}).rule,
|
||||
Equals(Rule::seq({
|
||||
CharacterSet{{ 945 }},
|
||||
CharacterSet{{ ' ' }},
|
||||
CharacterSet{{ 946 }},
|
||||
}))
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("regexp rules", [&]() {
|
||||
it("replaces regexps with the equivalent rule tree", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
}));
|
||||
AssertThat(
|
||||
expand_token(Rule::seq({
|
||||
String{"a"},
|
||||
Pattern{"x+"},
|
||||
String{"b"},
|
||||
})).rule,
|
||||
Equals(Rule::seq({
|
||||
CharacterSet{{'a'}},
|
||||
Repeat{CharacterSet{{ 'x' }}},
|
||||
CharacterSet{{'b'}},
|
||||
}))
|
||||
);
|
||||
});
|
||||
|
||||
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
pattern("[^\u03B1-\u03B4]*"),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
repeat(character({ 945, 946, 947, 948 }, false)),
|
||||
false
|
||||
}
|
||||
}));
|
||||
AssertThat(
|
||||
expand_token(Pattern{"[^\u03B1-\u03B4]+"}).rule,
|
||||
Equals(Rule(Repeat{
|
||||
CharacterSet().include_all().exclude(945, 948)
|
||||
}))
|
||||
);
|
||||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("["),
|
||||
}),
|
||||
false
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
|
||||
AssertThat(
|
||||
expand_token(Rule::seq({
|
||||
Pattern{"("},
|
||||
String{"xyz"},
|
||||
Pattern{"["},
|
||||
})).error,
|
||||
Equals(CompileError(
|
||||
TSCompileErrorTypeInvalidRegex,
|
||||
"unmatched open paren"
|
||||
))
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,104 +1,75 @@
|
|||
#include "test_helper.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_choices;
|
||||
|
||||
class rule_vector : public vector<rule_ptr> {
|
||||
public:
|
||||
bool operator==(const vector<rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (size_t i = 0; i < this->size(); i++) {
|
||||
auto rule = this->operator[](i);
|
||||
auto other_rule = other[i];
|
||||
if (!rule->operator==(*rule))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_vector(const initializer_list<rule_ptr> &list) :
|
||||
vector<rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
describe("extract_choices", []() {
|
||||
it("expands rules containing choices into multiple rules", [&]() {
|
||||
auto rule = seq({
|
||||
sym("a"),
|
||||
choice({ sym("b"), sym("c"), sym("d") }),
|
||||
sym("e")
|
||||
auto rule = Rule::seq({
|
||||
Symbol::terminal(1),
|
||||
Rule::choice({
|
||||
Symbol::terminal(2),
|
||||
Symbol::terminal(3),
|
||||
Symbol::terminal(4)
|
||||
}),
|
||||
Symbol::terminal(5)
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("b"), sym("e") }),
|
||||
seq({ sym("a"), sym("c"), sym("e") }),
|
||||
seq({ sym("a"), sym("d"), sym("e") }),
|
||||
auto result = extract_choices(rule);
|
||||
|
||||
AssertThat(result, Equals(vector<Rule>({
|
||||
Rule::seq({Symbol::terminal(1), Symbol::terminal(2), Symbol::terminal(5)}),
|
||||
Rule::seq({Symbol::terminal(1), Symbol::terminal(3), Symbol::terminal(5)}),
|
||||
Rule::seq({Symbol::terminal(1), Symbol::terminal(4), Symbol::terminal(5)}),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles metadata rules", [&]() {
|
||||
auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
|
||||
auto rule = Metadata::prec(5, Rule::choice({
|
||||
Symbol::terminal(2),
|
||||
Symbol::terminal(3),
|
||||
Symbol::terminal(4)
|
||||
}));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
prec(5, sym("b")),
|
||||
prec(5, sym("c")),
|
||||
prec(5, sym("d")),
|
||||
AssertThat(extract_choices(rule), Equals(vector<Rule>({
|
||||
Metadata::prec(5, Symbol::terminal(2)),
|
||||
Metadata::prec(5, Symbol::terminal(3)),
|
||||
Metadata::prec(5, Symbol::terminal(4)),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles nested choices", [&]() {
|
||||
auto rule = choice({
|
||||
seq({ choice({ sym("a"), sym("b") }), sym("c") }),
|
||||
sym("d")
|
||||
auto rule = Rule::choice({
|
||||
Rule::seq({
|
||||
Rule::choice({
|
||||
Symbol::terminal(1),
|
||||
Symbol::terminal(2)
|
||||
}),
|
||||
Symbol::terminal(3)
|
||||
}),
|
||||
Symbol::terminal(4)
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("c") }),
|
||||
seq({ sym("b"), sym("c") }),
|
||||
sym("d"),
|
||||
AssertThat(extract_choices(rule), Equals(vector<Rule>({
|
||||
Rule::seq({Symbol::terminal(1), Symbol::terminal(3)}),
|
||||
Rule::seq({Symbol::terminal(2), Symbol::terminal(3)}),
|
||||
Symbol::terminal(4),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles single symbols", [&]() {
|
||||
AssertThat(extract_choices(Symbol::terminal(2)), Equals(vector<Rule>({
|
||||
Symbol::terminal(2)
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles blank rules", [&]() {
|
||||
AssertThat(extract_choices(blank()), Equals(rule_vector({
|
||||
blank(),
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not move choices outside of repeats", [&]() {
|
||||
auto rule = seq({
|
||||
choice({ sym("a"), sym("b") }),
|
||||
repeat1(seq({
|
||||
sym("c"),
|
||||
choice({
|
||||
sym("d"),
|
||||
sym("e"),
|
||||
}),
|
||||
sym("f"),
|
||||
})),
|
||||
sym("g"),
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({
|
||||
sym("a"),
|
||||
repeat1(choice({
|
||||
seq({ sym("c"), sym("d"), sym("f") }),
|
||||
seq({ sym("c"), sym("e"), sym("f") }),
|
||||
})),
|
||||
sym("g"),
|
||||
}),
|
||||
seq({
|
||||
sym("b"),
|
||||
repeat1(choice({
|
||||
seq({ sym("c"), sym("d"), sym("f") }),
|
||||
seq({ sym("c"), sym("e"), sym("f") }),
|
||||
})),
|
||||
sym("g"),
|
||||
}),
|
||||
AssertThat(extract_choices(Blank{}), Equals(vector<Rule>({
|
||||
Blank{},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@
|
|||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
|
@ -13,23 +11,44 @@ using namespace rules;
|
|||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InternedGrammar;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using InternedVariable = InternedGrammar::Variable;
|
||||
using InitialSyntaxVariable = InitialSyntaxGrammar::Variable;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
token(repeat1(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
}))},
|
||||
Variable{"rule_B", VariableTypeNamed, pattern("ij+")},
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))},
|
||||
InternedVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Repeat{Rule::seq({
|
||||
String{"ab"},
|
||||
Pattern{"cd+"},
|
||||
Rule::choice({
|
||||
Symbol::non_terminal(1),
|
||||
Symbol::non_terminal(2),
|
||||
Metadata::token(Repeat{Rule::choice({
|
||||
String{"ef"},
|
||||
String{"g"}
|
||||
})}),
|
||||
}),
|
||||
})}
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Pattern{"h+"}
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::choice({ String{"i"}, Blank{} })
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_D",
|
||||
VariableTypeNamed,
|
||||
Repeat{Symbol::non_terminal(3)}
|
||||
},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
|
|
@ -42,62 +61,104 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(error, Equals(CompileError::none()));
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>{
|
||||
InitialSyntaxVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Repeat{Rule::seq({
|
||||
|
||||
// This string is now the first token in the lexical grammar.
|
||||
i_token(0),
|
||||
// This string is now the first token in the lexical grammar.
|
||||
Symbol::terminal(0),
|
||||
|
||||
// This pattern is now the second rule in the lexical grammar.
|
||||
i_token(1),
|
||||
// This pattern is now the second rule in the lexical grammar.
|
||||
Symbol::terminal(1),
|
||||
|
||||
choice({
|
||||
// Rule 1, which this symbol pointed to, has been moved to the
|
||||
// lexical grammar.
|
||||
i_token(3),
|
||||
Rule::choice({
|
||||
// Rule 1, which this symbol pointed to, has been moved to the
|
||||
// lexical grammar.
|
||||
Symbol::terminal(3),
|
||||
|
||||
// This symbol's index has been decremented, because a previous rule
|
||||
// was moved to the lexical grammar.
|
||||
i_sym(1),
|
||||
// This symbol's index has been decremented, because a previous rule
|
||||
// was moved to the lexical grammar.
|
||||
Symbol::non_terminal(1),
|
||||
|
||||
// This token rule is now the third rule in the lexical grammar.
|
||||
i_token(2),
|
||||
}),
|
||||
}))},
|
||||
// This token rule is now the third rule in the lexical grammar.
|
||||
Symbol::terminal(2),
|
||||
}),
|
||||
})}
|
||||
},
|
||||
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))},
|
||||
InitialSyntaxVariable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::choice({Symbol::terminal(4), Blank{}})
|
||||
},
|
||||
|
||||
InitialSyntaxVariable{
|
||||
"rule_D",
|
||||
VariableTypeNamed,
|
||||
Repeat{Symbol::non_terminal(2)}
|
||||
},
|
||||
}));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable>({
|
||||
// Strings become anonymous rules.
|
||||
LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
LexicalVariable{
|
||||
"ab",
|
||||
VariableTypeAnonymous,
|
||||
Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
|
||||
true
|
||||
},
|
||||
|
||||
// Patterns become hidden rules.
|
||||
LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false},
|
||||
LexicalVariable{
|
||||
"/cd+/",
|
||||
VariableTypeAuxiliary,
|
||||
Seq{CharacterSet{{'c'}}, Repeat{CharacterSet{{'d'}}}},
|
||||
false
|
||||
},
|
||||
|
||||
// Rules marked as tokens become hidden rules.
|
||||
LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
|
||||
str("ef"),
|
||||
str("gh")
|
||||
})), false},
|
||||
LexicalVariable{
|
||||
"/(ef|g)+/",
|
||||
VariableTypeAuxiliary,
|
||||
Repeat{Rule::choice({
|
||||
Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}},
|
||||
CharacterSet{{'g'}},
|
||||
})},
|
||||
false
|
||||
},
|
||||
|
||||
// This named rule was moved wholesale to the lexical grammar.
|
||||
LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false},
|
||||
LexicalVariable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Repeat{CharacterSet{{'h'}}},
|
||||
false
|
||||
},
|
||||
|
||||
// Strings become anonymous rules.
|
||||
LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true},
|
||||
LexicalVariable{
|
||||
"i",
|
||||
VariableTypeAnonymous,
|
||||
CharacterSet{{'i'}},
|
||||
true
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
})},
|
||||
{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
String{"ab"},
|
||||
Symbol::non_terminal(1),
|
||||
String{"ab"},
|
||||
})
|
||||
},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
|
|
@ -107,50 +168,114 @@ describe("extract_tokens", []() {
|
|||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable> {
|
||||
Variable {"rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })},
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable> {
|
||||
InitialSyntaxVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
Symbol::terminal(0),
|
||||
Symbol::non_terminal(1),
|
||||
Symbol::terminal(0)
|
||||
})
|
||||
},
|
||||
}));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
LexicalVariable{
|
||||
"ab",
|
||||
VariableTypeAnonymous,
|
||||
Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
|
||||
true
|
||||
},
|
||||
}))
|
||||
});
|
||||
|
||||
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("cd")},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })},
|
||||
InternedVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ Symbol::non_terminal(1), String{"ab"} })
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
String{"cd"}
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"ef"}, String{"cd"} })
|
||||
},
|
||||
}, {}, {}, {}});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })},
|
||||
Variable{"rule_B", VariableTypeNamed, i_token(1)},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })},
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>({
|
||||
InitialSyntaxVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) })
|
||||
},
|
||||
InitialSyntaxVariable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Symbol::terminal(1)
|
||||
},
|
||||
InitialSyntaxVariable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ Symbol::terminal(2), Symbol::terminal(1) })
|
||||
},
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
LexicalVariable {"cd", VariableTypeAnonymous, str("cd"), true},
|
||||
LexicalVariable {"ef", VariableTypeAnonymous, str("ef"), true},
|
||||
LexicalVariable{
|
||||
"ab",
|
||||
VariableTypeAnonymous,
|
||||
Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
|
||||
true
|
||||
},
|
||||
LexicalVariable{
|
||||
"cd",
|
||||
VariableTypeAnonymous,
|
||||
Seq{CharacterSet{{'c'}}, CharacterSet{{'d'}}},
|
||||
true
|
||||
},
|
||||
LexicalVariable{
|
||||
"ef",
|
||||
VariableTypeAnonymous,
|
||||
Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}},
|
||||
true
|
||||
},
|
||||
}));
|
||||
});
|
||||
|
||||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("ok")},
|
||||
Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))},
|
||||
Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))},
|
||||
InternedVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
String{"ok"}
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Repeat{Symbol::non_terminal(0)}
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}}
|
||||
},
|
||||
},
|
||||
{
|
||||
str(" ")
|
||||
String{" "}
|
||||
},
|
||||
{
|
||||
{ Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) }
|
||||
{ Symbol::non_terminal(1), Symbol::non_terminal(2) }
|
||||
},
|
||||
{}
|
||||
});
|
||||
|
|
@ -159,7 +284,7 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
|
||||
{ Symbol::non_terminal(0), Symbol::non_terminal(1) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -167,11 +292,11 @@ describe("extract_tokens", []() {
|
|||
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
String{"y"},
|
||||
Pattern{" "},
|
||||
},
|
||||
{},
|
||||
{}
|
||||
|
|
@ -180,8 +305,8 @@ describe("extract_tokens", []() {
|
|||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
|
||||
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
|
||||
AssertThat(get<1>(result).separators[1], EqualsPointer(pattern("\\s+")));
|
||||
AssertThat(get<1>(result).separators[0], Equals(Rule(CharacterSet{{'y'}})));
|
||||
AssertThat(get<1>(result).separators[1], Equals(Rule(CharacterSet{{' '}})));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, IsEmpty());
|
||||
});
|
||||
|
|
@ -189,11 +314,11 @@ describe("extract_tokens", []() {
|
|||
it("handles inline extra tokens that match tokens in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
|
||||
InternedVariable{"rule_B", VariableTypeNamed, String{"y"}},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
String{"y"},
|
||||
},
|
||||
{},
|
||||
{}
|
||||
|
|
@ -201,18 +326,30 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol::terminal(1) })));
|
||||
});
|
||||
|
||||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
Variable{"rule_C", VariableTypeNamed, str("z")},
|
||||
InternedVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"w"}, String{"x"}, Symbol::non_terminal(1) })
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
String{"y"}
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
String{"z"}
|
||||
},
|
||||
},
|
||||
{
|
||||
i_sym(2),
|
||||
Symbol::non_terminal(2),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
|
|
@ -221,34 +358,55 @@ describe("extract_tokens", []() {
|
|||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, Symbol::Terminal) },
|
||||
{ Symbol::terminal(3) },
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
});
|
||||
|
||||
it("returns an error if any extra tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
}, { i_sym(1) }, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
InternedVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"x"}, Symbol::non_terminal(1) })
|
||||
},
|
||||
InternedVariable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"y"}, String{"z"} })
|
||||
},
|
||||
},
|
||||
{
|
||||
Symbol::non_terminal(1)
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidExtraToken,
|
||||
"Not a token: rule_B")));
|
||||
AssertThat(get<2>(result), Equals(CompileError(
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
"Non-token symbol rule_B can't be used as an extra token"
|
||||
)));
|
||||
});
|
||||
|
||||
it("returns an error if any extra tokens are non-token rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
}, { choice({ i_sym(1), blank() }) }, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{"rule_A", VariableTypeNamed, String{"x"}},
|
||||
{"rule_B", VariableTypeNamed, String{"y"}},
|
||||
},
|
||||
{
|
||||
Rule::choice({ Symbol::non_terminal(1), Blank{} })
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(CompileError(
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
"Not a token: (choice (non-terminal 1) (blank))"
|
||||
"Non-token rule expression can't be used as an extra token"
|
||||
)));
|
||||
});
|
||||
});
|
||||
|
|
@ -256,13 +414,21 @@ describe("extract_tokens", []() {
|
|||
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"x"}, Symbol::non_terminal(1) })
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"y"}, String{"z"} })
|
||||
},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{
|
||||
ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)}
|
||||
ExternalToken {"rule_A", VariableTypeNamed, Symbol::non_terminal(0)}
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
|
@ -12,23 +11,23 @@ using prepare_grammar::flatten_rule;
|
|||
|
||||
describe("flatten_grammar", []() {
|
||||
it("associates each symbol with the precedence and associativity binding it to its successor", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
SyntaxVariable result = flatten_rule({
|
||||
"test",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(1),
|
||||
prec_left(101, seq({
|
||||
i_sym(2),
|
||||
choice({
|
||||
prec_right(102, seq({
|
||||
i_sym(3),
|
||||
i_sym(4)
|
||||
Rule::seq({
|
||||
Symbol::non_terminal(1),
|
||||
Metadata::prec_left(101, Rule::seq({
|
||||
Symbol::non_terminal(2),
|
||||
Rule::choice({
|
||||
Metadata::prec_right(102, Rule::seq({
|
||||
Symbol::non_terminal(3),
|
||||
Symbol::non_terminal(4)
|
||||
})),
|
||||
i_sym(5),
|
||||
Symbol::non_terminal(5),
|
||||
}),
|
||||
i_sym(6),
|
||||
Symbol::non_terminal(6),
|
||||
})),
|
||||
i_sym(7),
|
||||
Symbol::non_terminal(7),
|
||||
})
|
||||
});
|
||||
|
||||
|
|
@ -36,51 +35,51 @@ describe("flatten_grammar", []() {
|
|||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
|
||||
{Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(3), 102, AssociativityRight},
|
||||
{Symbol::non_terminal(4), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(6), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(7), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(5), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(6), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(7), 0, AssociativityNone},
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
||||
it("uses the last assigned precedence", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
SyntaxVariable result = flatten_rule({
|
||||
"test1",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
Metadata::prec_left(101, Rule::seq({
|
||||
Symbol::non_terminal(1),
|
||||
Symbol::non_terminal(2),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(1), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(2), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
|
||||
result = flatten_rule(Variable{
|
||||
result = flatten_rule({
|
||||
"test2",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
Metadata::prec_left(101, Rule::seq({
|
||||
Symbol::non_terminal(1),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(1), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,11 +1,7 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
|
@ -15,29 +11,29 @@ using prepare_grammar::intern_symbols;
|
|||
|
||||
describe("intern_symbols", []() {
|
||||
it("replaces named symbols with numerically-indexed symbols", [&]() {
|
||||
Grammar grammar{
|
||||
InputGrammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("_z") })},
|
||||
{"y", sym("_z")},
|
||||
{"_z", str("stuff")}
|
||||
{"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"_z"} })},
|
||||
{"y", VariableTypeNamed, NamedSymbol{"_z"}},
|
||||
{"_z", VariableTypeNamed, String{"stuff"}}
|
||||
}, {}, {}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>{
|
||||
Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })},
|
||||
Variable{"y", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"_z", VariableTypeHidden, str("stuff")},
|
||||
AssertThat(result.first.variables, Equals(vector<prepare_grammar::InternedGrammar::Variable>{
|
||||
{"x", VariableTypeNamed, Rule::choice({ Symbol::non_terminal(1), Symbol::non_terminal(2) })},
|
||||
{"y", VariableTypeNamed, Symbol::non_terminal(2)},
|
||||
{"_z", VariableTypeHidden, String{"stuff"}},
|
||||
}));
|
||||
});
|
||||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
it("returns an error", []() {
|
||||
Grammar grammar{
|
||||
InputGrammar grammar{
|
||||
{
|
||||
{"x", sym("y")},
|
||||
{"x", VariableTypeNamed, NamedSymbol{"y"}},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
|
@ -49,14 +45,14 @@ describe("intern_symbols", []() {
|
|||
});
|
||||
|
||||
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
|
||||
Grammar grammar{
|
||||
InputGrammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")}
|
||||
{"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })},
|
||||
{"y", VariableTypeNamed, NamedSymbol{"z"}},
|
||||
{"z", VariableTypeNamed, String{"stuff"}}
|
||||
},
|
||||
{
|
||||
sym("z")
|
||||
NamedSymbol{"z"}
|
||||
},
|
||||
{}, {}
|
||||
};
|
||||
|
|
@ -65,21 +61,29 @@ describe("intern_symbols", []() {
|
|||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
AssertThat(result.first.extra_tokens, Equals(vector<Rule>({ Symbol::non_terminal(2) })));
|
||||
});
|
||||
|
||||
it("records any rule names that match external token names", [&]() {
|
||||
Grammar grammar{
|
||||
InputGrammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")},
|
||||
{"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })},
|
||||
{"y", VariableTypeNamed, NamedSymbol{"z"}},
|
||||
{"z", VariableTypeNamed, String{"stuff"}},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{
|
||||
"w",
|
||||
"z"
|
||||
ExternalToken{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
NONE()
|
||||
},
|
||||
ExternalToken{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
NONE()
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -94,7 +98,7 @@ describe("intern_symbols", []() {
|
|||
ExternalToken{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
Symbol(2, Symbol::NonTerminal)
|
||||
Symbol::non_terminal(2)
|
||||
},
|
||||
}))
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/prepare_grammar/parse_regex.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -12,178 +10,218 @@ describe("parse_regex", []() {
|
|||
struct ValidInputRow {
|
||||
string description;
|
||||
string pattern;
|
||||
rule_ptr rule;
|
||||
Rule rule;
|
||||
};
|
||||
|
||||
vector<ValidInputRow> valid_inputs = {
|
||||
{
|
||||
"character sets",
|
||||
"[aAeE]",
|
||||
character({ 'a', 'A', 'e', 'E' })
|
||||
CharacterSet{{ 'a', 'A', 'e', 'E' }}
|
||||
},
|
||||
|
||||
{
|
||||
"'.' characters as wildcards",
|
||||
".",
|
||||
character({ '\n' }, false)
|
||||
CharacterSet().include_all().exclude('\n')
|
||||
},
|
||||
|
||||
{
|
||||
"character classes",
|
||||
"\\w-\\d-\\s-\\W-\\D-\\S",
|
||||
seq({
|
||||
character({
|
||||
Rule::seq({
|
||||
CharacterSet{{
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }),
|
||||
character({ '-' }),
|
||||
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }),
|
||||
character({ '-' }),
|
||||
character({ ' ', '\t', '\r', '\n' }),
|
||||
character({ '-' }),
|
||||
character({
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }, false),
|
||||
character({ '-' }),
|
||||
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false),
|
||||
character({ '-' }),
|
||||
character({ ' ', '\t', '\r', '\n' }, false),
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }},
|
||||
CharacterSet{{ '-' }},
|
||||
CharacterSet{{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }},
|
||||
CharacterSet{{ '-' }},
|
||||
CharacterSet{{ ' ', '\t', '\r', '\n' }},
|
||||
CharacterSet{{ '-' }},
|
||||
CharacterSet().include_all()
|
||||
.exclude('a', 'z')
|
||||
.exclude('A', 'Z')
|
||||
.exclude('0', '9')
|
||||
.exclude('_'),
|
||||
CharacterSet{{ '-' }},
|
||||
CharacterSet().include_all().exclude('0', '9'),
|
||||
CharacterSet{{ '-' }},
|
||||
CharacterSet().include_all()
|
||||
.exclude(' ')
|
||||
.exclude('\t')
|
||||
.exclude('\r')
|
||||
.exclude('\n')
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"choices",
|
||||
"ab|cd|ef",
|
||||
choice({
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }) }),
|
||||
seq({
|
||||
character({ 'c' }),
|
||||
character({ 'd' }) }),
|
||||
seq({
|
||||
character({ 'e' }),
|
||||
character({ 'f' }) }) })
|
||||
Rule::choice({
|
||||
Seq{
|
||||
CharacterSet{{'a'}},
|
||||
CharacterSet{{'b'}}
|
||||
},
|
||||
Seq{
|
||||
CharacterSet{{'c'}},
|
||||
CharacterSet{{'d'}}
|
||||
},
|
||||
Seq{
|
||||
CharacterSet{{'e'}},
|
||||
CharacterSet{{'f'}}
|
||||
}
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"simple sequences",
|
||||
"abc",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
character({ 'c' }) })
|
||||
Rule::seq({
|
||||
CharacterSet{{'a'}},
|
||||
CharacterSet{{'b'}},
|
||||
CharacterSet{{'c'}}
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"character ranges",
|
||||
"[12a-dA-D3]",
|
||||
character({
|
||||
CharacterSet{{
|
||||
'1', '2', '3',
|
||||
'a', 'b', 'c', 'd',
|
||||
'A', 'B', 'C', 'D' })
|
||||
'A', 'B', 'C', 'D'
|
||||
}}
|
||||
},
|
||||
|
||||
{
|
||||
"negated characters",
|
||||
"[^a\\d]",
|
||||
character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false)
|
||||
CharacterSet().include_all()
|
||||
.exclude('a')
|
||||
.exclude('0', '9')
|
||||
},
|
||||
|
||||
{
|
||||
"backslashes",
|
||||
"\\\\",
|
||||
character({ '\\' })
|
||||
CharacterSet{{'\\'}}
|
||||
},
|
||||
|
||||
{
|
||||
"character groups in sequences",
|
||||
"x([^x]|\\\\x)*x",
|
||||
seq({
|
||||
character({ 'x' }),
|
||||
repeat(choice({
|
||||
character({ 'x' }, false),
|
||||
seq({ character({ '\\' }), character({ 'x' }) }) })),
|
||||
character({ 'x' }) })
|
||||
Rule::seq({
|
||||
CharacterSet{{'x'}},
|
||||
Rule::choice({
|
||||
Repeat{Rule::choice({
|
||||
CharacterSet().include_all().exclude('x'),
|
||||
Rule::seq({
|
||||
CharacterSet{{'\\'}},
|
||||
CharacterSet{{'x'}}
|
||||
})
|
||||
})},
|
||||
Blank{}
|
||||
}),
|
||||
CharacterSet{{'x'}}
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"choices in sequences",
|
||||
"(a|b)cd",
|
||||
seq({
|
||||
choice({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }) }),
|
||||
character({ 'c' }),
|
||||
character({ 'd' }) })
|
||||
Rule::seq({
|
||||
Rule::choice({
|
||||
CharacterSet{{'a'}},
|
||||
CharacterSet{{'b'}} }),
|
||||
CharacterSet{{'c'}},
|
||||
CharacterSet{{'d'}} })
|
||||
},
|
||||
|
||||
{
|
||||
"escaped parentheses",
|
||||
"a\\(b",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ '(' }),
|
||||
character({ 'b' }) })
|
||||
Rule::seq({
|
||||
CharacterSet{{'a'}},
|
||||
CharacterSet{{'('}},
|
||||
CharacterSet{{'b'}},
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"escaped periods",
|
||||
"a\\.",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
character({ '.' }) })
|
||||
Rule::seq({
|
||||
CharacterSet{{'a'}},
|
||||
CharacterSet{{'.'}},
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"escaped characters",
|
||||
"\\t\\n\\r",
|
||||
seq({
|
||||
character({ '\t' }),
|
||||
character({ '\n' }),
|
||||
character({ '\r' }) })
|
||||
Rule::seq({
|
||||
CharacterSet{{'\t'}},
|
||||
CharacterSet{{'\n'}},
|
||||
CharacterSet{{'\r'}},
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"plus repeats",
|
||||
"(ab)+(cd)+",
|
||||
seq({
|
||||
repeat1(seq({ character({ 'a' }), character({ 'b' }) })),
|
||||
repeat1(seq({ character({ 'c' }), character({ 'd' }) })) })
|
||||
Rule::seq({
|
||||
Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })},
|
||||
Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })},
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"asterix repeats",
|
||||
"(ab)*(cd)*",
|
||||
seq({
|
||||
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
||||
repeat(seq({ character({ 'c' }), character({ 'd' }) })) })
|
||||
Rule::seq({
|
||||
Rule::choice({
|
||||
Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })},
|
||||
Blank{},
|
||||
}),
|
||||
Rule::choice({
|
||||
Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })},
|
||||
Blank{},
|
||||
}),
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"optional rules",
|
||||
"a(bc)?",
|
||||
seq({
|
||||
character({ 'a' }),
|
||||
choice({
|
||||
seq({ character({ 'b' }), character({ 'c' }) }),
|
||||
blank() }) })
|
||||
Rule::seq({
|
||||
CharacterSet{{'a'}},
|
||||
Rule::choice({
|
||||
Rule::seq({
|
||||
CharacterSet{{'b'}},
|
||||
CharacterSet{{'c'}},
|
||||
}),
|
||||
Blank{}
|
||||
}),
|
||||
})
|
||||
},
|
||||
|
||||
{
|
||||
"choices containing negated character classes",
|
||||
"/([^/]|(\\\\/))*/",
|
||||
seq({
|
||||
character({ '/' }),
|
||||
repeat(choice({
|
||||
character({ '/' }, false),
|
||||
seq({ character({ '\\' }), character({ '/' }) }) })),
|
||||
character({ '/' }), }),
|
||||
"/([^/]|(\\\\/))+/",
|
||||
Rule::seq({
|
||||
CharacterSet{{'/'}},
|
||||
Repeat{Rule::choice({
|
||||
CharacterSet().include_all().exclude('/'),
|
||||
Rule::seq({
|
||||
CharacterSet{{'\\'}},
|
||||
CharacterSet{{'/'}},
|
||||
}),
|
||||
})},
|
||||
CharacterSet{{'/'}},
|
||||
}),
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -229,7 +267,7 @@ describe("parse_regex", []() {
|
|||
for (auto &row : valid_inputs) {
|
||||
it(("parses " + row.description).c_str(), [&]() {
|
||||
auto result = parse_regex(row.pattern);
|
||||
AssertThat(result.first, EqualsPointer(row.rule));
|
||||
AssertThat(result.first, Equals(row.rule));
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
using namespace rules;
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ describe("CharacterSet", []() {
|
|||
.include('a', 'd')
|
||||
.include('f', 'm');
|
||||
|
||||
AssertThat(set1.hash_code(), Equals(set2.hash_code()));
|
||||
AssertThat(hash<CharacterSet>()(set1), Equals(hash<CharacterSet>()(set2)));
|
||||
});
|
||||
|
||||
it("returns different numbers for character sets that include different ranges", [&]() {
|
||||
|
|
@ -78,8 +78,8 @@ describe("CharacterSet", []() {
|
|||
.include('a', 'c')
|
||||
.include('f', 'm');
|
||||
|
||||
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
|
||||
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
|
||||
AssertThat(hash<CharacterSet>()(set1), !Equals(hash<CharacterSet>()(set2)));
|
||||
AssertThat(hash<CharacterSet>()(set2), !Equals(hash<CharacterSet>()(set1)));
|
||||
});
|
||||
|
||||
it("returns different numbers for character sets that exclude different ranges", [&]() {
|
||||
|
|
@ -93,16 +93,16 @@ describe("CharacterSet", []() {
|
|||
.exclude('a', 'c')
|
||||
.exclude('f', 'm');
|
||||
|
||||
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
|
||||
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
|
||||
AssertThat(hash<CharacterSet>()(set1), !Equals(hash<CharacterSet>()(set2)));
|
||||
AssertThat(hash<CharacterSet>()(set2), !Equals(hash<CharacterSet>()(set1)));
|
||||
});
|
||||
|
||||
it("returns different numbers for character sets with different sign", [&]() {
|
||||
CharacterSet set1 = CharacterSet().include_all();
|
||||
CharacterSet set2 = CharacterSet();
|
||||
|
||||
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
|
||||
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
|
||||
AssertThat(hash<CharacterSet>()(set1), !Equals(hash<CharacterSet>()(set2)));
|
||||
AssertThat(hash<CharacterSet>()(set2), !Equals(hash<CharacterSet>()(set1)));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -312,7 +312,7 @@ describe("CharacterSet", []() {
|
|||
.include('z');
|
||||
|
||||
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
|
||||
CharacterRange('a', 'c'),
|
||||
CharacterRange{'a', 'c'},
|
||||
CharacterRange('g'),
|
||||
CharacterRange('z'),
|
||||
})));
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue