Merge pull request #69 from tree-sitter/rules-variant

Implement Rule as a union type rather than an abstract base class
This commit is contained in:
Max Brunsfeld 2017-03-17 14:35:42 -07:00 committed by GitHub
commit 4d39f13eaf
109 changed files with 2795 additions and 3151 deletions

View file

@ -1,3 +1,4 @@
-std=c++14
-Isrc
-Itest
-Iinclude

View file

@ -15,6 +15,7 @@ typedef enum {
TSCompileErrorTypeLexConflict,
TSCompileErrorTypeParseConflict,
TSCompileErrorTypeEpsilonRule,
TSCompileErrorTypeInvalidTokenContents,
} TSCompileErrorType;
typedef struct {

View file

@ -33,40 +33,25 @@
'src/compiler/prepare_grammar/extract_tokens.cc',
'src/compiler/prepare_grammar/flatten_grammar.cc',
'src/compiler/prepare_grammar/intern_symbols.cc',
'src/compiler/prepare_grammar/is_token.cc',
'src/compiler/prepare_grammar/normalize_rules.cc',
'src/compiler/prepare_grammar/parse_regex.cc',
'src/compiler/prepare_grammar/prepare_grammar.cc',
'src/compiler/prepare_grammar/token_description.cc',
'src/compiler/rule.cc',
'src/compiler/syntax_grammar.cc',
'src/compiler/rules/blank.cc',
'src/compiler/rules/built_in_symbols.cc',
'src/compiler/rules/character_range.cc',
'src/compiler/rules/character_set.cc',
'src/compiler/rules/choice.cc',
'src/compiler/rules/metadata.cc',
'src/compiler/rules/named_symbol.cc',
'src/compiler/rules/pattern.cc',
'src/compiler/rules/repeat.cc',
'src/compiler/rules/rules.cc',
'src/compiler/rules/seq.cc',
'src/compiler/rules/string.cc',
'src/compiler/rules/symbol.cc',
'src/compiler/rules/visitor.cc',
'src/compiler/util/string_helpers.cc',
'externals/utf8proc/utf8proc.c',
'externals/json-parser/json.c',
],
'cflags_cc': [
'-std=c++0x',
],
'cflags_cc!': [
'-fno-rtti'
'-std=c++14',
],
'xcode_settings': {
'CLANG_CXX_LANGUAGE_STANDARD': 'c++11',
'GCC_ENABLE_CPP_RTTI': 'YES',
'CLANG_CXX_LANGUAGE_STANDARD': 'c++14',
'GCC_ENABLE_CPP_EXCEPTIONS': 'NO',
},
'direct_dependent_settings': {

View file

@ -10,8 +10,7 @@
#include "compiler/build_tables/parse_item_set_builder.h"
#include "compiler/lexical_grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
#include "compiler/build_tables/lex_table_builder.h"
namespace tree_sitter {
@ -25,7 +24,6 @@ using std::map;
using std::string;
using std::to_string;
using std::unordered_map;
using std::make_shared;
using rules::Associativity;
using rules::Symbol;
using rules::END_OF_INPUT;
@ -53,8 +51,8 @@ class ParseTableBuilder {
pair<ParseTable, CompileError> build() {
Symbol start_symbol = grammar.variables.empty() ?
Symbol(0, Symbol::Terminal) :
Symbol(0, Symbol::NonTerminal);
Symbol::terminal(0) :
Symbol::non_terminal(0);
Production start_production{
ProductionStep{start_symbol, 0, rules::AssociativityNone},
@ -121,7 +119,7 @@ class ParseTableBuilder {
}
if (!has_non_reciprocal_conflict) {
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal));
add_out_of_context_parse_state(&error_state, Symbol::terminal(i));
}
}
@ -132,11 +130,11 @@ class ParseTableBuilder {
}
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External));
add_out_of_context_parse_state(&error_state, Symbol::external(i));
}
for (size_t i = 0; i < grammar.variables.size(); i++) {
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal));
add_out_of_context_parse_state(&error_state, Symbol::non_terminal(i));
}
error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
@ -253,7 +251,7 @@ class ParseTableBuilder {
ParseStateId next_state = add_parse_state(next_item_set);
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
if (!allow_any_conflict)
recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set);
recovery_states[Symbol::non_terminal(lookahead)].add(next_item_set);
}
for (Symbol lookahead : lookaheads_with_conflicts) {
@ -428,7 +426,7 @@ class ParseTableBuilder {
if (lookahead.is_external()) return false;
if (!lookahead.is_built_in()) {
for (Symbol::Index incompatible_index : incompatible_token_indices) {
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
Symbol incompatible_symbol = Symbol::terminal(incompatible_index);
if (other.terminal_entries.count(incompatible_symbol)) return false;
}
}
@ -452,7 +450,7 @@ class ParseTableBuilder {
if (lookahead.is_external()) return false;
if (!lookahead.is_built_in()) {
for (Symbol::Index incompatible_index : incompatible_token_indices) {
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
Symbol incompatible_symbol = Symbol::terminal(incompatible_index);
if (state.terminal_entries.count(incompatible_symbol)) return false;
}
}

View file

@ -1,7 +1,7 @@
#include "compiler/build_tables/lex_conflict_manager.h"
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
#include "compiler/build_tables/lex_item.h"
namespace tree_sitter {

View file

@ -4,7 +4,7 @@
#include <map>
#include <set>
#include "compiler/lexical_grammar.h"
#include "compiler/rules/symbol.h"
#include "compiler/rule.h"
namespace tree_sitter {

View file

@ -2,12 +2,7 @@
#include <unordered_set>
#include "compiler/build_tables/lex_item_transitions.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
#include "compiler/rule.h"
#include "compiler/util/hash_combine.h"
namespace tree_sitter {
@ -19,51 +14,63 @@ using std::unordered_set;
using rules::CharacterSet;
using rules::Symbol;
LexItem::LexItem(const rules::Symbol &lhs, const rule_ptr rule)
LexItem::LexItem(const rules::Symbol &lhs, const rules::Rule &rule)
: lhs(lhs), rule(rule) {}
bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
return lhs == other.lhs && rule == other.rule;
}
LexItem::CompletionStatus LexItem::completion_status() const {
class GetCompletionStatus : public rules::RuleFn<CompletionStatus> {
protected:
CompletionStatus apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements) {
CompletionStatus status = apply(element);
using CompletionStatus = LexItem::CompletionStatus;
static CompletionStatus get_completion_status(const rules::Rule &rule) {
return rule.match(
[](rules::Choice choice) {
for (const auto &element : choice.elements) {
auto status = get_completion_status(element);
if (status.is_done) return status;
}
return { false, PrecedenceRange() };
}
return CompletionStatus{false, PrecedenceRange()};
},
CompletionStatus apply_to(const rules::Metadata *rule) {
CompletionStatus result = apply(rule->rule);
if (result.is_done && result.precedence.empty && rule->params.has_precedence) {
result.precedence.add(rule->params.precedence);
[](rules::Metadata metadata) {
CompletionStatus result = get_completion_status(*metadata.rule);
if (result.is_done && result.precedence.empty && metadata.params.has_precedence) {
result.precedence.add(metadata.params.precedence);
}
return result;
}
},
CompletionStatus apply_to(const rules::Repeat *rule) {
return apply(rule->content);
}
[](rules::Repeat repeat) {
return get_completion_status(*repeat.rule);
},
CompletionStatus apply_to(const rules::Blank *rule) {
return { true, PrecedenceRange() };
}
CompletionStatus apply_to(const rules::Seq *rule) {
CompletionStatus left_status = apply(rule->left);
[](rules::Seq sequence) {
CompletionStatus left_status = get_completion_status(*sequence.left);
if (left_status.is_done) {
return apply(rule->right);
return get_completion_status(*sequence.right);
} else {
return { false, PrecedenceRange() };
return CompletionStatus{false, PrecedenceRange()};
}
}
};
},
return GetCompletionStatus().apply(rule);
[](rules::Blank blank) {
return CompletionStatus{true, PrecedenceRange()};
},
[](rules::CharacterSet) {
return CompletionStatus{false, PrecedenceRange()};
},
[](auto) {
return CompletionStatus{false, PrecedenceRange()};
}
);
}
LexItem::CompletionStatus LexItem::completion_status() const {
return get_completion_status(rule);
}
LexItemSet::LexItemSet() {}

View file

@ -5,8 +5,7 @@
#include <map>
#include <utility>
#include <string>
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/rule.h"
#include "compiler/precedence_range.h"
namespace tree_sitter {
@ -14,7 +13,7 @@ namespace build_tables {
class LexItem {
public:
LexItem(const rules::Symbol &, rule_ptr);
LexItem(const rules::Symbol &, const rules::Rule &);
struct CompletionStatus {
bool is_done;
@ -25,7 +24,7 @@ class LexItem {
CompletionStatus completion_status() const;
rules::Symbol lhs;
rule_ptr rule;
rules::Rule rule;
};
} // namespace build_tables

View file

@ -4,47 +4,34 @@
#include <functional>
#include <utility>
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/visitor.h"
#include "compiler/rule.h"
#include "compiler/build_tables/lex_item.h"
namespace tree_sitter {
namespace build_tables {
using std::function;
using std::make_shared;
using std::map;
using std::pair;
using std::vector;
using rules::CharacterSet;
using rules::Symbol;
using rules::Blank;
using rules::Choice;
using rules::Seq;
using rules::Repeat;
using rules::Metadata;
typedef LexItemSet::Transition Transition;
typedef LexItemSet::TransitionMap TransitionMap;
using rules::Rule;
using Transition = LexItemSet::Transition;
using TransitionMap = LexItemSet::TransitionMap;
class TransitionBuilder : public rules::RuleFn<void> {
class TransitionBuilder {
TransitionMap *transitions;
const Symbol &item_lhs;
const rules::Symbol &item_lhs;
vector<int> *precedence_stack;
bool in_main_token;
Transition transform_transition(const Transition &transition,
function<rule_ptr(rule_ptr)> callback) {
inline Transition transform_transition(const Transition &transition,
const function<Rule(const Rule &)> &callback) {
LexItemSet destination;
for (const LexItem &item : transition.destination.entries)
for (const LexItem &item : transition.destination.entries) {
destination.entries.insert(LexItem(item.lhs, callback(item.rule)));
return Transition{ destination, transition.precedence,
transition.in_main_token };
}
return Transition{destination, transition.precedence, transition.in_main_token};
}
void add_transition(TransitionMap *transitions, CharacterSet new_characters,
@ -89,82 +76,102 @@ class TransitionBuilder : public rules::RuleFn<void> {
transitions->insert({ new_characters, new_transition });
}
void apply_to(const CharacterSet *character_set) {
PrecedenceRange precedence;
if (!precedence_stack->empty())
precedence.add(precedence_stack->back());
add_transition(transitions, *character_set,
Transition{
LexItemSet({ LexItem(item_lhs, Blank::build()) }),
precedence, in_main_token,
});
}
void apply_to(const Choice *choice) {
for (const rule_ptr &element : choice->elements)
apply(element);
}
void apply_to(const Seq *sequence) {
TransitionMap left_transitions;
TransitionBuilder(&left_transitions, this).apply(sequence->left);
for (const auto &pair : left_transitions) {
add_transition(
transitions, pair.first,
transform_transition(pair.second, [&sequence](rule_ptr rule) {
return Seq::build({ rule, sequence->right });
}));
}
if (rule_can_be_blank(sequence->left))
apply(sequence->right);
}
void apply_to(const Repeat *repeat) {
TransitionMap content_transitions;
TransitionBuilder(&content_transitions, this).apply(repeat->content);
for (const auto &pair : content_transitions) {
add_transition(transitions, pair.first, pair.second);
add_transition(
transitions, pair.first,
transform_transition(pair.second, [&repeat](rule_ptr item_rule) {
return Seq::build({ item_rule, repeat->copy() });
}));
}
}
void apply_to(const Metadata *metadata) {
bool has_active_precedence = metadata->params.is_active;
if (has_active_precedence)
precedence_stack->push_back(metadata->params.precedence);
if (metadata->params.is_main_token)
in_main_token = true;
rules::MetadataParams params = metadata->params;
if (params.has_precedence)
params.is_active = true;
TransitionMap content_transitions;
TransitionBuilder(&content_transitions, this).apply(metadata->rule);
for (const auto &pair : content_transitions) {
add_transition(
transitions, pair.first,
transform_transition(pair.second, [&params](rule_ptr rule) {
return Metadata::build(rule, params);
}));
}
if (has_active_precedence)
precedence_stack->pop_back();
}
public:
TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs,
void apply(const Rule &rule) {
rule.match(
[this](const rules::Blank &) {},
[this](const rules::CharacterSet &character_set) {
PrecedenceRange precedence;
if (!precedence_stack->empty()) {
precedence.add(precedence_stack->back());
}
add_transition(
transitions,
character_set,
Transition{
LexItemSet({ LexItem(item_lhs, rules::Blank{}) }),
precedence,
in_main_token,
}
);
},
[this](const rules::Choice &choice) {
for (const auto &element : choice.elements) {
apply(element);
}
},
[this](const rules::Seq &sequence) {
TransitionMap left_transitions;
TransitionBuilder(&left_transitions, this).apply(*sequence.left);
for (const auto &pair : left_transitions) {
add_transition(
transitions,
pair.first,
transform_transition(pair.second, [&sequence](Rule rule) -> Rule {
return Rule::seq({rule, *sequence.right});
})
);
}
if (rule_can_be_blank(*sequence.left)) {
apply(*sequence.right);
}
},
[this](const rules::Repeat &repeat) {
TransitionMap content_transitions;
TransitionBuilder(&content_transitions, this).apply(*repeat.rule);
for (const auto &pair : content_transitions) {
add_transition(transitions, pair.first, pair.second);
add_transition(
transitions, pair.first,
transform_transition(pair.second, [&repeat](Rule item_rule) {
return Rule::seq({ item_rule, repeat });
})
);
}
},
[this](const rules::Metadata &metadata) {
bool has_active_precedence = metadata.params.is_active;
if (has_active_precedence)
precedence_stack->push_back(metadata.params.precedence);
if (metadata.params.is_main_token)
in_main_token = true;
auto params = metadata.params;
if (params.has_precedence)
params.is_active = true;
TransitionMap content_transitions;
TransitionBuilder(&content_transitions, this).apply(*metadata.rule);
for (const auto &pair : content_transitions) {
add_transition(
transitions, pair.first,
transform_transition(pair.second, [&params](Rule rule) {
return rules::Metadata{rule, params};
})
);
}
if (has_active_precedence) {
precedence_stack->pop_back();
}
},
[](auto) {}
);
}
TransitionBuilder(TransitionMap *transitions, const rules::Symbol &item_lhs,
vector<int> *precedence_stack, bool in_main_token)
: transitions(transitions),
item_lhs(item_lhs),
@ -180,8 +187,7 @@ class TransitionBuilder : public rules::RuleFn<void> {
void lex_item_transitions(TransitionMap *transitions, const LexItem &item) {
vector<int> precedence_stack;
TransitionBuilder(transitions, item.lhs, &precedence_stack, false)
.apply(item.rule);
TransitionBuilder(transitions, item.lhs, &precedence_stack, false).apply(item.rule);
}
} // namespace build_tables

View file

@ -1,15 +1,12 @@
#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
#define COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/build_tables/lex_item.h"
namespace tree_sitter {
namespace build_tables {
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
const LexItem &);
void lex_item_transitions(LexItemSet::TransitionMap *transitions, const LexItem &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -10,13 +10,7 @@
#include "compiler/build_tables/lex_item.h"
#include "compiler/parse_table.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/visitor.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace build_tables {
@ -28,6 +22,7 @@ using std::string;
using std::vector;
using std::unordered_map;
using std::unique_ptr;
using rules::Rule;
using rules::Blank;
using rules::Choice;
using rules::CharacterSet;
@ -36,35 +31,45 @@ using rules::Symbol;
using rules::Metadata;
using rules::Seq;
class StartingCharacterAggregator : public rules::RuleFn<void> {
void apply_to(const rules::Seq *rule) {
apply(rule->left);
}
void apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements) apply(element);
}
void apply_to(const rules::Repeat *rule) {
apply(rule->content);
}
void apply_to(const rules::Metadata *rule) {
apply(rule->rule);
}
void apply_to(const rules::CharacterSet *rule) {
result.add_set(*rule);
}
class StartingCharacterAggregator {
public:
void apply(const Rule &rule) {
rule.match(
[this](const Seq &sequence) {
apply(*sequence.left);
},
[this](const rules::Choice &rule) {
for (const auto &element : rule.elements) {
apply(element);
}
},
[this](const rules::Repeat &rule) {
apply(*rule.rule);
},
[this](const rules::Metadata &rule) {
apply(*rule.rule);
},
[this](const rules::CharacterSet &rule) {
result.add_set(rule);
},
[this](const rules::Blank) {},
[](auto) {}
);
}
CharacterSet result;
};
class LexTableBuilderImpl : public LexTableBuilder {
LexTable lex_table;
const LexicalGrammar grammar;
vector<rule_ptr> separator_rules;
vector<Rule> separator_rules;
CharacterSet first_separator_characters;
LexConflictManager conflict_manager;
unordered_map<LexItemSet, LexStateId> lex_state_ids;
@ -74,11 +79,11 @@ class LexTableBuilderImpl : public LexTableBuilder {
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
StartingCharacterAggregator starting_character_aggregator;
for (const rule_ptr &rule : grammar.separators) {
separator_rules.push_back(Repeat::build(rule));
for (const auto &rule : grammar.separators) {
separator_rules.push_back(Repeat{rule});
starting_character_aggregator.apply(rule);
}
separator_rules.push_back(Blank::build());
separator_rules.push_back(Blank{});
first_separator_characters = starting_character_aggregator.result;
shadowed_token_indices.resize(grammar.variables.size());
}
@ -98,8 +103,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
clear();
map<Symbol, ParseTableEntry> terminals;
terminals[Symbol(left, Symbol::Terminal)];
terminals[Symbol(right, Symbol::Terminal)];
terminals[Symbol::terminal(left)];
terminals[Symbol::terminal(right)];
if (grammar.variables[left].is_string && grammar.variables[right].is_string) {
StartingCharacterAggregator left_starting_characters;
left_starting_characters.apply(grammar.variables[left].rule);
StartingCharacterAggregator right_starting_characters;
right_starting_characters.apply(grammar.variables[right].rule);
if (!(left_starting_characters.result == right_starting_characters.result)) {
return false;
}
}
add_lex_state(item_set_for_terminals(terminals));
@ -183,11 +198,11 @@ class LexTableBuilderImpl : public LexTableBuilder {
for (ParseState &state : parse_table->states) {
for (auto &entry : state.terminal_entries) {
Symbol symbol = entry.first;
if (symbol.is_token()) {
if (symbol.is_terminal()) {
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
if (homonyms != conflict_manager.possible_homonyms.end())
for (Symbol::Index homonym : homonyms->second)
if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
if (state.terminal_entries.count(Symbol::terminal(homonym))) {
entry.second.reusable = false;
break;
}
@ -198,7 +213,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
if (extensions != conflict_manager.possible_extensions.end())
for (Symbol::Index extension : extensions->second)
if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
if (state.terminal_entries.count(Symbol::terminal(extension))) {
entry.second.depends_on_lookahead = true;
break;
}
@ -278,15 +293,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
LexItemSet result;
for (const auto &pair : terminals) {
Symbol symbol = pair.first;
if (symbol.is_token()) {
for (const rule_ptr &rule : rules_for_symbol(symbol)) {
for (const rule_ptr &separator_rule : separator_rules) {
if (symbol.is_terminal()) {
for (const auto &rule : rules_for_symbol(symbol)) {
for (const auto &separator_rule : separator_rules) {
result.entries.insert(LexItem(
symbol,
Metadata::separator(
Seq::build({
Rule::seq({
separator_rule,
Metadata::main_token(rule) }))));
Metadata::main_token(rule)
})
)
));
}
}
}
@ -294,17 +312,20 @@ class LexTableBuilderImpl : public LexTableBuilder {
return result;
}
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
return { CharacterSet().include(0).copy() };
vector<Rule> rules_for_symbol(const rules::Symbol &symbol) {
if (symbol == rules::END_OF_INPUT()) {
return { CharacterSet().include(0) };
}
rule_ptr rule = grammar.variables[symbol.index].rule;
return grammar.variables[symbol.index].rule.match(
[](const Choice &choice) {
return choice.elements;
},
auto choice = rule->as<Choice>();
if (choice)
return choice->elements;
else
return { rule };
[](auto rule) {
return vector<Rule>{ rule };
}
);
}
};

View file

@ -1,7 +1,7 @@
#include "compiler/build_tables/lookahead_set.h"
#include <set>
#include <memory>
#include "compiler/rules/symbol.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace build_tables {

View file

@ -3,7 +3,7 @@
#include <set>
#include <memory>
#include "compiler/rules/symbol.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace build_tables {

View file

@ -1,7 +1,7 @@
#include "compiler/build_tables/parse_item.h"
#include <string>
#include "compiler/syntax_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
#include "compiler/util/hash_combine.h"
namespace tree_sitter {
@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
}
Symbol ParseItem::lhs() const {
return Symbol(variable_index, Symbol::NonTerminal);
return Symbol{variable_index, Symbol::NonTerminal};
}
bool ParseItem::is_done() const {

View file

@ -4,8 +4,7 @@
#include <map>
#include <utility>
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/rule.h"
#include "compiler/syntax_grammar.h"
#include "compiler/precedence_range.h"

View file

@ -4,7 +4,7 @@
#include <utility>
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace build_tables {
@ -16,8 +16,6 @@ using std::get;
using std::pair;
using std::tuple;
using std::make_tuple;
using std::shared_ptr;
using std::make_shared;
using rules::Symbol;
using rules::NONE;
@ -27,17 +25,17 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
set<Symbol::Index> processed_non_terminals;
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
Symbol symbol(i, Symbol::Terminal);
Symbol symbol = Symbol::terminal(i);
first_sets.insert({symbol, LookaheadSet({ symbol })});
}
for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
Symbol symbol(i, Symbol::External);
Symbol symbol = Symbol::external(i);
first_sets.insert({symbol, LookaheadSet({ symbol })});
}
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol(i, Symbol::NonTerminal);
Symbol symbol = Symbol::non_terminal(i);
LookaheadSet first_set;
processed_non_terminals.clear();
@ -64,7 +62,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
vector<ParseItemSetComponent> components_to_process;
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol(i, Symbol::NonTerminal);
Symbol symbol = Symbol::non_terminal(i);
map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
components_to_process.clear();

View file

@ -2,7 +2,7 @@
#define COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
#include "compiler/build_tables/parse_item.h"
#include "compiler/rules/symbol.h"
#include "compiler/rule.h"
#include <map>
namespace tree_sitter {

View file

@ -1,43 +1,42 @@
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace build_tables {
class CanBeBlank : public rules::RuleFn<bool> {
protected:
bool apply_to(const rules::Blank *) {
return true;
}
bool rule_can_be_blank(const rules::Rule &rule) {
return rule.match(
[](rules::Blank) {
return true;
},
bool apply_to(const rules::Repeat *rule) {
return apply(rule->content);
}
[](rules::CharacterSet) {
return false;
},
bool apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements)
if (apply(element))
return true;
return false;
}
[](rules::Repeat repeat) {
return rule_can_be_blank(*repeat.rule);
},
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) && apply(rule->right);
}
[](rules::Metadata metadata) {
return rule_can_be_blank(*metadata.rule);
},
bool apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
};
[](rules::Choice choice) {
for (const auto &element : choice.elements) {
if (rule_can_be_blank(element)) {
return true;
}
}
return false;
},
bool rule_can_be_blank(const rule_ptr &rule) {
return CanBeBlank().apply(rule);
[](rules::Seq seq) {
return rule_can_be_blank(*seq.left) && rule_can_be_blank(*seq.right);
},
[](auto) { return false; }
);
}
} // namespace build_tables

View file

@ -6,7 +6,7 @@
namespace tree_sitter {
namespace build_tables {
bool rule_can_be_blank(const rule_ptr &rule);
bool rule_can_be_blank(const rules::Rule &rule);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -22,8 +22,7 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
TSCompileErrorTypeInvalidGrammar };
}
auto prepare_grammar_result =
prepare_grammar::prepare_grammar(parse_result.grammar);
auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
CompileError error = get<2>(prepare_grammar_result);
@ -46,22 +45,20 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
}
pair<string, const CompileError> compile(const Grammar &grammar,
pair<string, const CompileError> compile(const InputGrammar &grammar,
std::string name) {
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
CompileError error = get<2>(prepare_grammar_result);
if (error.type)
return { "", error };
if (error.type) return { "", error };
auto table_build_result =
build_tables::build_tables(syntax_grammar, lexical_grammar);
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
error = get<2>(table_build_result);
if (error.type)
return { "", error };
if (error.type) return { "", error };
string code = generate_code::c_code(name, parse_table, lex_table,
syntax_grammar, lexical_grammar);

View file

@ -7,9 +7,9 @@
namespace tree_sitter {
struct Grammar;
struct InputGrammar;
std::pair<std::string, CompileError> compile(const Grammar &, std::string);
std::pair<std::string, CompileError> compile(const InputGrammar &, std::string);
} // namespace tree_sitter

View file

@ -8,6 +8,8 @@ namespace tree_sitter {
class CompileError {
public:
CompileError() : type(TSCompileErrorTypeNone) {}
CompileError(TSCompileErrorType type, std::string message)
: type(type), message(message) {}
@ -15,6 +17,10 @@ class CompileError {
return CompileError(TSCompileErrorTypeNone, "");
}
operator bool() const {
return type != TSCompileErrorTypeNone;
}
bool operator==(const CompileError &other) const {
return type == other.type && message == other.message;
}

View file

@ -9,7 +9,7 @@
#include "compiler/parse_table.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
#include "compiler/util/string_helpers.h"
#include "tree_sitter/runtime.h"
@ -129,7 +129,7 @@ class CCodeGenerator {
size_t token_count = 0;
for (const auto &entry : parse_table.symbols) {
const Symbol &symbol = entry.first;
if (symbol.is_token()) {
if (symbol.is_terminal()) {
token_count++;
} else if (symbol.is_external()) {
const ExternalToken &external_token = syntax_grammar.external_tokens[symbol.index];
@ -256,7 +256,7 @@ class CCodeGenerator {
if (symbol.is_external()) {
needs_external_scanner = true;
external_token_indices.insert(symbol.index);
} else if (symbol.is_token()) {
} else if (symbol.is_terminal()) {
auto corresponding_external_token =
external_tokens_by_corresponding_internal_token.find(symbol.index);
if (corresponding_external_token != external_tokens_by_corresponding_internal_token.end()) {
@ -298,7 +298,7 @@ class CCodeGenerator {
line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
indent([&]() {
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol::external(i)) + ",");
}
});
line("};");
@ -339,7 +339,7 @@ class CCodeGenerator {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (const auto &entry : state.nonterminal_entries) {
line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE(");
line("[" + symbol_id(Symbol::non_terminal(entry.first)) + "] = STATE(");
add(to_string(entry.second));
add("),");
}
@ -686,9 +686,13 @@ class CCodeGenerator {
string c_code(string name, const ParseTable &parse_table,
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) {
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar,
lexical_grammar)
.code();
return CCodeGenerator(
name,
parse_table,
lex_table,
syntax_grammar,
lexical_grammar
).code();
}
} // namespace generate_code

View file

@ -2,17 +2,43 @@
#define COMPILER_GRAMMAR_H_
#include <vector>
#include <unordered_set>
#include <string>
#include <utility>
#include "compiler/rule.h"
namespace tree_sitter {
struct Grammar {
std::vector<std::pair<std::string, rule_ptr>> rules;
std::vector<rule_ptr> extra_tokens;
std::vector<std::vector<std::string>> expected_conflicts;
std::vector<std::string> external_tokens;
enum VariableType {
VariableTypeHidden,
VariableTypeAuxiliary,
VariableTypeAnonymous,
VariableTypeNamed,
};
struct ExternalToken {
std::string name;
VariableType type;
rules::Symbol corresponding_internal_token;
inline bool operator==(const ExternalToken &other) const {
return name == other.name &&
type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;
}
};
struct InputGrammar {
struct Variable {
std::string name;
VariableType type;
rules::Rule rule;
};
std::vector<Variable> variables;
std::vector<rules::Rule> extra_tokens;
std::vector<std::unordered_set<rules::NamedSymbol>> expected_conflicts;
std::vector<ExternalToken> external_tokens;
};
} // namespace tree_sitter

View file

@ -1,6 +1,5 @@
#include "compiler/lex_table.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
namespace tree_sitter {

View file

@ -6,8 +6,7 @@
#include <set>
#include <string>
#include "compiler/precedence_range.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/character_set.h"
#include "compiler/rule.h"
namespace tree_sitter {

View file

@ -5,20 +5,25 @@
#include <string>
#include <set>
#include "compiler/rule.h"
#include "compiler/variable.h"
#include "compiler/grammar.h"
namespace tree_sitter {
struct LexicalVariable {
std::string name;
VariableType type;
rule_ptr rule;
rules::Rule rule;
bool is_string;
inline bool operator==(const LexicalVariable &other) const {
return other.name == name && other.type == type && other.rule == rule &&
other.is_string == is_string;
}
};
struct LexicalGrammar {
std::vector<LexicalVariable> variables;
std::vector<rule_ptr> separators;
std::vector<rules::Rule> separators;
};
} // namespace tree_sitter

View file

@ -1,20 +1,31 @@
#include "compiler/parse_grammar.h"
#include <string>
#include <vector>
#include <unordered_set>
#include <utility>
#include "json.h"
#include "compiler/rule.h"
#include "compiler/rules.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::unordered_set;
using std::pair;
using rules::Rule;
using rules::Blank;
using rules::Metadata;
using rules::Pattern;
using rules::String;
using rules::NamedSymbol;
struct ParseRuleResult {
rule_ptr rule;
Rule rule;
string error_message;
ParseRuleResult(const string &error_message) : error_message(error_message) {}
ParseRuleResult(const char *error_message) : error_message(error_message) {}
ParseRuleResult(Rule rule) : rule(rule) {}
};
ParseRuleResult parse_rule(json_value *rule_json) {
@ -23,193 +34,163 @@ ParseRuleResult parse_rule(json_value *rule_json) {
string type;
if (!rule_json) {
error_message = "Rule cannot be null";
goto error;
return "Rule cannot be null";
}
if (rule_json->type != json_object) {
error_message = "Rule type must be an object";
goto error;
return "Rule type must be an object";
}
rule_type_json = rule_json->operator[]("type");
if (rule_type_json.type != json_string) {
error_message = "Rule type must be a string";
goto error;
return "Rule type must be a string";
}
type = rule_type_json.u.string.ptr;
if (type == "BLANK") {
return { blank(), "" };
return Rule(Blank{});
}
if (type == "CHOICE") {
json_value members_json = rule_json->operator[]("members");
if (members_json.type != json_array) {
error_message = "Choice members must be an array";
goto error;
return "Choice members must be an array";
}
vector<rule_ptr> members;
vector<Rule> members;
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
json_value *member_json = members_json.u.array.values[i];
ParseRuleResult member = parse_rule(member_json);
if (member.rule.get()) {
members.push_back(member.rule);
} else {
error_message = "Invalid choice member: " + member.error_message;
goto error;
auto result = parse_rule(member_json);
if (!result.error_message.empty()) {
return "Invalid choice member: " + result.error_message;
}
members.push_back(result.rule);
}
return { choice(members), "" };
return Rule::choice(members);
}
if (type == "SEQ") {
json_value members_json = rule_json->operator[]("members");
if (members_json.type != json_array) {
error_message = "Seq members must be an array";
goto error;
return "Seq members must be an array";
}
vector<rule_ptr> members;
vector<Rule> members;
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
json_value *member_json = members_json.u.array.values[i];
ParseRuleResult member = parse_rule(member_json);
if (member.rule.get()) {
members.push_back(member.rule);
} else {
error_message = "Invalid seq member: " + member.error_message;
goto error;
auto result = parse_rule(member_json);
if (!result.error_message.empty()) {
return "Invalid choice member: " + result.error_message;
}
members.push_back(result.rule);
}
return { seq(members), "" };
return Rule::seq(members);
}
if (type == "REPEAT") {
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (content.rule.get()) {
return { repeat(content.rule), "" };
} else {
error_message = "Invalid repeat content: " + content.error_message;
goto error;
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid repeat content: " + result.error_message;
}
return Rule::choice({Rule::repeat(result.rule), Blank{}});
}
if (type == "REPEAT1") {
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (content.rule.get()) {
return { repeat1(content.rule), "" };
} else {
error_message = "Invalid repeat1 content: " + content.error_message;
goto error;
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid repeat content: " + result.error_message;
}
return Rule::repeat(result.rule);
}
if (type == "TOKEN") {
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (content.rule.get()) {
return { token(content.rule), "" };
} else {
error_message = "Invalid token content: " + content.error_message;
goto error;
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid token content: " + result.error_message;
}
return Rule(Metadata::token(result.rule));
}
if (type == "PATTERN") {
json_value value_json = rule_json->operator[]("value");
if (value_json.type == json_string) {
return { pattern(value_json.u.string.ptr), "" };
return Rule(Pattern{value_json.u.string.ptr});
} else {
error_message = "Pattern value must be a string";
goto error;
return "Pattern value must be a string";
}
}
if (type == "STRING") {
json_value value_json = rule_json->operator[]("value");
if (value_json.type == json_string) {
return { str(value_json.u.string.ptr), "" };
return Rule(String{value_json.u.string.ptr});
} else {
error_message = "String rule value must be a string";
goto error;
return "String rule value must be a string";
}
}
if (type == "SYMBOL") {
json_value value_json = rule_json->operator[]("name");
if (value_json.type == json_string) {
return { sym(value_json.u.string.ptr), "" };
return Rule(NamedSymbol{value_json.u.string.ptr});
} else {
error_message = "Symbol value must be a string";
goto error;
return "Symbol value must be a string";
}
}
if (type == "PREC") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
error_message = "Precedence value must be an integer";
goto error;
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (!content.rule.get()) {
error_message = "Invalid precedence content: " + content.error_message;
goto error;
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return { prec(precedence_json.u.integer, content.rule), "" };
return Rule(Metadata::prec(precedence_json.u.integer, result.rule));
}
if (type == "PREC_LEFT") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
error_message = "Precedence value must be an integer";
goto error;
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (!content.rule.get()) {
error_message = "Invalid precedence content: " + content.error_message;
goto error;
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return { prec_left(precedence_json.u.integer, content.rule), "" };
return Rule(Metadata::prec_left(precedence_json.u.integer, result.rule));
}
if (type == "PREC_RIGHT") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
error_message = "Precedence value must be an integer";
goto error;
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (!content.rule.get()) {
error_message = "Invalid precedence content: " + content.error_message;
goto error;
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return { prec_right(precedence_json.u.integer, content.rule), "" };
return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule));
}
error_message = "Unknown rule type " + type;
error:
return { rule_ptr(), error_message };
return "Unknown rule type: " + type;
}
ParseGrammarResult parse_grammar(const string &input) {
string error_message;
string name;
Grammar grammar;
InputGrammar grammar;
json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
@ -242,15 +223,16 @@ ParseGrammarResult parse_grammar(const string &input) {
for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) {
json_object_entry entry_json = rules_json.u.object.values[i];
ParseRuleResult entry = parse_rule(entry_json.value);
if (!entry.rule.get()) {
error_message =
string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
auto result = parse_rule(entry_json.value);
if (!result.error_message.empty()) {
error_message = result.error_message;
goto error;
}
grammar.rules.push_back({ string(entry_json.name), entry.rule });
grammar.variables.push_back(InputGrammar::Variable{
string(entry_json.name),
VariableTypeNamed,
result.rule
});
}
extras_json = grammar_json->operator[]("extras");
@ -262,13 +244,12 @@ ParseGrammarResult parse_grammar(const string &input) {
for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) {
json_value *extra_json = extras_json.u.array.values[i];
ParseRuleResult extra = parse_rule(extra_json);
if (!extra.rule.get()) {
error_message = string("Invalid extra token: ") + extra.error_message;
auto result = parse_rule(extra_json);
if (!result.error_message.empty()) {
error_message = "Invalid extra token: " + result.error_message;
goto error;
}
grammar.extra_tokens.push_back(extra.rule);
grammar.extra_tokens.push_back(result.rule);
}
}
@ -286,7 +267,7 @@ ParseGrammarResult parse_grammar(const string &input) {
goto error;
}
vector<string> conflict;
unordered_set<NamedSymbol> conflict;
for (size_t j = 0, conflict_length = conflict_json->u.array.length;
j < conflict_length; j++) {
json_value *conflict_entry_json = conflict_json->u.array.values[j];
@ -295,7 +276,9 @@ ParseGrammarResult parse_grammar(const string &input) {
goto error;
}
conflict.push_back(string(conflict_entry_json->u.string.ptr));
conflict.insert(rules::NamedSymbol{
string(conflict_entry_json->u.string.ptr)
});
}
grammar.expected_conflicts.push_back(conflict);
@ -317,7 +300,11 @@ ParseGrammarResult parse_grammar(const string &input) {
}
string token_name = token_name_json->u.string.ptr;
grammar.external_tokens.push_back(token_name);
grammar.external_tokens.push_back({
token_name,
VariableTypeNamed,
rules::NONE()
});
}
}
@ -329,7 +316,7 @@ error:
json_value_free(grammar_json);
}
return { "", Grammar(), error_message };
return { "", InputGrammar(), error_message };
}
} // namespace tree_sitter

View file

@ -9,7 +9,7 @@ namespace tree_sitter {
struct ParseGrammarResult {
std::string name;
Grammar grammar;
InputGrammar grammar;
std::string error_message;
};

View file

@ -1,7 +1,7 @@
#include "compiler/parse_table.h"
#include <string>
#include "compiler/precedence_range.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
namespace tree_sitter {
@ -178,7 +178,7 @@ ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
void ParseTable::set_nonterminal_action(ParseStateId state_id,
Symbol::Index lookahead,
ParseStateId next_state_id) {
symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true;
symbols[Symbol::non_terminal(lookahead)].structural = true;
states[state_id].nonterminal_entries[lookahead] = next_state_id;
}

View file

@ -6,8 +6,7 @@
#include <utility>
#include <vector>
#include "compiler/lex_table.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/rule.h"
#include "compiler/precedence_range.h"
#include "compiler/syntax_grammar.h"

View file

@ -2,14 +2,9 @@
#include <vector>
#include <string>
#include <utility>
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include <cassert>
#include "compiler/grammar.h"
#include "compiler/rule.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/repeat.h"
namespace tree_sitter {
namespace prepare_grammar {
@ -18,53 +13,79 @@ using std::string;
using std::vector;
using std::pair;
using std::to_string;
using std::make_shared;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::Seq;
using rules::Rule;
using rules::Symbol;
class ExpandRepeats : public rules::IdentityRuleFn {
class ExpandRepeats {
string rule_name;
size_t offset;
size_t repeat_count;
vector<pair<rule_ptr, Symbol>> existing_repeats;
vector<pair<Rule, Symbol>> existing_repeats;
rule_ptr apply_to(const Repeat *rule) {
for (const auto pair : existing_repeats)
if (pair.first->operator==(*rule))
return pair.second.copy();
Rule apply(Rule rule) {
return rule.match(
[&](const rules::Blank &blank) -> Rule { return blank; },
[&](const rules::Symbol &symbol) { return symbol; },
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
existing_repeats.push_back({ rule->copy(), repeat_symbol });
aux_rules.push_back(Variable{
helper_rule_name,
VariableTypeAuxiliary,
Choice::build({
Seq::build({
repeat_symbol.copy(),
inner_rule,
}),
inner_rule,
})
});
return repeat_symbol.copy();
[&](const rules::Choice &choice) {
vector<Rule> elements;
for (const auto &element : choice.elements) {
elements.push_back(apply(element));
}
return Rule::choice(elements);
},
[&](const rules::Seq &sequence) {
return rules::Seq{
apply(*sequence.left),
apply(*sequence.right)
};
},
[&](const rules::Repeat &repeat) {
for (const auto pair : existing_repeats) {
if (pair.first == rule) {
return pair.second;
}
}
Rule inner_rule = apply(*repeat.rule);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
Symbol repeat_symbol = Symbol::non_terminal(offset + index);
existing_repeats.push_back({repeat, repeat_symbol});
aux_rules.push_back({
helper_rule_name,
VariableTypeAuxiliary,
rules::Choice{{
rules::Seq{repeat_symbol, inner_rule},
inner_rule,
}}
});
return repeat_symbol;
},
[&](const rules::Metadata &metadata) {
return rules::Metadata{apply(*metadata.rule), metadata.params};
},
[](auto) {
assert(!"Unexpected rule type");
return rules::Blank{};
}
);
}
public:
explicit ExpandRepeats(size_t offset) : offset(offset) {}
rule_ptr expand(const rule_ptr &rule, const string &name) {
Rule expand(const Rule &rule, const string &name) {
rule_name = name;
repeat_count = 0;
return apply(rule);
}
vector<Variable> aux_rules;
vector<InitialSyntaxGrammar::Variable> aux_rules;
};
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
@ -75,11 +96,16 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
result.external_tokens = grammar.external_tokens;
ExpandRepeats expander(result.variables.size());
for (auto &variable : result.variables)
for (auto &variable : result.variables) {
variable.rule = expander.expand(variable.rule, variable.name);
}
result.variables.insert(
result.variables.end(),
expander.aux_rules.begin(),
expander.aux_rules.end()
);
result.variables.insert(result.variables.end(), expander.aux_rules.begin(),
expander.aux_rules.end());
return result;
}

View file

@ -6,8 +6,6 @@
namespace tree_sitter {
namespace prepare_grammar {
struct InitialSyntaxGrammar;
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &);
} // namespace prepare_grammar

View file

@ -2,15 +2,8 @@
#include <vector>
#include <string>
#include <utility>
#include <map>
#include "compiler/lexical_grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/string.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/character_set.h"
#include "compiler/rule.h"
#include "compiler/prepare_grammar/parse_regex.h"
#include "utf8proc.h"
@ -19,70 +12,69 @@ namespace prepare_grammar {
using std::string;
using std::vector;
using std::map;
using std::pair;
using std::make_shared;
using rules::String;
using rules::Pattern;
using rules::Metadata;
using rules::Rule;
class ExpandTokens : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
ExpandTokenResult expand_token(const rules::Rule &rule) {
return rule.match(
[](const rules::Blank &blank) -> ExpandTokenResult { return Rule(blank); },
rule_ptr apply_to(const String *rule) {
vector<rule_ptr> elements;
const uint8_t *iter = reinterpret_cast<const uint8_t *>(rule->value.data());
const uint8_t *end = iter + rule->value.size();
[](const rules::String &string) {
vector<Rule> elements;
const uint8_t *iter = reinterpret_cast<const uint8_t *>(string.value.data());
const uint8_t *end = iter + string.value.size();
while (iter < end) {
int32_t el;
size_t size = utf8proc_iterate(iter, (end - iter), &el);
if (!size)
break;
iter += size;
while (iter < end) {
int32_t el;
size_t size = utf8proc_iterate(iter, (end - iter), &el);
if (!size)
break;
iter += size;
elements.push_back(rules::CharacterSet().include(el).copy());
}
elements.push_back(rules::CharacterSet().include(el));
}
rules::MetadataParams params;
params.is_token = true;
params.is_string = true;
return Rule::seq(elements);
},
return rules::Metadata::build(rules::Seq::build(elements), params);
}
[](const rules::Pattern &pattern) -> ExpandTokenResult {
auto result = parse_regex(pattern.value);
if (result.second) return result.second;
return result.first;
},
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error.type)
error = pair.second;
return pair.first;
}
[](const rules::Repeat &rule) -> ExpandTokenResult {
auto result = expand_token(*rule.rule);
if (result.error) return result.error;
return Rule::repeat(result.rule);
},
public:
CompileError error;
ExpandTokens() : error(CompileError::none()) {}
[](const rules::Metadata &rule) -> ExpandTokenResult {
auto result = expand_token(*rule.rule);
if (result.error) return result.error;
return Rule(rules::Metadata{result.rule, rule.params});
},
[](const rules::Seq &rule) -> ExpandTokenResult {
auto left_result = expand_token(*rule.left);
if (left_result.error) return left_result.error;
auto right_result = expand_token(*rule.right);
if (right_result.error) return right_result.error;
return Rule(rules::Seq{left_result.rule, right_result.rule});
},
[](const rules::Choice &rule) -> ExpandTokenResult {
std::vector<Rule> elements;
for (const auto &element : rule.elements) {
auto result = expand_token(element);
if (result.error) return result.error;
elements.push_back(result.rule);
}
return Rule(rules::Choice{elements});
},
[](auto) { return CompileError(TSCompileErrorTypeInvalidTokenContents, ""); }
);
};
pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &grammar) {
LexicalGrammar result;
ExpandTokens expander;
for (const LexicalVariable &variable : grammar.variables) {
auto rule = expander.apply(variable.rule);
if (expander.error.type)
return { result, expander.error };
result.variables.push_back({variable.name, variable.type, rule, variable.is_string});
}
for (auto &sep : grammar.separators) {
auto rule = expander.apply(sep);
if (expander.error.type)
return { result, expander.error };
result.separators.push_back(rule);
}
return { result, CompileError::none() };
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -2,15 +2,21 @@
#define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
#include <utility>
#include "compiler/rule.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
struct LexicalGrammar;
namespace prepare_grammar {
std::pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &);
struct ExpandTokenResult {
rules::Rule rule;
CompileError error;
ExpandTokenResult(const rules::Rule &rule) : rule(rule) {}
ExpandTokenResult(const CompileError &error) : error(error) {}
};
ExpandTokenResult expand_token(const rules::Rule &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -1,54 +1,48 @@
#include "compiler/prepare_grammar/extract_choices.h"
#include <vector>
#include <memory>
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {
using std::vector;
using rules::Rule;
class ExtractChoices : public rules::RuleFn<vector<rule_ptr>> {
vector<rule_ptr> default_apply(const Rule *rule) {
return vector<rule_ptr>({ rule->copy() });
}
vector<Rule> extract_choices(const Rule &rule) {
return rule.match(
[](const rules::Seq &sequence) {
vector<Rule> result;
for (auto &left_entry : extract_choices(*sequence.left)) {
for (auto &right_entry : extract_choices(*sequence.right)) {
result.push_back(rules::Rule::seq({left_entry, right_entry}));
}
}
return result;
},
vector<rule_ptr> apply_to(const rules::Seq *rule) {
vector<rule_ptr> result;
for (auto left_entry : apply(rule->left))
for (auto right_entry : apply(rule->right))
result.push_back(rules::Seq::build({ left_entry, right_entry }));
return result;
}
[](const rules::Metadata &rule) {
vector<Rule> result;
for (auto &entry : extract_choices(*rule.rule)) {
result.push_back(rules::Metadata{entry, rule.params});
}
return result;
},
vector<rule_ptr> apply_to(const rules::Metadata *rule) {
vector<rule_ptr> result;
for (auto entry : apply(rule->rule))
result.push_back(rules::Metadata::build(entry, rule->params));
return result;
}
[](const rules::Choice &choice) {
vector<Rule> result;
for (auto &element : choice.elements) {
for (auto &entry : extract_choices(element)) {
result.push_back(entry);
}
}
return result;
},
vector<rule_ptr> apply_to(const rules::Choice *rule) {
vector<rule_ptr> result;
for (auto element : rule->elements)
for (auto entry : apply(element))
result.push_back(entry);
return result;
}
vector<rule_ptr> apply_to(const rules::Repeat *rule) {
return vector<rule_ptr>({
rules::Repeat::build(rules::Choice::build(apply(rule->content))),
});
}
};
std::vector<rule_ptr> extract_choices(const rule_ptr &rule) {
return ExtractChoices().apply(rule);
[](const auto &rule) {
return vector<Rule>({rule});
}
);
}
} // namespace prepare_grammar

View file

@ -7,7 +7,7 @@
namespace tree_sitter {
namespace prepare_grammar {
std::vector<rule_ptr> extract_choices(const rule_ptr &);
std::vector<rules::Rule> extract_choices(const rules::Rule &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -1,24 +1,19 @@
#include "compiler/prepare_grammar/extract_tokens.h"
#include <map>
#include <vector>
#include <cassert>
#include <set>
#include <string>
#include <tuple>
#include "tree_sitter/compiler.h"
#include "compiler/lexical_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/rule.h"
#include "compiler/prepare_grammar/token_description.h"
#include "compiler/prepare_grammar/is_token.h"
#include "compiler/prepare_grammar/expand_tokens.h"
namespace tree_sitter {
namespace prepare_grammar {
using std::make_shared;
using std::make_tuple;
using std::map;
using std::set;
@ -26,74 +21,143 @@ using std::string;
using std::tuple;
using std::vector;
using rules::Symbol;
using rules::Rule;
using rules::Rule;
class SymbolReplacer : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const Symbol *rule) {
return replace_symbol(*rule).copy();
}
class SymbolReplacer {
public:
map<Symbol, Symbol> replacements;
Rule apply(const Rule &rule) {
return rule.match(
[this](const rules::Blank &blank) -> Rule {
return blank;
},
[this](const rules::Symbol &symbol) {
return replace_symbol(symbol);
},
[this](const rules::Choice &choice) {
vector<rules::Rule> elements;
for (const auto &element : choice.elements) {
elements.push_back(apply(element));
}
return Rule::choice(elements);
},
[this](const rules::Seq &sequence) {
return rules::Seq{
apply(*sequence.left),
apply(*sequence.right)
};
},
[this](const rules::Repeat &repeat) {
return Rule::repeat(apply(*repeat.rule));
},
[this](const rules::Metadata &metadata) {
return rules::Metadata{apply(*metadata.rule), metadata.params};
},
[](auto) {
assert(!"Unexpected rule type");
return rules::Blank{};
}
);
}
Symbol replace_symbol(const Symbol &symbol) {
if (!symbol.is_non_terminal())
return symbol;
if (!symbol.is_non_terminal()) return symbol;
auto replacement_pair = replacements.find(symbol);
if (replacement_pair != replacements.end())
if (replacement_pair != replacements.end()) {
return replacement_pair->second;
}
int new_index = symbol.index;
for (const auto &pair : replacements)
if (pair.first.index < symbol.index)
for (const auto &pair : replacements) {
if (pair.first.index < symbol.index) {
new_index--;
return Symbol(new_index, Symbol::NonTerminal);
}
}
return Symbol::non_terminal(new_index);
}
};
class TokenExtractor : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to_token(const Rule *input, VariableType entry_type, bool is_string) {
for (size_t i = 0; i < tokens.size(); i++)
if (tokens[i].rule->operator==(*input)) {
class TokenExtractor {
Symbol extract_token(const rules::Rule &input, VariableType entry_type) {
for (size_t i = 0; i < tokens.size(); i++) {
if (tokens[i].rule == input) {
token_usage_counts[i]++;
return make_shared<Symbol>(i, Symbol::Terminal);
return Symbol::terminal(i);
}
rule_ptr rule = input->copy();
size_t index = tokens.size();
tokens.push_back({token_description(rule), entry_type, rule, is_string});
token_usage_counts.push_back(1);
return make_shared<Symbol>(index, Symbol::Terminal);
}
rule_ptr apply_to(const rules::String *rule) {
return apply_to_token(rule, VariableTypeAnonymous, true);
}
rule_ptr apply_to(const rules::Pattern *rule) {
return apply_to_token(rule, VariableTypeAuxiliary, false);
}
rule_ptr apply_to(const rules::Metadata *rule) {
if (rule->params.is_token) {
return apply_to_token(rule->rule.get(), VariableTypeAuxiliary, false);
} else {
return rules::IdentityRuleFn::apply_to(rule);
}
size_t index = tokens.size();
tokens.push_back({
token_description(input),
entry_type,
input
});
token_usage_counts.push_back(1);
return Symbol::terminal(index);
}
public:
vector<size_t> token_usage_counts;
vector<LexicalVariable> tokens;
};
Rule apply(const rules::Rule &rule) {
return rule.match(
[this](const rules::Blank &blank) -> Rule { return blank; },
static CompileError extra_token_error(const string &message) {
return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message);
}
[this](const rules::Metadata &rule) -> Rule {
if (rule.params.is_token) {
return extract_token(*rule.rule, VariableTypeAuxiliary);
} else {
return rules::Metadata{apply(*rule.rule), rule.params};
}
},
[this](const rules::String &rule) {
return extract_token(rule, VariableTypeAnonymous);
},
[this](const rules::Pattern &rule) {
return extract_token(rule, VariableTypeAuxiliary);
},
[this](const rules::Repeat &rule) {
return Rule::repeat(apply(*rule.rule));
},
[this](const rules::Seq &rule) {
return Rule::seq({apply(*rule.left), apply(*rule.right)});
},
[this](const rules::Choice &rule) {
std::vector<Rule> elements;
for (const auto &element : rule.elements) {
elements.push_back(apply(element));
}
return Rule::choice(elements);
},
[](const rules::Symbol &symbol) {
return symbol;
},
[](auto) {
assert(!"Unexpected rule type");
return rules::Blank{};
}
);
}
vector<size_t> token_usage_counts;
vector<InternedGrammar::Variable> tokens;
};
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
const InternedGrammar &grammar
@ -104,15 +168,29 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
TokenExtractor extractor;
// First, extract all of the grammar's tokens into the lexical grammar.
vector<Variable> processed_variables;
for (const Variable &variable : grammar.variables) {
processed_variables.push_back(Variable{
vector<InitialSyntaxGrammar::Variable> processed_variables;
for (const auto &variable : grammar.variables) {
processed_variables.push_back({
variable.name,
variable.type,
extractor.apply(variable.rule)
});
}
lexical_grammar.variables = extractor.tokens;
for (const auto &extracted_token : extractor.tokens) {
auto expansion = expand_token(extracted_token.rule);
if (expansion.error) return make_tuple(
syntax_grammar,
lexical_grammar,
expansion.error
);
lexical_grammar.variables.push_back({
extracted_token.name,
extracted_token.type,
expansion.rule,
extracted_token.type == VariableTypeAnonymous
});
}
// If a variable's entire rule was extracted as a token and that token didn't
// appear within any other rule, then remove that variable from the syntax
@ -120,26 +198,28 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
// that pointed to that variable will need to be updated to point to the
// variable in the lexical grammar. Symbols that pointed to later variables
// will need to have their indices decremented.
size_t i = 0;
for (const Variable &variable : processed_variables) {
auto symbol = variable.rule->as<Symbol>();
if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) {
lexical_grammar.variables[symbol->index].type = variable.type;
lexical_grammar.variables[symbol->index].name = variable.name;
symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol });
} else {
syntax_grammar.variables.push_back(variable);
}
size_t i = -1;
for (const auto &variable : processed_variables) {
i++;
if (variable.rule.is<Symbol>()) {
auto symbol = variable.rule.get_unchecked<Symbol>();
if (symbol.is_terminal() && extractor.token_usage_counts[symbol.index] == 1) {
lexical_grammar.variables[symbol.index].type = variable.type;
lexical_grammar.variables[symbol.index].name = variable.name;
symbol_replacer.replacements[Symbol::non_terminal(i)] = symbol;
continue;
}
}
syntax_grammar.variables.push_back(variable);
}
// Perform any replacements of symbols needed based on the previous step.
for (Variable &variable : syntax_grammar.variables) {
for (auto &variable : syntax_grammar.variables) {
variable.rule = symbol_replacer.apply(variable.rule);
}
for (const ConflictSet &conflict_set : grammar.expected_conflicts) {
ConflictSet new_conflict_set;
for (const auto &conflict_set : grammar.expected_conflicts) {
set<Symbol> new_conflict_set;
for (const Symbol &symbol : conflict_set) {
new_conflict_set.insert(symbol_replacer.replace_symbol(symbol));
}
@ -148,47 +228,51 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
// The grammar's extra tokens can be either token rules or symbols
// pointing to token rules. If they are symbols, then they'll be handled by
// the parser; add them to the syntax grammar's ubiqutous tokens. If they
// the parser; add them to the syntax grammar's extra tokens. If they
// are anonymous rules, they can be handled by the lexer; add them to the
// lexical grammar's separator rules.
for (const rule_ptr &rule : grammar.extra_tokens) {
int i = 0;
bool used_elsewhere_in_grammar = false;
for (const LexicalVariable &variable : lexical_grammar.variables) {
if (variable.rule->operator==(*rule)) {
syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal));
used_elsewhere_in_grammar = true;
for (const auto &rule : grammar.extra_tokens) {
CompileError error = rule.match(
[&](const Symbol &symbol) {
Symbol new_symbol = symbol_replacer.replace_symbol(symbol);
if (new_symbol.is_non_terminal()) {
return CompileError(
TSCompileErrorTypeInvalidExtraToken,
"Non-token symbol " + syntax_grammar.variables[new_symbol.index].name + " can't be used as an extra token"
);
} else {
syntax_grammar.extra_tokens.insert(new_symbol);
return CompileError::none();
}
},
[&](auto non_symbol) {
auto expansion = expand_token(non_symbol);
if (expansion.error) return CompileError(
TSCompileErrorTypeInvalidExtraToken,
"Non-token rule expression can't be used as an extra token"
);
int i = 0;
for (const LexicalVariable &variable : lexical_grammar.variables) {
if (variable.rule == expansion.rule) {
syntax_grammar.extra_tokens.insert(Symbol::terminal(i));
return CompileError::none();
}
i++;
}
lexical_grammar.separators.push_back(expansion.rule);
return CompileError::none();
}
i++;
}
);
if (used_elsewhere_in_grammar) {
continue;
}
if (is_token(rule)) {
lexical_grammar.separators.push_back(rule);
continue;
}
auto symbol = rule->as<Symbol>();
if (!symbol) {
return make_tuple(syntax_grammar, lexical_grammar,
extra_token_error(rule->to_string()));
}
Symbol new_symbol = symbol_replacer.replace_symbol(*symbol);
if (new_symbol.is_non_terminal()) {
return make_tuple(
syntax_grammar, lexical_grammar,
extra_token_error(syntax_grammar.variables[new_symbol.index].name));
}
syntax_grammar.extra_tokens.insert(new_symbol);
if (error) return make_tuple(syntax_grammar, lexical_grammar, error);
}
for (const ExternalToken &external_token : grammar.external_tokens) {
Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token);
Symbol internal_token = symbol_replacer.replace_symbol(
external_token.corresponding_internal_token
);
if (internal_token.is_non_terminal()) {
return make_tuple(

View file

@ -4,14 +4,15 @@
#include <tuple>
#include "compiler/compile_error.h"
#include "compiler/lexical_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
namespace tree_sitter {
namespace prepare_grammar {
std::tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
const InternedGrammar &);
const InternedGrammar &
);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -1,13 +1,11 @@
#include "compiler/prepare_grammar/flatten_grammar.h"
#include <vector>
#include <cassert>
#include <algorithm>
#include "compiler/prepare_grammar/extract_choices.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/grammar.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {
@ -15,8 +13,9 @@ namespace prepare_grammar {
using std::find;
using std::pair;
using std::vector;
using rules::Rule;
class FlattenRule : public rules::RuleFn<void> {
class FlattenRule {
private:
vector<int> precedence_stack;
vector<rules::Associativity> associativity_stack;
@ -24,40 +23,53 @@ class FlattenRule : public rules::RuleFn<void> {
rules::Associativity last_associativity;
Production production;
void apply_to(const rules::Symbol *sym) {
production.push_back(ProductionStep{
*sym,
precedence_stack.back(),
associativity_stack.back()
});
}
void apply(const Rule &rule) {
rule.match(
[&](const rules::Symbol &symbol) {
production.push_back(ProductionStep{
symbol,
precedence_stack.back(),
associativity_stack.back()
});
},
void apply_to(const rules::Metadata *metadata) {
if (metadata->params.has_precedence)
precedence_stack.push_back(metadata->params.precedence);
if (metadata->params.has_associativity)
associativity_stack.push_back(metadata->params.associativity);
[&](const rules::Metadata &metadata) {
if (metadata.params.has_precedence) {
precedence_stack.push_back(metadata.params.precedence);
}
apply(metadata->rule);
if (metadata.params.has_associativity) {
associativity_stack.push_back(metadata.params.associativity);
}
if (metadata->params.has_precedence) {
last_precedence = precedence_stack.back();
precedence_stack.pop_back();
production.back().precedence = precedence_stack.back();
}
apply(*metadata.rule);
if (metadata->params.has_associativity) {
last_associativity = associativity_stack.back();
associativity_stack.pop_back();
production.back().associativity = associativity_stack.back();
}
}
if (metadata.params.has_precedence) {
last_precedence = precedence_stack.back();
precedence_stack.pop_back();
production.back().precedence = precedence_stack.back();
}
void apply_to(const rules::Seq *seq) {
apply(seq->left);
last_precedence = 0;
last_associativity = rules::AssociativityNone;
apply(seq->right);
if (metadata.params.has_associativity) {
last_associativity = associativity_stack.back();
associativity_stack.pop_back();
production.back().associativity = associativity_stack.back();
}
},
[&](const rules::Seq &sequence) {
apply(*sequence.left);
last_precedence = 0;
last_associativity = rules::AssociativityNone;
apply(*sequence.right);
},
[&](const rules::Blank &blank) {},
[&](auto) {
assert(!"Unexpected rule type");
}
);
}
public:
@ -67,7 +79,7 @@ class FlattenRule : public rules::RuleFn<void> {
last_precedence(0),
last_associativity(rules::AssociativityNone) {}
Production flatten(const rule_ptr &rule) {
Production flatten(const Rule &rule) {
apply(rule);
if (!production.empty()) {
production.back().precedence = last_precedence;
@ -77,10 +89,10 @@ class FlattenRule : public rules::RuleFn<void> {
}
};
SyntaxVariable flatten_rule(const Variable &variable) {
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable) {
vector<Production> productions;
for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
for (const Rule &rule_component : extract_choices(variable.rule)) {
Production production = FlattenRule().flatten(rule_component);
auto end = productions.end();
if (find(productions.begin(), end, production) == end) {
@ -93,12 +105,21 @@ SyntaxVariable flatten_rule(const Variable &variable) {
pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &grammar) {
SyntaxGrammar result;
result.expected_conflicts = grammar.expected_conflicts;
result.extra_tokens = grammar.extra_tokens;
result.external_tokens = grammar.external_tokens;
for (const auto &expected_conflict : grammar.expected_conflicts) {
result.expected_conflicts.insert({
expected_conflict.begin(),
expected_conflict.end(),
});
}
for (const rules::Symbol &extra_token : grammar.extra_tokens) {
result.extra_tokens.insert(extra_token);
}
bool is_start = true;
for (const Variable &variable : grammar.variables) {
for (const auto &variable : grammar.variables) {
SyntaxVariable syntax_variable = flatten_rule(variable);
if (!is_start) {

View file

@ -4,14 +4,14 @@
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/compile_error.h"
#include "compiler/grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {
namespace prepare_grammar {
struct InitialSyntaxGrammar;
SyntaxVariable flatten_rule(const Variable &variable);
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable);
std::pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &);
} // namespace prepare_grammar

View file

@ -4,17 +4,26 @@
#include <set>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
#include "compiler/syntax_grammar.h"
#include "compiler/variable.h"
#include "compiler/grammar.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {
struct InitialSyntaxGrammar {
struct Variable {
std::string name;
VariableType type;
rules::Rule rule;
inline bool operator==(const Variable &other) const {
return name == other.name && type == other.type && rule == other.rule;
}
};
std::vector<Variable> variables;
std::set<rules::Symbol> extra_tokens;
std::set<ConflictSet> expected_conflicts;
std::set<std::set<rules::Symbol>> expected_conflicts;
std::vector<ExternalToken> external_tokens;
};

View file

@ -1,14 +1,11 @@
#include "compiler/prepare_grammar/intern_symbols.h"
#include <memory>
#include <vector>
#include <cassert>
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {
@ -17,34 +14,62 @@ using std::string;
using std::vector;
using std::set;
using std::pair;
using std::make_shared;
using rules::Symbol;
using rules::Rule;
class SymbolInterner : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const rules::NamedSymbol *rule) {
auto result = symbol_for_rule_name(rule->name);
if (!result.get()) {
missing_rule_name = rule->name;
return rules::Blank::build();
}
return result;
}
class SymbolInterner {
public:
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
for (size_t i = 0; i < grammar.rules.size(); i++)
if (grammar.rules[i].first == rule_name)
return make_shared<Symbol>(i, Symbol::NonTerminal);
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
if (grammar.external_tokens[i] == rule_name)
return make_shared<rules::Symbol>(i, Symbol::External);
return nullptr;
Rule apply(const Rule &rule) {
return rule.match(
[&](const rules::Blank &blank) -> Rule { return blank; },
[&](const rules::NamedSymbol &symbol) {
return intern_symbol(symbol);
},
[&](const rules::String &string) { return string; },
[&](const rules::Pattern &pattern) { return pattern; },
[&](const rules::Choice &choice) {
vector<rules::Rule> elements;
for (const auto &element : choice.elements) {
elements.push_back(apply(element));
}
return rules::Choice{elements};
},
[&](const rules::Seq &sequence) {
return rules::Seq{apply(*sequence.left), apply(*sequence.right)};
},
[&](const rules::Repeat &repeat) {
return rules::Repeat{apply(*repeat.rule)};
},
[&](const rules::Metadata &metadata) {
return rules::Metadata{apply(*metadata.rule), metadata.params};
},
[](auto) {
assert(!"Unexpected rule type");
return rules::Blank{};
}
);
}
explicit SymbolInterner(const Grammar &grammar) : grammar(grammar) {}
const Grammar grammar;
Symbol intern_symbol(rules::NamedSymbol named_symbol) {
for (size_t i = 0; i < grammar.variables.size(); i++)
if (grammar.variables[i].name == named_symbol.value)
return Symbol::non_terminal(i);
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
if (grammar.external_tokens[i].name == named_symbol.value)
return Symbol::external(i);
missing_rule_name = named_symbol.value;
return rules::NONE();
}
explicit SymbolInterner(const InputGrammar &grammar) : grammar(grammar) {}
const InputGrammar &grammar;
string missing_rule_name;
};
@ -53,52 +78,55 @@ CompileError missing_rule_error(string rule_name) {
"Undefined rule '" + rule_name + "'");
}
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar) {
InternedGrammar result;
for (auto &external_token_name : grammar.external_tokens) {
for (auto &external_token : grammar.external_tokens) {
Symbol corresponding_internal_token = rules::NONE();
for (size_t i = 0, n = grammar.rules.size(); i < n; i++) {
if (grammar.rules[i].first == external_token_name) {
corresponding_internal_token = Symbol(i, Symbol::NonTerminal);
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
if (grammar.variables[i].name == external_token.name) {
corresponding_internal_token = Symbol::non_terminal(i);
break;
}
}
result.external_tokens.push_back(ExternalToken{
external_token_name,
external_token_name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
external_token.name,
external_token.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
corresponding_internal_token
});
}
SymbolInterner interner(grammar);
for (auto &pair : grammar.rules) {
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
for (auto &variable : grammar.variables) {
auto new_rule = interner.apply(variable.rule);
if (!interner.missing_rule_name.empty()) {
return { result, missing_rule_error(interner.missing_rule_name) };
}
result.variables.push_back(Variable{
pair.first,
pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
result.variables.push_back(InternedGrammar::Variable{
variable.name,
variable.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
new_rule
});
}
for (auto &rule : grammar.extra_tokens) {
auto new_rule = interner.apply(rule);
if (!interner.missing_rule_name.empty())
if (!interner.missing_rule_name.empty()) {
return { result, missing_rule_error(interner.missing_rule_name) };
}
result.extra_tokens.push_back(new_rule);
}
for (auto &names : grammar.expected_conflicts) {
for (auto &expected_conflict : grammar.expected_conflicts) {
set<rules::Symbol> entry;
for (auto &name : names) {
auto symbol = interner.symbol_for_rule_name(name);
if (symbol.get())
entry.insert(*symbol);
for (auto &named_symbol : expected_conflict) {
auto symbol = interner.intern_symbol(named_symbol);
if (symbol != rules::NONE()) {
entry.insert(symbol);
}
}
result.expected_conflicts.insert(entry);
}

View file

@ -8,11 +8,11 @@
namespace tree_sitter {
struct Grammar;
struct InputGrammar;
namespace prepare_grammar {
std::pair<InternedGrammar, CompileError> intern_symbols(const Grammar &);
std::pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -4,17 +4,26 @@
#include <set>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
#include "compiler/syntax_grammar.h"
#include "compiler/variable.h"
#include "compiler/grammar.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {
struct InternedGrammar {
struct Variable {
std::string name;
VariableType type;
rules::Rule rule;
bool operator==(const Variable &other) const {
return name == other.name && type == other.type && rule == other.rule;
}
};
std::vector<Variable> variables;
std::vector<rule_ptr> extra_tokens;
std::set<ConflictSet> expected_conflicts;
std::vector<rules::Rule> extra_tokens;
std::set<std::set<rules::Symbol>> expected_conflicts;
std::vector<ExternalToken> external_tokens;
};

View file

@ -1,30 +0,0 @@
#include "compiler/prepare_grammar/is_token.h"
#include "tree_sitter/compiler.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
namespace tree_sitter {
namespace prepare_grammar {
class IsToken : public rules::RuleFn<bool> {
bool apply_to(const rules::String *rule) {
return true;
}
bool apply_to(const rules::Pattern *rule) {
return true;
}
bool apply_to(const rules::Metadata *rule) {
return rule->params.is_token;
}
};
bool is_token(const rule_ptr &rule) {
return IsToken().apply(rule);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -1,14 +0,0 @@
#ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
#define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {
bool is_token(const rule_ptr &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_

View file

@ -1,15 +1,17 @@
#include "compiler/prepare_grammar/normalize_rules.h"
#include "compiler/prepare_grammar/extract_choices.h"
#include "compiler/rules/choice.h"
namespace tree_sitter {
namespace prepare_grammar {
using std::vector;
using rules::Rule;
LexicalGrammar normalize_rules(const LexicalGrammar &input_grammar) {
LexicalGrammar result(input_grammar);
for (LexicalVariable &variable : result.variables) {
variable.rule = rules::Choice::build(extract_choices(variable.rule));
variable.rule = Rule::choice(extract_choices(variable.rule));
}
return result;

View file

@ -2,11 +2,7 @@
#include <string>
#include <utility>
#include <vector>
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/blank.h"
#include "compiler/rule.h"
#include "compiler/util/string_helpers.h"
#include "utf8proc.h"
@ -16,12 +12,9 @@ namespace prepare_grammar {
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::CharacterSet;
using rules::Seq;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::Rule;
class PatternParser {
public:
@ -32,103 +25,121 @@ class PatternParser {
next();
}
pair<rule_ptr, CompileError> rule(bool nested) {
vector<rule_ptr> choices = {};
pair<Rule, CompileError> rule(bool nested) {
vector<Rule> choices;
do {
if (!choices.empty()) {
if (peek() == '|')
if (peek() == '|') {
next();
else
} else {
break;
}
}
auto pair = term(nested);
if (pair.second.type)
return { Blank::build(), pair.second };
if (pair.second.type) {
return {Blank{}, pair.second };
}
choices.push_back(pair.first);
} while (has_more_input());
auto rule =
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, CompileError::none() };
return {Rule::choice(choices), CompileError::none()};
}
private:
pair<rule_ptr, CompileError> term(bool nested) {
rule_ptr result = Blank::build();
pair<Rule, CompileError> term(bool nested) {
Rule result;
do {
if (peek() == '|')
break;
if (nested && peek() == ')')
break;
auto pair = factor();
if (pair.second.type)
return { Blank::build(), pair.second };
result = Seq::build({ result, pair.first });
if (pair.second) {
return {Blank{}, pair.second};
}
result = Rule::seq({result, pair.first});
} while (has_more_input());
return { result, CompileError::none() };
}
pair<rule_ptr, CompileError> factor() {
pair<Rule, CompileError> factor() {
auto pair = atom();
if (pair.second.type)
return { Blank::build(), pair.second };
rule_ptr result = pair.first;
if (pair.second.type) {
return {Blank{}, pair.second};
}
Rule result = pair.first;
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = Choice::build({ Repeat::build(result), Blank::build() });
result = Rule::choice({
Rule::repeat(result),
Blank{}
});
break;
case '+':
next();
result = Repeat::build(result);
result = Rule::repeat(result);
break;
case '?':
next();
result = Choice::build({ result, Blank::build() });
result = Rule::choice({result, Blank{}});
break;
}
}
return { result, CompileError::none() };
return {result, CompileError::none()};
}
pair<rule_ptr, CompileError> atom() {
pair<Rule, CompileError> atom() {
switch (peek()) {
case '(': {
next();
auto pair = rule(true);
if (pair.second.type)
return { Blank::build(), pair.second };
if (peek() != ')')
if (pair.second.type) {
return {Blank{}, pair.second};
}
if (peek() != ')') {
return error("unmatched open paren");
}
next();
return { pair.first, CompileError::none() };
return {pair.first, CompileError::none()};
}
case '[': {
next();
auto pair = char_set();
if (pair.second.type)
return { Blank::build(), pair.second };
if (peek() != ']')
if (pair.second.type) {
return {Blank{}, pair.second};
}
if (peek() != ']') {
return error("unmatched open square bracket");
}
next();
return { pair.first.copy(), CompileError::none() };
return {pair.first, CompileError::none()};
}
case ')': {
return error("unmatched close paren");
}
case ']': {
return error("unmatched close square bracket");
}
case '.': {
next();
return { CharacterSet().include_all().exclude('\n').copy(),
CompileError::none() };
return {
CharacterSet().include_all().exclude('\n'),
CompileError::none()
};
}
default: {
auto pair = single_char();
if (pair.second.type)
return { Blank::build(), pair.second };
return { pair.first.copy(), CompileError::none() };
return { Blank{}, pair.second };
return {pair.first, CompileError::none()};
}
}
}
@ -234,8 +245,8 @@ class PatternParser {
return lookahead && iter <= end;
}
pair<rule_ptr, CompileError> error(string msg) {
return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) };
pair<Rule, CompileError> error(string msg) {
return { Blank{}, CompileError(TSCompileErrorTypeInvalidRegex, msg) };
}
string input;
@ -244,7 +255,7 @@ class PatternParser {
int32_t lookahead;
};
pair<rule_ptr, CompileError> parse_regex(const std::string &input) {
pair<Rule, CompileError> parse_regex(const std::string &input) {
return PatternParser(input.c_str()).rule(false);
}

View file

@ -9,7 +9,7 @@
namespace tree_sitter {
namespace prepare_grammar {
std::pair<rule_ptr, CompileError> parse_regex(const std::string &);
std::pair<rules::Rule, CompileError> parse_regex(const std::string &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -17,7 +17,7 @@ using std::get;
using std::make_tuple;
tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
const Grammar &input_grammar) {
const InputGrammar &input_grammar) {
/*
* Convert all string-based `NamedSymbols` into numerical `Symbols`
*/
@ -31,8 +31,9 @@ tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
*/
auto extract_result = extract_tokens(intern_result.first);
error = get<2>(extract_result);
if (error.type)
if (error.type) {
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
}
/*
* Replace `Repeat` rules with pairs of recursive rules
@ -42,11 +43,12 @@ tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
/*
* Expand `String` and `Pattern` rules into full rule trees
*/
auto expand_tokens_result = expand_tokens(get<1>(extract_result));
LexicalGrammar lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error.type)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
LexicalGrammar lex_grammar = get<1>(extract_result);
// auto expand_tokens_result = expand_tokens(get<1>(extract_result));
// LexicalGrammar lex_grammar = expand_tokens_result.first;
// error = expand_tokens_result.second;
// if (error.type)
// return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
/*
* Flatten syntax rules into lists of productions.

View file

@ -2,18 +2,15 @@
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
#include <tuple>
#include "compiler/grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
struct Grammar;
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
const Grammar &);
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(const InputGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -1,68 +1,82 @@
#include "compiler/prepare_grammar/token_description.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/string.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/metadata.h"
#include "compiler/rule.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace prepare_grammar {
using std::string;
using rules::Rule;
class TokenDescription : public rules::RuleFn<string> {
string apply_to(const rules::Pattern *rule) {
is_trivial = false;
return rule->value;
}
class TokenDescription {
bool is_trivial;
string apply_to(const rules::String *rule) {
return rule->value;
}
string apply(const Rule &rule) {
return rule.match(
[&](const rules::Blank) -> string {
return "";
},
string apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
[&](const rules::Symbol) {
return "";
},
string apply_to(const rules::Seq *rule) {
is_trivial = false;
return apply(rule->left) + apply(rule->right);
}
[&](const rules::Pattern &rule) {
is_trivial = false;
return rule.value;
},
string apply_to(const rules::Repeat *rule) {
is_trivial = false;
return apply(rule->content) + "*";
}
[&](const rules::String &rule) {
return rule.value;
},
string apply_to(const rules::Choice *rule) {
is_trivial = false;
string result = "(";
bool started = false;
for (auto &element : rule->elements) {
if (started)
result += "|";
result += apply(element);
started = true;
}
return result + ")";
[&](const rules::Metadata &rule) {
return apply(*rule.rule);
},
[&](const rules::Seq &rule) {
is_trivial = false;
return apply(*rule.left) + apply(*rule.right);
},
[&](const rules::Repeat &rule) {
is_trivial = false;
return apply(*rule.rule) + "+";
},
[&](const rules::Choice &rule) {
is_trivial = false;
string result = "(";
bool started = false;
for (auto &element : rule.elements) {
if (started) result += "|";
result += apply(element);
started = true;
}
return result + ")";
},
[](auto) {
return "";
}
);
}
public:
bool is_trivial;
string describe(const Rule &rule) {
string result = apply(rule);
if (is_trivial) {
return result;
} else {
return "/" + result + "/";
}
}
TokenDescription() : is_trivial(true) {}
};
string token_description(const rule_ptr &rule) {
TokenDescription description;
string result = description.apply(rule);
if (description.is_trivial)
return result;
else
return "/" + result + "/";
string token_description(const Rule &rule) {
return TokenDescription().describe(rule);
}
} // namespace prepare_grammar

View file

@ -7,7 +7,7 @@
namespace tree_sitter {
namespace prepare_grammar {
std::string token_description(const rule_ptr &);
std::string token_description(const rules::Rule &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -1,15 +1,287 @@
#include "compiler/rule.h"
#include <memory>
#include "compiler/util/hash_combine.h"
namespace tree_sitter {
namespace rules {
using std::ostream;
using std::string;
using std::move;
using std::vector;
using util::hash_combine;
bool Rule::operator!=(const Rule &other) const {
return !this->operator==(other);
Rule::Rule(const Rule &other) : blank_(Blank{}), type(BlankType) {
*this = other;
}
Rule::~Rule() {}
Rule::Rule(Rule &&other) noexcept : blank_(Blank{}), type(BlankType) {
*this = move(other);
}
static void destroy_value(Rule *rule) {
switch (rule->type) {
case Rule::BlankType: return rule->blank_.~Blank();
case Rule::CharacterSetType: return rule->character_set_.~CharacterSet();
case Rule::StringType: return rule->string_ .~String();
case Rule::PatternType: return rule->pattern_ .~Pattern();
case Rule::NamedSymbolType: return rule->named_symbol_.~NamedSymbol();
case Rule::SymbolType: return rule->symbol_ .~Symbol();
case Rule::ChoiceType: return rule->choice_ .~Choice();
case Rule::MetadataType: return rule->metadata_ .~Metadata();
case Rule::RepeatType: return rule->repeat_ .~Repeat();
case Rule::SeqType: return rule->seq_ .~Seq();
}
}
Rule &Rule::operator=(const Rule &other) {
destroy_value(this);
type = other.type;
switch (type) {
case BlankType:
new (&blank_) Blank(other.blank_);
break;
case CharacterSetType:
new (&character_set_) CharacterSet(other.character_set_);
break;
case StringType:
new (&string_) String(other.string_);
break;
case PatternType:
new (&pattern_) Pattern(other.pattern_);
break;
case NamedSymbolType:
new (&named_symbol_) NamedSymbol(other.named_symbol_);
break;
case SymbolType:
new (&symbol_) Symbol(other.symbol_);
break;
case ChoiceType:
new (&choice_) Choice(other.choice_);
break;
case MetadataType:
new (&metadata_) Metadata(other.metadata_);
break;
case RepeatType:
new (&repeat_) Repeat(other.repeat_);
break;
case SeqType:
new (&seq_) Seq(other.seq_);
break;
}
return *this;
}
Rule &Rule::operator=(Rule &&other) noexcept {
destroy_value(this);
type = other.type;
switch (type) {
case BlankType:
new (&blank_) Blank(move(other.blank_));
break;
case CharacterSetType:
new (&character_set_) CharacterSet(move(other.character_set_));
break;
case StringType:
new (&string_) String(move(other.string_));
break;
case PatternType:
new (&pattern_) Pattern(move(other.pattern_));
break;
case NamedSymbolType:
new (&named_symbol_) NamedSymbol(move(other.named_symbol_));
break;
case SymbolType:
new (&symbol_) Symbol(move(other.symbol_));
break;
case ChoiceType:
new (&choice_) Choice(move(other.choice_));
break;
case MetadataType:
new (&metadata_) Metadata(move(other.metadata_));
break;
case RepeatType:
new (&repeat_) Repeat(move(other.repeat_));
break;
case SeqType:
new (&seq_) Seq(move(other.seq_));
break;
}
other.type = BlankType;
other.blank_ = Blank{};
return *this;
}
Rule::~Rule() noexcept {
destroy_value(this);
}
bool Rule::operator==(const Rule &other) const {
if (type != other.type) return false;
switch (type) {
case Rule::CharacterSetType: return character_set_ == other.character_set_;
case Rule::StringType: return string_ == other.string_;
case Rule::PatternType: return pattern_ == other.pattern_;
case Rule::NamedSymbolType: return named_symbol_ == other.named_symbol_;
case Rule::SymbolType: return symbol_ == other.symbol_;
case Rule::ChoiceType: return choice_ == other.choice_;
case Rule::MetadataType: return metadata_ == other.metadata_;
case Rule::RepeatType: return repeat_ == other.repeat_;
case Rule::SeqType: return seq_ == other.seq_;
default: return blank_ == other.blank_;
}
}
template <>
bool Rule::is<Blank>() const { return type == BlankType; }
template <>
bool Rule::is<Symbol>() const { return type == SymbolType; }
template <>
bool Rule::is<Repeat>() const { return type == RepeatType; }
template <>
const Symbol & Rule::get_unchecked<Symbol>() const { return symbol_; }
static inline void add_choice_element(std::vector<Rule> *elements, const Rule &new_rule) {
new_rule.match(
[elements](Choice choice) {
for (auto &element : choice.elements) {
add_choice_element(elements, element);
}
},
[elements](auto rule) {
for (auto &element : *elements) {
if (element == rule) return;
}
elements->push_back(rule);
}
);
}
Rule Rule::choice(const vector<Rule> &rules) {
vector<Rule> elements;
for (auto &element : rules) {
add_choice_element(&elements, element);
}
return (elements.size() == 1) ? elements.front() : Choice{elements};
}
Rule Rule::repeat(const Rule &rule) {
return rule.is<Repeat>() ? rule : Repeat{rule};
}
Rule Rule::seq(const vector<Rule> &rules) {
Rule result;
for (const auto &rule : rules) {
rule.match(
[](Blank) {},
[&](Metadata metadata) {
if (!metadata.rule->is<Blank>()) {
result = Seq{result, rule};
}
},
[&](auto) {
if (result.is<Blank>()) {
result = rule;
} else {
result = Seq{result, rule};
}
}
);
}
return result;
}
} // namespace rules
} // namespace tree_sitter
namespace std {
size_t hash<Symbol>::operator()(const Symbol &symbol) const {
auto result = hash<int>()(symbol.index);
hash_combine(&result, hash<int>()(symbol.type));
return result;
}
size_t hash<NamedSymbol>::operator()(const NamedSymbol &symbol) const {
return hash<string>()(symbol.value);
}
size_t hash<Pattern>::operator()(const Pattern &symbol) const {
return hash<string>()(symbol.value);
}
size_t hash<String>::operator()(const String &symbol) const {
return hash<string>()(symbol.value);
}
size_t hash<CharacterSet>::operator()(const CharacterSet &character_set) const {
size_t result = 0;
hash_combine(&result, character_set.includes_all);
hash_combine(&result, character_set.included_chars.size());
for (uint32_t c : character_set.included_chars) {
hash_combine(&result, c);
}
hash_combine(&result, character_set.excluded_chars.size());
for (uint32_t c : character_set.excluded_chars) {
hash_combine(&result, c);
}
return result;
}
size_t hash<Blank>::operator()(const Blank &blank) const {
return 0;
}
size_t hash<Choice>::operator()(const Choice &choice) const {
size_t result = 0;
for (const auto &element : choice.elements) {
symmetric_hash_combine(&result, element);
}
return result;
}
size_t hash<Repeat>::operator()(const Repeat &repeat) const {
size_t result = 0;
hash_combine(&result, *repeat.rule);
return result;
}
size_t hash<Seq>::operator()(const Seq &seq) const {
size_t result = 0;
hash_combine(&result, *seq.left);
hash_combine(&result, *seq.right);
return result;
}
size_t hash<Metadata>::operator()(const Metadata &metadata) const {
size_t result = 0;
hash_combine(&result, *metadata.rule);
hash_combine(&result, metadata.params.precedence);
hash_combine<int>(&result, metadata.params.associativity);
hash_combine(&result, metadata.params.has_precedence);
hash_combine(&result, metadata.params.has_associativity);
hash_combine(&result, metadata.params.is_token);
hash_combine(&result, metadata.params.is_string);
hash_combine(&result, metadata.params.is_active);
hash_combine(&result, metadata.params.is_main_token);
return result;
}
size_t hash<Rule>::operator()(const Rule &rule) const {
size_t result = hash<int>()(rule.type);
switch (rule.type) {
case Rule::CharacterSetType: return result ^ hash<CharacterSet>()(rule.character_set_);
case Rule::StringType: return result ^ hash<String>()(rule.string_);
case Rule::PatternType: return result ^ hash<Pattern>()(rule.pattern_);
case Rule::NamedSymbolType: return result ^ hash<NamedSymbol>()(rule.named_symbol_);
case Rule::SymbolType: return result ^ hash<Symbol>()(rule.symbol_);
case Rule::ChoiceType: return result ^ hash<Choice>()(rule.choice_);
case Rule::MetadataType: return result ^ hash<Metadata>()(rule.metadata_);
case Rule::RepeatType: return result ^ hash<Repeat>()(rule.repeat_);
case Rule::SeqType: return result ^ hash<Seq>()(rule.seq_);
default: return result ^ hash<Blank>()(rule.blank_);
}
}
} // namespace std

View file

@ -1,44 +1,143 @@
#ifndef COMPILER_RULE_H_
#define COMPILER_RULE_H_
#include <string>
#include <memory>
#include <vector>
#include "compiler/util/make_visitor.h"
#include "compiler/util/hash_combine.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/string.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace rules {
class Visitor;
} // namespace rules
class Rule;
typedef std::shared_ptr<Rule> rule_ptr;
struct Rule {
union {
Blank blank_;
CharacterSet character_set_;
String string_;
Pattern pattern_;
NamedSymbol named_symbol_;
Symbol symbol_;
Choice choice_;
Metadata metadata_;
Repeat repeat_;
Seq seq_;
};
class Rule {
public:
virtual bool operator==(const Rule &other) const = 0;
bool operator!=(const Rule &other) const;
virtual size_t hash_code() const = 0;
virtual rule_ptr copy() const = 0;
virtual std::string to_string() const = 0;
virtual void accept(rules::Visitor *visitor) const = 0;
virtual ~Rule();
enum {
BlankType,
CharacterSetType,
StringType,
PatternType,
NamedSymbolType,
SymbolType,
ChoiceType,
MetadataType,
RepeatType,
SeqType,
} type;
template <typename T>
const T *as() const {
return dynamic_cast<const T *>(this);
Rule() : blank_(Blank{}), type(BlankType) {};
Rule(const Blank &value) : blank_(value), type(BlankType) {};
Rule(const CharacterSet &value) : character_set_(value), type(CharacterSetType) {};
Rule(const String &value) : string_(value), type(StringType) {};
Rule(const Pattern &value) : pattern_(value), type(PatternType) {};
Rule(const NamedSymbol &value) : named_symbol_(value), type(NamedSymbolType) {};
Rule(const Symbol &value) : symbol_(value), type(SymbolType) {};
Rule(const Choice &value) : choice_(value), type(ChoiceType) {};
Rule(const Metadata &value) : metadata_(value), type(MetadataType) {};
Rule(const Repeat &value) : repeat_(value), type(RepeatType) {};
Rule(const Seq &value) : seq_(value), type(SeqType) {};
Rule(const Rule &other);
Rule(Rule &&other) noexcept;
Rule &operator=(const Rule &other);
Rule &operator=(Rule &&other) noexcept;
~Rule() noexcept;
static Rule choice(const std::vector<Rule> &rules);
static Rule seq(const std::vector<Rule> &rules);
static Rule repeat(const Rule &rule);
template <typename RuleType>
bool is() const;
template <typename RuleType>
const RuleType & get_unchecked() const;
template <typename FunctionType>
inline auto accept(FunctionType function) const -> decltype(function(blank_)) {
switch (type) {
case CharacterSetType: return function(character_set_);
case StringType: return function(string_);
case PatternType: return function(pattern_);
case NamedSymbolType: return function(named_symbol_);
case SymbolType: return function(symbol_);
case ChoiceType: return function(choice_);
case MetadataType: return function(metadata_);
case RepeatType: return function(repeat_);
case SeqType: return function(seq_);
default: return function(blank_);
}
}
template <typename ...FunctionTypes>
inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward<FunctionTypes>(functions)...))){
return accept(util::make_visitor(std::forward<FunctionTypes>(functions)...));
}
bool operator==(const Rule &other) const;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
using namespace tree_sitter::rules;
using namespace tree_sitter::util;
template <>
struct hash<tree_sitter::rule_ptr> {
size_t operator()(const tree_sitter::rule_ptr &rule) const {
return rule->hash_code();
}
};
struct hash<Symbol> { size_t operator()(const Symbol &) const; };
template <>
struct hash<NamedSymbol> { size_t operator()(const NamedSymbol &) const; };
template <>
struct hash<Pattern> { size_t operator()(const Pattern &) const; };
template <>
struct hash<String> { size_t operator()(const String &) const; };
template <>
struct hash<CharacterSet> { size_t operator()(const CharacterSet &) const; };
template <>
struct hash<Blank> { size_t operator()(const Blank &) const; };
template <>
struct hash<Choice> { size_t operator()(const Choice &) const; };
template <>
struct hash<Repeat> { size_t operator()(const Repeat &) const; };
template <>
struct hash<Seq> { size_t operator()(const Seq &) const; };
template <>
struct hash<Metadata> { size_t operator()(const Metadata &) const; };
template <>
struct hash<Rule> { size_t operator()(const Rule &) const; };
} // namespace std

View file

@ -1,28 +0,0 @@
#ifndef COMPILER_RULES_H_
#define COMPILER_RULES_H_
#include <string>
#include <vector>
#include <memory>
#include "compiler/rule.h"
namespace tree_sitter {
rule_ptr blank();
rule_ptr choice(const std::vector<rule_ptr> &);
rule_ptr repeat(const rule_ptr &);
rule_ptr repeat1(const rule_ptr &);
rule_ptr seq(const std::vector<rule_ptr> &);
rule_ptr sym(const std::string &);
rule_ptr pattern(const std::string &);
rule_ptr str(const std::string &);
rule_ptr prec(int precedence, const rule_ptr &);
rule_ptr prec_left(const rule_ptr &);
rule_ptr prec_left(int precedence, const rule_ptr &);
rule_ptr prec_right(const rule_ptr &);
rule_ptr prec_right(int precedence, const rule_ptr &);
rule_ptr token(const rule_ptr &rule);
} // namespace std
#endif // COMPILER_RULES_H_

View file

@ -1,36 +0,0 @@
#include "compiler/rules/blank.h"
#include <string>
#include <memory>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
Blank::Blank() {}
rule_ptr Blank::build() {
return std::make_shared<Blank>();
}
bool Blank::operator==(const Rule &rule) const {
return rule.as<Blank>() != nullptr;
}
size_t Blank::hash_code() const {
return 0;
}
rule_ptr Blank::copy() const {
return std::make_shared<Blank>();
}
std::string Blank::to_string() const {
return "(blank)";
}
void Blank::accept(Visitor *visitor) const {
visitor->visit(this);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,25 +1,16 @@
#ifndef COMPILER_RULES_BLANK_H_
#define COMPILER_RULES_BLANK_H_
#include <string>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class Blank : public Rule {
public:
Blank();
static rule_ptr build();
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
struct Blank {
inline bool operator==(const Blank &other) const {
return true;
}
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_BLANK_H_
#endif // COMPILER_RULES_BLANK_H_

View file

@ -1,19 +0,0 @@
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
namespace rules {
Symbol END_OF_INPUT() {
return Symbol(-1, Symbol::Terminal);
}
Symbol START() {
return Symbol(-2, Symbol::NonTerminal);
}
Symbol NONE() {
return Symbol(-3, Symbol::Type(-1));
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,16 +0,0 @@
#ifndef COMPILER_RULES_BUILT_IN_SYMBOLS_H_
#define COMPILER_RULES_BUILT_IN_SYMBOLS_H_
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace rules {
Symbol END_OF_INPUT();
Symbol START();
Symbol NONE();
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_

View file

@ -1,36 +0,0 @@
#include "compiler/rules/character_range.h"
#include <string>
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace rules {
using std::string;
CharacterRange::CharacterRange(uint32_t value) : min(value), max(value) {}
CharacterRange::CharacterRange(uint32_t min, uint32_t max)
: min(min), max(max) {}
bool CharacterRange::operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
}
bool CharacterRange::operator<(const CharacterRange &other) const {
if (min < other.min)
return true;
if (min > other.min)
return false;
if (max < other.max)
return true;
return false;
}
string CharacterRange::to_string() const {
if (min == max)
return util::escape_char(min);
else
return util::escape_char(min) + "-" + util::escape_char(max);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,25 +0,0 @@
#ifndef COMPILER_RULES_CHARACTER_RANGE_H_
#define COMPILER_RULES_CHARACTER_RANGE_H_
#include <map>
#include <string>
namespace tree_sitter {
namespace rules {
struct CharacterRange {
uint32_t min;
uint32_t max;
explicit CharacterRange(uint32_t value);
explicit CharacterRange(uint32_t min, uint32_t max);
bool operator==(const CharacterRange &other) const;
bool operator<(const CharacterRange &others) const;
std::string to_string() const;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_CHARACTER_RANGE_H_

View file

@ -1,59 +1,57 @@
#include "compiler/rules/character_set.h"
#include <string>
#include <utility>
#include <vector>
#include "compiler/rules/visitor.h"
#include "compiler/util/hash_combine.h"
using std::set;
using std::vector;
namespace tree_sitter {
namespace rules {
using std::string;
using std::set;
using std::vector;
using util::hash_combine;
static void add_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
for (uint32_t c = min; c <= max; c++)
for (uint32_t c = min; c <= max; c++) {
characters->insert(c);
}
}
static void remove_range(set<uint32_t> *characters, uint32_t min, uint32_t max) {
for (uint32_t c = min; c <= max; c++)
for (uint32_t c = min; c <= max; c++) {
characters->erase(c);
}
}
static set<uint32_t> remove_chars(set<uint32_t> *left,
const set<uint32_t> &right) {
static set<uint32_t> remove_chars(set<uint32_t> *left, const set<uint32_t> &right) {
set<uint32_t> result;
for (uint32_t c : right) {
if (left->erase(c))
if (left->erase(c)) {
result.insert(c);
}
}
return result;
}
static set<uint32_t> add_chars(set<uint32_t> *left, const set<uint32_t> &right) {
set<uint32_t> result;
for (uint32_t c : right)
if (left->insert(c).second)
for (uint32_t c : right) {
if (left->insert(c).second) {
result.insert(c);
}
}
return result;
}
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
vector<CharacterRange> result;
for (uint32_t c : chars) {
size_t size = result.size();
auto size = result.size();
if (size >= 2 && result[size - 2].max == (c - 2)) {
result.pop_back();
result.back().max = c;
} else if (size >= 1) {
CharacterRange &last = result.back();
if (last.min < last.max && last.max == (c - 1))
if (last.min < last.max && last.max == (c - 1)) {
last.max = c;
else
} else {
result.push_back(CharacterRange(c));
}
} else {
result.push_back(CharacterRange(c));
}
@ -61,14 +59,14 @@ static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
return result;
}
CharacterSet::CharacterSet()
: includes_all(false), included_chars({}), excluded_chars({}) {}
CharacterSet::CharacterSet() : includes_all(false) {}
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = rule.as<CharacterSet>();
return other && (includes_all == other->includes_all) &&
(included_chars == other->included_chars) &&
(excluded_chars == other->excluded_chars);
CharacterSet::CharacterSet(const set<uint32_t> &chars) : included_chars(chars), includes_all(false) {}
bool CharacterSet::operator==(const CharacterSet &other) const {
return includes_all == other.includes_all &&
included_chars == other.included_chars &&
excluded_chars == other.excluded_chars;
}
bool CharacterSet::operator<(const CharacterSet &other) const {
@ -83,41 +81,6 @@ bool CharacterSet::operator<(const CharacterSet &other) const {
return excluded_chars < other.excluded_chars;
}
size_t CharacterSet::hash_code() const {
size_t result = 0;
hash_combine(&result, includes_all);
hash_combine(&result, included_chars.size());
for (uint32_t c : included_chars)
hash_combine(&result, c);
hash_combine(&result, excluded_chars.size());
for (uint32_t c : excluded_chars)
hash_combine(&result, c);
return result;
}
rule_ptr CharacterSet::copy() const {
return std::make_shared<CharacterSet>(*this);
}
string CharacterSet::to_string() const {
string result("(char");
if (includes_all)
result += " include_all";
if (!included_chars.empty()) {
result += " (include";
for (auto r : included_ranges())
result += string(" ") + r.to_string();
result += ")";
}
if (!excluded_chars.empty()) {
result += " (exclude";
for (auto r : excluded_ranges())
result += string(" ") + r.to_string();
result += ")";
}
return result + ")";
}
CharacterSet &CharacterSet::include_all() {
includes_all = true;
included_chars = {};
@ -212,9 +175,5 @@ vector<CharacterRange> CharacterSet::excluded_ranges() const {
return consolidate_ranges(excluded_chars);
}
void CharacterSet::accept(Visitor *visitor) const {
visitor->visit(this);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,20 +1,28 @@
#ifndef COMPILER_RULES_CHARACTER_SET_H_
#define COMPILER_RULES_CHARACTER_SET_H_
#include <stdint.h>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "compiler/rule.h"
#include "compiler/rules/character_range.h"
#include <set>
#include <cstdint>
namespace tree_sitter {
namespace rules {
class CharacterSet : public Rule {
public:
struct CharacterRange {
uint32_t min;
uint32_t max;
inline explicit CharacterRange(uint32_t value) : min{value}, max{value} {}
inline CharacterRange(uint32_t min, uint32_t max) : min{min}, max{max} {}
inline bool operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
}
};
struct CharacterSet {
CharacterSet();
CharacterSet(const std::set<uint32_t> &);
CharacterSet &include_all();
CharacterSet &include(uint32_t c);
@ -22,12 +30,8 @@ class CharacterSet : public Rule {
CharacterSet &exclude(uint32_t c);
CharacterSet &exclude(uint32_t min, uint32_t max);
bool operator==(const Rule &other) const;
bool operator==(const CharacterSet &) const;
bool operator<(const CharacterSet &) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
@ -37,23 +41,12 @@ class CharacterSet : public Rule {
std::vector<CharacterRange> included_ranges() const;
std::vector<CharacterRange> excluded_ranges() const;
bool includes_all;
std::set<uint32_t> included_chars;
std::set<uint32_t> excluded_chars;
bool includes_all;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
template <>
struct hash<tree_sitter::rules::CharacterSet> {
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
return rule.hash_code();
}
};
} // namespace std
#endif // COMPILER_RULES_CHARACTER_SET_H_
#endif // COMPILER_RULES_CHARACTER_SET_H_

View file

@ -1,77 +1,11 @@
#include "compiler/rules/choice.h"
#include <string>
#include <set>
#include "compiler/rules/visitor.h"
#include "compiler/util/hash_combine.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::make_shared;
using std::vector;
using std::set;
using util::symmetric_hash_combine;
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
void add_choice_element(vector<rule_ptr> *vec, const rule_ptr new_rule) {
auto choice = new_rule->as<Choice>();
if (choice) {
for (auto &child : choice->elements)
add_choice_element(vec, child);
} else {
for (auto &element : *vec)
if (element->operator==(*new_rule))
return;
vec->push_back(new_rule);
}
}
rule_ptr Choice::build(const vector<rule_ptr> &inputs) {
vector<rule_ptr> elements;
for (auto &el : inputs)
add_choice_element(&elements, el);
if (elements.size() == 1)
return elements.front();
else
return make_shared<Choice>(elements);
}
bool Choice::operator==(const Rule &rule) const {
const Choice *other = rule.as<Choice>();
if (!other)
return false;
size_t size = elements.size();
if (size != other->elements.size())
return false;
for (size_t i = 0; i < size; i++)
if (!elements[i]->operator==(*other->elements[i]))
return false;
return true;
}
size_t Choice::hash_code() const {
size_t result = 0;
symmetric_hash_combine(&result, elements.size());
for (const auto &element : elements)
symmetric_hash_combine(&result, element);
return result;
}
rule_ptr Choice::copy() const {
return std::make_shared<Choice>(*this);
}
string Choice::to_string() const {
string result = "(choice";
for (const auto &element : elements)
result += " " + element->to_string();
return result + ")";
}
void Choice::accept(Visitor *visitor) const {
visitor->visit(this);
bool Choice::operator==(const Choice &other) const {
return elements == other.elements;
}
} // namespace rules

View file

@ -1,28 +1,21 @@
#ifndef COMPILER_RULES_CHOICE_H_
#define COMPILER_RULES_CHOICE_H_
#include <string>
#include <memory>
#include <vector>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class Choice : public Rule {
public:
explicit Choice(const std::vector<rule_ptr> &elements);
static rule_ptr build(const std::vector<rule_ptr> &rules);
struct Rule;
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
struct Choice {
std::vector<Rule> elements;
const std::vector<rule_ptr> elements;
bool operator==(const Choice &other) const;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_CHOICE_H_
#endif // COMPILER_RULES_CHOICE_H_

View file

@ -1,97 +1,70 @@
#include "compiler/rules/metadata.h"
#include <string>
#include <map>
#include <climits>
#include "compiler/rules/visitor.h"
#include "compiler/rules/blank.h"
#include "compiler/util/hash_combine.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
using std::make_shared;
using std::map;
using std::pair;
using util::hash_combine;
Metadata::Metadata(const Rule &rule, MetadataParams params) :
rule(std::make_shared<Rule>(rule)), params(params) {}
MetadataParams::MetadataParams() :
precedence{0},
associativity{AssociativityNone},
has_precedence{false},
has_associativity{false},
is_token{false},
is_string{false},
is_active{false},
is_main_token{false} {}
bool MetadataParams::operator==(const MetadataParams &other) const {
return
precedence == other.precedence &&
associativity == other.associativity &&
has_precedence == other.has_precedence &&
has_associativity == other.has_associativity &&
is_token == other.is_token &&
is_string == other.is_string &&
is_active == other.is_active &&
is_main_token == other.is_main_token;
bool Metadata::operator==(const Metadata &other) const {
return rule->operator==(*other.rule) && params == other.params;
}
Metadata::Metadata(rule_ptr rule, MetadataParams params)
: rule(rule), params(params) {}
rule_ptr Metadata::build(rule_ptr rule, MetadataParams params) {
return std::make_shared<Metadata>(rule, params);
Metadata Metadata::token(const Rule &rule) {
MetadataParams params;
params.is_token = true;
return Metadata{rule, params};
}
rule_ptr Metadata::main_token(rule_ptr rule) {
Metadata Metadata::active_prec(int precedence, const Rule &rule) {
MetadataParams params;
params.has_precedence = true;
params.precedence = 0;
params.is_main_token = true;
return Metadata::build(rule, params);
params.precedence = precedence;
params.is_active = true;
return Metadata{rule, params};
}
rule_ptr Metadata::separator(rule_ptr rule) {
Metadata Metadata::prec(int precedence, const Rule &rule) {
MetadataParams params;
params.has_precedence = true;
params.precedence = precedence;
return Metadata{rule, params};
}
Metadata Metadata::prec_left(int precedence, const Rule &rule) {
MetadataParams params;
params.has_precedence = true;
params.precedence = precedence;
params.has_associativity = true;
params.associativity = AssociativityLeft;
return Metadata{rule, params};
}
Metadata Metadata::prec_right(int precedence, const Rule &rule) {
MetadataParams params;
params.has_precedence = true;
params.precedence = precedence;
params.has_associativity = true;
params.associativity = AssociativityRight;
return Metadata{rule, params};
}
Metadata Metadata::separator(const Rule &rule) {
MetadataParams params;
params.has_precedence = true;
params.precedence = INT_MIN;
params.is_active = true;
return Metadata::build(rule, params);
return Metadata{rule, params};
}
bool Metadata::operator==(const Rule &rule) const {
auto other = rule.as<Metadata>();
return other && other->params == params && other->rule->operator==(*this->rule);
}
size_t Metadata::hash_code() const {
size_t result = 0;
hash_combine(&result, params.precedence);
hash_combine<int>(&result, params.associativity);
hash_combine(&result, params.has_precedence);
hash_combine(&result, params.has_associativity);
hash_combine(&result, params.is_token);
hash_combine(&result, params.is_string);
hash_combine(&result, params.is_active);
hash_combine(&result, params.is_main_token);
return result;
}
rule_ptr Metadata::copy() const {
return make_shared<Metadata>(rule->copy(), params);
}
std::string Metadata::to_string() const {
if (params.has_precedence) {
return "(metadata prec:" + std::to_string(params.precedence) + " " +
rule->to_string() + ")";
} else {
return "(metadata " + rule->to_string() + ")";
}
}
void Metadata::accept(Visitor *visitor) const {
visitor->visit(this);
Metadata Metadata::main_token(const Rule &rule) {
MetadataParams params;
params.has_precedence = true;
params.precedence = 0;
params.is_main_token = true;
return Metadata{rule, params};
}
} // namespace rules

View file

@ -1,9 +1,7 @@
#ifndef COMPILER_RULES_METADATA_H_
#define COMPILER_RULES_METADATA_H_
#include <string>
#include <map>
#include "compiler/rule.h"
#include <memory>
namespace tree_sitter {
namespace rules {
@ -24,28 +22,45 @@ struct MetadataParams {
bool is_active;
bool is_main_token;
MetadataParams();
bool operator==(const MetadataParams &) const;
inline MetadataParams() :
precedence{0}, associativity{AssociativityNone}, has_precedence{false},
has_associativity{false}, is_token{false}, is_string{false},
is_active{false}, is_main_token{false} {}
inline bool operator==(const MetadataParams &other) const {
return (
precedence == other.precedence &&
associativity == other.associativity &&
has_precedence == other.has_precedence &&
has_associativity == other.has_associativity &&
is_token == other.is_token &&
is_string == other.is_string &&
is_active == other.is_active &&
is_main_token == other.is_main_token
);
}
};
class Metadata : public Rule {
public:
Metadata(rule_ptr rule, MetadataParams);
static rule_ptr build(rule_ptr rule, MetadataParams);
static rule_ptr main_token(rule_ptr rule);
static rule_ptr separator(rule_ptr rule);
struct Rule;
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const rule_ptr rule;
struct Metadata {
std::shared_ptr<Rule> rule;
MetadataParams params;
Metadata(const Rule &rule, MetadataParams params);
static Metadata token(const Rule &rule);
static Metadata active_prec(int precedence, const Rule &rule);
static Metadata prec(int precedence, const Rule &rule);
static Metadata prec_left(int precedence, const Rule &rule);
static Metadata prec_right(int precedence, const Rule &rule);
static Metadata separator(const Rule &rule);
static Metadata main_token(const Rule &rule);
bool operator==(const Metadata &other) const;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_METADATA_H_
#endif // COMPILER_RULES_METADATA_H_

View file

@ -1,35 +0,0 @@
#include "compiler/rules/named_symbol.h"
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
bool NamedSymbol::operator==(const Rule &rule) const {
auto other = rule.as<NamedSymbol>();
return other && other->name == name;
}
size_t NamedSymbol::hash_code() const {
return hash<string>()(name);
}
rule_ptr NamedSymbol::copy() const {
return std::make_shared<NamedSymbol>(*this);
}
string NamedSymbol::to_string() const {
return string("(sym '") + name + "')";
}
void NamedSymbol::accept(Visitor *visitor) const {
visitor->visit(this);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -2,25 +2,19 @@
#define COMPILER_RULES_NAMED_SYMBOL_H_
#include <string>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class NamedSymbol : public Rule {
public:
explicit NamedSymbol(const std::string &name);
struct NamedSymbol {
std::string value;
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
std::string name;
inline bool operator==(const NamedSymbol &other) const {
return value == other.value;
}
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_NAMED_SYMBOL_H_
#endif // COMPILER_RULES_NAMED_SYMBOL_H_

View file

@ -1,36 +0,0 @@
#include "compiler/rules/pattern.h"
#include <string>
#include "compiler/rules/visitor.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
Pattern::Pattern(const string &string) : value(string) {}
bool Pattern::operator==(tree_sitter::Rule const &other) const {
auto pattern = other.as<Pattern>();
return pattern && (pattern->value == value);
}
size_t Pattern::hash_code() const {
return hash<string>()(value);
}
rule_ptr Pattern::copy() const {
return std::make_shared<Pattern>(*this);
}
string Pattern::to_string() const {
return string("(pattern '") + util::escape_string(value) + "')";
}
void Pattern::accept(Visitor *visitor) const {
visitor->visit(this);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -2,25 +2,19 @@
#define COMPILER_RULES_PATTERN_H_
#include <string>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class Pattern : public Rule {
public:
explicit Pattern(const std::string &string);
struct Pattern {
std::string value;
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::string value;
inline bool operator==(const Pattern &other) const {
return value == other.value;
}
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_PATTERN_H_
#endif // COMPILER_RULES_PATTERN_H_

View file

@ -1,43 +1,14 @@
#include "compiler/rules/repeat.h"
#include <memory>
#include <string>
#include "compiler/rules/visitor.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
using std::make_shared;
using std::string;
Repeat::Repeat(const Rule &rule) :
rule(std::make_shared<Rule>(rule)) {}
Repeat::Repeat(const rule_ptr content) : content(content) {}
rule_ptr Repeat::build(const rule_ptr &rule) {
auto inner_repeat = rule->as<Repeat>();
if (inner_repeat)
return rule;
else
return make_shared<Repeat>(rule);
}
bool Repeat::operator==(const Rule &rule) const {
auto other = rule.as<Repeat>();
return other && (*other->content == *content);
}
size_t Repeat::hash_code() const {
return content->hash_code();
}
rule_ptr Repeat::copy() const {
return make_shared<Repeat>(*this);
}
string Repeat::to_string() const {
return string("(repeat ") + content->to_string() + ")";
}
void Repeat::accept(Visitor *visitor) const {
visitor->visit(this);
bool Repeat::operator==(const Repeat &other) const {
return rule->operator==(*other.rule);
}
} // namespace rules

View file

@ -1,27 +1,21 @@
#ifndef COMPILER_RULES_REPEAT_H_
#define COMPILER_RULES_REPEAT_H_
#include <string>
#include "compiler/rule.h"
#include <memory>
namespace tree_sitter {
namespace rules {
class Repeat : public Rule {
public:
explicit Repeat(rule_ptr content);
static rule_ptr build(const rule_ptr &rule);
struct Rule;
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
struct Repeat {
std::shared_ptr<Rule> rule;
const rule_ptr content;
explicit Repeat(const Rule &rule);
bool operator==(const Repeat &other) const;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_REPEAT_H_
#endif // COMPILER_RULES_REPEAT_H_

View file

@ -1,108 +0,0 @@
#include <vector>
#include <map>
#include <set>
#include <string>
#include "compiler/rule.h"
#include "compiler/rules.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
using std::make_shared;
using std::string;
using std::set;
using std::vector;
using std::map;
using rules::MetadataParams;
static rule_ptr metadata(rule_ptr rule, MetadataParams params) {
return std::make_shared<rules::Metadata>(rule, params);
}
rule_ptr blank() {
return rules::Blank::build();
}
rule_ptr choice(const vector<rule_ptr> &rules) {
return rules::Choice::build(rules);
}
rule_ptr repeat(const rule_ptr &content) {
return choice({ repeat1(content), blank() });
}
rule_ptr repeat1(const rule_ptr &content) {
return rules::Repeat::build(content);
}
rule_ptr seq(const vector<rule_ptr> &rules) {
return rules::Seq::build(rules);
}
rule_ptr sym(const string &name) {
return make_shared<rules::NamedSymbol>(name);
}
rule_ptr pattern(const string &value) {
return make_shared<rules::Pattern>(value);
}
rule_ptr str(const string &value) {
return make_shared<rules::String>(value);
}
rule_ptr prec_left(const rule_ptr &rule) {
MetadataParams params;
params.has_associativity = true;
params.associativity = rules::AssociativityLeft;
return metadata(rule, params);
}
rule_ptr prec_left(int precedence, const rule_ptr &rule) {
MetadataParams params;
params.has_associativity = true;
params.associativity = rules::AssociativityLeft;
params.has_precedence = true;
params.precedence = precedence;
return metadata(rule, params);
}
rule_ptr prec_right(const rule_ptr &rule) {
MetadataParams params;
params.has_associativity = true;
params.associativity = rules::AssociativityRight;
return metadata(rule, params);
}
rule_ptr prec_right(int precedence, const rule_ptr &rule) {
MetadataParams params;
params.has_associativity = true;
params.associativity = rules::AssociativityRight;
params.has_precedence = true;
params.precedence = precedence;
return metadata(rule, params);
}
rule_ptr prec(int precedence, const rule_ptr &rule) {
MetadataParams params;
params.has_precedence = true;
params.precedence = precedence;
return metadata(rule, params);
}
rule_ptr token(const rule_ptr &rule) {
MetadataParams params;
params.is_token = true;
return metadata(rule, params);
}
} // namespace tree_sitter

View file

@ -1,56 +1,15 @@
#include "compiler/rules/seq.h"
#include <string>
#include "compiler/rules/visitor.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/metadata.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
using std::make_shared;
using std::string;
using std::vector;
Seq::Seq(const Rule &left, const Rule &right) :
left(std::make_shared<Rule>(left)),
right(std::make_shared<Rule>(right)) {}
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
rule_ptr Seq::build(const std::vector<rule_ptr> &rules) {
rule_ptr result = make_shared<Blank>();
for (auto &rule : rules) {
auto blank = rule->as<Blank>();
if (blank)
continue;
auto metadata = rule->as<Metadata>();
if (metadata && metadata->rule->as<Blank>())
continue;
if (result->as<Blank>())
result = rule;
else
result = make_shared<Seq>(result, rule);
}
return result;
}
bool Seq::operator==(const Rule &rule) const {
const Seq *other = rule.as<Seq>();
return other && (*other->left == *left) && (*other->right == *right);
}
size_t Seq::hash_code() const {
return left->hash_code() ^ right->hash_code();
}
rule_ptr Seq::copy() const {
return std::make_shared<Seq>(*this);
}
string Seq::to_string() const {
return string("(seq ") + left->to_string() + " " + right->to_string() + ")";
}
void Seq::accept(Visitor *visitor) const {
visitor->visit(this);
bool Seq::operator==(const Seq &other) const {
return left->operator==(*other.left) && right->operator==(*other.right);
}
} // namespace rules

View file

@ -1,29 +1,23 @@
#ifndef COMPILER_RULES_SEQ_H_
#define COMPILER_RULES_SEQ_H_
#include <string>
#include <memory>
#include <vector>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class Seq : public Rule {
public:
Seq(rule_ptr left, rule_ptr right);
static rule_ptr build(const std::vector<rule_ptr> &rules);
struct Rule;
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
struct Seq {
std::shared_ptr<Rule> left;
std::shared_ptr<Rule> right;
const rule_ptr left;
const rule_ptr right;
Seq(const Rule &left, const Rule &right);
bool operator==(const Seq &other) const;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_SEQ_H_
#endif // COMPILER_RULES_SEQ_H_

View file

@ -1,35 +0,0 @@
#include "compiler/rules/string.h"
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
String::String(string value) : value(value) {}
bool String::operator==(const Rule &rule) const {
auto other = rule.as<String>();
return other && (other->value == value);
}
size_t String::hash_code() const {
return hash<string>()(value);
}
rule_ptr String::copy() const {
return std::make_shared<String>(*this);
}
string String::to_string() const {
return string("(string '") + value + "')";
}
void String::accept(Visitor *visitor) const {
visitor->visit(this);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -2,25 +2,19 @@
#define COMPILER_RULES_STRING_H_
#include <string>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class String : public Rule {
public:
explicit String(std::string value);
struct String {
std::string value;
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::string value;
inline bool operator==(const String &other) const {
return value == other.value;
}
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_STRING_H_
#endif // COMPILER_RULES_STRING_H_

View file

@ -1,82 +0,0 @@
#include "compiler/rules/symbol.h"
#include <string>
#include <map>
#include "compiler/rules/visitor.h"
#include "compiler/util/hash_combine.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::to_string;
using util::hash_combine;
Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {}
bool Symbol::operator==(const Symbol &other) const {
return (other.index == index) && (other.type == type);
}
bool Symbol::operator==(const Rule &rule) const {
auto other = rule.as<Symbol>();
return other && this->operator==(*other);
}
size_t Symbol::hash_code() const {
size_t result = 0;
hash_combine(&result, index);
hash_combine<int>(&result, type);
return result;
}
rule_ptr Symbol::copy() const {
return std::make_shared<Symbol>(*this);
}
string Symbol::to_string() const {
switch (type) {
case Symbol::Terminal:
return "(terminal " + std::to_string(index) + ")";
case Symbol::NonTerminal:
return "(non-terminal " + std::to_string(index) + ")";
case Symbol::External:
return "(external " + std::to_string(index) + ")";
default:
return "(none)";
}
}
bool Symbol::operator<(const Symbol &other) const {
if (type < other.type)
return true;
if (other.type < type)
return false;
return (index < other.index);
}
bool Symbol::is_built_in(Symbol::Index index) {
return index < 0;
}
bool Symbol::is_built_in() const {
return is_built_in(index);
}
bool Symbol::is_token() const {
return type == Symbol::Terminal;
}
bool Symbol::is_external() const {
return type == Symbol::External;
}
bool Symbol::is_non_terminal() const {
return type == Symbol::NonTerminal;
}
void Symbol::accept(Visitor *visitor) const {
visitor->visit(this);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,55 +1,76 @@
#ifndef COMPILER_RULES_SYMBOL_H_
#define COMPILER_RULES_SYMBOL_H_
#include <string>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class Symbol : public Rule {
public:
typedef int Index;
typedef enum {
struct Symbol {
using Index = int;
enum Type {
External,
Terminal,
NonTerminal,
} Type;
};
Symbol(Index index, Type type);
inline bool operator==(const Symbol &other) const {
return index == other.index && type == other.type;
}
bool operator==(const Symbol &other) const;
bool operator==(const Rule &other) const;
inline bool operator!=(const Symbol &other) const {
return !operator==(other);
}
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
bool operator<(const Symbol &other) const;
static bool is_built_in(Index);
bool is_built_in() const;
bool is_token() const;
bool is_external() const;
bool is_non_terminal() const;
inline bool operator<(const Symbol &other) const {
if (type < other.type) return true;
if (type > other.type) return false;
return index < other.index;
}
Index index;
Type type;
static Symbol terminal(Index index) {
return Symbol{index, Type::Terminal};
}
static Symbol external(Index index) {
return Symbol{index, Type::External};
}
static Symbol non_terminal(Index index) {
return Symbol{index, Type::NonTerminal};
}
bool is_non_terminal() const {
return type == Type::NonTerminal;
}
bool is_terminal() const {
return type == Type::Terminal;
}
bool is_external() const {
return type == Type::External;
}
bool is_built_in() const {
return index < 0;
}
};
inline Symbol END_OF_INPUT() {
return Symbol{-1, Symbol::Terminal};
}
inline Symbol START() {
return Symbol{-2, Symbol::NonTerminal};
}
inline Symbol NONE() {
return Symbol{-3, Symbol::Type(-1)};
}
} // namespace rules
} // namespace tree_sitter
namespace std {
template <>
struct hash<tree_sitter::rules::Symbol> {
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
return rule.hash_code();
}
};
} // std
#endif // COMPILER_RULES_SYMBOL_H_
#endif // COMPILER_RULES_SYMBOL_H_

View file

@ -1,44 +0,0 @@
#include "compiler/rules/visitor.h"
#include <vector>
#include "compiler/rule.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/repeat.h"
namespace tree_sitter {
namespace rules {
using std::vector;
Visitor::~Visitor() {}
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
return rule->copy();
}
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
vector<rule_ptr> rules;
for (const auto &el : rule->elements)
rules.push_back(apply(el));
return Choice::build(rules);
}
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
return Seq::build({ apply(rule->left), apply(rule->right) });
}
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
return Repeat::build(apply(rule->content));
}
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
return Metadata::build(apply(rule->rule), rule->params);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,234 +0,0 @@
#ifndef COMPILER_RULES_VISITOR_H_
#define COMPILER_RULES_VISITOR_H_
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
class Blank;
class NamedSymbol;
class CharacterSet;
class Choice;
class Repeat;
class Seq;
class String;
class Symbol;
class Pattern;
class Metadata;
class ExternalToken;
class Visitor {
public:
virtual void visit(const Blank *rule) = 0;
virtual void visit(const CharacterSet *rule) = 0;
virtual void visit(const Choice *rule) = 0;
virtual void visit(const Metadata *rule) = 0;
virtual void visit(const Pattern *rule) = 0;
virtual void visit(const Repeat *rule) = 0;
virtual void visit(const Seq *rule) = 0;
virtual void visit(const String *rule) = 0;
virtual void visit(const NamedSymbol *rule) = 0;
virtual void visit(const Symbol *rule) = 0;
virtual void visit(const ExternalToken *rule) = 0;
virtual ~Visitor();
};
template <typename T>
class RuleFn : private Visitor {
public:
T apply(const rule_ptr &rule) {
value_ = T();
rule->accept(this);
return value_;
}
protected:
virtual T default_apply(const Rule *rule) {
return T();
}
virtual T apply_to(const Blank *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const CharacterSet *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Choice *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Metadata *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Pattern *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Repeat *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Seq *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const String *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const NamedSymbol *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Symbol *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const ExternalToken *rule) {
return default_apply((const Rule *)rule);
}
void visit(const Blank *rule) {
value_ = apply_to(rule);
}
void visit(const CharacterSet *rule) {
value_ = apply_to(rule);
}
void visit(const Choice *rule) {
value_ = apply_to(rule);
}
void visit(const Metadata *rule) {
value_ = apply_to(rule);
}
void visit(const Pattern *rule) {
value_ = apply_to(rule);
}
void visit(const Repeat *rule) {
value_ = apply_to(rule);
}
void visit(const Seq *rule) {
value_ = apply_to(rule);
}
void visit(const String *rule) {
value_ = apply_to(rule);
}
void visit(const NamedSymbol *rule) {
value_ = apply_to(rule);
}
void visit(const Symbol *rule) {
value_ = apply_to(rule);
}
void visit(const ExternalToken *rule) {
value_ = apply_to(rule);
}
private:
T value_;
};
template <>
class RuleFn<void> : private Visitor {
public:
void apply(const rule_ptr &rule) {
rule->accept(this);
}
protected:
virtual void default_apply(const Rule *rule) {}
virtual void apply_to(const Blank *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const CharacterSet *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const Choice *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const Metadata *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const Pattern *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const Repeat *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const Seq *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const String *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const NamedSymbol *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const Symbol *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const ExternalToken *rule) {
return default_apply((const Rule *)rule);
}
void visit(const Blank *rule) {
apply_to(rule);
}
void visit(const CharacterSet *rule) {
apply_to(rule);
}
void visit(const Choice *rule) {
apply_to(rule);
}
void visit(const Metadata *rule) {
apply_to(rule);
}
void visit(const Pattern *rule) {
apply_to(rule);
}
void visit(const Repeat *rule) {
apply_to(rule);
}
void visit(const Seq *rule) {
apply_to(rule);
}
void visit(const String *rule) {
apply_to(rule);
}
void visit(const NamedSymbol *rule) {
apply_to(rule);
}
void visit(const Symbol *rule) {
apply_to(rule);
}
void visit(const ExternalToken *rule) {
apply_to(rule);
}
};
class IdentityRuleFn : public RuleFn<rule_ptr> {
protected:
virtual rule_ptr default_apply(const Rule *rule);
virtual rule_ptr apply_to(const Choice *rule);
virtual rule_ptr apply_to(const Metadata *rule);
virtual rule_ptr apply_to(const Seq *rule);
virtual rule_ptr apply_to(const Repeat *rule);
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_VISITOR_H_

View file

@ -1,20 +0,0 @@
#include "compiler/syntax_grammar.h"
#include <vector>
#include <string>
#include <utility>
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
bool ExternalToken::operator==(const ExternalToken &other) const {
return name == other.name && type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;
}
bool ProductionStep::operator==(const ProductionStep &other) const {
return symbol == other.symbol && precedence == other.precedence &&
associativity == other.associativity;
}
} // namespace tree_sitter

View file

@ -4,22 +4,16 @@
#include <vector>
#include <string>
#include <set>
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/variable.h"
#include "compiler/rule.h"
#include "compiler/grammar.h"
namespace tree_sitter {
struct ExternalToken {
bool operator==(const ExternalToken &) const;
std::string name;
VariableType type;
rules::Symbol corresponding_internal_token;
};
struct ProductionStep {
bool operator==(const ProductionStep &) const;
inline bool operator==(const ProductionStep &other) const {
return symbol == other.symbol && precedence == other.precedence &&
associativity == other.associativity;
}
rules::Symbol symbol;
int precedence;
@ -34,7 +28,7 @@ struct SyntaxVariable {
std::vector<Production> productions;
};
typedef std::set<rules::Symbol> ConflictSet;
using ConflictSet = std::set<rules::Symbol>;
struct SyntaxGrammar {
std::vector<SyntaxVariable> variables;

View file

@ -0,0 +1,31 @@
#ifndef COMPILER_UTIL_MAKE_VISITOR_H_
#define COMPILER_UTIL_MAKE_VISITOR_H_
namespace tree_sitter {
namespace util {
template <typename... Fns>
struct visitor;
template <typename Fn>
struct visitor<Fn> : Fn {
using Fn::operator();
visitor(Fn fn) : Fn(fn) {}
};
template <typename Fn, typename... Fns>
struct visitor<Fn, Fns...> : Fn, visitor<Fns...> {
using Fn::operator();
using visitor<Fns...>::operator();
visitor(Fn fn, Fns... fns) : Fn(fn), visitor<Fns...>(fns...) {}
};
template <typename... Fns>
visitor<Fns...> make_visitor(Fns... fns) {
return visitor<Fns...>(fns...);
}
} // namespace util
} // namespace tree_sitter
#endif // COMPILER_UTIL_MAKE_VISITOR_H_

View file

@ -1,25 +0,0 @@
#ifndef COMPILER_VARIABLE_H_
#define COMPILER_VARIABLE_H_
#include <string>
#include "compiler/rule.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
enum VariableType {
VariableTypeHidden,
VariableTypeAuxiliary,
VariableTypeAnonymous,
VariableTypeNamed,
};
struct Variable {
std::string name;
VariableType type;
rule_ptr rule;
};
} // namespace tree_sitter
#endif // COMPILER_VARIABLE_H_

View file

@ -1,7 +1,6 @@
#include "test_helper.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rule.h"
#include "compiler/parse_table.h"
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/lex_item.h"
@ -14,11 +13,11 @@ START_TEST
describe("LexConflictManager::resolve(new_action, old_action)", []() {
LexConflictManager conflict_manager;
bool update;
Symbol sym1(0, Symbol::Terminal);
Symbol sym2(1, Symbol::Terminal);
Symbol sym3(2, Symbol::Terminal);
Symbol sym4(3, Symbol::Terminal);
LexItemSet item_set({ LexItem(sym4, blank() )});
Symbol sym1 = Symbol::terminal(0);
Symbol sym2 = Symbol::terminal(1);
Symbol sym3 = Symbol::terminal(2);
Symbol sym4 = Symbol::terminal(3);
LexItemSet item_set({ LexItem(sym4, Blank{} )});
before_each([&]() {
conflict_manager = LexConflictManager();

View file

@ -1,8 +1,6 @@
#include "test_helper.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules.h"
#include "helpers/rule_helpers.h"
#include "compiler/rule.h"
#include "helpers/stream_methods.h"
using namespace rules;
@ -14,7 +12,7 @@ START_TEST
describe("LexItem", []() {
describe("completion_status()", [&]() {
it("indicates whether the item is done and its precedence", [&]() {
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
LexItem item1(Symbol::terminal(0), CharacterSet({'a', 'b', 'c'}));
AssertThat(item1.completion_status().is_done, IsFalse());
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
@ -22,15 +20,18 @@ describe("LexItem", []() {
params.precedence = 3;
params.has_precedence = true;
params.is_string = 1;
LexItem item2(Symbol(0, Symbol::Terminal), choice({
metadata(blank(), params),
character({ 'a', 'b', 'c' })
}));
LexItem item2(Symbol::terminal(0), Choice{{
Metadata{Blank{}, params},
CharacterSet{{ 'a', 'b', 'c' }}
}});
AssertThat(item2.completion_status().is_done, IsTrue());
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
LexItem item3(Symbol::terminal(0), Choice{{
Blank{},
Repeat{CharacterSet{{ ' ', '\t' }}},
}});
AssertThat(item3.completion_status().is_done, IsTrue());
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
});
@ -40,17 +41,17 @@ describe("LexItem", []() {
describe("LexItemSet::transitions()", [&]() {
it("handles single characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
CharacterSet{{'x'}},
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
LexItem(Symbol::non_terminal(1), Blank{}),
}),
PrecedenceRange(),
false
@ -64,7 +65,7 @@ describe("LexItemSet::transitions()", [&]() {
params.is_main_token = true;
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
LexItem(Symbol::non_terminal(1), Metadata{CharacterSet{{'x'}}, params}),
});
AssertThat(
@ -74,7 +75,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
LexItem(Symbol::non_terminal(1), Metadata{Blank{}, params}),
}),
PrecedenceRange(),
true
@ -85,11 +86,11 @@ describe("LexItemSet::transitions()", [&]() {
it("handles sequences", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'w' }),
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
CharacterSet{{ 'w' }},
CharacterSet{{ 'x' }},
CharacterSet{{ 'y' }},
CharacterSet{{ 'z' }},
})),
});
@ -100,10 +101,10 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('w'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
CharacterSet{{ 'x' }},
CharacterSet{{ 'y' }},
CharacterSet{{ 'z' }},
})),
}),
PrecedenceRange(),
@ -115,14 +116,14 @@ describe("LexItemSet::transitions()", [&]() {
it("handles sequences with nested precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
prec(3, seq({
character({ 'v' }),
prec(4, seq({
character({ 'w' }),
character({ 'x' }) })),
character({ 'y' }) })),
character({ 'z' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
Metadata::prec(3, Rule::seq({
CharacterSet{{ 'v' }},
Metadata::prec(4, Rule::seq({
CharacterSet{{ 'w' }},
CharacterSet{{ 'x' }} })),
CharacterSet{{ 'y' }} })),
CharacterSet{{ 'z' }},
})),
});
@ -137,13 +138,15 @@ describe("LexItemSet::transitions()", [&]() {
// The outer precedence is now 'active', because we are within its
// contained rule.
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, seq({
prec(4, seq({
character({ 'w' }),
character({ 'x' }) })),
character({ 'y' }) })),
character({ 'z' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
Metadata::active_prec(3, Rule::seq({
Metadata::prec(4, Rule::seq({
CharacterSet{{ 'w' }},
CharacterSet{{ 'x' }}
})),
CharacterSet{{ 'y' }}
})),
CharacterSet{{ 'z' }},
})),
}),
@ -165,11 +168,12 @@ describe("LexItemSet::transitions()", [&]() {
Transition{
// The inner precedence is now 'active'
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, seq({
active_prec(4, character({ 'x' })),
character({ 'y' }) })),
character({ 'z' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
Metadata::active_prec(3, Rule::seq({
Metadata::active_prec(4, CharacterSet{{'x'}}),
CharacterSet{{'y'}}
})),
CharacterSet{{'z'}},
})),
}),
@ -190,9 +194,9 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, character({ 'y' })),
character({ 'z' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
Metadata::active_prec(3, CharacterSet{{'y'}}),
CharacterSet{{'z'}},
})),
}),
@ -213,7 +217,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('y'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}),
}),
PrecedenceRange(3),
false
@ -224,13 +228,13 @@ describe("LexItemSet::transitions()", [&]() {
it("handles sequences where the left hand side can be blank", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
choice({
character({ 'x' }),
blank(),
LexItem(Symbol::non_terminal(1), Rule::seq({
Rule::choice({
CharacterSet{{ 'x' }},
Blank{},
}),
character({ 'y' }),
character({ 'z' }),
CharacterSet{{ 'y' }},
CharacterSet{{ 'z' }},
})),
});
@ -241,9 +245,9 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'y' }),
character({ 'z' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
CharacterSet{{ 'y' }},
CharacterSet{{ 'z' }},
})),
}),
PrecedenceRange(),
@ -254,7 +258,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('y'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'z' }}),
}),
PrecedenceRange(),
false
@ -265,7 +269,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles blanks", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
LexItem(Symbol::non_terminal(1), Blank{}),
});
AssertThat(item_set.transitions(), IsEmpty());
@ -273,11 +277,11 @@ describe("LexItemSet::transitions()", [&]() {
it("handles repeats", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))),
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
LexItem(Symbol::non_terminal(1), Repeat{Rule::seq({
CharacterSet{{ 'a' }},
CharacterSet{{ 'b' }},
})}),
LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{'c'}}}),
});
AssertThat(
@ -287,14 +291,14 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'b' }),
repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))
LexItem(Symbol::non_terminal(1), Rule::seq({
CharacterSet{{ 'b' }},
Repeat{Rule::seq({
CharacterSet{{ 'a' }},
CharacterSet{{ 'b' }},
})}
})),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'b' }}),
}),
PrecedenceRange(),
false
@ -304,8 +308,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('c'),
Transition{
LexItemSet({
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
LexItem(Symbol::non_terminal(2), Repeat{CharacterSet{{ 'c' }}}),
LexItem(Symbol::non_terminal(2), Blank{}),
}),
PrecedenceRange(),
false
@ -316,7 +320,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles repeats with precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}}))
});
AssertThat(
@ -326,8 +330,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Repeat{CharacterSet{{ 'a' }}})),
LexItem(Symbol::non_terminal(1), Metadata::active_prec(-1, Blank{})),
}),
PrecedenceRange(-1),
false
@ -338,14 +342,14 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between overlapping character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), choice({
active_prec(2, seq({
character({ 'a', 'b', 'c', 'd' }),
character({ 'x' }),
LexItem(Symbol::non_terminal(1), Rule::choice({
Metadata::active_prec(2, Rule::seq({
CharacterSet{{ 'a', 'b', 'c', 'd' }},
CharacterSet{{ 'x' }},
})),
active_prec(3, seq({
character({ 'c', 'd', 'e', 'f' }),
character({ 'y' }),
Metadata::active_prec(3, Rule::seq({
CharacterSet{{ 'c', 'd', 'e', 'f' }},
CharacterSet{{ 'y' }},
})),
}))
});
@ -357,7 +361,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a', 'b'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})),
}),
PrecedenceRange(2),
false
@ -367,8 +371,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('c', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
LexItem(Symbol::non_terminal(1), Metadata::active_prec(2, CharacterSet{{ 'x' }})),
LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})),
}),
PrecedenceRange(2, 3),
false
@ -378,7 +382,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
LexItem(Symbol::non_terminal(1), Metadata::active_prec(3, CharacterSet{{ 'y' }})),
}),
PrecedenceRange(3),
false
@ -389,14 +393,14 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between a subset and a superset of characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), choice({
seq({
character({ 'b', 'c', 'd' }),
character({ 'x' }),
LexItem(Symbol::non_terminal(1), Rule::choice({
Rule::seq({
CharacterSet{{ 'b', 'c', 'd' }},
CharacterSet{{ 'x' }},
}),
seq({
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
character({ 'y' }),
Rule::seq({
CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }},
CharacterSet{{ 'y' }},
}),
})),
});
@ -408,7 +412,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a').include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}),
}),
PrecedenceRange(),
false
@ -418,8 +422,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('b', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'x' }}),
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'y' }}),
}),
PrecedenceRange(),
false
@ -430,15 +434,15 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between whitelisted and blacklisted character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
choice({
character({ '/' }, false),
seq({
character({ '\\' }),
character({ '/' }),
LexItem(Symbol::non_terminal(1), Rule::seq({
Rule::choice({
CharacterSet().include_all().exclude('/'),
Rule::seq({
CharacterSet{{ '\\' }},
CharacterSet{{ '/' }},
}),
}),
character({ '/' }),
CharacterSet{{ '/' }},
}))
});
@ -449,7 +453,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include_all().exclude('/').exclude('\\'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}),
}),
PrecedenceRange(),
false
@ -459,8 +463,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('\\'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
LexItem(Symbol::non_terminal(1), CharacterSet{{ '/' }}),
LexItem(Symbol::non_terminal(1), Rule::seq({ CharacterSet{{ '/' }}, CharacterSet{{ '/' }} })),
}),
PrecedenceRange(),
false
@ -471,8 +475,8 @@ describe("LexItemSet::transitions()", [&]() {
it("handles different items with overlapping character sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
LexItem(Symbol::non_terminal(1), CharacterSet{{ 'a', 'b', 'c', 'd', 'e', 'f' }}),
LexItem(Symbol::non_terminal(2), CharacterSet{{ 'e', 'f', 'g', 'h', 'i' }})
});
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
@ -480,7 +484,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
LexItem(Symbol::non_terminal(1), Blank{}),
}),
PrecedenceRange(),
false
@ -490,8 +494,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
LexItem(Symbol::non_terminal(1), Blank{}),
LexItem(Symbol::non_terminal(2), Blank{}),
}),
PrecedenceRange(),
false
@ -501,7 +505,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('g', 'i'),
Transition{
LexItemSet({
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
LexItem(Symbol::non_terminal(2), Blank{}),
}),
PrecedenceRange(),
false

View file

@ -3,8 +3,6 @@
#include "compiler/lexical_grammar.h"
#include "compiler/build_tables/parse_item_set_builder.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/built_in_symbols.h"
#include "helpers/rule_helpers.h"
using namespace build_tables;
using namespace rules;
@ -17,7 +15,7 @@ describe("ParseItemSetBuilder", []() {
lexical_variables.push_back({
"token_" + to_string(i),
VariableTypeNamed,
blank(),
Blank{},
false
});
}
@ -28,23 +26,23 @@ describe("ParseItemSetBuilder", []() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
}),
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
}),
Production({
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone},
})
}},
SyntaxVariable{"rule2", VariableTypeNamed, {
Production({
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
{Symbol::terminal(14), 0, AssociativityNone},
{Symbol::terminal(15), 0, AssociativityNone},
})
}},
}, {}, {}, {}};
@ -55,8 +53,8 @@ describe("ParseItemSetBuilder", []() {
ParseItemSet item_set({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) }),
}
});
@ -65,20 +63,20 @@ describe("ParseItemSetBuilder", []() {
AssertThat(item_set, Equals(ParseItemSet({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) })
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
ParseItem(Symbol::non_terminal(1), production(1, 0), 0),
LookaheadSet({ Symbol::terminal(11) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
ParseItem(Symbol::non_terminal(1), production(1, 1), 0),
LookaheadSet({ Symbol::terminal(11) })
},
{
ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
ParseItem(Symbol::non_terminal(2), production(2, 0), 0),
LookaheadSet({ Symbol::terminal(11) })
},
})));
});
@ -87,14 +85,14 @@ describe("ParseItemSetBuilder", []() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
}),
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
}),
Production({})
}},
@ -106,8 +104,8 @@ describe("ParseItemSetBuilder", []() {
ParseItemSet item_set({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) }),
}
});
@ -116,16 +114,16 @@ describe("ParseItemSetBuilder", []() {
AssertThat(item_set, Equals(ParseItemSet({
{
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) })
ParseItem(Symbol::non_terminal(0), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
ParseItem(Symbol::non_terminal(1), production(1, 0), 0),
LookaheadSet({ Symbol::terminal(11) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
ParseItem(Symbol::non_terminal(1), production(1, 1), 0),
LookaheadSet({ Symbol::terminal(11) })
},
})));
});

View file

@ -1,8 +1,6 @@
#include "test_helper.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules.h"
#include "helpers/rule_helpers.h"
#include "compiler/rule.h"
using namespace rules;
using build_tables::rule_can_be_blank;
@ -10,49 +8,48 @@ using build_tables::rule_can_be_blank;
START_TEST
describe("rule_can_be_blank", [&]() {
rule_ptr rule;
Rule rule;
it("returns false for basic rules", [&]() {
AssertThat(rule_can_be_blank(i_sym(3)), IsFalse());
AssertThat(rule_can_be_blank(str("x")), IsFalse());
AssertThat(rule_can_be_blank(pattern("x")), IsFalse());
AssertThat(rule_can_be_blank(CharacterSet{{'x'}}), IsFalse());
});
it("returns true for blanks", [&]() {
AssertThat(rule_can_be_blank(blank()), IsTrue());
AssertThat(rule_can_be_blank(Blank{}), IsTrue());
});
it("returns true for repeats", [&]() {
AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue());
it("returns true for repeats iff the content can be blank", [&]() {
AssertThat(rule_can_be_blank(Repeat{CharacterSet{{'x'}}}), IsFalse());
AssertThat(rule_can_be_blank(Repeat{Blank{}}), IsTrue());
});
it("returns true for choices iff one or more sides can be blank", [&]() {
rule = choice({ sym("x"), blank() });
rule = Rule::choice({ CharacterSet{{'x'}}, Blank{} });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ blank(), sym("x") });
rule = Rule::choice({ Blank{}, CharacterSet{{'x'}} });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ sym("x"), sym("y") });
rule = Rule::choice({ CharacterSet{{'x'}}, CharacterSet{{'y'}} });
AssertThat(rule_can_be_blank(rule), IsFalse());
});
it("returns true for sequences iff both sides can be blank", [&]() {
rule = seq({ blank(), str("x") });
rule = Rule::seq({ Blank{}, CharacterSet{{'x'}} });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ str("x"), blank() });
rule = Rule::seq({ CharacterSet{{'x'}}, Blank{} });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ blank(), choice({ sym("x"), blank() }) });
rule = Rule::seq({ Blank{}, Rule::choice({ CharacterSet{{'x'}}, Blank{} }) });
AssertThat(rule_can_be_blank(rule), IsTrue());
});
it("ignores metadata rules", [&]() {
rule = make_shared<rules::Metadata>(blank(), MetadataParams());
rule = Metadata::prec(1, Blank{});
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = make_shared<rules::Metadata>(sym("one"), MetadataParams());
rule = Metadata::prec(1, CharacterSet{{'x'}});
AssertThat(rule_can_be_blank(rule), IsFalse());
});
});

View file

@ -1,20 +1,24 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/prepare_grammar/expand_repeats.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
START_TEST
using namespace rules;
using prepare_grammar::InitialSyntaxGrammar;
using prepare_grammar::expand_repeats;
using Variable = InitialSyntaxGrammar::Variable;
bool operator==(const Variable &left, const Variable &right) {
return left.name == right.name && left.rule == right.rule && left.type == right.type;
}
START_TEST
describe("expand_repeats", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))},
Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(0)}},
},
{}, {}, {}
};
@ -22,10 +26,10 @@ describe("expand_repeats", []() {
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, i_sym(1)},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(0) }),
i_token(0),
Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(1)},
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) }),
Symbol::terminal(0),
})},
}));
});
@ -33,9 +37,9 @@ describe("expand_repeats", []() {
it("replaces repeats inside of sequences", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, seq({
i_token(10),
repeat1(i_token(11)),
Variable{"rule0", VariableTypeNamed, Rule::seq({
Symbol::terminal(10),
Repeat{Symbol::terminal(11)},
})},
},
{}, {}, {}
@ -44,13 +48,13 @@ describe("expand_repeats", []() {
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, seq({
i_token(10),
i_sym(1),
Variable{"rule0", VariableTypeNamed, Rule::seq({
Symbol::terminal(10),
Symbol::non_terminal(1),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(11) }),
i_token(11)
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }),
Symbol::terminal(11)
})},
}));
});
@ -58,9 +62,9 @@ describe("expand_repeats", []() {
it("replaces repeats inside of choices", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, choice({
i_token(10),
repeat1(i_token(11))
Variable{"rule0", VariableTypeNamed, Rule::choice({
Symbol::terminal(10),
Repeat{Symbol::terminal(11)}
})},
},
{}, {}, {}
@ -69,13 +73,13 @@ describe("expand_repeats", []() {
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, choice({
i_token(10),
i_sym(1),
Variable{"rule0", VariableTypeNamed, Rule::choice({
Symbol::terminal(10),
Symbol::non_terminal(1),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(11) }),
i_token(11),
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(11) }),
Symbol::terminal(11),
})},
}));
});
@ -83,13 +87,13 @@ describe("expand_repeats", []() {
it("does not create redundant auxiliary rules", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, choice({
seq({ i_token(1), repeat1(i_token(4)) }),
seq({ i_token(2), repeat1(i_token(4)) }),
Variable{"rule0", VariableTypeNamed, Rule::choice({
Rule::seq({ Symbol::terminal(1), Repeat{Symbol::terminal(4)} }),
Rule::seq({ Symbol::terminal(2), Repeat{Symbol::terminal(4)} }),
})},
Variable{"rule1", VariableTypeNamed, seq({
i_token(3),
repeat1(i_token(4))
Variable{"rule1", VariableTypeNamed, Rule::seq({
Symbol::terminal(3),
Repeat{Symbol::terminal(4)}
})},
},
{}, {}, {}
@ -98,17 +102,17 @@ describe("expand_repeats", []() {
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, choice({
seq({ i_token(1), i_sym(2) }),
seq({ i_token(2), i_sym(2) }),
Variable{"rule0", VariableTypeNamed, Rule::choice({
Rule::seq({ Symbol::terminal(1), Symbol::non_terminal(2) }),
Rule::seq({ Symbol::terminal(2), Symbol::non_terminal(2) }),
})},
Variable{"rule1", VariableTypeNamed, seq({
i_token(3),
i_sym(2),
Variable{"rule1", VariableTypeNamed, Rule::seq({
Symbol::terminal(3),
Symbol::non_terminal(2),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(2), i_token(4) }),
i_token(4),
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(4) }),
Symbol::terminal(4),
})},
}));
});
@ -116,9 +120,9 @@ describe("expand_repeats", []() {
it("can replace multiple repeats in the same rule", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, seq({
repeat1(i_token(10)),
repeat1(i_token(11)),
Variable{"rule0", VariableTypeNamed, Rule::seq({
Repeat{Symbol::terminal(10)},
Repeat{Symbol::terminal(11)},
})},
},
{}, {}, {}
@ -127,17 +131,17 @@ describe("expand_repeats", []() {
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, seq({
i_sym(1),
i_sym(2),
Variable{"rule0", VariableTypeNamed, Rule::seq({
Symbol::non_terminal(1),
Symbol::non_terminal(2),
})},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(1), i_token(10) }),
i_token(10),
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(10) }),
Symbol::terminal(10),
})},
Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({
seq({ i_sym(2), i_token(11) }),
i_token(11),
Variable{"rule0_repeat2", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(11) }),
Symbol::terminal(11),
})},
}));
});
@ -145,8 +149,8 @@ describe("expand_repeats", []() {
it("can replace repeats in multiple rules", [&]() {
InitialSyntaxGrammar grammar{
{
Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))},
Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))},
Variable{"rule0", VariableTypeNamed, Repeat{Symbol::terminal(10)}},
Variable{"rule1", VariableTypeNamed, Repeat{Symbol::terminal(11)}},
},
{}, {}, {}
};
@ -154,15 +158,15 @@ describe("expand_repeats", []() {
auto result = expand_repeats(grammar);
AssertThat(result.variables, Equals(vector<Variable>{
Variable{"rule0", VariableTypeNamed, i_sym(2)},
Variable{"rule1", VariableTypeNamed, i_sym(3)},
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(2), i_token(10) }),
i_token(10),
Variable{"rule0", VariableTypeNamed, Symbol::non_terminal(2)},
Variable{"rule1", VariableTypeNamed, Symbol::non_terminal(3)},
Variable{"rule0_repeat1", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(2), Symbol::terminal(10) }),
Symbol::terminal(10),
})},
Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({
seq({ i_sym(3), i_token(11) }),
i_token(11),
Variable{"rule1_repeat1", VariableTypeAuxiliary, Rule::choice({
Rule::seq({ Symbol::non_terminal(3), Symbol::terminal(11) }),
Symbol::terminal(11),
})},
}));
});

View file

@ -1,12 +1,13 @@
#include "test_helper.h"
#include "compiler/lexical_grammar.h"
#include "compiler/prepare_grammar/expand_tokens.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
START_TEST
using namespace rules;
using prepare_grammar::expand_tokens;
using prepare_grammar::expand_token;
using prepare_grammar::ExpandTokenResult;
describe("expand_tokens", []() {
MetadataParams string_token_params;
@ -15,153 +16,72 @@ describe("expand_tokens", []() {
describe("string rules", [&]() {
it("replaces strings with sequences of character sets", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
str("xyz"),
i_sym(11),
}),
false
}
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
metadata(seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
}), string_token_params),
i_sym(11),
AssertThat(
expand_token(Rule::seq({
String{"a"},
String{"bcd"},
String{"e"}
})).rule,
Equals(Rule::seq({
CharacterSet{{ 'a' }},
Rule::seq({
CharacterSet{{ 'b' }},
CharacterSet{{ 'c' }},
CharacterSet{{ 'd' }},
}),
false
}
}));
CharacterSet{{ 'e' }},
})));
});
it("handles strings containing non-ASCII UTF8 characters", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
str("\u03B1 \u03B2"),
false
},
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
metadata(seq({
character({ 945 }),
character({ ' ' }),
character({ 946 }),
}), string_token_params),
false
}
}));
AssertThat(
expand_token(String{"\u03B1 \u03B2"}).rule,
Equals(Rule::seq({
CharacterSet{{ 945 }},
CharacterSet{{ ' ' }},
CharacterSet{{ 946 }},
}))
);
});
});
describe("regexp rules", [&]() {
it("replaces regexps with the equivalent rule tree", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
pattern("x*"),
i_sym(11),
}),
false
}
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
i_sym(10),
repeat(character({ 'x' })),
i_sym(11),
}),
false
}
}));
AssertThat(
expand_token(Rule::seq({
String{"a"},
Pattern{"x+"},
String{"b"},
})).rule,
Equals(Rule::seq({
CharacterSet{{'a'}},
Repeat{CharacterSet{{ 'x' }}},
CharacterSet{{'b'}},
}))
);
});
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
pattern("[^\u03B1-\u03B4]*"),
false
}
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
LexicalVariable{
"rule_A",
VariableTypeNamed,
repeat(character({ 945, 946, 947, 948 }, false)),
false
}
}));
AssertThat(
expand_token(Pattern{"[^\u03B1-\u03B4]+"}).rule,
Equals(Rule(Repeat{
CharacterSet().include_all().exclude(945, 948)
}))
);
});
it("returns an error when the grammar contains an invalid regex", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"rule_A",
VariableTypeNamed,
seq({
pattern("("),
str("xyz"),
pattern("["),
}),
false
},
},
{}
};
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
AssertThat(
expand_token(Rule::seq({
Pattern{"("},
String{"xyz"},
Pattern{"["},
})).error,
Equals(CompileError(
TSCompileErrorTypeInvalidRegex,
"unmatched open paren"
))
);
});
});
});

View file

@ -1,104 +1,75 @@
#include "test_helper.h"
#include "helpers/stream_methods.h"
#include "compiler/prepare_grammar/extract_choices.h"
#include "helpers/rule_helpers.h"
START_TEST
using namespace rules;
using prepare_grammar::extract_choices;
class rule_vector : public vector<rule_ptr> {
public:
bool operator==(const vector<rule_ptr> &other) const {
if (this->size() != other.size()) return false;
for (size_t i = 0; i < this->size(); i++) {
auto rule = this->operator[](i);
auto other_rule = other[i];
if (!rule->operator==(*rule))
return false;
}
return true;
}
rule_vector(const initializer_list<rule_ptr> &list) :
vector<rule_ptr>(list) {}
};
describe("extract_choices", []() {
it("expands rules containing choices into multiple rules", [&]() {
auto rule = seq({
sym("a"),
choice({ sym("b"), sym("c"), sym("d") }),
sym("e")
auto rule = Rule::seq({
Symbol::terminal(1),
Rule::choice({
Symbol::terminal(2),
Symbol::terminal(3),
Symbol::terminal(4)
}),
Symbol::terminal(5)
});
AssertThat(extract_choices(rule), Equals(rule_vector({
seq({ sym("a"), sym("b"), sym("e") }),
seq({ sym("a"), sym("c"), sym("e") }),
seq({ sym("a"), sym("d"), sym("e") }),
auto result = extract_choices(rule);
AssertThat(result, Equals(vector<Rule>({
Rule::seq({Symbol::terminal(1), Symbol::terminal(2), Symbol::terminal(5)}),
Rule::seq({Symbol::terminal(1), Symbol::terminal(3), Symbol::terminal(5)}),
Rule::seq({Symbol::terminal(1), Symbol::terminal(4), Symbol::terminal(5)}),
})));
});
it("handles metadata rules", [&]() {
auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
auto rule = Metadata::prec(5, Rule::choice({
Symbol::terminal(2),
Symbol::terminal(3),
Symbol::terminal(4)
}));
AssertThat(extract_choices(rule), Equals(rule_vector({
prec(5, sym("b")),
prec(5, sym("c")),
prec(5, sym("d")),
AssertThat(extract_choices(rule), Equals(vector<Rule>({
Metadata::prec(5, Symbol::terminal(2)),
Metadata::prec(5, Symbol::terminal(3)),
Metadata::prec(5, Symbol::terminal(4)),
})));
});
it("handles nested choices", [&]() {
auto rule = choice({
seq({ choice({ sym("a"), sym("b") }), sym("c") }),
sym("d")
auto rule = Rule::choice({
Rule::seq({
Rule::choice({
Symbol::terminal(1),
Symbol::terminal(2)
}),
Symbol::terminal(3)
}),
Symbol::terminal(4)
});
AssertThat(extract_choices(rule), Equals(rule_vector({
seq({ sym("a"), sym("c") }),
seq({ sym("b"), sym("c") }),
sym("d"),
AssertThat(extract_choices(rule), Equals(vector<Rule>({
Rule::seq({Symbol::terminal(1), Symbol::terminal(3)}),
Rule::seq({Symbol::terminal(2), Symbol::terminal(3)}),
Symbol::terminal(4),
})));
});
it("handles single symbols", [&]() {
AssertThat(extract_choices(Symbol::terminal(2)), Equals(vector<Rule>({
Symbol::terminal(2)
})));
});
it("handles blank rules", [&]() {
AssertThat(extract_choices(blank()), Equals(rule_vector({
blank(),
})));
});
it("does not move choices outside of repeats", [&]() {
auto rule = seq({
choice({ sym("a"), sym("b") }),
repeat1(seq({
sym("c"),
choice({
sym("d"),
sym("e"),
}),
sym("f"),
})),
sym("g"),
});
AssertThat(extract_choices(rule), Equals(rule_vector({
seq({
sym("a"),
repeat1(choice({
seq({ sym("c"), sym("d"), sym("f") }),
seq({ sym("c"), sym("e"), sym("f") }),
})),
sym("g"),
}),
seq({
sym("b"),
repeat1(choice({
seq({ sym("c"), sym("d"), sym("f") }),
seq({ sym("c"), sym("e"), sym("f") }),
})),
sym("g"),
}),
AssertThat(extract_choices(Blank{}), Equals(vector<Rule>({
Blank{},
})));
});
});

View file

@ -3,8 +3,6 @@
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/prepare_grammar/extract_tokens.h"
#include "helpers/rule_helpers.h"
#include "helpers/equals_pointer.h"
#include "helpers/stream_methods.h"
START_TEST
@ -13,23 +11,44 @@ using namespace rules;
using prepare_grammar::extract_tokens;
using prepare_grammar::InternedGrammar;
using prepare_grammar::InitialSyntaxGrammar;
using InternedVariable = InternedGrammar::Variable;
using InitialSyntaxVariable = InitialSyntaxGrammar::Variable;
describe("extract_tokens", []() {
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
str("ab"),
pattern("cd*"),
choice({
i_sym(1),
i_sym(2),
token(repeat1(choice({ str("ef"), str("gh") }))),
}),
}))},
Variable{"rule_B", VariableTypeNamed, pattern("ij+")},
Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })},
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))},
InternedVariable{
"rule_A",
VariableTypeNamed,
Repeat{Rule::seq({
String{"ab"},
Pattern{"cd+"},
Rule::choice({
Symbol::non_terminal(1),
Symbol::non_terminal(2),
Metadata::token(Repeat{Rule::choice({
String{"ef"},
String{"g"}
})}),
}),
})}
},
InternedVariable{
"rule_B",
VariableTypeNamed,
Pattern{"h+"}
},
InternedVariable{
"rule_C",
VariableTypeNamed,
Rule::choice({ String{"i"}, Blank{} })
},
InternedVariable{
"rule_D",
VariableTypeNamed,
Repeat{Symbol::non_terminal(3)}
},
},
{},
{},
@ -42,62 +61,104 @@ describe("extract_tokens", []() {
AssertThat(error, Equals(CompileError::none()));
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>{
InitialSyntaxVariable{
"rule_A",
VariableTypeNamed,
Repeat{Rule::seq({
// This string is now the first token in the lexical grammar.
i_token(0),
// This string is now the first token in the lexical grammar.
Symbol::terminal(0),
// This pattern is now the second rule in the lexical grammar.
i_token(1),
// This pattern is now the second rule in the lexical grammar.
Symbol::terminal(1),
choice({
// Rule 1, which this symbol pointed to, has been moved to the
// lexical grammar.
i_token(3),
Rule::choice({
// Rule 1, which this symbol pointed to, has been moved to the
// lexical grammar.
Symbol::terminal(3),
// This symbol's index has been decremented, because a previous rule
// was moved to the lexical grammar.
i_sym(1),
// This symbol's index has been decremented, because a previous rule
// was moved to the lexical grammar.
Symbol::non_terminal(1),
// This token rule is now the third rule in the lexical grammar.
i_token(2),
}),
}))},
// This token rule is now the third rule in the lexical grammar.
Symbol::terminal(2),
}),
})}
},
Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })},
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))},
InitialSyntaxVariable{
"rule_C",
VariableTypeNamed,
Rule::choice({Symbol::terminal(4), Blank{}})
},
InitialSyntaxVariable{
"rule_D",
VariableTypeNamed,
Repeat{Symbol::non_terminal(2)}
},
}));
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable>({
// Strings become anonymous rules.
LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true},
LexicalVariable{
"ab",
VariableTypeAnonymous,
Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
true
},
// Patterns become hidden rules.
LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false},
LexicalVariable{
"/cd+/",
VariableTypeAuxiliary,
Seq{CharacterSet{{'c'}}, Repeat{CharacterSet{{'d'}}}},
false
},
// Rules marked as tokens become hidden rules.
LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
str("ef"),
str("gh")
})), false},
LexicalVariable{
"/(ef|g)+/",
VariableTypeAuxiliary,
Repeat{Rule::choice({
Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}},
CharacterSet{{'g'}},
})},
false
},
// This named rule was moved wholesale to the lexical grammar.
LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false},
LexicalVariable{
"rule_B",
VariableTypeNamed,
Repeat{CharacterSet{{'h'}}},
false
},
// Strings become anonymous rules.
LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true},
LexicalVariable{
"i",
VariableTypeAnonymous,
CharacterSet{{'i'}},
true
},
})));
});
it("does not create duplicate tokens in the lexical grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, seq({
str("ab"),
i_sym(0),
str("ab"),
})},
{
"rule_A",
VariableTypeNamed,
Rule::seq({
String{"ab"},
Symbol::non_terminal(1),
String{"ab"},
})
},
},
{},
{},
@ -107,50 +168,114 @@ describe("extract_tokens", []() {
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
AssertThat(syntax_grammar.variables, Equals(vector<Variable> {
Variable {"rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })},
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable> {
InitialSyntaxVariable{
"rule_A",
VariableTypeNamed,
Rule::seq({
Symbol::terminal(0),
Symbol::non_terminal(1),
Symbol::terminal(0)
})
},
}));
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
LexicalVariable{
"ab",
VariableTypeAnonymous,
Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
true
},
}))
});
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })},
Variable{"rule_B", VariableTypeNamed, str("cd")},
Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })},
InternedVariable{
"rule_A",
VariableTypeNamed,
Rule::seq({ Symbol::non_terminal(1), String{"ab"} })
},
InternedVariable{
"rule_B",
VariableTypeNamed,
String{"cd"}
},
InternedVariable{
"rule_C",
VariableTypeNamed,
Rule::seq({ String{"ef"}, String{"cd"} })
},
}, {}, {}, {}});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })},
Variable{"rule_B", VariableTypeNamed, i_token(1)},
Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })},
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>({
InitialSyntaxVariable{
"rule_A",
VariableTypeNamed,
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) })
},
InitialSyntaxVariable{
"rule_B",
VariableTypeNamed,
Symbol::terminal(1)
},
InitialSyntaxVariable{
"rule_C",
VariableTypeNamed,
Rule::seq({ Symbol::terminal(2), Symbol::terminal(1) })
},
})));
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
LexicalVariable {"cd", VariableTypeAnonymous, str("cd"), true},
LexicalVariable {"ef", VariableTypeAnonymous, str("ef"), true},
LexicalVariable{
"ab",
VariableTypeAnonymous,
Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
true
},
LexicalVariable{
"cd",
VariableTypeAnonymous,
Seq{CharacterSet{{'c'}}, CharacterSet{{'d'}}},
true
},
LexicalVariable{
"ef",
VariableTypeAnonymous,
Seq{CharacterSet{{'e'}}, CharacterSet{{'f'}}},
true
},
}));
});
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, str("ok")},
Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))},
Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))},
InternedVariable{
"rule_A",
VariableTypeNamed,
String{"ok"}
},
InternedVariable{
"rule_B",
VariableTypeNamed,
Repeat{Symbol::non_terminal(0)}
},
InternedVariable{
"rule_C",
VariableTypeNamed,
Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}}
},
},
{
str(" ")
String{" "}
},
{
{ Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) }
{ Symbol::non_terminal(1), Symbol::non_terminal(2) }
},
{}
});
@ -159,7 +284,7 @@ describe("extract_tokens", []() {
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
{ Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
{ Symbol::non_terminal(0), Symbol::non_terminal(1) },
})));
});
@ -167,11 +292,11 @@ describe("extract_tokens", []() {
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, str("x")},
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
},
{
str("y"),
pattern("\\s+"),
String{"y"},
Pattern{" "},
},
{},
{}
@ -180,8 +305,8 @@ describe("extract_tokens", []() {
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
AssertThat(get<1>(result).separators[1], EqualsPointer(pattern("\\s+")));
AssertThat(get<1>(result).separators[0], Equals(Rule(CharacterSet{{'y'}})));
AssertThat(get<1>(result).separators[1], Equals(Rule(CharacterSet{{' '}})));
AssertThat(get<0>(result).extra_tokens, IsEmpty());
});
@ -189,11 +314,11 @@ describe("extract_tokens", []() {
it("handles inline extra tokens that match tokens in the grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, str("x")},
Variable{"rule_B", VariableTypeNamed, str("y")},
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
InternedVariable{"rule_B", VariableTypeNamed, String{"y"}},
},
{
str("y"),
String{"y"},
},
{},
{}
@ -201,18 +326,30 @@ describe("extract_tokens", []() {
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol::terminal(1) })));
});
it("updates extra symbols according to the new symbol numbers", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })},
Variable{"rule_B", VariableTypeNamed, str("y")},
Variable{"rule_C", VariableTypeNamed, str("z")},
InternedVariable{
"rule_A",
VariableTypeNamed,
Rule::seq({ String{"w"}, String{"x"}, Symbol::non_terminal(1) })
},
InternedVariable{
"rule_B",
VariableTypeNamed,
String{"y"}
},
InternedVariable{
"rule_C",
VariableTypeNamed,
String{"z"}
},
},
{
i_sym(2),
Symbol::non_terminal(2),
},
{},
{}
@ -221,34 +358,55 @@ describe("extract_tokens", []() {
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
{ Symbol(3, Symbol::Terminal) },
{ Symbol::terminal(3) },
})));
AssertThat(get<1>(result).separators, IsEmpty());
});
it("returns an error if any extra tokens are non-token symbols", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
}, { i_sym(1) }, {}, {}});
auto result = extract_tokens(InternedGrammar{
{
InternedVariable{
"rule_A",
VariableTypeNamed,
Rule::seq({ String{"x"}, Symbol::non_terminal(1) })
},
InternedVariable{
"rule_B",
VariableTypeNamed,
Rule::seq({ String{"y"}, String{"z"} })
},
},
{
Symbol::non_terminal(1)
},
{},
{}
});
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(
CompileError(TSCompileErrorTypeInvalidExtraToken,
"Not a token: rule_B")));
AssertThat(get<2>(result), Equals(CompileError(
TSCompileErrorTypeInvalidExtraToken,
"Non-token symbol rule_B can't be used as an extra token"
)));
});
it("returns an error if any extra tokens are non-token rules", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable{"rule_A", VariableTypeNamed, str("x")},
Variable{"rule_B", VariableTypeNamed, str("y")},
}, { choice({ i_sym(1), blank() }) }, {}, {}});
auto result = extract_tokens(InternedGrammar{
{
{"rule_A", VariableTypeNamed, String{"x"}},
{"rule_B", VariableTypeNamed, String{"y"}},
},
{
Rule::choice({ Symbol::non_terminal(1), Blank{} })
},
{},
{}
});
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(CompileError(
TSCompileErrorTypeInvalidExtraToken,
"Not a token: (choice (non-terminal 1) (blank))"
"Non-token rule expression can't be used as an extra token"
)));
});
});
@ -256,13 +414,21 @@ describe("extract_tokens", []() {
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
{
"rule_A",
VariableTypeNamed,
Rule::seq({ String{"x"}, Symbol::non_terminal(1) })
},
{
"rule_B",
VariableTypeNamed,
Rule::seq({ String{"y"}, String{"z"} })
},
},
{},
{},
{
ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)}
ExternalToken {"rule_A", VariableTypeNamed, Symbol::non_terminal(0)}
}
});

View file

@ -2,7 +2,6 @@
#include "compiler/prepare_grammar/flatten_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/syntax_grammar.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
START_TEST
@ -12,23 +11,23 @@ using prepare_grammar::flatten_rule;
describe("flatten_grammar", []() {
it("associates each symbol with the precedence and associativity binding it to its successor", [&]() {
SyntaxVariable result = flatten_rule(Variable{
SyntaxVariable result = flatten_rule({
"test",
VariableTypeNamed,
seq({
i_sym(1),
prec_left(101, seq({
i_sym(2),
choice({
prec_right(102, seq({
i_sym(3),
i_sym(4)
Rule::seq({
Symbol::non_terminal(1),
Metadata::prec_left(101, Rule::seq({
Symbol::non_terminal(2),
Rule::choice({
Metadata::prec_right(102, Rule::seq({
Symbol::non_terminal(3),
Symbol::non_terminal(4)
})),
i_sym(5),
Symbol::non_terminal(5),
}),
i_sym(6),
Symbol::non_terminal(6),
})),
i_sym(7),
Symbol::non_terminal(7),
})
});
@ -36,51 +35,51 @@ describe("flatten_grammar", []() {
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
{Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(3), 102, AssociativityRight},
{Symbol::non_terminal(4), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}),
Production({
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(5), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
})
})))
});
it("uses the last assigned precedence", [&]() {
SyntaxVariable result = flatten_rule(Variable{
SyntaxVariable result = flatten_rule({
"test1",
VariableTypeNamed,
prec_left(101, seq({
i_sym(1),
i_sym(2),
Metadata::prec_left(101, Rule::seq({
Symbol::non_terminal(1),
Symbol::non_terminal(2),
}))
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol::non_terminal(1), 101, AssociativityLeft},
{Symbol::non_terminal(2), 101, AssociativityLeft},
})
})))
result = flatten_rule(Variable{
result = flatten_rule({
"test2",
VariableTypeNamed,
prec_left(101, seq({
i_sym(1),
Metadata::prec_left(101, Rule::seq({
Symbol::non_terminal(1),
}))
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol::non_terminal(1), 101, AssociativityLeft},
})
})))
});

View file

@ -1,11 +1,7 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/intern_symbols.h"
#include "compiler/grammar.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "helpers/equals_pointer.h"
#include "helpers/rule_helpers.h"
#include "compiler/rule.h"
#include "helpers/stream_methods.h"
START_TEST
@ -15,29 +11,29 @@ using prepare_grammar::intern_symbols;
describe("intern_symbols", []() {
it("replaces named symbols with numerically-indexed symbols", [&]() {
Grammar grammar{
InputGrammar grammar{
{
{"x", choice({ sym("y"), sym("_z") })},
{"y", sym("_z")},
{"_z", str("stuff")}
{"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"_z"} })},
{"y", VariableTypeNamed, NamedSymbol{"_z"}},
{"_z", VariableTypeNamed, String{"stuff"}}
}, {}, {}, {}
};
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<Variable>{
Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })},
Variable{"y", VariableTypeNamed, i_sym(2)},
Variable{"_z", VariableTypeHidden, str("stuff")},
AssertThat(result.first.variables, Equals(vector<prepare_grammar::InternedGrammar::Variable>{
{"x", VariableTypeNamed, Rule::choice({ Symbol::non_terminal(1), Symbol::non_terminal(2) })},
{"y", VariableTypeNamed, Symbol::non_terminal(2)},
{"_z", VariableTypeHidden, String{"stuff"}},
}));
});
describe("when there are symbols that reference undefined rules", [&]() {
it("returns an error", []() {
Grammar grammar{
InputGrammar grammar{
{
{"x", sym("y")},
{"x", VariableTypeNamed, NamedSymbol{"y"}},
},
{}, {}, {}
};
@ -49,14 +45,14 @@ describe("intern_symbols", []() {
});
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
Grammar grammar{
InputGrammar grammar{
{
{"x", choice({ sym("y"), sym("z") })},
{"y", sym("z")},
{"z", str("stuff")}
{"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })},
{"y", VariableTypeNamed, NamedSymbol{"z"}},
{"z", VariableTypeNamed, String{"stuff"}}
},
{
sym("z")
NamedSymbol{"z"}
},
{}, {}
};
@ -65,21 +61,29 @@ describe("intern_symbols", []() {
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
AssertThat(result.first.extra_tokens, Equals(vector<Rule>({ Symbol::non_terminal(2) })));
});
it("records any rule names that match external token names", [&]() {
Grammar grammar{
InputGrammar grammar{
{
{"x", choice({ sym("y"), sym("z") })},
{"y", sym("z")},
{"z", str("stuff")},
{"x", VariableTypeNamed, Rule::choice({ NamedSymbol{"y"}, NamedSymbol{"z"} })},
{"y", VariableTypeNamed, NamedSymbol{"z"}},
{"z", VariableTypeNamed, String{"stuff"}},
},
{},
{},
{
"w",
"z"
ExternalToken{
"w",
VariableTypeNamed,
NONE()
},
ExternalToken{
"z",
VariableTypeNamed,
NONE()
},
}
};
@ -94,7 +98,7 @@ describe("intern_symbols", []() {
ExternalToken{
"z",
VariableTypeNamed,
Symbol(2, Symbol::NonTerminal)
Symbol::non_terminal(2)
},
}))
});

View file

@ -1,7 +1,5 @@
#include "test_helper.h"
#include "compiler/prepare_grammar/parse_regex.h"
#include "helpers/equals_pointer.h"
#include "helpers/rule_helpers.h"
START_TEST
@ -12,178 +10,218 @@ describe("parse_regex", []() {
struct ValidInputRow {
string description;
string pattern;
rule_ptr rule;
Rule rule;
};
vector<ValidInputRow> valid_inputs = {
{
"character sets",
"[aAeE]",
character({ 'a', 'A', 'e', 'E' })
CharacterSet{{ 'a', 'A', 'e', 'E' }}
},
{
"'.' characters as wildcards",
".",
character({ '\n' }, false)
CharacterSet().include_all().exclude('\n')
},
{
"character classes",
"\\w-\\d-\\s-\\W-\\D-\\S",
seq({
character({
Rule::seq({
CharacterSet{{
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }),
character({ '-' }),
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }),
character({ '-' }),
character({ ' ', '\t', '\r', '\n' }),
character({ '-' }),
character({
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }, false),
character({ '-' }),
character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false),
character({ '-' }),
character({ ' ', '\t', '\r', '\n' }, false),
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' }},
CharacterSet{{ '-' }},
CharacterSet{{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }},
CharacterSet{{ '-' }},
CharacterSet{{ ' ', '\t', '\r', '\n' }},
CharacterSet{{ '-' }},
CharacterSet().include_all()
.exclude('a', 'z')
.exclude('A', 'Z')
.exclude('0', '9')
.exclude('_'),
CharacterSet{{ '-' }},
CharacterSet().include_all().exclude('0', '9'),
CharacterSet{{ '-' }},
CharacterSet().include_all()
.exclude(' ')
.exclude('\t')
.exclude('\r')
.exclude('\n')
})
},
{
"choices",
"ab|cd|ef",
choice({
seq({
character({ 'a' }),
character({ 'b' }) }),
seq({
character({ 'c' }),
character({ 'd' }) }),
seq({
character({ 'e' }),
character({ 'f' }) }) })
Rule::choice({
Seq{
CharacterSet{{'a'}},
CharacterSet{{'b'}}
},
Seq{
CharacterSet{{'c'}},
CharacterSet{{'d'}}
},
Seq{
CharacterSet{{'e'}},
CharacterSet{{'f'}}
}
})
},
{
"simple sequences",
"abc",
seq({
character({ 'a' }),
character({ 'b' }),
character({ 'c' }) })
Rule::seq({
CharacterSet{{'a'}},
CharacterSet{{'b'}},
CharacterSet{{'c'}}
})
},
{
"character ranges",
"[12a-dA-D3]",
character({
CharacterSet{{
'1', '2', '3',
'a', 'b', 'c', 'd',
'A', 'B', 'C', 'D' })
'A', 'B', 'C', 'D'
}}
},
{
"negated characters",
"[^a\\d]",
character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false)
CharacterSet().include_all()
.exclude('a')
.exclude('0', '9')
},
{
"backslashes",
"\\\\",
character({ '\\' })
CharacterSet{{'\\'}}
},
{
"character groups in sequences",
"x([^x]|\\\\x)*x",
seq({
character({ 'x' }),
repeat(choice({
character({ 'x' }, false),
seq({ character({ '\\' }), character({ 'x' }) }) })),
character({ 'x' }) })
Rule::seq({
CharacterSet{{'x'}},
Rule::choice({
Repeat{Rule::choice({
CharacterSet().include_all().exclude('x'),
Rule::seq({
CharacterSet{{'\\'}},
CharacterSet{{'x'}}
})
})},
Blank{}
}),
CharacterSet{{'x'}}
})
},
{
"choices in sequences",
"(a|b)cd",
seq({
choice({
character({ 'a' }),
character({ 'b' }) }),
character({ 'c' }),
character({ 'd' }) })
Rule::seq({
Rule::choice({
CharacterSet{{'a'}},
CharacterSet{{'b'}} }),
CharacterSet{{'c'}},
CharacterSet{{'d'}} })
},
{
"escaped parentheses",
"a\\(b",
seq({
character({ 'a' }),
character({ '(' }),
character({ 'b' }) })
Rule::seq({
CharacterSet{{'a'}},
CharacterSet{{'('}},
CharacterSet{{'b'}},
})
},
{
"escaped periods",
"a\\.",
seq({
character({ 'a' }),
character({ '.' }) })
Rule::seq({
CharacterSet{{'a'}},
CharacterSet{{'.'}},
})
},
{
"escaped characters",
"\\t\\n\\r",
seq({
character({ '\t' }),
character({ '\n' }),
character({ '\r' }) })
Rule::seq({
CharacterSet{{'\t'}},
CharacterSet{{'\n'}},
CharacterSet{{'\r'}},
})
},
{
"plus repeats",
"(ab)+(cd)+",
seq({
repeat1(seq({ character({ 'a' }), character({ 'b' }) })),
repeat1(seq({ character({ 'c' }), character({ 'd' }) })) })
Rule::seq({
Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })},
Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })},
})
},
{
"asterix repeats",
"(ab)*(cd)*",
seq({
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
repeat(seq({ character({ 'c' }), character({ 'd' }) })) })
Rule::seq({
Rule::choice({
Repeat{Rule::seq({ CharacterSet{{'a'}}, CharacterSet{{'b'}} })},
Blank{},
}),
Rule::choice({
Repeat{Rule::seq({ CharacterSet{{'c'}}, CharacterSet{{'d'}} })},
Blank{},
}),
})
},
{
"optional rules",
"a(bc)?",
seq({
character({ 'a' }),
choice({
seq({ character({ 'b' }), character({ 'c' }) }),
blank() }) })
Rule::seq({
CharacterSet{{'a'}},
Rule::choice({
Rule::seq({
CharacterSet{{'b'}},
CharacterSet{{'c'}},
}),
Blank{}
}),
})
},
{
"choices containing negated character classes",
"/([^/]|(\\\\/))*/",
seq({
character({ '/' }),
repeat(choice({
character({ '/' }, false),
seq({ character({ '\\' }), character({ '/' }) }) })),
character({ '/' }), }),
"/([^/]|(\\\\/))+/",
Rule::seq({
CharacterSet{{'/'}},
Repeat{Rule::choice({
CharacterSet().include_all().exclude('/'),
Rule::seq({
CharacterSet{{'\\'}},
CharacterSet{{'/'}},
}),
})},
CharacterSet{{'/'}},
}),
},
};
@ -229,7 +267,7 @@ describe("parse_regex", []() {
for (auto &row : valid_inputs) {
it(("parses " + row.description).c_str(), [&]() {
auto result = parse_regex(row.pattern);
AssertThat(result.first, EqualsPointer(row.rule));
AssertThat(result.first, Equals(row.rule));
});
}

View file

@ -1,5 +1,5 @@
#include "test_helper.h"
#include "compiler/rules/character_set.h"
#include "compiler/rule.h"
using namespace rules;
@ -66,7 +66,7 @@ describe("CharacterSet", []() {
.include('a', 'd')
.include('f', 'm');
AssertThat(set1.hash_code(), Equals(set2.hash_code()));
AssertThat(hash<CharacterSet>()(set1), Equals(hash<CharacterSet>()(set2)));
});
it("returns different numbers for character sets that include different ranges", [&]() {
@ -78,8 +78,8 @@ describe("CharacterSet", []() {
.include('a', 'c')
.include('f', 'm');
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
AssertThat(hash<CharacterSet>()(set1), !Equals(hash<CharacterSet>()(set2)));
AssertThat(hash<CharacterSet>()(set2), !Equals(hash<CharacterSet>()(set1)));
});
it("returns different numbers for character sets that exclude different ranges", [&]() {
@ -93,16 +93,16 @@ describe("CharacterSet", []() {
.exclude('a', 'c')
.exclude('f', 'm');
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
AssertThat(hash<CharacterSet>()(set1), !Equals(hash<CharacterSet>()(set2)));
AssertThat(hash<CharacterSet>()(set2), !Equals(hash<CharacterSet>()(set1)));
});
it("returns different numbers for character sets with different sign", [&]() {
CharacterSet set1 = CharacterSet().include_all();
CharacterSet set2 = CharacterSet();
AssertThat(set1.hash_code(), !Equals(set2.hash_code()));
AssertThat(set2.hash_code(), !Equals(set1.hash_code()));
AssertThat(hash<CharacterSet>()(set1), !Equals(hash<CharacterSet>()(set2)));
AssertThat(hash<CharacterSet>()(set2), !Equals(hash<CharacterSet>()(set1)));
});
});
@ -312,7 +312,7 @@ describe("CharacterSet", []() {
.include('z');
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
CharacterRange('a', 'c'),
CharacterRange{'a', 'c'},
CharacterRange('g'),
CharacterRange('z'),
})));

Some files were not shown because too many files have changed in this diff Show more