Extract public compiler API into its own header file
This commit is contained in:
parent
0b4e1c8d0d
commit
9e2dc14182
53 changed files with 466 additions and 409 deletions
|
|
@ -1,7 +1,9 @@
|
|||
#include "first_set.h"
|
||||
#include "compiler.h"
|
||||
#include "rule_can_be_blank.h"
|
||||
#include "grammar.h"
|
||||
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
#include "rules/choice.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include "first_set.h"
|
||||
#include "rule_transitions.h"
|
||||
#include "rule_can_be_blank.h"
|
||||
#include "grammar.h"
|
||||
#include "compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "item.h"
|
||||
#include "grammar.h"
|
||||
#include "compiler.h"
|
||||
#include "rule_can_be_blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
#ifndef __TreeSitter__item__
|
||||
#define __TreeSitter__item__
|
||||
#ifndef __tree_sitter__item__
|
||||
#define __tree_sitter__item__
|
||||
|
||||
#include <string>
|
||||
#include "rule.h"
|
||||
#include <set>
|
||||
#include "symbol.h"
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -77,5 +76,4 @@ namespace std {
|
|||
};
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "item_set_closure.h"
|
||||
#include "compiler.h"
|
||||
#include "./follow_sets.h"
|
||||
#include "grammar.h"
|
||||
#include "item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,15 @@
|
|||
#ifndef __tree_sitter__item_set_transitions__
|
||||
#define __tree_sitter__item_set_transitions__
|
||||
|
||||
#include "character_set.h"
|
||||
#include "symbol.h"
|
||||
#include "item.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
class Symbol;
|
||||
}
|
||||
|
||||
namespace build_tables {
|
||||
std::map<rules::CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar);
|
||||
std::map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar);
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@
|
|||
#include "item.h"
|
||||
#include "item_set_closure.h"
|
||||
#include "item_set_transitions.h"
|
||||
#include "rules.h"
|
||||
#include "grammar.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include "stream_methods.h"
|
||||
|
||||
|
|
@ -11,6 +10,7 @@ namespace tree_sitter {
|
|||
using std::pair;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ namespace tree_sitter {
|
|||
LexItemSet item_set;
|
||||
for (auto &symbol : state.expected_inputs()) {
|
||||
if (symbol == END_OF_INPUT)
|
||||
item_set.insert(LexItem(symbol, rules::character('\0')));
|
||||
item_set.insert(LexItem(symbol, make_shared<CharacterSet>(std::set<rules::CharacterRange>{ '\0' })));
|
||||
if (lex_grammar.has_definition(symbol))
|
||||
item_set.insert(LexItem(symbol, lex_grammar.rule(symbol)));
|
||||
}
|
||||
|
|
@ -134,7 +134,7 @@ namespace tree_sitter {
|
|||
lex_grammar(lex_grammar) {};
|
||||
|
||||
pair<ParseTable, LexTable> build() {
|
||||
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), {}, END_OF_INPUT);
|
||||
auto item = ParseItem(START, make_shared<Symbol>(grammar.start_rule_name), {}, END_OF_INPUT);
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
||||
add_parse_state(item_set);
|
||||
return pair<ParseTable, LexTable>(parse_table, lex_table);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
#include "rule_can_be_blank.h"
|
||||
#include "grammar.h"
|
||||
#include "rules.h"
|
||||
#include "compiler.h"
|
||||
#include "rules/symbol.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
#include "rules/choice.h"
|
||||
#include "rules/blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using namespace rules;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
#ifndef __tree_sitter__rule_can_be_blank__
|
||||
#define __tree_sitter__rule_can_be_blank__
|
||||
|
||||
#include "rule.h"
|
||||
#include "compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace build_tables {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const Grammar &grammar);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,19 @@
|
|||
#include "rules.h"
|
||||
#include "rule_transitions.h"
|
||||
#include "rule_can_be_blank.h"
|
||||
#include "merge_transitions.h"
|
||||
#include "rules/blank.h"
|
||||
#include "rules/choice.h"
|
||||
#include "rules/seq.h"
|
||||
#include "rules/string.h"
|
||||
#include "rules/repeat.h"
|
||||
#include "rules/pattern.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/character_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::make_shared;
|
||||
using namespace rules;
|
||||
|
||||
namespace build_tables {
|
||||
|
|
@ -18,7 +27,7 @@ namespace tree_sitter {
|
|||
template<>
|
||||
map<CharacterSet, rule_ptr> merge_transitions(const map<CharacterSet, rule_ptr> &left, const map<CharacterSet, rule_ptr> &right) {
|
||||
auto transitions = merge_char_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
return make_shared<Choice>(left, right);
|
||||
});
|
||||
return *static_cast<map<CharacterSet, rule_ptr> *>(&transitions);
|
||||
}
|
||||
|
|
@ -26,7 +35,7 @@ namespace tree_sitter {
|
|||
template<>
|
||||
map<Symbol, rule_ptr> merge_transitions(const map<Symbol, rule_ptr> &left, const map<Symbol, rule_ptr> &right) {
|
||||
auto transitions = merge_sym_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
return make_shared<Choice>(left, right);
|
||||
});
|
||||
return *static_cast<map<Symbol, rule_ptr> *>(&transitions);
|
||||
}
|
||||
|
|
@ -54,7 +63,7 @@ namespace tree_sitter {
|
|||
auto atom = dynamic_cast<const T *>(rule);
|
||||
if (atom) {
|
||||
value = map<T, rule_ptr>();
|
||||
value.insert({ *atom, blank() });
|
||||
value.insert({ *atom, make_shared<Blank>() });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -72,11 +81,8 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = map_transitions(transitions(rule->left), [&](const rule_ptr left_rule) -> rule_ptr {
|
||||
if (is_blank(left_rule))
|
||||
return rule->right;
|
||||
else
|
||||
return seq({ left_rule, rule->right });
|
||||
value = map_transitions(transitions(rule->left), [&](const rule_ptr left_rule) {
|
||||
return Seq::Build({ left_rule, rule->right });
|
||||
});
|
||||
if (rule_can_be_blank(rule->left)) {
|
||||
value = merge_transitions<T>(value, transitions(rule->right));
|
||||
|
|
@ -84,15 +90,17 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
value = map_transitions(transitions(rule->content), [&](const rule_ptr &value) -> rule_ptr {
|
||||
return seq({ value, choice({ rule->copy(), blank() }) });
|
||||
value = map_transitions(transitions(rule->content), [&](const rule_ptr &value) {
|
||||
return Seq::Build({
|
||||
value,
|
||||
make_shared<Choice>(rule->copy(), make_shared<Blank>()) });
|
||||
});
|
||||
}
|
||||
|
||||
void visit(const String *rule) {
|
||||
rule_ptr result = character(rule->value[0]);
|
||||
for (int i = 1; i < rule->value.length(); i++)
|
||||
result = seq({ result, character(rule->value[i]) });
|
||||
rule_ptr result = make_shared<Blank>();
|
||||
for (char val : rule->value)
|
||||
result = Seq::Build({ result, make_shared<CharacterSet>(set<CharacterRange>({ val })) });
|
||||
value = transitions(result);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,4 @@
|
|||
#include "compile.h"
|
||||
|
||||
#include "grammar.h"
|
||||
#include "compiler.h"
|
||||
#include "prepare_grammar/perform.h"
|
||||
#include "build_tables/perform.h"
|
||||
#include "generate_code/c_code.h"
|
||||
|
|
|
|||
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef __tree_sitter__compile__
|
||||
#define __tree_sitter__compile__
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
std::string compile(const Grammar &grammar, std::string name);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
#ifndef __tree_sitter__code_generator__
|
||||
#define __tree_sitter__code_generator__
|
||||
|
||||
#include "grammar.h"
|
||||
#include "parse_table.h"
|
||||
#include "lex_table.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,40 +1,35 @@
|
|||
#include "grammar.h"
|
||||
#include "compiler.h"
|
||||
#include "symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::initializer_list;
|
||||
using std::map;
|
||||
using std::ostream;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
|
||||
Grammar::Grammar(const initializer_list<pair<const string, const rules::rule_ptr>> &rules) :
|
||||
rules(rules),
|
||||
start_rule_name(rules.begin()->first) {}
|
||||
|
||||
Grammar::Grammar(std::string start_rule_name, const rule_map &rules) :
|
||||
Grammar::Grammar(std::string start_rule_name,
|
||||
const std::map<const std::string, const rule_ptr> &rules) :
|
||||
rules(rules),
|
||||
start_rule_name(start_rule_name) {}
|
||||
|
||||
Grammar::Grammar(std::string start_rule_name, const rule_map &rules, const rule_map &aux_rules) :
|
||||
Grammar::Grammar(std::string start_rule_name,
|
||||
const map<const string, const rule_ptr> &rules,
|
||||
const map<const string, const rule_ptr> &aux_rules) :
|
||||
rules(rules),
|
||||
aux_rules(aux_rules),
|
||||
start_rule_name(start_rule_name) {}
|
||||
|
||||
const rules::rule_ptr Grammar::rule(const rules::Symbol &symbol) const {
|
||||
const rule_ptr Grammar::rule(const Symbol &symbol) const {
|
||||
auto map = symbol.is_auxiliary ? aux_rules : rules;
|
||||
auto iter = map.find(symbol.name);
|
||||
if (iter != map.end())
|
||||
return iter->second;
|
||||
else
|
||||
return rules::rule_ptr();
|
||||
}
|
||||
|
||||
vector<string> Grammar::rule_names() const {
|
||||
vector<string> result;
|
||||
for (auto pair : rules) {
|
||||
result.push_back(pair.first);
|
||||
}
|
||||
return result;
|
||||
return rule_ptr();
|
||||
}
|
||||
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
|
|
@ -56,7 +51,7 @@ namespace tree_sitter {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Grammar::has_definition(const rules::Symbol &symbol) const {
|
||||
bool Grammar::has_definition(const Symbol &symbol) const {
|
||||
return rule(symbol).get() != nullptr;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,31 +0,0 @@
|
|||
#ifndef __TreeSitter__grammar__
|
||||
#define __TreeSitter__grammar__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "rules.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar {
|
||||
typedef std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> rule_map_init_list;
|
||||
typedef std::map<const std::string, const rules::rule_ptr> rule_map;
|
||||
|
||||
public:
|
||||
Grammar(const rule_map_init_list &rules);
|
||||
Grammar(std::string start_rule_name, const rule_map &rules);
|
||||
Grammar(std::string start_rule_name, const rule_map &rules, const rule_map &aux_rules);
|
||||
|
||||
const std::string start_rule_name;
|
||||
std::vector<std::string> rule_names() const;
|
||||
bool operator==(const Grammar &other) const;
|
||||
bool has_definition(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
|
||||
|
||||
rule_map rules;
|
||||
rule_map aux_rules;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,10 +1,17 @@
|
|||
#include "expand_repeats.h"
|
||||
#include <map>
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
#include "rules/symbol.h"
|
||||
#include "rules/choice.h"
|
||||
#include "rules/blank.h"
|
||||
#include "rules/repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using namespace rules;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -19,28 +26,24 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr make_repeat_helper(string name, const rule_ptr &rule) {
|
||||
return choice({
|
||||
seq({
|
||||
rule,
|
||||
aux_sym(name),
|
||||
}),
|
||||
blank(),
|
||||
});
|
||||
return Choice::Build({
|
||||
Seq::Build({ rule, make_shared<Symbol>(name, true) }),
|
||||
make_shared<Blank>() });
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
|
||||
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
|
||||
value = aux_sym(helper_rule_name);
|
||||
value = make_shared<Symbol>(helper_rule_name, true);
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = seq({ apply(rule->left), apply(rule->right) });
|
||||
value = Seq::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = choice({ apply(rule->left), apply(rule->right) });
|
||||
value = Choice::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
void default_visit(const Rule *rule) {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef __tree_sitter__expand_repeats__
|
||||
#define __tree_sitter__expand_repeats__
|
||||
|
||||
#include "grammar.h"
|
||||
#include "compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,20 @@
|
|||
#include "extract_tokens.h"
|
||||
#include "search_for_symbols.h"
|
||||
#include <map>
|
||||
#include "compiler.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
#include "rules/choice.h"
|
||||
#include "rules/repeat.h"
|
||||
#include "rules/blank.h"
|
||||
#include "rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using namespace rules;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -29,7 +37,7 @@ namespace tree_sitter {
|
|||
return value;
|
||||
} else {
|
||||
string token_name = add_token(rule);
|
||||
return aux_sym(token_name);
|
||||
return make_shared<Symbol>(token_name, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -47,15 +55,15 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = choice({ apply(rule->left), apply(rule->right) });
|
||||
value = Choice::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = seq({ apply(rule->left), apply(rule->right) });
|
||||
value = Seq::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
value = repeat(apply(rule->content));
|
||||
value = make_shared<Repeat>(apply(rule->content));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
#ifndef __tree_sitter__extract_tokens__
|
||||
#define __tree_sitter__extract_tokens__
|
||||
|
||||
#include "grammar.h"
|
||||
#include <utility>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<Grammar, Grammar> extract_tokens(const Grammar &);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
#ifndef __tree_sitter__prepare_grammar__
|
||||
#define __tree_sitter__prepare_grammar__
|
||||
|
||||
#include "grammar.h"
|
||||
#include <utility>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<Grammar, Grammar> perform(const Grammar &);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,14 @@
|
|||
#include "search_for_symbols.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "choice.h"
|
||||
#include "seq.h"
|
||||
#include "repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using namespace rules;
|
||||
|
||||
namespace prepare_grammar {
|
||||
class SymbolSearcher : Visitor {
|
||||
class SymbolSearcher : rules::Visitor {
|
||||
public:
|
||||
bool value;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef __tree_sitter__search_for_symbols__
|
||||
#define __tree_sitter__search_for_symbols__
|
||||
|
||||
#include "rules.h"
|
||||
#include "rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "rules.h"
|
||||
#include "blank.h"
|
||||
#include "visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
|
|||
44
src/compiler/rules/character_range.cpp
Normal file
44
src/compiler/rules/character_range.cpp
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
#include "compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
||||
namespace rules {
|
||||
static const char MAX_CHAR = '\xff';
|
||||
|
||||
CharacterRange::CharacterRange(char value) : min(value), max(value) {}
|
||||
CharacterRange::CharacterRange(char min, char max) : min(min), max(max) {}
|
||||
|
||||
bool CharacterRange::operator==(const CharacterRange &other) const {
|
||||
return min == other.min && max == other.max;
|
||||
}
|
||||
|
||||
bool CharacterRange::operator<(const CharacterRange &other) const {
|
||||
if (min < other.min) return true;
|
||||
if (min > other.min) return false;
|
||||
if (max < other.max) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string escape_character(char input) {
|
||||
switch (input) {
|
||||
case '\0':
|
||||
return "<EOF>";
|
||||
case MAX_CHAR:
|
||||
return "<MAX>";
|
||||
default:
|
||||
return string() + input;
|
||||
}
|
||||
}
|
||||
|
||||
string CharacterRange::to_string() const {
|
||||
if (min == 0 && max == MAX_CHAR)
|
||||
return "<ANY>";
|
||||
if (min == max)
|
||||
return escape_character(min);
|
||||
else
|
||||
return string() + escape_character(min) + "-" + escape_character(max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,42 +1,16 @@
|
|||
#include "rules.h"
|
||||
#include "character_set.h"
|
||||
#include "visitor.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using std::initializer_list;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
const char MAX_CHAR = '\xff';
|
||||
|
||||
CharacterRange::CharacterRange(char value) : min(value), max(value) {}
|
||||
CharacterRange::CharacterRange(char min, char max) :
|
||||
min(min),
|
||||
max(max)
|
||||
{}
|
||||
|
||||
bool CharacterRange::operator==(const CharacterRange &other) const {
|
||||
return min == other.min && max == other.max;
|
||||
}
|
||||
static const char MAX_CHAR = '\xff';
|
||||
|
||||
bool CharacterRange::operator<(const CharacterRange &other) const {
|
||||
if (min < other.min) return true;
|
||||
if (min > other.min) return false;
|
||||
if (max < other.max) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string escape_character(char input) {
|
||||
switch (input) {
|
||||
case '\0':
|
||||
return "<EOF>";
|
||||
case MAX_CHAR:
|
||||
return "<MAX>";
|
||||
default:
|
||||
return string() + input;
|
||||
}
|
||||
}
|
||||
|
||||
int max_int(const CharacterRange &range) {
|
||||
return range.max == MAX_CHAR ? 255 : (int)range.max;
|
||||
}
|
||||
|
|
@ -45,19 +19,9 @@ namespace tree_sitter {
|
|||
return (int)range.min;
|
||||
}
|
||||
|
||||
string CharacterRange::to_string() const {
|
||||
if (min == 0 && max == MAX_CHAR)
|
||||
return "<ANY>";
|
||||
if (min == max)
|
||||
return escape_character(min);
|
||||
else
|
||||
return string() + escape_character(min) + "-" + escape_character(max);
|
||||
}
|
||||
|
||||
CharacterSet::CharacterSet() : ranges({}) {}
|
||||
CharacterSet::CharacterSet(const set<CharacterRange> &ranges) : ranges(ranges) {}
|
||||
CharacterSet::CharacterSet(const set<CharacterRange> &ranges, bool sign) :
|
||||
ranges(sign ? ranges : CharacterSet(ranges).complement().ranges) {}
|
||||
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges) : ranges(ranges) {}
|
||||
|
||||
bool CharacterSet::operator==(const Rule &rule) const {
|
||||
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
|
||||
|
|
@ -89,7 +53,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
CharacterSet CharacterSet::complement() const {
|
||||
CharacterSet result({ {0, MAX_CHAR} }, true);
|
||||
CharacterSet result({ {0, MAX_CHAR} });
|
||||
result.remove_set(*this);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,31 +1,10 @@
|
|||
#ifndef __tree_sitter__character_set__
|
||||
#define __tree_sitter__character_set__
|
||||
|
||||
#include "compiler.h"
|
||||
#include "rule.h"
|
||||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
struct CharacterRange {
|
||||
char min;
|
||||
char max;
|
||||
CharacterRange(char);
|
||||
CharacterRange(char, char);
|
||||
bool operator==(const CharacterRange &) const;
|
||||
bool operator<(const CharacterRange &) const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::CharacterRange> {
|
||||
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
|
||||
return (hash<char>()(range.min) ^ hash<char>()(range.max));
|
||||
}
|
||||
};
|
||||
}
|
||||
#include <initializer_list>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
@ -33,15 +12,7 @@ namespace tree_sitter {
|
|||
public:
|
||||
CharacterSet();
|
||||
CharacterSet(const std::set<CharacterRange> &ranges);
|
||||
CharacterSet(const std::set<CharacterRange> &ranges, bool);
|
||||
|
||||
CharacterSet complement() const;
|
||||
CharacterSet intersect(const CharacterSet &) const;
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
bool is_empty() const;
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
CharacterSet(const std::initializer_list<CharacterRange> &ranges);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
bool operator<(const CharacterSet &) const;
|
||||
|
|
@ -50,6 +21,13 @@ namespace tree_sitter {
|
|||
std::string to_string() const;
|
||||
void accept(Visitor &visitor) const;
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
CharacterSet complement() const;
|
||||
CharacterSet intersect(const CharacterSet &) const;
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
bool is_empty() const;
|
||||
|
||||
std::set<CharacterRange> ranges;
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,21 @@
|
|||
#include "rules.h"
|
||||
|
||||
using std::string;
|
||||
#include "choice.h"
|
||||
#include "visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::make_shared;
|
||||
using std::vector;
|
||||
|
||||
namespace rules {
|
||||
Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {};
|
||||
|
||||
rule_ptr Choice::Build(const vector<rule_ptr> &rules) {
|
||||
rule_ptr result;
|
||||
for (auto rule : rules)
|
||||
result = result.get() ? make_shared<Choice>(result, rule) : rule;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Choice::operator==(const Rule &rule) const {
|
||||
const Choice *other = dynamic_cast<const Choice *>(&rule);
|
||||
return other && (*other->left == *left) && (*other->right == *right);
|
||||
|
|
|
|||
|
|
@ -2,12 +2,14 @@
|
|||
#define __tree_sitter__choice__
|
||||
|
||||
#include "rule.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Choice : public Rule {
|
||||
public:
|
||||
Choice(rule_ptr left, rule_ptr right);
|
||||
static rule_ptr Build(const std::vector<rule_ptr> &rules);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,18 @@
|
|||
#include "rules.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
using std::set;
|
||||
#include "pattern.h"
|
||||
#include "visitor.h"
|
||||
#include "choice.h"
|
||||
#include "seq.h"
|
||||
#include "repeat.h"
|
||||
#include "character_set.h"
|
||||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
using std::string;
|
||||
using std::hash;
|
||||
using std::make_shared;
|
||||
using std::set;
|
||||
|
||||
class PatternParser {
|
||||
public:
|
||||
PatternParser(const string &input) :
|
||||
|
|
@ -17,7 +24,7 @@ namespace tree_sitter {
|
|||
auto result = term();
|
||||
while (has_more_input() && peek() == '|') {
|
||||
next();
|
||||
result = choice({ result, term() });
|
||||
result = make_shared<Choice>(result, term());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
@ -26,7 +33,7 @@ namespace tree_sitter {
|
|||
rule_ptr term() {
|
||||
rule_ptr result = factor();
|
||||
while (has_more_input() && (peek() != '|') && (peek() != ')'))
|
||||
result = seq({ result, factor() });
|
||||
result = Seq::Build({ result, factor() });
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -34,7 +41,7 @@ namespace tree_sitter {
|
|||
rule_ptr result = atom();
|
||||
if (has_more_input() && (peek() == '+')) {
|
||||
next();
|
||||
result = repeat(result);
|
||||
result = make_shared<Repeat>(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
@ -92,7 +99,7 @@ namespace tree_sitter {
|
|||
next();
|
||||
if (peek() == '-') {
|
||||
next();
|
||||
value = CharacterSet({ {first_char, peek()} }, true);
|
||||
value = CharacterSet({ CharacterRange(first_char, peek()) });
|
||||
next();
|
||||
} else {
|
||||
value = CharacterSet({ first_char });
|
||||
|
|
@ -108,9 +115,9 @@ namespace tree_sitter {
|
|||
case ')':
|
||||
return CharacterSet({ value });
|
||||
case 'w':
|
||||
return CharacterSet({{'a', 'z'}, {'A', 'Z'}}, true);
|
||||
return CharacterSet({{'a', 'z'}, {'A', 'Z'}});
|
||||
case 'd':
|
||||
return CharacterSet({{'0', '9'}}, true);
|
||||
return CharacterSet({CharacterRange('0', '9')});
|
||||
default:
|
||||
error("unrecognized escape sequence");
|
||||
return CharacterSet();
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "rules.h"
|
||||
#include "repeat.h"
|
||||
#include "visitor.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,34 +1,38 @@
|
|||
#include "rules.h"
|
||||
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
#include "compiler.h"
|
||||
#include "rule.h"
|
||||
#include "blank.h"
|
||||
#include "symbol.h"
|
||||
#include "choice.h"
|
||||
#include "seq.h"
|
||||
#include "string.h"
|
||||
#include "pattern.h"
|
||||
#include "character_set.h"
|
||||
#include "repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
namespace rules {
|
||||
rule_ptr blank() {
|
||||
return make_shared<Blank>();
|
||||
}
|
||||
|
||||
rule_ptr character(char value) {
|
||||
set<CharacterRange> ranges = { value };
|
||||
return make_shared<CharacterSet>(ranges);
|
||||
}
|
||||
|
||||
rule_ptr character(const set<CharacterRange> &ranges) {
|
||||
return make_shared<CharacterSet>(ranges);
|
||||
}
|
||||
|
||||
rule_ptr character(const set<CharacterRange> &ranges, bool sign) {
|
||||
return make_shared<CharacterSet>(ranges, sign);
|
||||
if (sign)
|
||||
return character(ranges);
|
||||
else
|
||||
return CharacterSet(ranges).complement().copy();
|
||||
}
|
||||
|
||||
rule_ptr choice(const vector<rule_ptr> &rules) {
|
||||
rule_ptr result;
|
||||
for (auto rule : rules)
|
||||
result = result.get() ? make_shared<Choice>(result, rule) : rule;
|
||||
return result;
|
||||
return Choice::Build(rules);
|
||||
}
|
||||
|
||||
rule_ptr pattern(const string &value) {
|
||||
|
|
@ -40,12 +44,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr seq(const vector<rule_ptr> &rules) {
|
||||
rule_ptr result = blank();
|
||||
for (auto rule : rules)
|
||||
result = (typeid(*result) != typeid(Blank)) ?
|
||||
make_shared<Seq>(result, rule) :
|
||||
rule;
|
||||
return result;
|
||||
return Seq::Build(rules);
|
||||
}
|
||||
|
||||
rule_ptr str(const string &value) {
|
||||
|
|
|
|||
|
|
@ -1,33 +0,0 @@
|
|||
#ifndef __TreeSitter__rules__
|
||||
#define __TreeSitter__rules__
|
||||
|
||||
#include "rule.h"
|
||||
#include "blank.h"
|
||||
#include "symbol.h"
|
||||
#include "choice.h"
|
||||
#include "seq.h"
|
||||
#include "string.h"
|
||||
#include "pattern.h"
|
||||
#include "character_set.h"
|
||||
#include "repeat.h"
|
||||
#include "visitor.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
rule_ptr blank();
|
||||
rule_ptr character(char value);
|
||||
rule_ptr character(const std::set<CharacterRange> &matches);
|
||||
rule_ptr character(const std::set<CharacterRange> &matches, bool);
|
||||
|
||||
rule_ptr choice(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr pattern(const std::string &value);
|
||||
rule_ptr repeat(const rule_ptr content);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr str(const std::string &value);
|
||||
rule_ptr sym(const std::string &name);
|
||||
rule_ptr aux_sym(const std::string &name);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,11 +1,22 @@
|
|||
#include "rules.h"
|
||||
|
||||
using std::string;
|
||||
#include "seq.h"
|
||||
#include "visitor.h"
|
||||
#include "blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::make_shared;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace rules {
|
||||
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {};
|
||||
|
||||
rule_ptr Seq::Build(const std::vector<rule_ptr> &rules) {
|
||||
rule_ptr result = make_shared<Blank>();
|
||||
for (auto &rule : rules)
|
||||
result = (typeid(*result) != typeid(Blank)) ? make_shared<Seq>(result, rule) : rule;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Seq::operator==(const Rule &rule) const {
|
||||
const Seq *other = dynamic_cast<const Seq *>(&rule);
|
||||
return other && (*other->left == *left) && (*other->right == *right);
|
||||
|
|
|
|||
|
|
@ -2,12 +2,14 @@
|
|||
#define __tree_sitter__seq__
|
||||
|
||||
#include "rule.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Seq : public Rule {
|
||||
public:
|
||||
Seq(rule_ptr left, rule_ptr right);
|
||||
static rule_ptr Build(const std::vector<rule_ptr> &rules);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "rules.h"
|
||||
#include "string.h"
|
||||
#include "visitor.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "rules.h"
|
||||
#include "symbol.h"
|
||||
#include "visitor.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,13 @@
|
|||
#include "visitor.h"
|
||||
#include "rule.h"
|
||||
#include "blank.h"
|
||||
#include "symbol.h"
|
||||
#include "choice.h"
|
||||
#include "seq.h"
|
||||
#include "string.h"
|
||||
#include "pattern.h"
|
||||
#include "character_set.h"
|
||||
#include "repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,18 @@
|
|||
#ifndef __tree_sitter__rule_visitor__
|
||||
#define __tree_sitter__rule_visitor__
|
||||
|
||||
#include "rules.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Rule;
|
||||
class Blank;
|
||||
class Symbol;
|
||||
class CharacterSet;
|
||||
class Choice;
|
||||
class Repeat;
|
||||
class Seq;
|
||||
class String;
|
||||
class Pattern;
|
||||
|
||||
class Visitor {
|
||||
public:
|
||||
virtual void default_visit(const Rule *rule);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue