Reorganize compiler directory
This commit is contained in:
parent
023a0c4f70
commit
92cec5758f
51 changed files with 630 additions and 624 deletions
|
|
@ -1,6 +1,6 @@
|
|||
#include "item.h"
|
||||
#include "grammar.h"
|
||||
#include "transitions.h"
|
||||
#include "rule_transitions.h"
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
|
@ -9,7 +9,7 @@ using std::make_shared;
|
|||
using std::ostream;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
namespace build_tables {
|
||||
Item::Item(const string &rule_name, const rules::rule_ptr rule, int consumed_sym_count) :
|
||||
rule_name(rule_name),
|
||||
rule(rule),
|
||||
|
|
@ -24,7 +24,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
transition_map<rules::Rule, Item> Item::transitions() const {
|
||||
return lr::transitions(rule).map<Item>([&](rules::rule_ptr to_rule) -> item_ptr {
|
||||
return rule_transitions(rule).map<Item>([&](rules::rule_ptr to_rule) -> item_ptr {
|
||||
int next_sym_count = (consumed_sym_count == -1) ? -1 : (consumed_sym_count + 1);
|
||||
return make_shared<Item>(rule_name, to_rule, next_sym_count);
|
||||
});
|
||||
|
|
@ -32,7 +32,7 @@ namespace tree_sitter {
|
|||
|
||||
vector<rules::Symbol> Item::next_symbols() const {
|
||||
vector<rules::Symbol> result;
|
||||
for (auto pair : lr::transitions(rule)) {
|
||||
for (auto pair : rule_transitions(rule)) {
|
||||
auto sym = dynamic_pointer_cast<const rules::Symbol>(pair.first);
|
||||
if (sym) result.push_back(*sym);
|
||||
}
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace lr {
|
||||
namespace build_tables {
|
||||
class Item;
|
||||
typedef std::shared_ptr<const Item> item_ptr;
|
||||
|
||||
|
|
@ -37,8 +37,8 @@ namespace tree_sitter {
|
|||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::lr::Item> {
|
||||
size_t operator()(const tree_sitter::lr::Item &item) {
|
||||
struct hash<tree_sitter::build_tables::Item> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) {
|
||||
return
|
||||
hash<std::string>()(item.rule_name) ^
|
||||
hash<tree_sitter::rules::Rule>()(*item.rule) ^
|
||||
|
|
@ -9,11 +9,11 @@ using std::string;
|
|||
using std::make_shared;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
namespace build_tables {
|
||||
ItemSet::ItemSet(const vector<Item> &items) : contents(items) {}
|
||||
ItemSet::ItemSet(const initializer_list<Item> &items) : contents(items) {}
|
||||
|
||||
static bool vector_contains(vector<Item> items, lr::Item item) {
|
||||
static bool vector_contains(vector<Item> items, build_tables::Item item) {
|
||||
return (std::find(items.begin(), items.end(), item) != items.end());
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
bool ItemSet::operator==(const tree_sitter::lr::ItemSet &other) const {
|
||||
bool ItemSet::operator==(const tree_sitter::build_tables::ItemSet &other) const {
|
||||
return contents == other.contents;
|
||||
}
|
||||
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
namespace build_tables {
|
||||
class ItemSet;
|
||||
typedef std::shared_ptr<const ItemSet> item_set_ptr;
|
||||
|
||||
|
|
@ -37,11 +37,11 @@ namespace tree_sitter {
|
|||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<const tree_sitter::lr::ItemSet> {
|
||||
size_t operator()(const tree_sitter::lr::ItemSet &item_set) const {
|
||||
struct hash<const tree_sitter::build_tables::ItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::ItemSet &item_set) const {
|
||||
size_t result = hash<size_t>()(item_set.size());
|
||||
for (auto item : item_set)
|
||||
result ^= hash<tree_sitter::lr::Item>()(item);
|
||||
result ^= hash<tree_sitter::build_tables::Item>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#include "table_builder.h"
|
||||
#include "item_set.h"
|
||||
#include "./perform.h"
|
||||
#include "./item_set.h"
|
||||
#include "rules.h"
|
||||
#include "grammar.h"
|
||||
|
||||
|
|
@ -7,7 +7,7 @@ using std::pair;
|
|||
using std::vector;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
namespace build_tables {
|
||||
static int NOT_FOUND = -1;
|
||||
|
||||
class TableBuilder {
|
||||
|
|
@ -113,7 +113,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, LexTable> build_tables(const Grammar &grammar, const Grammar &lex_grammar) {
|
||||
pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar) {
|
||||
return TableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
|
|
@ -7,8 +7,8 @@
|
|||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace lr {
|
||||
std::pair<ParseTable, LexTable> build_tables(const Grammar &grammar, const Grammar &lex_grammar);
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
#include "transitions.h"
|
||||
#include "rule_transitions.h"
|
||||
#include "rules.h"
|
||||
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
namespace build_tables {
|
||||
class TransitionsVisitor : public rules::Visitor {
|
||||
public:
|
||||
transition_map<Rule, Rule> value;
|
||||
|
|
@ -22,14 +22,14 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = transitions(rule->left);
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
value = rule_transitions(rule->left);
|
||||
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = transitions(rule->left).map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
|
||||
value = rule_transitions(rule->left).map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
|
||||
if (typeid(*left_rule) == typeid(Blank))
|
||||
return rule->right;
|
||||
else
|
||||
|
|
@ -38,7 +38,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
value = transitions(rule->content).map<Rule>([&](const rule_ptr &value) -> rule_ptr {
|
||||
value = rule_transitions(rule->content).map<Rule>([&](const rule_ptr &value) -> rule_ptr {
|
||||
return seq({ value, choice({ rule->copy(), blank() }) });
|
||||
});
|
||||
}
|
||||
|
|
@ -47,15 +47,15 @@ namespace tree_sitter {
|
|||
rule_ptr result = character(rule->value[0]);
|
||||
for (int i = 1; i < rule->value.length(); i++)
|
||||
result = seq({ result, character(rule->value[i]) });
|
||||
value = transitions(result);
|
||||
value = rule_transitions(result);
|
||||
}
|
||||
|
||||
void visit(const Pattern *rule) {
|
||||
value = transitions(rule->to_rule_tree());
|
||||
value = rule_transitions(rule->to_rule_tree());
|
||||
}
|
||||
};
|
||||
|
||||
transition_map<Rule, Rule> transitions(const rule_ptr &rule) {
|
||||
transition_map<Rule, Rule> rule_transitions(const rule_ptr &rule) {
|
||||
TransitionsVisitor visitor;
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
|
|
@ -5,8 +5,8 @@
|
|||
#include "transition_map.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
transition_map<rules::Rule, rules::Rule> transitions(const rules::rule_ptr &rule);
|
||||
namespace build_tables {
|
||||
transition_map<rules::Rule, rules::Rule> rule_transitions(const rules::rule_ptr &rule);
|
||||
}
|
||||
}
|
||||
|
||||
17
src/compiler/compile.cpp
Normal file
17
src/compiler/compile.cpp
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#include "compile.h"
|
||||
|
||||
#include "grammar.h"
|
||||
#include "prepare_grammar/perform.h"
|
||||
#include "build_tables/perform.h"
|
||||
#include "generate_code/c_code.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
std::string compile(const Grammar &grammar) {
|
||||
auto grammars = prepare_grammar::perform(grammar);
|
||||
auto tables = build_tables::perform(grammars.first, grammars.second);
|
||||
auto rule_names = grammars.first.rule_names();
|
||||
auto token_names = grammars.second.rule_names();
|
||||
rule_names.insert(rule_names.end(), token_names.begin(), token_names.end());
|
||||
return generate_code::c_code(rule_names, tables.first, tables.second);
|
||||
}
|
||||
}
|
||||
12
src/compiler/compile.h
Normal file
12
src/compiler/compile.h
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
#ifndef __tree_sitter__compile__
|
||||
#define __tree_sitter__compile__
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
std::string compile(const Grammar &grammar);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -8,10 +8,9 @@ using std::to_string;
|
|||
using std::unordered_map;
|
||||
using std::unordered_set;
|
||||
using std::vector;
|
||||
using namespace tree_sitter::lr;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace code_gen {
|
||||
namespace generate_code {
|
||||
static void str_replace(string &input, const string &search, const string &replace) {
|
||||
size_t pos = 0;
|
||||
while (1) {
|
||||
|
|
@ -6,8 +6,8 @@
|
|||
#include "lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace code_gen {
|
||||
std::string c_code(std::vector<std::string> rule_names, const lr::ParseTable &parse_table, const lr::LexTable &lex_table);
|
||||
namespace generate_code {
|
||||
std::string c_code(std::vector<std::string> rule_names, const ParseTable &parse_table, const LexTable &lex_table);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
#include "extract_tokens.h"
|
||||
#include "search_for_symbols.h"
|
||||
#include <unordered_map>
|
||||
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::unordered_map;
|
||||
|
||||
namespace tree_sitter {
|
||||
class TokenExtractor : rules::Visitor {
|
||||
public:
|
||||
rules::rule_ptr value;
|
||||
size_t anonymous_token_count = 0;
|
||||
unordered_map<string, const rules::rule_ptr> tokens;
|
||||
|
||||
rules::rule_ptr initial_apply(string name, const rules::rule_ptr rule) {
|
||||
auto result = apply(rule);
|
||||
auto symbol = std::dynamic_pointer_cast<const rules::Symbol>(result);
|
||||
if (symbol && *symbol != *rule) {
|
||||
tokens.insert({ name, tokens[symbol->name] });
|
||||
tokens.erase(symbol->name);
|
||||
anonymous_token_count--;
|
||||
return rules::rule_ptr();
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
rules::rule_ptr apply(const rules::rule_ptr rule) {
|
||||
if (search_for_symbols(rule)) {
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
} else {
|
||||
string token_name = add_token(rule);
|
||||
return rules::sym(token_name);
|
||||
}
|
||||
}
|
||||
|
||||
string add_token(const rules::rule_ptr &rule) {
|
||||
for (auto pair : tokens)
|
||||
if (*pair.second == *rule)
|
||||
return pair.first;
|
||||
string name = to_string(++anonymous_token_count);
|
||||
tokens.insert({ name, rule });
|
||||
return name;
|
||||
}
|
||||
|
||||
void default_visit(const rules::Rule *rule) {
|
||||
value = rule->copy();
|
||||
}
|
||||
|
||||
void visit(const rules::Choice *choice) {
|
||||
value = rules::choice({ apply(choice->left), apply(choice->right) });
|
||||
}
|
||||
|
||||
void visit(const rules::Seq *seq) {
|
||||
value = rules::seq({ apply(seq->left), apply(seq->right) });
|
||||
}
|
||||
};
|
||||
|
||||
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
|
||||
TokenExtractor extractor;
|
||||
unordered_map<string, const rules::rule_ptr> rules;
|
||||
|
||||
for (auto pair : input_grammar.rules) {
|
||||
string name = pair.first;
|
||||
auto new_rule = extractor.initial_apply(name, pair.second);
|
||||
if (new_rule.get())
|
||||
rules.insert({ name, new_rule });
|
||||
}
|
||||
|
||||
return {
|
||||
Grammar(input_grammar.start_rule_name, rules),
|
||||
Grammar("", extractor.tokens)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
#include "prepare_grammar.h"
|
||||
#include "extract_tokens.h"
|
||||
|
||||
using std::pair;
|
||||
|
||||
namespace tree_sitter {
|
||||
pair<Grammar, Grammar> prepare_grammar(const Grammar &input_grammar) {
|
||||
return extract_tokens(input_grammar);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
#include "search_for_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class SymbolSearcher : rules::Visitor {
|
||||
public:
|
||||
bool value;
|
||||
|
||||
bool apply(const rules::rule_ptr rule) {
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
}
|
||||
|
||||
void default_visit(const rules::Rule *rule) {
|
||||
value = false;
|
||||
}
|
||||
|
||||
void visit(const rules::Symbol *symbol) {
|
||||
value = true;
|
||||
}
|
||||
|
||||
void visit(const rules::Choice *choice) {
|
||||
value = apply(choice->left) || apply(choice->right);
|
||||
}
|
||||
|
||||
void visit(const rules::Seq *seq) {
|
||||
value = apply(seq->left) || apply(seq->right);
|
||||
}
|
||||
};
|
||||
|
||||
bool search_for_symbols(const rules::rule_ptr &rule) {
|
||||
return SymbolSearcher().apply(rule);
|
||||
}
|
||||
}
|
||||
67
src/compiler/lex_table.cpp
Normal file
67
src/compiler/lex_table.cpp
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
#include "lex_table.h"
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::unordered_map;
|
||||
using std::unordered_set;
|
||||
using std::vector;
|
||||
|
||||
namespace tree_sitter {
|
||||
// Action
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, std::string symbol_name) :
|
||||
type(type),
|
||||
state_index(state_index),
|
||||
symbol_name(symbol_name) {}
|
||||
|
||||
LexAction LexAction::Error() {
|
||||
return LexAction(LexActionTypeError, -1, "");
|
||||
}
|
||||
|
||||
LexAction LexAction::Advance(size_t state_index) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, "");
|
||||
}
|
||||
|
||||
LexAction LexAction::Accept(std::string symbol_name) {
|
||||
return LexAction(LexActionTypeAccept, -1, symbol_name);
|
||||
}
|
||||
|
||||
bool LexAction::operator==(const LexAction &other) const {
|
||||
return
|
||||
(type == other.type) &&
|
||||
(state_index == other.state_index) &&
|
||||
(symbol_name == other.symbol_name);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
|
||||
switch (action.type) {
|
||||
case LexActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case LexActionTypeAccept:
|
||||
return stream << string("#<accept ") + action.symbol_name + ">";
|
||||
case LexActionTypeAdvance:
|
||||
return stream << string("#<advance ") + to_string(action.state_index) + ">";
|
||||
}
|
||||
}
|
||||
|
||||
// State
|
||||
unordered_set<CharMatch> LexState::expected_inputs() const {
|
||||
unordered_set<CharMatch> result;
|
||||
for (auto pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Table
|
||||
size_t LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void LexTable::add_action(size_t state_index, CharMatch match, LexAction action) {
|
||||
states[state_index].actions[match].insert(action);
|
||||
}
|
||||
|
||||
void LexTable::add_default_action(size_t state_index, LexAction action) {
|
||||
states[state_index].default_actions.insert(action);
|
||||
}
|
||||
}
|
||||
65
src/compiler/lex_table.h
Normal file
65
src/compiler/lex_table.h
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
#ifndef __TreeSitter__lex_table__
|
||||
#define __TreeSitter__lex_table__
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include "char_match.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeError,
|
||||
LexActionTypeAdvance
|
||||
} LexActionType;
|
||||
|
||||
class LexAction {
|
||||
LexAction(LexActionType type, size_t state_index, std::string symbol_name);
|
||||
public:
|
||||
static LexAction Accept(std::string symbol_name);
|
||||
static LexAction Error();
|
||||
static LexAction Advance(size_t state_index);
|
||||
bool operator==(const LexAction &action) const;
|
||||
|
||||
LexActionType type;
|
||||
std::string symbol_name;
|
||||
size_t state_index;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::LexAction> {
|
||||
size_t operator()(const tree_sitter::LexAction &action) const {
|
||||
return (
|
||||
hash<int>()(action.type) ^
|
||||
hash<string>()(action.symbol_name) ^
|
||||
hash<size_t>()(action.state_index));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace tree_sitter {
|
||||
class LexState {
|
||||
public:
|
||||
std::unordered_map<CharMatch, std::unordered_set<LexAction>> actions;
|
||||
std::unordered_set<LexAction> default_actions;
|
||||
std::unordered_set<CharMatch> expected_inputs() const;
|
||||
};
|
||||
|
||||
class LexTable {
|
||||
public:
|
||||
size_t add_state();
|
||||
void add_action(size_t state_index, CharMatch match, LexAction action);
|
||||
void add_default_action(size_t state_index, LexAction action);
|
||||
|
||||
static const std::string START;
|
||||
static const std::string END_OF_INPUT;
|
||||
std::vector<LexState> states;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
#include "lex_table.h"
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::unordered_map;
|
||||
using std::unordered_set;
|
||||
using std::vector;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
|
||||
// Action
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, std::string symbol_name) :
|
||||
type(type),
|
||||
state_index(state_index),
|
||||
symbol_name(symbol_name) {}
|
||||
|
||||
LexAction LexAction::Error() {
|
||||
return LexAction(LexActionTypeError, -1, "");
|
||||
}
|
||||
|
||||
LexAction LexAction::Advance(size_t state_index) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, "");
|
||||
}
|
||||
|
||||
LexAction LexAction::Accept(std::string symbol_name) {
|
||||
return LexAction(LexActionTypeAccept, -1, symbol_name);
|
||||
}
|
||||
|
||||
bool LexAction::operator==(const LexAction &other) const {
|
||||
return
|
||||
(type == other.type) &&
|
||||
(state_index == other.state_index) &&
|
||||
(symbol_name == other.symbol_name);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
|
||||
switch (action.type) {
|
||||
case LexActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case LexActionTypeAccept:
|
||||
return stream << string("#<accept ") + action.symbol_name + ">";
|
||||
case LexActionTypeAdvance:
|
||||
return stream << string("#<advance ") + to_string(action.state_index) + ">";
|
||||
}
|
||||
}
|
||||
|
||||
// State
|
||||
unordered_set<CharMatch> LexState::expected_inputs() const {
|
||||
unordered_set<CharMatch> result;
|
||||
for (auto pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Table
|
||||
size_t LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void LexTable::add_action(size_t state_index, CharMatch match, LexAction action) {
|
||||
states[state_index].actions[match].insert(action);
|
||||
}
|
||||
|
||||
void LexTable::add_default_action(size_t state_index, LexAction action) {
|
||||
states[state_index].default_actions.insert(action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
#ifndef __TreeSitter__lex_table__
|
||||
#define __TreeSitter__lex_table__
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include "char_match.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
typedef enum {
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeError,
|
||||
LexActionTypeAdvance
|
||||
} LexActionType;
|
||||
|
||||
class LexAction {
|
||||
LexAction(LexActionType type, size_t state_index, std::string symbol_name);
|
||||
public:
|
||||
static LexAction Accept(std::string symbol_name);
|
||||
static LexAction Error();
|
||||
static LexAction Advance(size_t state_index);
|
||||
bool operator==(const LexAction &action) const;
|
||||
|
||||
LexActionType type;
|
||||
std::string symbol_name;
|
||||
size_t state_index;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::lr::LexAction> {
|
||||
size_t operator()(const tree_sitter::lr::LexAction &action) const {
|
||||
return (
|
||||
hash<int>()(action.type) ^
|
||||
hash<string>()(action.symbol_name) ^
|
||||
hash<size_t>()(action.state_index));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
|
||||
|
||||
class LexState {
|
||||
public:
|
||||
std::unordered_map<CharMatch, std::unordered_set<LexAction>> actions;
|
||||
std::unordered_set<LexAction> default_actions;
|
||||
std::unordered_set<CharMatch> expected_inputs() const;
|
||||
};
|
||||
|
||||
class LexTable {
|
||||
public:
|
||||
size_t add_state();
|
||||
void add_action(size_t state_index, CharMatch match, LexAction action);
|
||||
void add_default_action(size_t state_index, LexAction action);
|
||||
|
||||
static const std::string START;
|
||||
static const std::string END_OF_INPUT;
|
||||
std::vector<LexState> states;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
#include "parse_table.h"
|
||||
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::to_string;
|
||||
using std::unordered_set;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
// Action
|
||||
ParseAction::ParseAction(ParseActionType type, size_t state_index, string symbol_name, size_t child_symbol_count) :
|
||||
type(type),
|
||||
state_index(state_index),
|
||||
symbol_name(symbol_name),
|
||||
child_symbol_count(child_symbol_count) {};
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, "", -1);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Accept() {
|
||||
return ParseAction(ParseActionTypeAccept, -1, "", -1);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Shift(size_t state_index) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, "", -1);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Reduce(std::string symbol_name, size_t child_symbol_count) {
|
||||
return ParseAction(ParseActionTypeReduce, -1, symbol_name, child_symbol_count);
|
||||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
bool types_eq = type == other.type;
|
||||
bool state_indices_eq = state_index == other.state_index;
|
||||
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
|
||||
return types_eq && state_indices_eq && child_symbol_counts_eq;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseAction &action) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case ParseActionTypeAccept:
|
||||
return stream << string("#<accept>");
|
||||
case ParseActionTypeShift:
|
||||
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
|
||||
case ParseActionTypeReduce:
|
||||
return stream << (string("#<reduce ") + action.symbol_name + ">");
|
||||
}
|
||||
}
|
||||
|
||||
// State
|
||||
ParseState::ParseState() : lex_state_index(-1) {}
|
||||
|
||||
unordered_set<string> ParseState::expected_inputs() const {
|
||||
unordered_set<string> result;
|
||||
for (auto pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Table
|
||||
size_t ParseTable::add_state() {
|
||||
states.push_back(ParseState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) {
|
||||
states[state_index].actions[sym_name].insert(action);
|
||||
}
|
||||
|
||||
void ParseTable::add_default_action(size_t state_index, ParseAction action) {
|
||||
states[state_index].default_actions.insert(action);
|
||||
}
|
||||
|
||||
const string ParseTable::START = "__START__";
|
||||
const string ParseTable::END_OF_INPUT = "__END__";
|
||||
}
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
#ifndef __TreeSitter__parse_table__
|
||||
#define __TreeSitter__parse_table__
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include "rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
typedef enum {
|
||||
ParseActionTypeAccept,
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeReduce,
|
||||
} ParseActionType;
|
||||
|
||||
class ParseAction {
|
||||
ParseAction(ParseActionType type, size_t state_index, std::string symbol_name, size_t child_symbol_count);
|
||||
public:
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(size_t state_index);
|
||||
static ParseAction Reduce(std::string symbol_name, size_t child_symbol_count);
|
||||
bool operator==(const ParseAction &action) const;
|
||||
|
||||
ParseActionType type;
|
||||
size_t child_symbol_count;
|
||||
std::string symbol_name;
|
||||
size_t state_index;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::lr::ParseAction> {
|
||||
size_t operator()(const tree_sitter::lr::ParseAction &action) const {
|
||||
return (
|
||||
hash<int>()(action.type) ^
|
||||
hash<string>()(action.symbol_name) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.child_symbol_count));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
|
||||
|
||||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
std::unordered_map<std::string, std::unordered_set<ParseAction>> actions;
|
||||
std::unordered_set<ParseAction> default_actions;
|
||||
std::unordered_set<std::string> expected_inputs() const;
|
||||
size_t lex_state_index;
|
||||
};
|
||||
|
||||
class ParseTable {
|
||||
public:
|
||||
size_t add_state();
|
||||
void add_action(size_t state_index, std::string symbol_name, ParseAction action);
|
||||
void add_default_action(size_t state_index, ParseAction action);
|
||||
|
||||
static const std::string START;
|
||||
static const std::string END_OF_INPUT;
|
||||
std::vector<ParseState> states;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
78
src/compiler/parse_table.cpp
Normal file
78
src/compiler/parse_table.cpp
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
#include "parse_table.h"
|
||||
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::to_string;
|
||||
using std::unordered_set;
|
||||
|
||||
namespace tree_sitter {
|
||||
// Action
|
||||
ParseAction::ParseAction(ParseActionType type, size_t state_index, string symbol_name, size_t child_symbol_count) :
|
||||
type(type),
|
||||
state_index(state_index),
|
||||
symbol_name(symbol_name),
|
||||
child_symbol_count(child_symbol_count) {};
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, "", -1);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Accept() {
|
||||
return ParseAction(ParseActionTypeAccept, -1, "", -1);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Shift(size_t state_index) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, "", -1);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Reduce(std::string symbol_name, size_t child_symbol_count) {
|
||||
return ParseAction(ParseActionTypeReduce, -1, symbol_name, child_symbol_count);
|
||||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
bool types_eq = type == other.type;
|
||||
bool state_indices_eq = state_index == other.state_index;
|
||||
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
|
||||
return types_eq && state_indices_eq && child_symbol_counts_eq;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseAction &action) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case ParseActionTypeAccept:
|
||||
return stream << string("#<accept>");
|
||||
case ParseActionTypeShift:
|
||||
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
|
||||
case ParseActionTypeReduce:
|
||||
return stream << (string("#<reduce ") + action.symbol_name + ">");
|
||||
}
|
||||
}
|
||||
|
||||
// State
|
||||
ParseState::ParseState() : lex_state_index(-1) {}
|
||||
|
||||
unordered_set<string> ParseState::expected_inputs() const {
|
||||
unordered_set<string> result;
|
||||
for (auto pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Table
|
||||
size_t ParseTable::add_state() {
|
||||
states.push_back(ParseState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) {
|
||||
states[state_index].actions[sym_name].insert(action);
|
||||
}
|
||||
|
||||
void ParseTable::add_default_action(size_t state_index, ParseAction action) {
|
||||
states[state_index].default_actions.insert(action);
|
||||
}
|
||||
|
||||
const string ParseTable::START = "__START__";
|
||||
const string ParseTable::END_OF_INPUT = "__END__";
|
||||
}
|
||||
70
src/compiler/parse_table.h
Normal file
70
src/compiler/parse_table.h
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#ifndef __TreeSitter__parse_table__
|
||||
#define __TreeSitter__parse_table__
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include "rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
ParseActionTypeAccept,
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeReduce,
|
||||
} ParseActionType;
|
||||
|
||||
class ParseAction {
|
||||
ParseAction(ParseActionType type, size_t state_index, std::string symbol_name, size_t child_symbol_count);
|
||||
public:
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(size_t state_index);
|
||||
static ParseAction Reduce(std::string symbol_name, size_t child_symbol_count);
|
||||
bool operator==(const ParseAction &action) const;
|
||||
|
||||
ParseActionType type;
|
||||
size_t child_symbol_count;
|
||||
std::string symbol_name;
|
||||
size_t state_index;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::ParseAction> {
|
||||
size_t operator()(const tree_sitter::ParseAction &action) const {
|
||||
return (
|
||||
hash<int>()(action.type) ^
|
||||
hash<string>()(action.symbol_name) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.child_symbol_count));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace tree_sitter {
|
||||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
std::unordered_map<std::string, std::unordered_set<ParseAction>> actions;
|
||||
std::unordered_set<ParseAction> default_actions;
|
||||
std::unordered_set<std::string> expected_inputs() const;
|
||||
size_t lex_state_index;
|
||||
};
|
||||
|
||||
class ParseTable {
|
||||
public:
|
||||
size_t add_state();
|
||||
void add_action(size_t state_index, std::string symbol_name, ParseAction action);
|
||||
void add_default_action(size_t state_index, ParseAction action);
|
||||
|
||||
static const std::string START;
|
||||
static const std::string END_OF_INPUT;
|
||||
std::vector<ParseState> states;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
80
src/compiler/prepare_grammar/extract_tokens.cpp
Normal file
80
src/compiler/prepare_grammar/extract_tokens.cpp
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#include "extract_tokens.h"
|
||||
#include "search_for_symbols.h"
|
||||
#include <unordered_map>
|
||||
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::unordered_map;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
class TokenExtractor : rules::Visitor {
|
||||
public:
|
||||
rules::rule_ptr value;
|
||||
size_t anonymous_token_count = 0;
|
||||
unordered_map<string, const rules::rule_ptr> tokens;
|
||||
|
||||
rules::rule_ptr initial_apply(string name, const rules::rule_ptr rule) {
|
||||
auto result = apply(rule);
|
||||
auto symbol = std::dynamic_pointer_cast<const rules::Symbol>(result);
|
||||
if (symbol && *symbol != *rule) {
|
||||
tokens.insert({ name, tokens[symbol->name] });
|
||||
tokens.erase(symbol->name);
|
||||
anonymous_token_count--;
|
||||
return rules::rule_ptr();
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
rules::rule_ptr apply(const rules::rule_ptr rule) {
|
||||
if (search_for_symbols(rule)) {
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
} else {
|
||||
string token_name = add_token(rule);
|
||||
return rules::sym(token_name);
|
||||
}
|
||||
}
|
||||
|
||||
string add_token(const rules::rule_ptr &rule) {
|
||||
for (auto pair : tokens)
|
||||
if (*pair.second == *rule)
|
||||
return pair.first;
|
||||
string name = to_string(++anonymous_token_count);
|
||||
tokens.insert({ name, rule });
|
||||
return name;
|
||||
}
|
||||
|
||||
void default_visit(const rules::Rule *rule) {
|
||||
value = rule->copy();
|
||||
}
|
||||
|
||||
void visit(const rules::Choice *choice) {
|
||||
value = rules::choice({ apply(choice->left), apply(choice->right) });
|
||||
}
|
||||
|
||||
void visit(const rules::Seq *seq) {
|
||||
value = rules::seq({ apply(seq->left), apply(seq->right) });
|
||||
}
|
||||
};
|
||||
|
||||
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
|
||||
TokenExtractor extractor;
|
||||
unordered_map<string, const rules::rule_ptr> rules;
|
||||
|
||||
for (auto pair : input_grammar.rules) {
|
||||
string name = pair.first;
|
||||
auto new_rule = extractor.initial_apply(name, pair.second);
|
||||
if (new_rule.get())
|
||||
rules.insert({ name, new_rule });
|
||||
}
|
||||
|
||||
return {
|
||||
Grammar(input_grammar.start_rule_name, rules),
|
||||
Grammar("", extractor.tokens)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,7 +4,9 @@
|
|||
#include "grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
std::pair<Grammar, Grammar> extract_tokens(const Grammar &);
|
||||
namespace prepare_grammar {
|
||||
std::pair<Grammar, Grammar> extract_tokens(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
12
src/compiler/prepare_grammar/perform.cpp
Normal file
12
src/compiler/prepare_grammar/perform.cpp
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
#include "./perform.h"
|
||||
#include "extract_tokens.h"
|
||||
|
||||
using std::pair;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
pair<Grammar, Grammar> perform(const Grammar &input_grammar) {
|
||||
return prepare_grammar::extract_tokens(input_grammar);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,7 +4,9 @@
|
|||
#include "grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
std::pair<Grammar, Grammar> prepare_grammar(const Grammar &);
|
||||
namespace prepare_grammar {
|
||||
std::pair<Grammar, Grammar> perform(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
35
src/compiler/prepare_grammar/search_for_symbols.cpp
Normal file
35
src/compiler/prepare_grammar/search_for_symbols.cpp
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#include "search_for_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
class SymbolSearcher : rules::Visitor {
|
||||
public:
|
||||
bool value;
|
||||
|
||||
bool apply(const rules::rule_ptr rule) {
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
}
|
||||
|
||||
void default_visit(const rules::Rule *rule) {
|
||||
value = false;
|
||||
}
|
||||
|
||||
void visit(const rules::Symbol *symbol) {
|
||||
value = true;
|
||||
}
|
||||
|
||||
void visit(const rules::Choice *choice) {
|
||||
value = apply(choice->left) || apply(choice->right);
|
||||
}
|
||||
|
||||
void visit(const rules::Seq *seq) {
|
||||
value = apply(seq->left) || apply(seq->right);
|
||||
}
|
||||
};
|
||||
|
||||
bool search_for_symbols(const rules::rule_ptr &rule) {
|
||||
return SymbolSearcher().apply(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,7 +4,9 @@
|
|||
#include "rules.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
bool search_for_symbols(const rules::rule_ptr &);
|
||||
namespace prepare_grammar {
|
||||
bool search_for_symbols(const rules::rule_ptr &);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue