Reorganize compiler directory

This commit is contained in:
Max Brunsfeld 2014-01-11 15:14:17 -08:00
parent 023a0c4f70
commit 92cec5758f
51 changed files with 630 additions and 624 deletions

View file

@ -1,6 +1,6 @@
#include "item.h"
#include "grammar.h"
#include "transitions.h"
#include "rule_transitions.h"
using std::string;
using std::vector;
@ -9,7 +9,7 @@ using std::make_shared;
using std::ostream;
namespace tree_sitter {
namespace lr {
namespace build_tables {
Item::Item(const string &rule_name, const rules::rule_ptr rule, int consumed_sym_count) :
rule_name(rule_name),
rule(rule),
@ -24,7 +24,7 @@ namespace tree_sitter {
}
transition_map<rules::Rule, Item> Item::transitions() const {
return lr::transitions(rule).map<Item>([&](rules::rule_ptr to_rule) -> item_ptr {
return rule_transitions(rule).map<Item>([&](rules::rule_ptr to_rule) -> item_ptr {
int next_sym_count = (consumed_sym_count == -1) ? -1 : (consumed_sym_count + 1);
return make_shared<Item>(rule_name, to_rule, next_sym_count);
});
@ -32,7 +32,7 @@ namespace tree_sitter {
vector<rules::Symbol> Item::next_symbols() const {
vector<rules::Symbol> result;
for (auto pair : lr::transitions(rule)) {
for (auto pair : rule_transitions(rule)) {
auto sym = dynamic_pointer_cast<const rules::Symbol>(pair.first);
if (sym) result.push_back(*sym);
}

View file

@ -9,7 +9,7 @@
namespace tree_sitter {
class Grammar;
namespace lr {
namespace build_tables {
class Item;
typedef std::shared_ptr<const Item> item_ptr;
@ -37,8 +37,8 @@ namespace tree_sitter {
namespace std {
template<>
struct hash<tree_sitter::lr::Item> {
size_t operator()(const tree_sitter::lr::Item &item) {
struct hash<tree_sitter::build_tables::Item> {
size_t operator()(const tree_sitter::build_tables::Item &item) {
return
hash<std::string>()(item.rule_name) ^
hash<tree_sitter::rules::Rule>()(*item.rule) ^

View file

@ -9,11 +9,11 @@ using std::string;
using std::make_shared;
namespace tree_sitter {
namespace lr {
namespace build_tables {
ItemSet::ItemSet(const vector<Item> &items) : contents(items) {}
ItemSet::ItemSet(const initializer_list<Item> &items) : contents(items) {}
static bool vector_contains(vector<Item> items, lr::Item item) {
static bool vector_contains(vector<Item> items, build_tables::Item item) {
return (std::find(items.begin(), items.end(), item) != items.end());
}
@ -66,7 +66,7 @@ namespace tree_sitter {
return result;
}
bool ItemSet::operator==(const tree_sitter::lr::ItemSet &other) const {
bool ItemSet::operator==(const tree_sitter::build_tables::ItemSet &other) const {
return contents == other.contents;
}

View file

@ -6,7 +6,7 @@
#include <set>
namespace tree_sitter {
namespace lr {
namespace build_tables {
class ItemSet;
typedef std::shared_ptr<const ItemSet> item_set_ptr;
@ -37,11 +37,11 @@ namespace tree_sitter {
namespace std {
template<>
struct hash<const tree_sitter::lr::ItemSet> {
size_t operator()(const tree_sitter::lr::ItemSet &item_set) const {
struct hash<const tree_sitter::build_tables::ItemSet> {
size_t operator()(const tree_sitter::build_tables::ItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.size());
for (auto item : item_set)
result ^= hash<tree_sitter::lr::Item>()(item);
result ^= hash<tree_sitter::build_tables::Item>()(item);
return result;
}
};

View file

@ -1,5 +1,5 @@
#include "table_builder.h"
#include "item_set.h"
#include "./perform.h"
#include "./item_set.h"
#include "rules.h"
#include "grammar.h"
@ -7,7 +7,7 @@ using std::pair;
using std::vector;
namespace tree_sitter {
namespace lr {
namespace build_tables {
static int NOT_FOUND = -1;
class TableBuilder {
@ -113,7 +113,7 @@ namespace tree_sitter {
}
};
pair<ParseTable, LexTable> build_tables(const Grammar &grammar, const Grammar &lex_grammar) {
pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar) {
return TableBuilder(grammar, lex_grammar).build();
}
}

View file

@ -7,8 +7,8 @@
namespace tree_sitter {
class Grammar;
namespace lr {
std::pair<ParseTable, LexTable> build_tables(const Grammar &grammar, const Grammar &lex_grammar);
namespace build_tables {
std::pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar);
}
}

View file

@ -1,10 +1,10 @@
#include "transitions.h"
#include "rule_transitions.h"
#include "rules.h"
using namespace tree_sitter::rules;
namespace tree_sitter {
namespace lr {
namespace build_tables {
class TransitionsVisitor : public rules::Visitor {
public:
transition_map<Rule, Rule> value;
@ -22,14 +22,14 @@ namespace tree_sitter {
}
void visit(const Choice *rule) {
value = transitions(rule->left);
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
value = rule_transitions(rule->left);
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
void visit(const Seq *rule) {
value = transitions(rule->left).map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
value = rule_transitions(rule->left).map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
if (typeid(*left_rule) == typeid(Blank))
return rule->right;
else
@ -38,7 +38,7 @@ namespace tree_sitter {
}
void visit(const Repeat *rule) {
value = transitions(rule->content).map<Rule>([&](const rule_ptr &value) -> rule_ptr {
value = rule_transitions(rule->content).map<Rule>([&](const rule_ptr &value) -> rule_ptr {
return seq({ value, choice({ rule->copy(), blank() }) });
});
}
@ -47,15 +47,15 @@ namespace tree_sitter {
rule_ptr result = character(rule->value[0]);
for (int i = 1; i < rule->value.length(); i++)
result = seq({ result, character(rule->value[i]) });
value = transitions(result);
value = rule_transitions(result);
}
void visit(const Pattern *rule) {
value = transitions(rule->to_rule_tree());
value = rule_transitions(rule->to_rule_tree());
}
};
transition_map<Rule, Rule> transitions(const rule_ptr &rule) {
transition_map<Rule, Rule> rule_transitions(const rule_ptr &rule) {
TransitionsVisitor visitor;
rule->accept(visitor);
return visitor.value;

View file

@ -5,8 +5,8 @@
#include "transition_map.h"
namespace tree_sitter {
namespace lr {
transition_map<rules::Rule, rules::Rule> transitions(const rules::rule_ptr &rule);
namespace build_tables {
transition_map<rules::Rule, rules::Rule> rule_transitions(const rules::rule_ptr &rule);
}
}

17
src/compiler/compile.cpp Normal file
View file

@ -0,0 +1,17 @@
#include "compile.h"
#include "grammar.h"
#include "prepare_grammar/perform.h"
#include "build_tables/perform.h"
#include "generate_code/c_code.h"
namespace tree_sitter {
std::string compile(const Grammar &grammar) {
auto grammars = prepare_grammar::perform(grammar);
auto tables = build_tables::perform(grammars.first, grammars.second);
auto rule_names = grammars.first.rule_names();
auto token_names = grammars.second.rule_names();
rule_names.insert(rule_names.end(), token_names.begin(), token_names.end());
return generate_code::c_code(rule_names, tables.first, tables.second);
}
}

12
src/compiler/compile.h Normal file
View file

@ -0,0 +1,12 @@
#ifndef __tree_sitter__compile__
#define __tree_sitter__compile__
#include <string>
namespace tree_sitter {
class Grammar;
std::string compile(const Grammar &grammar);
}
#endif

View file

@ -8,10 +8,9 @@ using std::to_string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
using namespace tree_sitter::lr;
namespace tree_sitter {
namespace code_gen {
namespace generate_code {
static void str_replace(string &input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {

View file

@ -6,8 +6,8 @@
#include "lex_table.h"
namespace tree_sitter {
namespace code_gen {
std::string c_code(std::vector<std::string> rule_names, const lr::ParseTable &parse_table, const lr::LexTable &lex_table);
namespace generate_code {
std::string c_code(std::vector<std::string> rule_names, const ParseTable &parse_table, const LexTable &lex_table);
}
}

View file

@ -1,78 +0,0 @@
#include "extract_tokens.h"
#include "search_for_symbols.h"
#include <unordered_map>
using std::pair;
using std::string;
using std::to_string;
using std::unordered_map;
namespace tree_sitter {
class TokenExtractor : rules::Visitor {
public:
rules::rule_ptr value;
size_t anonymous_token_count = 0;
unordered_map<string, const rules::rule_ptr> tokens;
rules::rule_ptr initial_apply(string name, const rules::rule_ptr rule) {
auto result = apply(rule);
auto symbol = std::dynamic_pointer_cast<const rules::Symbol>(result);
if (symbol && *symbol != *rule) {
tokens.insert({ name, tokens[symbol->name] });
tokens.erase(symbol->name);
anonymous_token_count--;
return rules::rule_ptr();
} else {
return result;
}
}
rules::rule_ptr apply(const rules::rule_ptr rule) {
if (search_for_symbols(rule)) {
rule->accept(*this);
return value;
} else {
string token_name = add_token(rule);
return rules::sym(token_name);
}
}
string add_token(const rules::rule_ptr &rule) {
for (auto pair : tokens)
if (*pair.second == *rule)
return pair.first;
string name = to_string(++anonymous_token_count);
tokens.insert({ name, rule });
return name;
}
void default_visit(const rules::Rule *rule) {
value = rule->copy();
}
void visit(const rules::Choice *choice) {
value = rules::choice({ apply(choice->left), apply(choice->right) });
}
void visit(const rules::Seq *seq) {
value = rules::seq({ apply(seq->left), apply(seq->right) });
}
};
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
TokenExtractor extractor;
unordered_map<string, const rules::rule_ptr> rules;
for (auto pair : input_grammar.rules) {
string name = pair.first;
auto new_rule = extractor.initial_apply(name, pair.second);
if (new_rule.get())
rules.insert({ name, new_rule });
}
return {
Grammar(input_grammar.start_rule_name, rules),
Grammar("", extractor.tokens)
};
}
}

View file

@ -1,10 +0,0 @@
#include "prepare_grammar.h"
#include "extract_tokens.h"
using std::pair;
namespace tree_sitter {
pair<Grammar, Grammar> prepare_grammar(const Grammar &input_grammar) {
return extract_tokens(input_grammar);
}
}

View file

@ -1,33 +0,0 @@
#include "search_for_symbols.h"
namespace tree_sitter {
class SymbolSearcher : rules::Visitor {
public:
bool value;
bool apply(const rules::rule_ptr rule) {
rule->accept(*this);
return value;
}
void default_visit(const rules::Rule *rule) {
value = false;
}
void visit(const rules::Symbol *symbol) {
value = true;
}
void visit(const rules::Choice *choice) {
value = apply(choice->left) || apply(choice->right);
}
void visit(const rules::Seq *seq) {
value = apply(seq->left) || apply(seq->right);
}
};
bool search_for_symbols(const rules::rule_ptr &rule) {
return SymbolSearcher().apply(rule);
}
}

View file

@ -0,0 +1,67 @@
#include "lex_table.h"
using std::string;
using std::to_string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
namespace tree_sitter {
// Action
LexAction::LexAction(LexActionType type, size_t state_index, std::string symbol_name) :
type(type),
state_index(state_index),
symbol_name(symbol_name) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, "");
}
LexAction LexAction::Advance(size_t state_index) {
return LexAction(LexActionTypeAdvance, state_index, "");
}
LexAction LexAction::Accept(std::string symbol_name) {
return LexAction(LexActionTypeAccept, -1, symbol_name);
}
bool LexAction::operator==(const LexAction &other) const {
return
(type == other.type) &&
(state_index == other.state_index) &&
(symbol_name == other.symbol_name);
}
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + action.symbol_name + ">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) + ">";
}
}
// State
unordered_set<CharMatch> LexState::expected_inputs() const {
unordered_set<CharMatch> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
}
// Table
size_t LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
void LexTable::add_action(size_t state_index, CharMatch match, LexAction action) {
states[state_index].actions[match].insert(action);
}
void LexTable::add_default_action(size_t state_index, LexAction action) {
states[state_index].default_actions.insert(action);
}
}

65
src/compiler/lex_table.h Normal file
View file

@ -0,0 +1,65 @@
#ifndef __TreeSitter__lex_table__
#define __TreeSitter__lex_table__
#include <unordered_map>
#include <vector>
#include <string>
#include <unordered_set>
#include "char_match.h"
namespace tree_sitter {
typedef enum {
LexActionTypeAccept,
LexActionTypeError,
LexActionTypeAdvance
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, std::string symbol_name);
public:
static LexAction Accept(std::string symbol_name);
static LexAction Error();
static LexAction Advance(size_t state_index);
bool operator==(const LexAction &action) const;
LexActionType type;
std::string symbol_name;
size_t state_index;
};
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
}
namespace std {
template<>
struct hash<tree_sitter::LexAction> {
size_t operator()(const tree_sitter::LexAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<size_t>()(action.state_index));
}
};
}
namespace tree_sitter {
class LexState {
public:
std::unordered_map<CharMatch, std::unordered_set<LexAction>> actions;
std::unordered_set<LexAction> default_actions;
std::unordered_set<CharMatch> expected_inputs() const;
};
class LexTable {
public:
size_t add_state();
void add_action(size_t state_index, CharMatch match, LexAction action);
void add_default_action(size_t state_index, LexAction action);
static const std::string START;
static const std::string END_OF_INPUT;
std::vector<LexState> states;
};
}
#endif

View file

@ -1,70 +0,0 @@
#include "lex_table.h"
using std::string;
using std::to_string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
namespace tree_sitter {
namespace lr {
// Action
LexAction::LexAction(LexActionType type, size_t state_index, std::string symbol_name) :
type(type),
state_index(state_index),
symbol_name(symbol_name) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, "");
}
LexAction LexAction::Advance(size_t state_index) {
return LexAction(LexActionTypeAdvance, state_index, "");
}
LexAction LexAction::Accept(std::string symbol_name) {
return LexAction(LexActionTypeAccept, -1, symbol_name);
}
bool LexAction::operator==(const LexAction &other) const {
return
(type == other.type) &&
(state_index == other.state_index) &&
(symbol_name == other.symbol_name);
}
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + action.symbol_name + ">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) + ">";
}
}
// State
unordered_set<CharMatch> LexState::expected_inputs() const {
unordered_set<CharMatch> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
}
// Table
size_t LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
void LexTable::add_action(size_t state_index, CharMatch match, LexAction action) {
states[state_index].actions[match].insert(action);
}
void LexTable::add_default_action(size_t state_index, LexAction action) {
states[state_index].default_actions.insert(action);
}
}
}

View file

@ -1,69 +0,0 @@
#ifndef __TreeSitter__lex_table__
#define __TreeSitter__lex_table__
#include <unordered_map>
#include <vector>
#include <string>
#include <unordered_set>
#include "char_match.h"
namespace tree_sitter {
namespace lr {
typedef enum {
LexActionTypeAccept,
LexActionTypeError,
LexActionTypeAdvance
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, std::string symbol_name);
public:
static LexAction Accept(std::string symbol_name);
static LexAction Error();
static LexAction Advance(size_t state_index);
bool operator==(const LexAction &action) const;
LexActionType type;
std::string symbol_name;
size_t state_index;
};
}
}
namespace std {
template<>
struct hash<tree_sitter::lr::LexAction> {
size_t operator()(const tree_sitter::lr::LexAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<size_t>()(action.state_index));
}
};
}
namespace tree_sitter {
namespace lr {
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
class LexState {
public:
std::unordered_map<CharMatch, std::unordered_set<LexAction>> actions;
std::unordered_set<LexAction> default_actions;
std::unordered_set<CharMatch> expected_inputs() const;
};
class LexTable {
public:
size_t add_state();
void add_action(size_t state_index, CharMatch match, LexAction action);
void add_default_action(size_t state_index, LexAction action);
static const std::string START;
static const std::string END_OF_INPUT;
std::vector<LexState> states;
};
}
}
#endif

View file

@ -1,80 +0,0 @@
#include "parse_table.h"
using std::string;
using std::ostream;
using std::to_string;
using std::unordered_set;
namespace tree_sitter {
namespace lr {
// Action
ParseAction::ParseAction(ParseActionType type, size_t state_index, string symbol_name, size_t child_symbol_count) :
type(type),
state_index(state_index),
symbol_name(symbol_name),
child_symbol_count(child_symbol_count) {};
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, "", -1);
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, "", -1);
}
ParseAction ParseAction::Shift(size_t state_index) {
return ParseAction(ParseActionTypeShift, state_index, "", -1);
}
ParseAction ParseAction::Reduce(std::string symbol_name, size_t child_symbol_count) {
return ParseAction(ParseActionTypeReduce, -1, symbol_name, child_symbol_count);
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool state_indices_eq = state_index == other.state_index;
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
return types_eq && state_indices_eq && child_symbol_counts_eq;
}
ostream& operator<<(ostream &stream, const ParseAction &action) {
switch (action.type) {
case ParseActionTypeError:
return stream << string("#<error>");
case ParseActionTypeAccept:
return stream << string("#<accept>");
case ParseActionTypeShift:
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
case ParseActionTypeReduce:
return stream << (string("#<reduce ") + action.symbol_name + ">");
}
}
// State
ParseState::ParseState() : lex_state_index(-1) {}
unordered_set<string> ParseState::expected_inputs() const {
unordered_set<string> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
}
// Table
size_t ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) {
states[state_index].actions[sym_name].insert(action);
}
void ParseTable::add_default_action(size_t state_index, ParseAction action) {
states[state_index].default_actions.insert(action);
}
const string ParseTable::START = "__START__";
const string ParseTable::END_OF_INPUT = "__END__";
}
}

View file

@ -1,74 +0,0 @@
#ifndef __TreeSitter__parse_table__
#define __TreeSitter__parse_table__
#include <unordered_map>
#include <vector>
#include <unordered_set>
#include "rule.h"
namespace tree_sitter {
namespace lr {
typedef enum {
ParseActionTypeAccept,
ParseActionTypeError,
ParseActionTypeShift,
ParseActionTypeReduce,
} ParseActionType;
class ParseAction {
ParseAction(ParseActionType type, size_t state_index, std::string symbol_name, size_t child_symbol_count);
public:
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(size_t state_index);
static ParseAction Reduce(std::string symbol_name, size_t child_symbol_count);
bool operator==(const ParseAction &action) const;
ParseActionType type;
size_t child_symbol_count;
std::string symbol_name;
size_t state_index;
};
}
}
namespace std {
template<>
struct hash<tree_sitter::lr::ParseAction> {
size_t operator()(const tree_sitter::lr::ParseAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.child_symbol_count));
}
};
}
namespace tree_sitter {
namespace lr {
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
class ParseState {
public:
ParseState();
std::unordered_map<std::string, std::unordered_set<ParseAction>> actions;
std::unordered_set<ParseAction> default_actions;
std::unordered_set<std::string> expected_inputs() const;
size_t lex_state_index;
};
class ParseTable {
public:
size_t add_state();
void add_action(size_t state_index, std::string symbol_name, ParseAction action);
void add_default_action(size_t state_index, ParseAction action);
static const std::string START;
static const std::string END_OF_INPUT;
std::vector<ParseState> states;
};
}
}
#endif

View file

@ -0,0 +1,78 @@
#include "parse_table.h"
using std::string;
using std::ostream;
using std::to_string;
using std::unordered_set;
namespace tree_sitter {
// Action
ParseAction::ParseAction(ParseActionType type, size_t state_index, string symbol_name, size_t child_symbol_count) :
type(type),
state_index(state_index),
symbol_name(symbol_name),
child_symbol_count(child_symbol_count) {};
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, "", -1);
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, "", -1);
}
ParseAction ParseAction::Shift(size_t state_index) {
return ParseAction(ParseActionTypeShift, state_index, "", -1);
}
ParseAction ParseAction::Reduce(std::string symbol_name, size_t child_symbol_count) {
return ParseAction(ParseActionTypeReduce, -1, symbol_name, child_symbol_count);
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool state_indices_eq = state_index == other.state_index;
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
return types_eq && state_indices_eq && child_symbol_counts_eq;
}
ostream& operator<<(ostream &stream, const ParseAction &action) {
switch (action.type) {
case ParseActionTypeError:
return stream << string("#<error>");
case ParseActionTypeAccept:
return stream << string("#<accept>");
case ParseActionTypeShift:
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
case ParseActionTypeReduce:
return stream << (string("#<reduce ") + action.symbol_name + ">");
}
}
// State
ParseState::ParseState() : lex_state_index(-1) {}
unordered_set<string> ParseState::expected_inputs() const {
unordered_set<string> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
}
// Table
size_t ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) {
states[state_index].actions[sym_name].insert(action);
}
void ParseTable::add_default_action(size_t state_index, ParseAction action) {
states[state_index].default_actions.insert(action);
}
const string ParseTable::START = "__START__";
const string ParseTable::END_OF_INPUT = "__END__";
}

View file

@ -0,0 +1,70 @@
#ifndef __TreeSitter__parse_table__
#define __TreeSitter__parse_table__
#include <unordered_map>
#include <vector>
#include <unordered_set>
#include "rule.h"
namespace tree_sitter {
typedef enum {
ParseActionTypeAccept,
ParseActionTypeError,
ParseActionTypeShift,
ParseActionTypeReduce,
} ParseActionType;
class ParseAction {
ParseAction(ParseActionType type, size_t state_index, std::string symbol_name, size_t child_symbol_count);
public:
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(size_t state_index);
static ParseAction Reduce(std::string symbol_name, size_t child_symbol_count);
bool operator==(const ParseAction &action) const;
ParseActionType type;
size_t child_symbol_count;
std::string symbol_name;
size_t state_index;
};
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
}
namespace std {
template<>
struct hash<tree_sitter::ParseAction> {
size_t operator()(const tree_sitter::ParseAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.child_symbol_count));
}
};
}
namespace tree_sitter {
class ParseState {
public:
ParseState();
std::unordered_map<std::string, std::unordered_set<ParseAction>> actions;
std::unordered_set<ParseAction> default_actions;
std::unordered_set<std::string> expected_inputs() const;
size_t lex_state_index;
};
class ParseTable {
public:
size_t add_state();
void add_action(size_t state_index, std::string symbol_name, ParseAction action);
void add_default_action(size_t state_index, ParseAction action);
static const std::string START;
static const std::string END_OF_INPUT;
std::vector<ParseState> states;
};
}
#endif

View file

@ -0,0 +1,80 @@
#include "extract_tokens.h"
#include "search_for_symbols.h"
#include <unordered_map>
using std::pair;
using std::string;
using std::to_string;
using std::unordered_map;
namespace tree_sitter {
namespace prepare_grammar {
class TokenExtractor : rules::Visitor {
public:
rules::rule_ptr value;
size_t anonymous_token_count = 0;
unordered_map<string, const rules::rule_ptr> tokens;
rules::rule_ptr initial_apply(string name, const rules::rule_ptr rule) {
auto result = apply(rule);
auto symbol = std::dynamic_pointer_cast<const rules::Symbol>(result);
if (symbol && *symbol != *rule) {
tokens.insert({ name, tokens[symbol->name] });
tokens.erase(symbol->name);
anonymous_token_count--;
return rules::rule_ptr();
} else {
return result;
}
}
rules::rule_ptr apply(const rules::rule_ptr rule) {
if (search_for_symbols(rule)) {
rule->accept(*this);
return value;
} else {
string token_name = add_token(rule);
return rules::sym(token_name);
}
}
string add_token(const rules::rule_ptr &rule) {
for (auto pair : tokens)
if (*pair.second == *rule)
return pair.first;
string name = to_string(++anonymous_token_count);
tokens.insert({ name, rule });
return name;
}
void default_visit(const rules::Rule *rule) {
value = rule->copy();
}
void visit(const rules::Choice *choice) {
value = rules::choice({ apply(choice->left), apply(choice->right) });
}
void visit(const rules::Seq *seq) {
value = rules::seq({ apply(seq->left), apply(seq->right) });
}
};
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
TokenExtractor extractor;
unordered_map<string, const rules::rule_ptr> rules;
for (auto pair : input_grammar.rules) {
string name = pair.first;
auto new_rule = extractor.initial_apply(name, pair.second);
if (new_rule.get())
rules.insert({ name, new_rule });
}
return {
Grammar(input_grammar.start_rule_name, rules),
Grammar("", extractor.tokens)
};
}
}
}

View file

@ -4,7 +4,9 @@
#include "grammar.h"
namespace tree_sitter {
std::pair<Grammar, Grammar> extract_tokens(const Grammar &);
namespace prepare_grammar {
std::pair<Grammar, Grammar> extract_tokens(const Grammar &);
}
}
#endif

View file

@ -0,0 +1,12 @@
#include "./perform.h"
#include "extract_tokens.h"
using std::pair;
namespace tree_sitter {
namespace prepare_grammar {
pair<Grammar, Grammar> perform(const Grammar &input_grammar) {
return prepare_grammar::extract_tokens(input_grammar);
}
}
}

View file

@ -4,7 +4,9 @@
#include "grammar.h"
namespace tree_sitter {
std::pair<Grammar, Grammar> prepare_grammar(const Grammar &);
namespace prepare_grammar {
std::pair<Grammar, Grammar> perform(const Grammar &);
}
}
#endif

View file

@ -0,0 +1,35 @@
#include "search_for_symbols.h"
namespace tree_sitter {
namespace prepare_grammar {
class SymbolSearcher : rules::Visitor {
public:
bool value;
bool apply(const rules::rule_ptr rule) {
rule->accept(*this);
return value;
}
void default_visit(const rules::Rule *rule) {
value = false;
}
void visit(const rules::Symbol *symbol) {
value = true;
}
void visit(const rules::Choice *choice) {
value = apply(choice->left) || apply(choice->right);
}
void visit(const rules::Seq *seq) {
value = apply(seq->left) || apply(seq->right);
}
};
bool search_for_symbols(const rules::rule_ptr &rule) {
return SymbolSearcher().apply(rule);
}
}
}

View file

@ -4,7 +4,9 @@
#include "rules.h"
namespace tree_sitter {
bool search_for_symbols(const rules::rule_ptr &);
namespace prepare_grammar {
bool search_for_symbols(const rules::rule_ptr &);
}
}
#endif

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
namespace tree_sitter {
namespace rules {

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
using std::string;
using std::hash;

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
using std::string;

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
using std::string;
using std::hash;

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
using std::string;

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
using std::string;

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
using std::string;
using std::hash;

View file

@ -1,5 +1,4 @@
#include "rules.h"
#include "transition_map.h"
using std::string;
using std::hash;