Generate parse tables and code using pre-processed grammars
This commit is contained in:
parent
2621f06315
commit
2afd8843dc
27 changed files with 272 additions and 285 deletions
|
|
@ -51,12 +51,12 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
class CCodeGenerator {
|
||||
const Grammar grammar;
|
||||
const vector<string> rule_names;
|
||||
const ParseTable parse_table;
|
||||
const LexTable lex_table;
|
||||
public:
|
||||
CCodeGenerator(const Grammar &grammar, const ParseTable &parse_table, const LexTable &lex_table) :
|
||||
grammar(grammar),
|
||||
CCodeGenerator(vector<string> rule_names, const ParseTable &parse_table, const LexTable &lex_table) :
|
||||
rule_names(rule_names),
|
||||
parse_table(parse_table),
|
||||
lex_table(lex_table)
|
||||
{}
|
||||
|
|
@ -152,7 +152,7 @@ namespace tree_sitter {
|
|||
|
||||
string symbol_enum() {
|
||||
string result = "typedef enum {\n";
|
||||
for (string rule_name : grammar.rule_names())
|
||||
for (string rule_name : rule_names)
|
||||
result += indent(symbol_id(rule_name)) + ",\n";
|
||||
result += indent(symbol_id(ParseTable::END_OF_INPUT));
|
||||
return result + "\n"
|
||||
|
|
@ -192,8 +192,8 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
string c_code(const Grammar &grammar, const ParseTable &parse_table, const LexTable &lex_table) {
|
||||
return CCodeGenerator(grammar, parse_table, lex_table).code();
|
||||
string c_code(const vector<string> rule_names, const ParseTable &parse_table, const LexTable &lex_table) {
|
||||
return CCodeGenerator(rule_names, parse_table, lex_table).code();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace code_gen {
|
||||
std::string c_code(const Grammar &grammar, const lr::ParseTable &parse_table, const lr::LexTable &lex_table);
|
||||
std::string c_code(std::vector<std::string> rule_names, const lr::ParseTable &parse_table, const lr::LexTable &lex_table);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,17 +16,22 @@ namespace tree_sitter {
|
|||
Item Item::at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar) {
|
||||
return Item(rule_name, grammar.rule(rule_name), 0);
|
||||
}
|
||||
|
||||
|
||||
Item Item::at_beginning_of_token(const std::string &rule_name, const Grammar &grammar) {
|
||||
return Item(rule_name, grammar.rule(rule_name), -1);
|
||||
}
|
||||
|
||||
transition_map<rules::Rule, Item> Item::transitions() const {
|
||||
return lr::transitions(rule).map<Item>([&](rules::rule_ptr to_rule) -> item_ptr {
|
||||
return std::make_shared<Item>(rule_name, to_rule, consumed_sym_count + 1);
|
||||
int next_sym_count = (consumed_sym_count == -1) ? -1 : (consumed_sym_count + 1);
|
||||
return std::make_shared<Item>(rule_name, to_rule, next_sym_count);
|
||||
});
|
||||
};
|
||||
|
||||
vector<rules::Symbol> Item::next_symbols() const {
|
||||
vector<rules::Symbol> result;
|
||||
vector<rules::NonTerminal> Item::next_symbols() const {
|
||||
vector<rules::NonTerminal> result;
|
||||
for (auto pair : lr::transitions(rule)) {
|
||||
shared_ptr<const rules::Symbol> sym = dynamic_pointer_cast<const rules::Symbol>(pair.first);
|
||||
auto sym = dynamic_pointer_cast<const rules::NonTerminal>(pair.first);
|
||||
if (sym) result.push_back(*sym);
|
||||
}
|
||||
return result;
|
||||
|
|
@ -39,7 +44,10 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
bool Item::is_done() const {
|
||||
return *rule == rules::Blank();
|
||||
for (auto pair : transitions()) {
|
||||
if (*pair.first == rules::Blank()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(ostream &stream, const Item &item) {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <string>
|
||||
#include "rule.h"
|
||||
#include "symbol.h"
|
||||
#include "non_terminal.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -17,9 +17,10 @@ namespace tree_sitter {
|
|||
public:
|
||||
Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count);
|
||||
static Item at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar);
|
||||
static Item at_beginning_of_token(const std::string &rule_name, const Grammar &grammar);
|
||||
|
||||
transition_map<rules::Rule, Item> transitions() const;
|
||||
std::vector<rules::Symbol> next_symbols() const;
|
||||
std::vector<rules::NonTerminal> next_symbols() const;
|
||||
bool operator==(const Item &other) const;
|
||||
bool is_done() const;
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ namespace tree_sitter {
|
|||
static void add_item(vector<Item> &vector, const Item &item, const Grammar &grammar) {
|
||||
if (!vector_contains(vector, item)) {
|
||||
vector.push_back(item);
|
||||
for (rules::Symbol rule : item.next_symbols()) {
|
||||
for (rules::NonTerminal rule : item.next_symbols()) {
|
||||
Item next_item = Item::at_beginning_of_rule(rule.name, grammar);
|
||||
add_item(vector, next_item, grammar);
|
||||
}
|
||||
|
|
@ -33,29 +33,17 @@ namespace tree_sitter {
|
|||
|
||||
ItemSet::ItemSet(const Item &item, const Grammar &grammar) : contents(closure_in_grammar(item, grammar)) {}
|
||||
|
||||
template<typename RuleClass>
|
||||
static transition_map<RuleClass, ItemSet> transitions(const ItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<RuleClass, ItemSet> result;
|
||||
for (auto item : item_set) {
|
||||
transition_map<rules::Rule, ItemSet> ItemSet::all_transitions(const Grammar &grammar) const {
|
||||
transition_map<rules::Rule, ItemSet> result;
|
||||
for (auto item : *this) {
|
||||
auto item_transitions = item.transitions();
|
||||
for (auto pair : item_transitions) {
|
||||
std::shared_ptr<const RuleClass> rule = dynamic_pointer_cast<const RuleClass>(pair.first);
|
||||
Item item = *pair.second;
|
||||
if (rule.get() != nullptr)
|
||||
result.add(rule, std::make_shared<ItemSet>(item, grammar));
|
||||
result.add(pair.first, std::make_shared<ItemSet>(*pair.second, grammar));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
transition_map<rules::Character, ItemSet> ItemSet::char_transitions(const Grammar &grammar) const {
|
||||
return transitions<rules::Character>(*this, grammar);
|
||||
}
|
||||
|
||||
transition_map<rules::Symbol, ItemSet> ItemSet::sym_transitions(const Grammar &grammar) const {
|
||||
return transitions<rules::Symbol>(*this, grammar);
|
||||
}
|
||||
|
||||
bool ItemSet::operator==(const tree_sitter::lr::ItemSet &other) const {
|
||||
return contents == other.contents;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,9 +21,26 @@ namespace tree_sitter {
|
|||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
size_t size() const;
|
||||
|
||||
transition_map<rules::Rule, ItemSet> all_transitions(const Grammar &grammar) const;
|
||||
|
||||
transition_map<rules::Character, ItemSet> char_transitions(const Grammar &grammar) const;
|
||||
transition_map<rules::Symbol, ItemSet> sym_transitions(const Grammar &grammar) const;
|
||||
template<typename RuleClass>
|
||||
transition_map<RuleClass, ItemSet> transitions(const Grammar &grammar) const {
|
||||
transition_map<RuleClass, ItemSet> result;
|
||||
for (auto transition : all_transitions(grammar)) {
|
||||
auto rule = std::dynamic_pointer_cast<const RuleClass>(transition.first);
|
||||
if (rule.get()) result.add(rule, transition.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename RuleClass>
|
||||
std::vector<RuleClass> next_inputs(const Grammar &grammar) const {
|
||||
std::vector<RuleClass> result;
|
||||
for (auto pair : transitions<RuleClass>(grammar))
|
||||
result.push_back(*pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const ItemSet &other) const;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -49,8 +49,6 @@ namespace tree_sitter {
|
|||
LexState::LexState() : actions(unordered_map<CharMatch, unordered_set<LexAction>>()) {}
|
||||
|
||||
// Table
|
||||
LexTable::LexTable(vector<string> rule_names) : symbol_names(rule_names) {}
|
||||
|
||||
size_t LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
|
|
|
|||
|
|
@ -55,8 +55,6 @@ namespace tree_sitter {
|
|||
|
||||
class LexTable {
|
||||
public:
|
||||
LexTable(std::vector<std::string> rule_names);
|
||||
|
||||
size_t add_state();
|
||||
void add_action(size_t state_index, CharMatch match, LexAction action);
|
||||
void add_default_action(size_t state_index, LexAction action);
|
||||
|
|
@ -64,7 +62,6 @@ namespace tree_sitter {
|
|||
static const std::string START;
|
||||
static const std::string END_OF_INPUT;
|
||||
std::vector<LexState> states;
|
||||
const std::vector<std::string> symbol_names;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,10 +55,6 @@ namespace tree_sitter {
|
|||
{}
|
||||
|
||||
// Table
|
||||
ParseTable::ParseTable(vector<string> symbol_names) :
|
||||
symbol_names(symbol_names),
|
||||
states(vector<ParseState>()) {};
|
||||
|
||||
size_t ParseTable::add_state() {
|
||||
states.push_back(ParseState());
|
||||
return states.size() - 1;
|
||||
|
|
|
|||
|
|
@ -59,8 +59,6 @@ namespace tree_sitter {
|
|||
|
||||
class ParseTable {
|
||||
public:
|
||||
ParseTable(std::vector<std::string> rule_names);
|
||||
|
||||
size_t add_state();
|
||||
void add_action(size_t state_index, std::string symbol_name, ParseAction action);
|
||||
void add_default_action(size_t state_index, ParseAction action);
|
||||
|
|
@ -68,7 +66,6 @@ namespace tree_sitter {
|
|||
static const std::string START;
|
||||
static const std::string END_OF_INPUT;
|
||||
std::vector<ParseState> states;
|
||||
const std::vector<std::string> symbol_names;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
#include "item_set.h"
|
||||
#include "grammar.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -13,6 +15,7 @@ namespace tree_sitter {
|
|||
|
||||
class TableBuilder {
|
||||
const Grammar grammar;
|
||||
const Grammar lex_grammar;
|
||||
std::unordered_map<const ItemSet, size_t> parse_state_indices;
|
||||
std::unordered_map<const ItemSet, size_t> lex_state_indices;
|
||||
ParseTable parse_table;
|
||||
|
|
@ -29,7 +32,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void add_shift_actions(const ItemSet &item_set, size_t state_index) {
|
||||
for (auto transition : item_set.sym_transitions(grammar)) {
|
||||
for (auto transition : item_set.transitions<rules::Symbol>(grammar)) {
|
||||
rules::Symbol symbol = *transition.first;
|
||||
ItemSet item_set = *transition.second;
|
||||
size_t new_state_index = add_parse_state(item_set);
|
||||
|
|
@ -38,7 +41,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void add_advance_actions(const ItemSet &item_set, size_t state_index) {
|
||||
for (auto transition : item_set.char_transitions(grammar)) {
|
||||
for (auto transition : item_set.transitions<rules::Character>(grammar)) {
|
||||
rules::Character rule = *transition.first;
|
||||
ItemSet item_set = *transition.second;
|
||||
size_t new_state_index = add_lex_state(item_set);
|
||||
|
|
@ -77,13 +80,21 @@ namespace tree_sitter {
|
|||
return state_index;
|
||||
}
|
||||
|
||||
ItemSet lex_item_set_for_parse_item_set(const ItemSet &parse_item_set) {
|
||||
vector<Item> items;
|
||||
for (rules::Token token : parse_item_set.next_inputs<rules::Token>(grammar))
|
||||
items.push_back(Item::at_beginning_of_token(token.name, lex_grammar));
|
||||
return ItemSet(items);
|
||||
}
|
||||
|
||||
size_t add_parse_state(const ItemSet &item_set) {
|
||||
auto state_index = parse_state_index_for_item_set(item_set);
|
||||
if (state_index == NOT_FOUND) {
|
||||
state_index = parse_table.add_state();
|
||||
parse_state_indices[item_set] = state_index;
|
||||
|
||||
parse_table.states[state_index].lex_state_index = add_lex_state(item_set);
|
||||
ItemSet lex_item_set = lex_item_set_for_parse_item_set(item_set);
|
||||
parse_table.states[state_index].lex_state_index = add_lex_state(lex_item_set);
|
||||
add_shift_actions(item_set, state_index);
|
||||
add_reduce_actions(item_set, state_index);
|
||||
}
|
||||
|
|
@ -92,13 +103,9 @@ namespace tree_sitter {
|
|||
|
||||
public:
|
||||
|
||||
TableBuilder(const Grammar &grammar) :
|
||||
TableBuilder(const Grammar &grammar, const Grammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
parse_table(ParseTable(grammar.rule_names())),
|
||||
lex_table(LexTable(grammar.rule_names())),
|
||||
parse_state_indices(unordered_map<const ItemSet, size_t>()),
|
||||
lex_state_indices(unordered_map<const ItemSet, size_t>())
|
||||
{};
|
||||
lex_grammar(lex_grammar) {};
|
||||
|
||||
std::pair<ParseTable, LexTable> build() {
|
||||
auto item = Item(ParseTable::START, rules::sym(grammar.start_rule_name), 0);
|
||||
|
|
@ -108,8 +115,8 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
std::pair<ParseTable, LexTable> build_tables(const tree_sitter::Grammar &grammar) {
|
||||
return TableBuilder(grammar).build();
|
||||
std::pair<ParseTable, LexTable> build_tables(const Grammar &grammar, const Grammar &lex_grammar) {
|
||||
return TableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -8,7 +8,7 @@ namespace tree_sitter {
|
|||
class Grammar;
|
||||
|
||||
namespace lr {
|
||||
std::pair<ParseTable, LexTable> build_tables(const Grammar &grammar);
|
||||
std::pair<ParseTable, LexTable> build_tables(const Grammar &grammar, const Grammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ namespace tree_sitter {
|
|||
transition_map<Rule, Rule> value;
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
value = transition_map<Rule, Rule>();
|
||||
value = transition_map<Rule, Rule>({{ blank(), blank() }});
|
||||
}
|
||||
|
||||
void visit(const Character *rule) {
|
||||
|
|
@ -20,7 +20,11 @@ namespace tree_sitter {
|
|||
void visit(const Symbol *rule) {
|
||||
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
|
||||
}
|
||||
|
||||
|
||||
void visit(const Token *rule) {
|
||||
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = transitions(rule->left);
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
|
|
@ -39,7 +43,7 @@ namespace tree_sitter {
|
|||
|
||||
void visit(const Repeat *rule) {
|
||||
value = transitions(rule->content).map<Rule>([&](const rule_ptr &value) -> rule_ptr {
|
||||
return seq({ value, choice({ repeat(rule->content), blank() }) });
|
||||
return seq({ value, choice({ rule->copy(), blank() }) });
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
28
src/compiler/rules/non_terminal.cpp
Normal file
28
src/compiler/rules/non_terminal.cpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
NonTerminal::NonTerminal(const std::string &name) : Symbol(name) {};
|
||||
|
||||
bool NonTerminal::operator==(const Rule &rule) const {
|
||||
const NonTerminal *other = dynamic_cast<const NonTerminal *>(&rule);
|
||||
return other && (other->name == name);
|
||||
}
|
||||
|
||||
rule_ptr NonTerminal::copy() const {
|
||||
return std::make_shared<NonTerminal>(*this);
|
||||
}
|
||||
|
||||
string NonTerminal::to_string() const {
|
||||
return string("#<non-terminal '") + name + "'>";
|
||||
}
|
||||
|
||||
void NonTerminal::accept(Visitor &visitor) const {
|
||||
visitor.visit(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
20
src/compiler/rules/non_terminal.h
Normal file
20
src/compiler/rules/non_terminal.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef __tree_sitter__non_terminal__
|
||||
#define __tree_sitter__non_terminal__
|
||||
|
||||
#include "symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class NonTerminal : public Symbol {
|
||||
public:
|
||||
NonTerminal(const std::string &name);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor &visitor) const;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -47,7 +47,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
sym_ptr sym(const string &name) {
|
||||
return make_shared<Symbol>(name);
|
||||
return make_shared<NonTerminal>(name);
|
||||
}
|
||||
|
||||
rule_ptr token(const std::string &name) {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include "pattern.h"
|
||||
#include "character.h"
|
||||
#include "repeat.h"
|
||||
#include "non_terminal.h"
|
||||
#include "visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -6,17 +6,13 @@ using std::hash;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Token::Token(const std::string &name) : name(name) {};
|
||||
Token::Token(const std::string &name) : Symbol(name) {};
|
||||
|
||||
bool Token::operator==(const Rule &rule) const {
|
||||
const Token *other = dynamic_cast<const Token *>(&rule);
|
||||
return other && (other->name == name);
|
||||
}
|
||||
|
||||
size_t Token::hash_code() const {
|
||||
return typeid(this).hash_code() ^ hash<string>()(name);
|
||||
}
|
||||
|
||||
rule_ptr Token::copy() const {
|
||||
return std::make_shared<Token>(*this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,21 +1,18 @@
|
|||
#ifndef __tree_sitter__token__
|
||||
#define __tree_sitter__token__
|
||||
|
||||
#include "rule.h"
|
||||
#include "symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Token : public Rule {
|
||||
class Token : public Symbol {
|
||||
public:
|
||||
Token(const std::string &name);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor &visitor) const;
|
||||
|
||||
const std::string name;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue