Intern symbols during grammar preparation
This commit is contained in:
parent
33d781f492
commit
68d44fd565
67 changed files with 10829 additions and 10557 deletions
|
|
@ -10,6 +10,7 @@
|
|||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
#include "compiler/build_tables/conflict_manager.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
|
|
@ -24,7 +25,7 @@ namespace tree_sitter {
|
|||
using std::set;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
namespace build_tables {
|
||||
|
|
@ -45,7 +46,7 @@ namespace tree_sitter {
|
|||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ISymbol &symbol = transition.first;
|
||||
const ParseItemSet &item_set = transition.second;
|
||||
set<int> precedence_values = precedence_values_for_item_set(item_set);
|
||||
|
||||
|
|
@ -117,7 +118,7 @@ namespace tree_sitter {
|
|||
LexItemSet lex_item_set_for_parse_state(const ParseState &state) {
|
||||
LexItemSet result;
|
||||
for (auto &symbol : state.expected_inputs()) {
|
||||
if (lex_grammar.has_definition(symbol))
|
||||
if (symbol.is_token() && !symbol.is_built_in())
|
||||
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
|
||||
|
|
@ -160,12 +161,12 @@ namespace tree_sitter {
|
|||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet error_item_set;
|
||||
for (auto &pair : lex_grammar.rules) {
|
||||
LexItem item(Symbol(pair.first, rules::SymbolTypeNormal), after_separators(pair.second));
|
||||
for (size_t i = 0; i < lex_grammar.rules.size(); i++) {
|
||||
LexItem item(ISymbol(i, rules::SymbolOptionToken), after_separators(lex_grammar.rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
for (auto &pair : lex_grammar.aux_rules) {
|
||||
LexItem item(Symbol(pair.first, rules::SymbolTypeAuxiliary), after_separators(pair.second));
|
||||
for (size_t i = 0; i < lex_grammar.aux_rules.size(); i++) {
|
||||
LexItem item(ISymbol(i, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary)), after_separators(lex_grammar.aux_rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
error_item_set.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
|
||||
|
|
@ -175,15 +176,14 @@ namespace tree_sitter {
|
|||
|
||||
public:
|
||||
TableBuilder(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar,
|
||||
const map<Symbol, string> &rule_names) :
|
||||
const PreparedGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
lex_grammar(lex_grammar),
|
||||
conflict_manager(ConflictManager(grammar, lex_grammar, rule_names))
|
||||
conflict_manager(ConflictManager(grammar, lex_grammar))
|
||||
{}
|
||||
|
||||
void build() {
|
||||
auto start_symbol = make_shared<Symbol>(grammar.start_rule_name());
|
||||
auto start_symbol = make_shared<ISymbol>(0);
|
||||
ParseItem item(rules::START(), start_symbol, {}, rules::END_OF_INPUT());
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
||||
add_parse_state(item_set);
|
||||
|
|
@ -200,9 +200,8 @@ namespace tree_sitter {
|
|||
|
||||
pair<pair<ParseTable, LexTable>, vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar,
|
||||
const map<Symbol, string> &rule_names) {
|
||||
TableBuilder builder(grammar, lex_grammar, rule_names);
|
||||
const PreparedGrammar &lex_grammar) {
|
||||
TableBuilder builder(grammar, lex_grammar);
|
||||
builder.build();
|
||||
return { { builder.parse_table, builder.lex_table }, builder.conflicts() };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,8 +15,7 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
std::pair<std::pair<ParseTable, LexTable>, std::vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar,
|
||||
const std::map<rules::Symbol, std::string> &rule_names);
|
||||
const PreparedGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,14 +15,11 @@ namespace tree_sitter {
|
|||
using std::vector;
|
||||
|
||||
ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar,
|
||||
const map<Symbol, string> &rule_names) :
|
||||
const PreparedGrammar &lex_grammar) :
|
||||
parse_grammar(parse_grammar),
|
||||
lex_grammar(lex_grammar),
|
||||
rule_names(rule_names)
|
||||
{}
|
||||
lex_grammar(lex_grammar) {}
|
||||
|
||||
bool ConflictManager::resolve_parse_action(const rules::Symbol &symbol,
|
||||
bool ConflictManager::resolve_parse_action(const rules::ISymbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action) {
|
||||
if (new_action.type < old_action.type)
|
||||
|
|
@ -63,9 +60,7 @@ namespace tree_sitter {
|
|||
return false;
|
||||
} else {
|
||||
record_conflict(symbol, old_action, new_action);
|
||||
size_t old_index = parse_grammar.index_of(old_action.symbol);
|
||||
size_t new_index = parse_grammar.index_of(new_action.symbol);
|
||||
return new_index < old_index;
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
}
|
||||
default:
|
||||
|
|
@ -83,9 +78,7 @@ namespace tree_sitter {
|
|||
return true;
|
||||
case LexActionTypeAccept:
|
||||
if (new_action.type == LexActionTypeAccept) {
|
||||
size_t old_index = lex_grammar.index_of(old_action.symbol);
|
||||
size_t new_index = lex_grammar.index_of(new_action.symbol);
|
||||
return (new_index < old_index);
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
|
|
@ -109,16 +102,16 @@ namespace tree_sitter {
|
|||
return precedences + ")";
|
||||
}
|
||||
|
||||
string message_for_action(const ParseAction &action, const map<Symbol, string> &rule_names) {
|
||||
string message_for_action(const ParseAction &action, const PreparedGrammar &parse_grammar) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift:
|
||||
return "shift " + precedence_string(action);
|
||||
case ParseActionTypeReduce: {
|
||||
auto pair = rule_names.find(action.symbol);
|
||||
if (pair == rule_names.end())
|
||||
return "ERROR " + action.symbol.name;
|
||||
string name = parse_grammar.rule_name(action.symbol);
|
||||
if (name == "")
|
||||
return "ERROR" + to_string(action.symbol.index);
|
||||
else
|
||||
return "reduce " + pair->second + " " + precedence_string(action);
|
||||
return "reduce " + name + " " + precedence_string(action);
|
||||
}
|
||||
case ParseActionTypeAccept:
|
||||
return "accept";
|
||||
|
|
@ -127,12 +120,15 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
void ConflictManager::record_conflict(const rules::Symbol &symbol,
|
||||
void ConflictManager::record_conflict(const rules::ISymbol &symbol,
|
||||
const ParseAction &left,
|
||||
const ParseAction &right) {
|
||||
conflicts_.insert(Conflict(rule_names.find(symbol)->second + ": " +
|
||||
message_for_action(left, rule_names) + " / " +
|
||||
message_for_action(right, rule_names)));
|
||||
string name = symbol.is_token() ?
|
||||
lex_grammar.rule_name(symbol) :
|
||||
parse_grammar.rule_name(symbol);
|
||||
conflicts_.insert(Conflict(name + ": " +
|
||||
message_for_action(left, parse_grammar) + " / " +
|
||||
message_for_action(right, parse_grammar)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -14,21 +15,19 @@ namespace tree_sitter {
|
|||
class ConflictManager {
|
||||
const PreparedGrammar parse_grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
const std::map<rules::Symbol, std::string> rule_names;
|
||||
std::set<Conflict> conflicts_;
|
||||
|
||||
public:
|
||||
ConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar,
|
||||
const std::map<rules::Symbol, std::string> &rule_names);
|
||||
const PreparedGrammar &lex_grammar);
|
||||
|
||||
bool resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action);
|
||||
bool resolve_parse_action(const rules::Symbol &symbol,
|
||||
bool resolve_parse_action(const rules::ISymbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action);
|
||||
|
||||
void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right);
|
||||
void record_conflict(const rules::ISymbol &symbol, const ParseAction &left, const ParseAction &right);
|
||||
const std::vector<Conflict> conflicts() const;
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,47 +6,48 @@
|
|||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
|
||||
namespace build_tables {
|
||||
set<Symbol> set_union(const set<Symbol> &left, const set<Symbol> &right) {
|
||||
set<Symbol> result = left;
|
||||
set<ISymbol> set_union(const set<ISymbol> &left, const set<ISymbol> &right) {
|
||||
set<ISymbol> result = left;
|
||||
result.insert(right.begin(), right.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
class FirstSet : public rules::RuleFn<set<Symbol>> {
|
||||
class FirstSet : public rules::RuleFn<set<ISymbol>> {
|
||||
const PreparedGrammar grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
set<ISymbol> visited_symbols;
|
||||
public:
|
||||
explicit FirstSet(const PreparedGrammar &grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
set<ISymbol> apply_to(const ISymbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
visited_symbols.insert(*rule);
|
||||
|
||||
if (grammar.has_definition(*rule)) {
|
||||
return apply(grammar.rule(*rule));
|
||||
if (rule->is_token()) {
|
||||
return set<ISymbol>({ *rule });
|
||||
} else {
|
||||
return set<Symbol>({ *rule });
|
||||
return apply(grammar.rule(*rule));
|
||||
}
|
||||
} else {
|
||||
return set<Symbol>();
|
||||
return set<ISymbol>();
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Metadata *rule) {
|
||||
set<ISymbol> apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Choice *rule) {
|
||||
set<ISymbol> apply_to(const rules::Choice *rule) {
|
||||
return set_union(apply(rule->left), apply(rule->right));
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Seq *rule) {
|
||||
set<ISymbol> apply_to(const rules::Seq *rule) {
|
||||
auto result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left, grammar)) {
|
||||
return set_union(result, apply(rule->right));
|
||||
|
|
@ -56,12 +57,12 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
set<ISymbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
return FirstSet(grammar).apply(rule);
|
||||
}
|
||||
|
||||
set<Symbol> first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
set<Symbol> result;
|
||||
set<ISymbol> first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
set<ISymbol> result;
|
||||
for (auto &item : item_set) {
|
||||
result = set_union(result, first_set(item.rule, grammar));
|
||||
if (rule_can_be_blank(item.rule, grammar))
|
||||
|
|
|
|||
|
|
@ -15,14 +15,14 @@ namespace tree_sitter {
|
|||
* the beginning of a string derivable from a given rule,
|
||||
* in a given gramamr.
|
||||
*/
|
||||
std::set<rules::Symbol>
|
||||
std::set<rules::ISymbol>
|
||||
first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
|
||||
/*
|
||||
* Returns the set of terminal symbols that can appear at
|
||||
* the beginning of any item in the given set.
|
||||
*/
|
||||
std::set<rules::Symbol>
|
||||
std::set<rules::ISymbol>
|
||||
first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,19 +7,19 @@
|
|||
namespace tree_sitter {
|
||||
using std::set;
|
||||
using std::map;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
using rules::rule_ptr;
|
||||
|
||||
namespace build_tables {
|
||||
map<Symbol, set<Symbol>> follow_sets(const ParseItem &item,
|
||||
map<ISymbol, set<ISymbol>> follow_sets(const ParseItem &item,
|
||||
const PreparedGrammar &grammar) {
|
||||
map<Symbol, set<Symbol>> result;
|
||||
map<ISymbol, set<ISymbol>> result;
|
||||
|
||||
for (auto &pair : sym_transitions(item.rule)) {
|
||||
Symbol symbol = pair.first;
|
||||
ISymbol symbol = pair.first;
|
||||
rule_ptr next_rule = pair.second;
|
||||
if (grammar.has_definition(symbol)) {
|
||||
set<Symbol> following_terminals = first_set(next_rule, grammar);
|
||||
if (!symbol.is_token() && !symbol.is_built_in()) {
|
||||
set<ISymbol> following_terminals = first_set(next_rule, grammar);
|
||||
if (rule_can_be_blank(next_rule, grammar))
|
||||
following_terminals.insert(item.lookahead_sym);
|
||||
result.insert({ symbol, following_terminals });
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ namespace tree_sitter {
|
|||
* item. The values are the sets of terminals which can appear immediately
|
||||
* after the corresponding non-terminals.
|
||||
*/
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>>
|
||||
std::map<rules::ISymbol, std::set<rules::ISymbol>>
|
||||
follow_sets(const ParseItem &item, const PreparedGrammar &grammar);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) :
|
||||
Item::Item(const rules::ISymbol &lhs, const rules::rule_ptr rule) :
|
||||
lhs(lhs),
|
||||
rule(rule) {}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,16 +2,16 @@
|
|||
#define COMPILER_BUILD_TABLES_ITEM_H_
|
||||
|
||||
#include <unordered_set>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class Item {
|
||||
public:
|
||||
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
Item(const rules::ISymbol &lhs, rules::rule_ptr rule);
|
||||
bool is_done() const;
|
||||
|
||||
rules::Symbol lhs;
|
||||
rules::ISymbol lhs;
|
||||
rules::rule_ptr rule;
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
|
||||
namespace build_tables {
|
||||
static bool contains(const ParseItemSet *items, const ParseItem &item) {
|
||||
|
|
@ -22,8 +22,8 @@ namespace tree_sitter {
|
|||
if (!contains(item_set, item)) {
|
||||
item_set->insert(item);
|
||||
for (const auto &pair : follow_sets(item, grammar)) {
|
||||
const Symbol &non_terminal = pair.first;
|
||||
const set<Symbol> &terminals = pair.second;
|
||||
const ISymbol &non_terminal = pair.first;
|
||||
const set<ISymbol> &terminals = pair.second;
|
||||
for (const auto &terminal : terminals) {
|
||||
ParseItem next_item(non_terminal, grammar.rule(non_terminal), 0, terminal);
|
||||
add_item(item_set, next_item, grammar);
|
||||
|
|
|
|||
|
|
@ -3,12 +3,13 @@
|
|||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::map;
|
||||
using std::unordered_set;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
|
||||
namespace build_tables {
|
||||
map<CharacterSet, LexItemSet>
|
||||
|
|
@ -21,11 +22,11 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
map<Symbol, ParseItemSet>
|
||||
map<ISymbol, ParseItemSet>
|
||||
sym_transitions(const ParseItem &item, const PreparedGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
map<ISymbol, ParseItemSet> result;
|
||||
for (auto transition : sym_transitions(item.rule)) {
|
||||
Symbol rule = transition.first;
|
||||
ISymbol rule = transition.first;
|
||||
ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1, item.lookahead_sym);
|
||||
result.insert({ rule, item_set_closure(ParseItemSet({ new_item }), grammar) });
|
||||
}
|
||||
|
|
@ -53,11 +54,11 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
map<Symbol, ParseItemSet>
|
||||
map<ISymbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
map<ISymbol, ParseItemSet> result;
|
||||
for (const ParseItem &item : item_set) {
|
||||
map<Symbol, ParseItemSet> item_transitions = sym_transitions(item, grammar);
|
||||
map<ISymbol, ParseItemSet> item_transitions = sym_transitions(item, grammar);
|
||||
result = merge_sym_transitions<ParseItemSet>(result,
|
||||
item_transitions,
|
||||
[&](ParseItemSet left, ParseItemSet right) {
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ namespace tree_sitter {
|
|||
std::map<rules::CharacterSet, LexItemSet>
|
||||
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
|
||||
|
||||
std::map<rules::Symbol, ParseItemSet>
|
||||
std::map<rules::ISymbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ namespace tree_sitter {
|
|||
using std::vector;
|
||||
|
||||
namespace build_tables {
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) :
|
||||
LexItem::LexItem(const rules::ISymbol &lhs, const rules::rule_ptr rule) :
|
||||
Item(lhs, rule) {}
|
||||
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
class LexItem : public Item {
|
||||
public:
|
||||
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
LexItem(const rules::ISymbol &lhs, rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
};
|
||||
|
|
@ -25,7 +25,7 @@ namespace std {
|
|||
struct hash<tree_sitter::build_tables::LexItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) const {
|
||||
return
|
||||
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::ISymbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <map>
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -15,11 +15,11 @@ namespace tree_sitter {
|
|||
* using the given function.
|
||||
*/
|
||||
template<typename T>
|
||||
std::map<rules::Symbol, T>
|
||||
merge_sym_transitions(const std::map<rules::Symbol, T> &left,
|
||||
const std::map<rules::Symbol, T> &right,
|
||||
std::map<rules::ISymbol, T>
|
||||
merge_sym_transitions(const std::map<rules::ISymbol, T> &left,
|
||||
const std::map<rules::ISymbol, T> &right,
|
||||
std::function<T(T, T)> merge_fn) {
|
||||
std::map<rules::Symbol, T> result(left);
|
||||
std::map<rules::ISymbol, T> result(left);
|
||||
for (auto &pair : right) {
|
||||
auto rule = pair.first;
|
||||
bool merged = false;
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@ namespace tree_sitter {
|
|||
using std::ostream;
|
||||
|
||||
namespace build_tables {
|
||||
ParseItem::ParseItem(const rules::Symbol &lhs,
|
||||
ParseItem::ParseItem(const rules::ISymbol &lhs,
|
||||
const rules::rule_ptr rule,
|
||||
size_t consumed_symbol_count,
|
||||
const rules::Symbol &lookahead_sym) :
|
||||
const rules::ISymbol &lookahead_sym) :
|
||||
Item(lhs, rule),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
lookahead_sym(lookahead_sym) {}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
|
|
@ -11,15 +11,15 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs,
|
||||
ParseItem(const rules::ISymbol &lhs,
|
||||
rules::rule_ptr rule,
|
||||
const size_t consumed_symbol_count,
|
||||
const rules::Symbol &lookahead_sym);
|
||||
const rules::ISymbol &lookahead_sym);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
int precedence() const;
|
||||
|
||||
const size_t consumed_symbol_count;
|
||||
const rules::Symbol lookahead_sym;
|
||||
const rules::ISymbol lookahead_sym;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
|
||||
|
|
@ -33,10 +33,10 @@ namespace std {
|
|||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return
|
||||
hash<string>()(item.lhs.name) ^
|
||||
hash<tree_sitter::rules::ISymbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
|
||||
hash<size_t>()(item.consumed_symbol_count) ^
|
||||
hash<string>()(item.lookahead_sym.name);
|
||||
hash<tree_sitter::rules::ISymbol>()(item.lookahead_sym);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -38,17 +38,17 @@ namespace tree_sitter {
|
|||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const PreparedGrammar grammar;
|
||||
set<rules::Symbol> visited_symbols;
|
||||
set<rules::ISymbol> visited_symbols;
|
||||
using CanBeBlank::visit;
|
||||
|
||||
public:
|
||||
using CanBeBlank::apply_to;
|
||||
explicit CanBeBlankRecursive(const PreparedGrammar &grammar) : grammar(grammar) {}
|
||||
|
||||
bool apply_to(const rules::Symbol *rule) {
|
||||
bool apply_to(const rules::ISymbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
visited_symbols.insert(*rule);
|
||||
return grammar.has_definition(*rule) && apply(grammar.rule(*rule));
|
||||
return !rule->is_token() && apply(grammar.rule(*rule));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
|
@ -17,7 +18,7 @@ namespace tree_sitter {
|
|||
using std::set;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
using rules::CharacterSet;
|
||||
using rules::Metadata;
|
||||
|
||||
|
|
@ -35,8 +36,8 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
template<>
|
||||
map<Symbol, rule_ptr>
|
||||
merge_transitions(const map<Symbol, rule_ptr> &left, const map<Symbol, rule_ptr> &right) {
|
||||
map<ISymbol, rule_ptr>
|
||||
merge_transitions(const map<ISymbol, rule_ptr> &left, const map<ISymbol, rule_ptr> &right) {
|
||||
return merge_sym_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) {
|
||||
return make_shared<rules::Choice>(left, right);
|
||||
});
|
||||
|
|
@ -65,7 +66,7 @@ namespace tree_sitter {
|
|||
return apply_to_atom(rule);
|
||||
}
|
||||
|
||||
map<T, rule_ptr> apply_to(const Symbol *rule) {
|
||||
map<T, rule_ptr> apply_to(const ISymbol *rule) {
|
||||
return apply_to_atom(rule);
|
||||
}
|
||||
|
||||
|
|
@ -117,8 +118,8 @@ namespace tree_sitter {
|
|||
return RuleTransitions<CharacterSet>().apply(rule);
|
||||
}
|
||||
|
||||
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
|
||||
return RuleTransitions<Symbol>().apply(rule);
|
||||
map<ISymbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
|
||||
return RuleTransitions<ISymbol>().apply(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,14 +3,14 @@
|
|||
|
||||
#include <map>
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
std::map<rules::CharacterSet, rules::rule_ptr>
|
||||
char_transitions(const rules::rule_ptr &rule);
|
||||
|
||||
std::map<rules::Symbol, rules::rule_ptr>
|
||||
std::map<rules::ISymbol, rules::rule_ptr>
|
||||
sym_transitions(const rules::rule_ptr &rule);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include "compiler/generate_code/c_code.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/name_symbols/name_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
|
|
@ -15,15 +14,13 @@ namespace tree_sitter {
|
|||
PreparedGrammar &syntax_grammar = grammars.first;
|
||||
PreparedGrammar &lexical_grammar = grammars.second;
|
||||
|
||||
auto symbol_names = name_symbols::name_symbols(syntax_grammar, lexical_grammar);
|
||||
|
||||
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar, symbol_names);
|
||||
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
auto tables = table_build_result.first;
|
||||
auto conflicts = table_build_result.second;
|
||||
|
||||
ParseTable &parse_table = tables.first;
|
||||
LexTable &lex_table = tables.second;
|
||||
|
||||
return { generate_code::c_code(name, parse_table, lex_table, symbol_names), conflicts };
|
||||
return { generate_code::c_code(name, parse_table, lex_table, syntax_grammar, lexical_grammar), conflicts };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
#include "compiler/generate_code/c_code.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/generate_code/token_description.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
|
@ -52,17 +54,19 @@ namespace tree_sitter {
|
|||
const string name;
|
||||
const ParseTable parse_table;
|
||||
const LexTable lex_table;
|
||||
const map<rules::Symbol, string> symbol_names;
|
||||
const PreparedGrammar syntax_grammar;
|
||||
const PreparedGrammar lexical_grammar;
|
||||
public:
|
||||
CCodeGenerator(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const map<rules::Symbol, string> &symbol_names) :
|
||||
const PreparedGrammar &syntax_grammar,
|
||||
const PreparedGrammar &lexical_grammar) :
|
||||
name(name),
|
||||
parse_table(parse_table),
|
||||
lex_table(lex_table),
|
||||
symbol_names(symbol_names)
|
||||
{}
|
||||
syntax_grammar(syntax_grammar),
|
||||
lexical_grammar(lexical_grammar) {}
|
||||
|
||||
string code() {
|
||||
return join({
|
||||
|
|
@ -79,15 +83,33 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
private:
|
||||
string symbol_id(rules::Symbol symbol) {
|
||||
|
||||
const PreparedGrammar & grammar_for_symbol(const rules::ISymbol &symbol) {
|
||||
return symbol.is_token() ? lexical_grammar : syntax_grammar;
|
||||
}
|
||||
|
||||
string symbol_id(const rules::ISymbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ?
|
||||
"ts_builtin_sym_error" :
|
||||
"ts_builtin_sym_end";
|
||||
} else if (symbol.is_auxiliary()) {
|
||||
return "ts_aux_sym_" + symbol.name;
|
||||
} else {
|
||||
return "ts_sym_" + symbol.name;
|
||||
string name = grammar_for_symbol(symbol).rule_name(symbol);
|
||||
if (symbol.is_auxiliary())
|
||||
return "ts_aux_sym_" + name;
|
||||
else
|
||||
return "ts_sym_" + name;
|
||||
}
|
||||
}
|
||||
|
||||
string symbol_name(const rules::ISymbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ? "error" : "end";
|
||||
} else if (symbol.is_token() && symbol.is_auxiliary()) {
|
||||
return token_description(grammar_for_symbol(symbol).rule(symbol));
|
||||
} else {
|
||||
string name = grammar_for_symbol(symbol).rule_name(symbol);
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -189,20 +211,20 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
string symbol_names_list() {
|
||||
set<rules::Symbol> symbols(parse_table.symbols);
|
||||
set<rules::ISymbol> symbols(parse_table.symbols);
|
||||
symbols.insert(rules::END_OF_INPUT());
|
||||
symbols.insert(rules::ERROR());
|
||||
|
||||
string result = "SYMBOL_NAMES = {\n";
|
||||
for (auto symbol : parse_table.symbols)
|
||||
result += indent("[" + symbol_id(symbol) + "] = \"" + symbol_names.find(symbol)->second) + "\",\n";
|
||||
result += indent("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol)) + "\",\n";
|
||||
return result + "};";
|
||||
}
|
||||
|
||||
string hidden_symbols_list() {
|
||||
string result = "HIDDEN_SYMBOLS = {\n";
|
||||
for (auto &symbol : parse_table.symbols)
|
||||
if (symbol.is_hidden())
|
||||
if (!symbol.is_built_in() && (symbol.is_auxiliary() || grammar_for_symbol(symbol).rule_name(symbol)[0] == '_'))
|
||||
result += indent("[" + symbol_id(symbol) + "] = 1,") + "\n";
|
||||
return result + "};";
|
||||
}
|
||||
|
|
@ -266,8 +288,9 @@ namespace tree_sitter {
|
|||
string c_code(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const map<rules::Symbol, string> &symbol_names) {
|
||||
return CCodeGenerator(name, parse_table, lex_table, symbol_names).code();
|
||||
const PreparedGrammar &syntax_grammar,
|
||||
const PreparedGrammar &lexical_grammar) {
|
||||
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,11 +7,14 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace generate_code {
|
||||
std::string c_code(std::string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const std::map<rules::Symbol, std::string> &symbol_names);
|
||||
const PreparedGrammar &syntax_grammar,
|
||||
const PreparedGrammar &lexical_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
25
src/compiler/generate_code/token_description.cc
Normal file
25
src/compiler/generate_code/token_description.cc
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#include "compiler/generate_code/token_description.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/string.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
||||
namespace generate_code {
|
||||
class TokenDescription : public rules::RuleFn<string> {
|
||||
string apply_to(const rules::Pattern *rule) {
|
||||
return "/" + rule->value + "/";
|
||||
}
|
||||
|
||||
string apply_to(const rules::String *rule) {
|
||||
return "'" + rule->value + "'";
|
||||
}
|
||||
};
|
||||
|
||||
std::string token_description(const rules::rule_ptr &rule) {
|
||||
return TokenDescription().apply(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
13
src/compiler/generate_code/token_description.h
Normal file
13
src/compiler/generate_code/token_description.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef COMPILER_GENERATE_CODE_TOKEN_DESCRIPTION_H_
|
||||
#define COMPILER_GENERATE_CODE_TOKEN_DESCRIPTION_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace generate_code {
|
||||
std::string token_description(const rules::rule_ptr &);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_GENERATE_CODE_TOKEN_DESCRIPTION_H_
|
||||
|
|
@ -1,32 +1,33 @@
|
|||
#include "compiler/lex_table.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
LexAction::LexAction() :
|
||||
type(LexActionTypeError),
|
||||
symbol(Symbol("")),
|
||||
symbol(ISymbol(-1)),
|
||||
state_index(-1) {}
|
||||
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol) :
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, ISymbol symbol) :
|
||||
type(type),
|
||||
symbol(symbol),
|
||||
state_index(state_index) {}
|
||||
|
||||
LexAction LexAction::Error() {
|
||||
return LexAction(LexActionTypeError, -1, Symbol(""));
|
||||
return LexAction(LexActionTypeError, -1, ISymbol(-1));
|
||||
}
|
||||
|
||||
LexAction LexAction::Advance(size_t state_index) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, Symbol(""));
|
||||
return LexAction(LexActionTypeAdvance, state_index, ISymbol(-1));
|
||||
}
|
||||
|
||||
LexAction LexAction::Accept(Symbol symbol) {
|
||||
LexAction LexAction::Accept(ISymbol symbol) {
|
||||
return LexAction(LexActionTypeAccept, -1, symbol);
|
||||
}
|
||||
|
||||
|
|
@ -42,7 +43,7 @@ namespace tree_sitter {
|
|||
case LexActionTypeError:
|
||||
return stream << string("#<error>");
|
||||
case LexActionTypeAccept:
|
||||
return stream << string("#<accept ") + action.symbol.name + ">";
|
||||
return stream << string("#<accept ") + to_string(action.symbol.index) + ">";
|
||||
case LexActionTypeAdvance:
|
||||
return stream << string("#<advance ") + to_string(action.state_index) + ">";
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
#include <vector>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -16,16 +16,16 @@ namespace tree_sitter {
|
|||
} LexActionType;
|
||||
|
||||
class LexAction {
|
||||
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol);
|
||||
LexAction(LexActionType type, size_t state_index, rules::ISymbol symbol);
|
||||
public:
|
||||
LexAction();
|
||||
static LexAction Accept(rules::Symbol symbol);
|
||||
static LexAction Accept(rules::ISymbol symbol);
|
||||
static LexAction Error();
|
||||
static LexAction Advance(size_t state_index);
|
||||
bool operator==(const LexAction &action) const;
|
||||
|
||||
LexActionType type;
|
||||
rules::Symbol symbol;
|
||||
rules::ISymbol symbol;
|
||||
size_t state_index;
|
||||
};
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ namespace std {
|
|||
struct hash<tree_sitter::LexAction> {
|
||||
size_t operator()(const tree_sitter::LexAction &action) const {
|
||||
return (hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<tree_sitter::rules::ISymbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index));
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,52 +0,0 @@
|
|||
#include "compiler/name_symbols/name_symbols.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/rule.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace name_symbols {
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using rules::Symbol;
|
||||
using rules::SymbolTypeNormal;
|
||||
using rules::SymbolTypeAuxiliary;
|
||||
|
||||
class TokenName : public rules::RuleFn<string> {
|
||||
protected:
|
||||
string apply_to(const rules::Pattern *rule) {
|
||||
return "/" + util::escape_string(rule->value) + "/";
|
||||
}
|
||||
|
||||
string apply_to(const rules::String *rule) {
|
||||
return "'" + util::escape_string(rule->value) + "'";
|
||||
}
|
||||
};
|
||||
|
||||
map<rules::Symbol, string> name_symbols(const PreparedGrammar &syntactic_grammar,
|
||||
const PreparedGrammar &lexical_grammar) {
|
||||
map<rules::Symbol, string> result;
|
||||
|
||||
for (const auto &pair : syntactic_grammar.rules)
|
||||
result.insert({ Symbol(pair.first, SymbolTypeNormal), pair.first });
|
||||
for (const auto &pair : lexical_grammar.rules)
|
||||
result.insert({ Symbol(pair.first, SymbolTypeNormal), pair.first });
|
||||
for (const auto &pair : syntactic_grammar.aux_rules)
|
||||
result.insert({ Symbol(pair.first, SymbolTypeAuxiliary), pair.first });
|
||||
for (const auto &pair : lexical_grammar.aux_rules)
|
||||
result.insert({
|
||||
Symbol(pair.first, SymbolTypeAuxiliary),
|
||||
TokenName().apply(pair.second)
|
||||
});
|
||||
|
||||
result.insert({ rules::END_OF_INPUT(), "EOF" });
|
||||
result.insert({ rules::ERROR(), "ERROR" });
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#ifndef COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
|
||||
#define COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace name_symbols {
|
||||
std::map<rules::Symbol, std::string> name_symbols(const PreparedGrammar &syntactic_grammar,
|
||||
const PreparedGrammar &lexical_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
|
||||
|
|
@ -7,11 +7,11 @@ namespace tree_sitter {
|
|||
using std::to_string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
|
||||
ParseAction::ParseAction(ParseActionType type,
|
||||
size_t state_index,
|
||||
Symbol symbol,
|
||||
ISymbol symbol,
|
||||
size_t consumed_symbol_count,
|
||||
set<int> precedence_values) :
|
||||
type(type),
|
||||
|
|
@ -22,23 +22,23 @@ namespace tree_sitter {
|
|||
|
||||
ParseAction::ParseAction() :
|
||||
type(ParseActionTypeError),
|
||||
symbol(Symbol("")),
|
||||
symbol(ISymbol(-1)),
|
||||
state_index(-1),
|
||||
consumed_symbol_count(0) {}
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(""), 0, { 0 });
|
||||
return ParseAction(ParseActionTypeError, -1, ISymbol(-1), 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Accept() {
|
||||
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), 0, { 0 });
|
||||
return ParseAction(ParseActionTypeAccept, -1, ISymbol(-1), 0, { 0 });
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Shift(size_t state_index, set<int> precedence_values) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), 0, precedence_values);
|
||||
return ParseAction(ParseActionTypeShift, state_index, ISymbol(-1), 0, precedence_values);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, int precedence) {
|
||||
ParseAction ParseAction::Reduce(ISymbol symbol, size_t consumed_symbol_count, int precedence) {
|
||||
return ParseAction(ParseActionTypeReduce, -1, symbol, consumed_symbol_count, { precedence });
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ namespace tree_sitter {
|
|||
case ParseActionTypeShift:
|
||||
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
|
||||
case ParseActionTypeReduce:
|
||||
return stream << (string("#<reduce ") + action.symbol.name + ">");
|
||||
return stream << (string("#<reduce ") + to_string(action.symbol.index) + ">");
|
||||
default:
|
||||
return stream;
|
||||
}
|
||||
|
|
@ -66,8 +66,8 @@ namespace tree_sitter {
|
|||
|
||||
ParseState::ParseState() : lex_state_id(-1) {}
|
||||
|
||||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
set<ISymbol> ParseState::expected_inputs() const {
|
||||
set<ISymbol> result;
|
||||
for (auto &pair : actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
|
|
@ -90,7 +90,7 @@ namespace tree_sitter {
|
|||
return states.size() - 1;
|
||||
}
|
||||
|
||||
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
|
||||
void ParseTable::add_action(ParseStateId id, ISymbol symbol, ParseAction action) {
|
||||
symbols.insert(symbol);
|
||||
states[id].actions[symbol] = action;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/lex_table.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
class ParseAction {
|
||||
ParseAction(ParseActionType type,
|
||||
size_t state_index,
|
||||
rules::Symbol symbol,
|
||||
rules::ISymbol symbol,
|
||||
size_t consumed_symbol_count,
|
||||
std::set<int> precedence_values);
|
||||
public:
|
||||
|
|
@ -27,11 +27,11 @@ namespace tree_sitter {
|
|||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(size_t state_index, std::set<int> precedence_values);
|
||||
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, int precedence);
|
||||
static ParseAction Reduce(rules::ISymbol symbol, size_t consumed_symbol_count, int precedence);
|
||||
bool operator==(const ParseAction &action) const;
|
||||
|
||||
ParseActionType type;
|
||||
rules::Symbol symbol;
|
||||
rules::ISymbol symbol;
|
||||
size_t state_index;
|
||||
size_t consumed_symbol_count;
|
||||
std::set<int> precedence_values;
|
||||
|
|
@ -46,7 +46,7 @@ namespace std {
|
|||
size_t operator()(const tree_sitter::ParseAction &action) const {
|
||||
return (
|
||||
hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<tree_sitter::rules::ISymbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.consumed_symbol_count));
|
||||
}
|
||||
|
|
@ -57,8 +57,8 @@ namespace tree_sitter {
|
|||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
std::map<rules::Symbol, ParseAction> actions;
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
std::map<rules::ISymbol, ParseAction> actions;
|
||||
std::set<rules::ISymbol> expected_inputs() const;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
||||
|
|
@ -69,10 +69,10 @@ namespace tree_sitter {
|
|||
class ParseTable {
|
||||
public:
|
||||
uint64_t add_state();
|
||||
void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action);
|
||||
void add_action(ParseStateId state_id, rules::ISymbol symbol, ParseAction action);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::set<rules::Symbol> symbols;
|
||||
std::set<rules::ISymbol> symbols;
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ namespace tree_sitter {
|
|||
using rules::Repeat;
|
||||
using rules::Rule;
|
||||
using rules::Seq;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
|
||||
namespace prepare_grammar {
|
||||
class ExpandRepeats : public rules::IdentityRuleFn {
|
||||
|
|
@ -30,8 +30,9 @@ namespace tree_sitter {
|
|||
|
||||
rule_ptr apply_to(const Repeat *rule) {
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
string helper_rule_name = rule_name + string("_repeat") + to_string(aux_rules.size() + 1);
|
||||
rule_ptr repeat_symbol = make_shared<Symbol>(helper_rule_name, rules::SymbolTypeAuxiliary);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
|
||||
rule_ptr repeat_symbol = make_shared<ISymbol>(offset + index, rules::SymbolOptionAuxiliary);
|
||||
aux_rules.push_back({
|
||||
helper_rule_name,
|
||||
Choice::Build({
|
||||
|
|
@ -43,8 +44,9 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
ExpandRepeats(string rule_name) : rule_name(rule_name) {}
|
||||
ExpandRepeats(string rule_name, size_t offset) : rule_name(rule_name), offset(offset) {}
|
||||
|
||||
size_t offset;
|
||||
vector<pair<string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
||||
|
|
@ -52,7 +54,7 @@ namespace tree_sitter {
|
|||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
ExpandRepeats expander(pair.first);
|
||||
ExpandRepeats expander(pair.first, aux_rules.size());
|
||||
rules.push_back({ pair.first, expander.apply(pair.second) });
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
|
@ -15,69 +16,112 @@
|
|||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::ISymbol;
|
||||
using std::dynamic_pointer_cast;
|
||||
|
||||
namespace prepare_grammar {
|
||||
class IsToken : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::String *rule) {
|
||||
return true;
|
||||
bool apply_to(const rules::String *rule) { return true; }
|
||||
bool apply_to(const rules::Pattern *rule) { return true; }
|
||||
};
|
||||
|
||||
class SymbolInliner : public rules::IdentityRuleFn {
|
||||
map<ISymbol, ISymbol> replacements;
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
int new_index_for_symbol(const ISymbol &symbol) {
|
||||
int result = symbol.index;
|
||||
for (const auto &pair : replacements)
|
||||
if (pair.first.index < symbol.index &&
|
||||
pair.first.is_auxiliary() == symbol.is_auxiliary())
|
||||
result--;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Pattern *rule) {
|
||||
return true;
|
||||
|
||||
rule_ptr apply_to(const ISymbol *rule) {
|
||||
auto replacement_pair = replacements.find(*rule);
|
||||
if (replacement_pair != replacements.end())
|
||||
return replacement_pair->second.copy();
|
||||
else if (rule->is_built_in())
|
||||
return rule->copy();
|
||||
else
|
||||
return make_shared<ISymbol>(new_index_for_symbol(*rule), rule->options);
|
||||
}
|
||||
|
||||
public:
|
||||
SymbolInliner(const map<ISymbol, ISymbol> &replacements, size_t rule_count, size_t aux_rule_count) :
|
||||
replacements(replacements)
|
||||
{}
|
||||
};
|
||||
|
||||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
string add_token(rule_ptr rule) {
|
||||
for (auto pair : tokens)
|
||||
if (*pair.second == *rule)
|
||||
return pair.first;
|
||||
string name = "token" + to_string(tokens.size() + 1);
|
||||
tokens.push_back({ name, rule });
|
||||
return name;
|
||||
size_t add_token(rule_ptr rule) {
|
||||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].second->operator==(*rule))
|
||||
return i;
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back({ "token" + to_string(index), rule });
|
||||
return index;
|
||||
}
|
||||
|
||||
rule_ptr default_apply(const rules::Rule *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(result)) {
|
||||
return make_shared<rules::Symbol>(add_token(result), rules::SymbolTypeAuxiliary);
|
||||
size_t index = add_token(result);
|
||||
return make_shared<rules::ISymbol>(index, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary));
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
vector<pair<string, rules::rule_ptr>> tokens;
|
||||
vector<pair<string, rule_ptr>> tokens;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, tokens, aux_rules, aux_tokens;
|
||||
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
|
||||
TokenExtractor extractor;
|
||||
map<ISymbol, ISymbol> symbol_replacements;
|
||||
|
||||
for (auto &pair : input_grammar.rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
if (IsToken().apply(rule))
|
||||
tokens.push_back({ name, rule });
|
||||
else
|
||||
rules.push_back({ name, extractor.apply(rule) });
|
||||
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
|
||||
auto pair = input_grammar.rules[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
tokens.push_back(pair);
|
||||
symbol_replacements.insert({
|
||||
ISymbol(i),
|
||||
ISymbol(tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionToken))
|
||||
});
|
||||
} else {
|
||||
rules.push_back({ pair.first, extractor.apply(pair.second) });
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &pair : input_grammar.aux_rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
if (IsToken().apply(rule))
|
||||
aux_tokens.push_back({ name, rule });
|
||||
else
|
||||
aux_rules.push_back({ name, extractor.apply(rule) });
|
||||
for (size_t i = 0; i < input_grammar.aux_rules.size(); i++) {
|
||||
auto pair = input_grammar.aux_rules[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
aux_tokens.push_back(pair);
|
||||
symbol_replacements.insert({
|
||||
ISymbol(i, rules::SymbolOptionAuxiliary),
|
||||
ISymbol(aux_tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionAuxiliary|rules::SymbolOptionToken))
|
||||
});
|
||||
} else {
|
||||
aux_rules.push_back({ pair.first, extractor.apply(pair.second) });
|
||||
}
|
||||
}
|
||||
|
||||
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
SymbolInliner inliner(symbol_replacements, input_grammar.rules.size(), input_grammar.aux_rules.size());
|
||||
for (auto &pair : rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &pair : aux_rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, aux_rules),
|
||||
PreparedGrammar(tokens, aux_tokens)
|
||||
|
|
|
|||
65
src/compiler/prepare_grammar/intern_symbols.cc
Normal file
65
src/compiler/prepare_grammar/intern_symbols.cc
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include <memory>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using rules::rule_ptr;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using std::exception;
|
||||
|
||||
GrammarError::GrammarError(string rule_name) : rule_name(rule_name) {}
|
||||
|
||||
string GrammarError::message() const {
|
||||
return "Undefined rule '" + rule_name + "'";
|
||||
}
|
||||
|
||||
namespace prepare_grammar {
|
||||
class InternSymbols : public rules::IdentityRuleFn {
|
||||
const Grammar grammar;
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
long index_of(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule_name)
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Symbol *rule) {
|
||||
long index = index_of(rule->name);
|
||||
if (index == -1)
|
||||
missing_rule_name = rule->name;
|
||||
return make_shared<rules::ISymbol>(index);
|
||||
}
|
||||
|
||||
public:
|
||||
InternSymbols(const Grammar &grammar) : grammar(grammar) {}
|
||||
|
||||
string missing_rule_name;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr>> rules;
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return {
|
||||
PreparedGrammar(rules),
|
||||
new GrammarError(interner.missing_rule_name)
|
||||
};
|
||||
rules.push_back({ pair.first, new_rule });
|
||||
}
|
||||
|
||||
return { PreparedGrammar(rules), nullptr };
|
||||
}
|
||||
}
|
||||
}
|
||||
23
src/compiler/prepare_grammar/intern_symbols.h
Normal file
23
src/compiler/prepare_grammar/intern_symbols.h
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_INTERN_SYMBOLS_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_INTERN_SYMBOLS_H_
|
||||
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
class GrammarError {
|
||||
std::string rule_name;
|
||||
public:
|
||||
GrammarError(std::string rule_name);
|
||||
std::string message() const;
|
||||
};
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_INTERN_SYMBOLS_H_
|
||||
|
|
@ -2,15 +2,21 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
|
||||
namespace prepare_grammar {
|
||||
pair<PreparedGrammar, PreparedGrammar> prepare_grammar(const Grammar &input_grammar) {
|
||||
auto grammars = prepare_grammar::extract_tokens(input_grammar);
|
||||
auto rule_grammar = expand_repeats(grammars.first);
|
||||
auto lex_grammar = grammars.second;
|
||||
auto interned = intern_symbols(input_grammar);
|
||||
if (interned.second) {
|
||||
printf("Error!");
|
||||
exit(1);
|
||||
}
|
||||
auto grammars = extract_tokens(interned.first);
|
||||
const auto &rule_grammar = expand_repeats(grammars.first);
|
||||
const auto &lex_grammar = grammars.second;
|
||||
return { rule_grammar, lex_grammar };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,14 +2,14 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::ostream;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
using rules::ISymbol;
|
||||
|
||||
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :
|
||||
|
|
@ -20,12 +20,16 @@ namespace tree_sitter {
|
|||
Grammar(grammar),
|
||||
aux_rules({}) {}
|
||||
|
||||
const rule_ptr PreparedGrammar::rule(const Symbol &symbol) const {
|
||||
auto rule_set = symbol.is_auxiliary() ? aux_rules : rules;
|
||||
for (auto &pair : rule_set)
|
||||
if (pair.first == symbol.name)
|
||||
return pair.second;
|
||||
return rule_ptr();
|
||||
const rule_ptr PreparedGrammar::rule(const ISymbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules[symbol.index].second :
|
||||
rules[symbol.index].second;
|
||||
}
|
||||
|
||||
string PreparedGrammar::rule_name(const ISymbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules[symbol.index].first :
|
||||
rules[symbol.index].first;
|
||||
}
|
||||
|
||||
bool PreparedGrammar::operator==(const PreparedGrammar &other) const {
|
||||
|
|
@ -40,19 +44,6 @@ namespace tree_sitter {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool PreparedGrammar::has_definition(const Symbol &symbol) const {
|
||||
return rule(symbol).get() != nullptr;
|
||||
}
|
||||
|
||||
size_t PreparedGrammar::index_of(const rules::Symbol &symbol) const {
|
||||
for (size_t i = 0; i < rules.size(); i++) {
|
||||
if (rules[i].first == symbol.name) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar : public Grammar {
|
||||
|
|
@ -15,9 +15,8 @@ namespace tree_sitter {
|
|||
PreparedGrammar(const Grammar &grammar);
|
||||
|
||||
bool operator==(const PreparedGrammar &other) const;
|
||||
bool has_definition(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
|
||||
size_t index_of(const rules::Symbol &symbol) const;
|
||||
std::string rule_name(const rules::ISymbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::ISymbol &symbol) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Symbol ERROR() { return Symbol("error", SymbolTypeBuiltIn); }
|
||||
Symbol START() { return Symbol("start", SymbolTypeBuiltIn); }
|
||||
Symbol END_OF_INPUT() { return Symbol("end", SymbolTypeBuiltIn); }
|
||||
ISymbol END_OF_INPUT() { return ISymbol(-1, SymbolOptionToken); }
|
||||
ISymbol ERROR() { return ISymbol(-2, SymbolOptionToken); }
|
||||
ISymbol START() { return ISymbol(-3); }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
#ifndef COMPILER_RULES_BUILT_IN_SYMBOLS_H_
|
||||
#define COMPILER_RULES_BUILT_IN_SYMBOLS_H_
|
||||
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/interned_symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Symbol ERROR();
|
||||
Symbol START();
|
||||
Symbol END_OF_INPUT();
|
||||
ISymbol ERROR();
|
||||
ISymbol START();
|
||||
ISymbol END_OF_INPUT();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
61
src/compiler/rules/interned_symbol.cc
Normal file
61
src/compiler/rules/interned_symbol.cc
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
#include "compiler/rules/interned_symbol.h"
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::hash;
|
||||
|
||||
namespace rules {
|
||||
ISymbol::ISymbol(int index) :
|
||||
index(index),
|
||||
options(SymbolOption(0)) {}
|
||||
|
||||
ISymbol::ISymbol(int index, SymbolOption options) :
|
||||
index(index),
|
||||
options(options) {}
|
||||
|
||||
bool ISymbol::operator==(const Rule &rule) const {
|
||||
const ISymbol *other = dynamic_cast<const ISymbol *>(&rule);
|
||||
return other && (other->index == index) && (other->options == options);
|
||||
}
|
||||
|
||||
size_t ISymbol::hash_code() const {
|
||||
return hash<size_t>()(index) ^ hash<int16_t>()(options);
|
||||
}
|
||||
|
||||
rule_ptr ISymbol::copy() const {
|
||||
return std::make_shared<ISymbol>(*this);
|
||||
}
|
||||
|
||||
string ISymbol::to_string() const {
|
||||
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
|
||||
name += (options & SymbolOptionToken) ? "token" : "sym";
|
||||
return "#<" + name + std::to_string(index) + ">";
|
||||
}
|
||||
|
||||
bool ISymbol::operator<(const ISymbol &other) const {
|
||||
if (options < other.options) return true;
|
||||
if (options > other.options) return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
||||
bool ISymbol::is_token() const {
|
||||
return options & SymbolOptionToken;
|
||||
}
|
||||
|
||||
bool ISymbol::is_built_in() const {
|
||||
return index < 0;
|
||||
}
|
||||
|
||||
bool ISymbol::is_auxiliary() const {
|
||||
return options & SymbolOptionAuxiliary;
|
||||
}
|
||||
|
||||
void ISymbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
45
src/compiler/rules/interned_symbol.h
Normal file
45
src/compiler/rules/interned_symbol.h
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#ifndef COMPILER_RULES_INTERNED_SYMBOL_H_
|
||||
#define COMPILER_RULES_INTERNED_SYMBOL_H_
|
||||
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
SymbolOptionToken = 1 << 0,
|
||||
SymbolOptionAuxiliary = 1 << 1,
|
||||
} SymbolOption;
|
||||
|
||||
class ISymbol : public Rule {
|
||||
public:
|
||||
explicit ISymbol(int index);
|
||||
ISymbol(int index, SymbolOption options);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
bool operator<(const ISymbol &other) const;
|
||||
|
||||
bool is_token() const;
|
||||
bool is_built_in() const;
|
||||
bool is_auxiliary() const;
|
||||
|
||||
int index;
|
||||
SymbolOption options;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::rules::ISymbol> {
|
||||
size_t operator()(const tree_sitter::rules::ISymbol &rule) const {
|
||||
return rule.hash_code();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // COMPILER_RULES_INTERNED_SYMBOL_H_
|
||||
|
|
@ -8,8 +8,7 @@ namespace tree_sitter {
|
|||
using std::hash;
|
||||
|
||||
namespace rules {
|
||||
Symbol::Symbol(const std::string &name) : name(name), type(SymbolTypeNormal) {}
|
||||
Symbol::Symbol(const std::string &name, SymbolType type) : name(name), type(type) {}
|
||||
Symbol::Symbol(const std::string &name) : name(name) {}
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
|
||||
|
|
@ -17,11 +16,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.name == name) && (other.type == type);
|
||||
return other.name == name;
|
||||
}
|
||||
|
||||
size_t Symbol::hash_code() const {
|
||||
return hash<string>()(name) ^ hash<int16_t>()(type);
|
||||
return hash<string>()(name);
|
||||
}
|
||||
|
||||
rule_ptr Symbol::copy() const {
|
||||
|
|
@ -29,34 +28,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
string Symbol::to_string() const {
|
||||
switch (type) {
|
||||
case SymbolTypeNormal:
|
||||
return string("#<sym '") + name + "'>";
|
||||
case SymbolTypeAuxiliary:
|
||||
return string("#<aux_sym '") + name + "'>";
|
||||
case SymbolTypeBuiltIn:
|
||||
return string("#<builtin_sym '") + name + "'>";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
return string("#<sym '") + name + "'>";
|
||||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
if (type < other.type) return true;
|
||||
if (type > other.type) return false;
|
||||
return (name < other.name);
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in() const {
|
||||
return type == SymbolTypeBuiltIn;
|
||||
}
|
||||
|
||||
bool Symbol::is_auxiliary() const {
|
||||
return type == SymbolTypeAuxiliary;
|
||||
}
|
||||
|
||||
bool Symbol::is_hidden() const {
|
||||
return (name.front() == '_' || type == SymbolTypeAuxiliary);
|
||||
return name < other.name;
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const {
|
||||
|
|
|
|||
|
|
@ -7,16 +7,9 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
SymbolTypeNormal,
|
||||
SymbolTypeAuxiliary,
|
||||
SymbolTypeBuiltIn
|
||||
} SymbolType;
|
||||
|
||||
class Symbol : public Rule {
|
||||
public:
|
||||
explicit Symbol(const std::string &name);
|
||||
Symbol(const std::string &name, SymbolType type);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
bool operator==(const Symbol &other) const;
|
||||
|
|
@ -27,12 +20,7 @@ namespace tree_sitter {
|
|||
void accept(Visitor *visitor) const;
|
||||
bool operator<(const Symbol &other) const;
|
||||
|
||||
bool is_built_in() const;
|
||||
bool is_hidden() const;
|
||||
bool is_auxiliary() const;
|
||||
|
||||
std::string name;
|
||||
SymbolType type;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ namespace tree_sitter {
|
|||
class Repeat;
|
||||
class Seq;
|
||||
class String;
|
||||
class ISymbol;
|
||||
class Pattern;
|
||||
class Metadata;
|
||||
|
||||
|
|
@ -26,6 +27,7 @@ namespace tree_sitter {
|
|||
virtual void visit(const Seq *rule) = 0;
|
||||
virtual void visit(const String *rule) = 0;
|
||||
virtual void visit(const Symbol *rule) = 0;
|
||||
virtual void visit(const ISymbol *rule) = 0;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
|
@ -48,6 +50,7 @@ namespace tree_sitter {
|
|||
virtual T apply_to(const Seq *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const String *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const Symbol *rule) { return default_apply((const Rule *)rule); }
|
||||
virtual T apply_to(const ISymbol *rule) { return default_apply((const Rule *)rule); }
|
||||
|
||||
void visit(const Blank *rule) { value_ = apply_to(rule); }
|
||||
void visit(const CharacterSet *rule) { value_ = apply_to(rule); }
|
||||
|
|
@ -58,12 +61,14 @@ namespace tree_sitter {
|
|||
void visit(const Seq *rule) { value_ = apply_to(rule); }
|
||||
void visit(const String *rule) { value_ = apply_to(rule); }
|
||||
void visit(const Symbol *rule) { value_ = apply_to(rule); }
|
||||
void visit(const ISymbol *rule) { value_ = apply_to(rule); }
|
||||
|
||||
private:
|
||||
T value_;
|
||||
};
|
||||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
protected:
|
||||
virtual rule_ptr default_apply(const Rule *rule);
|
||||
virtual rule_ptr apply_to(const Choice *rule);
|
||||
virtual rule_ptr apply_to(const Metadata *rule);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue