Auto-format all source code with clang-format

This commit is contained in:
Max Brunsfeld 2014-07-20 21:43:27 -07:00
parent b8d8386e63
commit 98cc2f2264
105 changed files with 4223 additions and 4052 deletions

View file

@ -15,128 +15,133 @@
#include "compiler/build_tables/item_set_transitions.h"
namespace tree_sitter {
using std::string;
using std::map;
using std::unordered_map;
using std::set;
using std::make_shared;
using rules::Symbol;
using rules::CharacterSet;
namespace build_tables {
namespace build_tables {
class LexTableBuilder {
const LexicalGrammar lex_grammar;
ParseTable *parse_table;
LexConflictManager conflict_manager;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
LexTable lex_table;
using std::string;
using std::map;
using std::unordered_map;
using std::set;
using std::make_shared;
using rules::Symbol;
using rules::CharacterSet;
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet result;
for (const auto &symbol : symbols) {
if (symbol == rules::ERROR())
continue;
else if (symbol == rules::END_OF_INPUT())
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
else if (symbol.is_token())
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
}
return result;
}
class LexTableBuilder {
const LexicalGrammar lex_grammar;
ParseTable *parse_table;
LexConflictManager conflict_manager;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
LexTable lex_table;
LexStateId add_lex_state(const LexItemSet &item_set) {
auto pair = lex_state_ids.find(item_set);
if (pair == lex_state_ids.end()) {
LexStateId state_id = lex_table.add_state();
lex_state_ids[item_set] = state_id;
add_accept_token_actions(item_set, state_id);
add_advance_actions(item_set, state_id);
add_token_start(item_set, state_id);
return state_id;
} else {
return pair->second;
}
}
void add_error_lex_state() {
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID);
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = char_transitions(item_set);
for (const auto &transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
auto action = LexAction::Advance(new_state_id, precedence_values_for_item_set(new_item_set));
if (conflict_manager.resolve_lex_action(lex_table.state(state_id).default_action, action))
lex_table.state(state_id).actions[rule] = action;
}
}
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
for (const LexItem &item : item_set) {
if (item.is_done()) {
auto current_action = lex_table.state(state_id).default_action;
auto new_action = LexAction::Accept(item.lhs, item.precedence());
if (conflict_manager.resolve_lex_action(current_action, new_action))
lex_table.state(state_id).default_action = new_action;
}
}
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &item : item_set)
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
CharacterSet separator_set() const {
set<rules::CharacterRange> ranges;
for (char c : lex_grammar.separators)
ranges.insert(c);
return CharacterSet(ranges);
}
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build({
make_shared<rules::Metadata>(
make_shared<rules::Repeat>(separator_set().copy()),
map<rules::MetadataKey, int>({
{rules::START_TOKEN, 1},
{rules::PRECEDENCE, -1},
})),
rule,
});
}
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
set<int> result;
for (const auto &item : item_set)
result.insert(item.precedence());
return result;
}
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
lex_grammar(lex_grammar),
parse_table(parse_table),
conflict_manager(LexConflictManager(lex_grammar)) {}
LexTable build() {
for (auto &parse_state : parse_table->states) {
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
parse_state.lex_state_id = add_lex_state(item_set);
}
add_error_lex_state();
return lex_table;
}
};
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
return LexTableBuilder(parse_table, lex_grammar).build();
}
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet result;
for (const auto &symbol : symbols) {
if (symbol == rules::ERROR())
continue;
else if (symbol == rules::END_OF_INPUT())
result.insert(
LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
else if (symbol.is_token())
result.insert(
LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
}
return result;
}
LexStateId add_lex_state(const LexItemSet &item_set) {
auto pair = lex_state_ids.find(item_set);
if (pair == lex_state_ids.end()) {
LexStateId state_id = lex_table.add_state();
lex_state_ids[item_set] = state_id;
add_accept_token_actions(item_set, state_id);
add_advance_actions(item_set, state_id);
add_token_start(item_set, state_id);
return state_id;
} else {
return pair->second;
}
}
void add_error_lex_state() {
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID);
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = char_transitions(item_set);
for (const auto &transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
auto action = LexAction::Advance(
new_state_id, precedence_values_for_item_set(new_item_set));
if (conflict_manager.resolve_lex_action(
lex_table.state(state_id).default_action, action))
lex_table.state(state_id).actions[rule] = action;
}
}
void add_accept_token_actions(const LexItemSet &item_set,
LexStateId state_id) {
for (const LexItem &item : item_set) {
if (item.is_done()) {
auto current_action = lex_table.state(state_id).default_action;
auto new_action = LexAction::Accept(item.lhs, item.precedence());
if (conflict_manager.resolve_lex_action(current_action, new_action))
lex_table.state(state_id).default_action = new_action;
}
}
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &item : item_set)
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
CharacterSet separator_set() const {
set<rules::CharacterRange> ranges;
for (char c : lex_grammar.separators)
ranges.insert(c);
return CharacterSet(ranges);
}
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build(
{ make_shared<rules::Metadata>(
make_shared<rules::Repeat>(separator_set().copy()),
map<rules::MetadataKey, int>(
{ { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })),
rule, });
}
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
set<int> result;
for (const auto &item : item_set)
result.insert(item.precedence());
return result;
}
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
: lex_grammar(lex_grammar),
parse_table(parse_table),
conflict_manager(LexConflictManager(lex_grammar)) {}
LexTable build() {
for (auto &parse_state : parse_table->states) {
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
parse_state.lex_state_id = add_lex_state(item_set);
}
add_error_lex_state();
return lex_table;
}
};
LexTable build_lex_table(ParseTable *parse_table,
const LexicalGrammar &lex_grammar) {
return LexTableBuilder(parse_table, lex_grammar).build();
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,12 +5,16 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class LexicalGrammar;
class ParseTable;
namespace build_tables {
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
}
}
class LexicalGrammar;
class ParseTable;
namespace build_tables {
LexTable build_lex_table(ParseTable *parse_table,
const LexicalGrammar &lex_grammar);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_

View file

@ -14,142 +14,156 @@
#include "compiler/build_tables/first_set.h"
namespace tree_sitter {
using std::pair;
using std::string;
using std::vector;
using std::set;
using std::map;
using std::unordered_map;
using std::make_shared;
using rules::Symbol;
namespace build_tables {
namespace build_tables {
class ParseTableBuilder {
const SyntaxGrammar grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
ParseTable parse_table;
using std::pair;
using std::string;
using std::vector;
using std::set;
using std::map;
using std::unordered_map;
using std::make_shared;
using rules::Symbol;
ParseStateId add_parse_state(const ParseItemSet &item_set) {
auto pair = parse_state_ids.find(item_set);
if (pair == parse_state_ids.end()) {
ParseStateId state_id = parse_table.add_state();
parse_state_ids[item_set] = state_id;
item_sets_to_process.push_back({ item_set, state_id });
return state_id;
} else {
return pair->second;
}
}
class ParseTableBuilder {
const SyntaxGrammar grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId> > item_sets_to_process;
ParseTable parse_table;
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
if (item.is_done()) {
ParseAction action = (item.lhs == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
for (auto &lookahead_sym : lookahead_symbols)
if (should_add_action(state_id, lookahead_sym, action))
parse_table.add_action(state_id, lookahead_sym, action);
}
}
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
ParseAction new_action = ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
if (should_add_action(state_id, symbol, new_action)) {
ParseStateId new_state_id = add_parse_state(next_item_set);
new_action.state_index = new_state_id;
parse_table.add_action(state_id, symbol, new_action);
}
}
}
void add_shift_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol == actions.end()) {
parse_table.add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra());
}
}
}
void add_reduce_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions = parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol != actions.end() && pair_for_symbol->second.type == ParseActionTypeShift) {
size_t shift_state_id = pair_for_symbol->second.state_index;
for (const auto &pair : actions) {
const Symbol &lookahead_sym = pair.first;
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
if (should_add_action(shift_state_id, lookahead_sym, reduce_extra))
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
}
}
}
}
bool should_add_action(ParseStateId state_id, const Symbol &symbol, const ParseAction &action) {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
return (
current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(symbol, current_action->second, action));
}
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
set<int> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
if (item.consumed_symbol_count > 0)
result.insert(item.precedence());
}
return result;
}
public:
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
grammar(grammar),
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
pair<ParseTable, vector<Conflict>> build() {
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0);
add_parse_state(item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
parse_table.symbols.insert(rules::ERROR());
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();
ParseItemSet &item_set = pair.first;
ParseStateId &state_id = pair.second;
item_sets_to_process.pop_back();
add_reduce_actions(item_set, state_id);
add_shift_actions(item_set, state_id);
add_shift_extra_actions(state_id);
}
for (ParseStateId state_id = 0; state_id < parse_table.states.size(); state_id++)
add_reduce_extra_actions(state_id);
return { parse_table, conflict_manager.conflicts() };
}
};
pair<ParseTable, vector<Conflict>>
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
}
ParseStateId add_parse_state(const ParseItemSet &item_set) {
auto pair = parse_state_ids.find(item_set);
if (pair == parse_state_ids.end()) {
ParseStateId state_id = parse_table.add_state();
parse_state_ids[item_set] = state_id;
item_sets_to_process.push_back({ item_set, state_id });
return state_id;
} else {
return pair->second;
}
}
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
if (item.is_done()) {
ParseAction action =
(item.lhs == rules::START())
? ParseAction::Accept()
: ParseAction::Reduce(item.lhs, item.consumed_symbol_count,
item.precedence());
for (auto &lookahead_sym : lookahead_symbols)
if (should_add_action(state_id, lookahead_sym, action))
parse_table.add_action(state_id, lookahead_sym, action);
}
}
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
ParseAction new_action =
ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
if (should_add_action(state_id, symbol, new_action)) {
ParseStateId new_state_id = add_parse_state(next_item_set);
new_action.state_index = new_state_id;
parse_table.add_action(state_id, symbol, new_action);
}
}
}
void add_shift_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions =
parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol == actions.end()) {
parse_table.add_action(state_id, ubiquitous_symbol,
ParseAction::ShiftExtra());
}
}
}
void add_reduce_extra_actions(ParseStateId state_id) {
const map<Symbol, ParseAction> &actions =
parse_table.states[state_id].actions;
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
const auto &pair_for_symbol = actions.find(ubiquitous_symbol);
if (pair_for_symbol != actions.end() &&
pair_for_symbol->second.type == ParseActionTypeShift) {
size_t shift_state_id = pair_for_symbol->second.state_index;
for (const auto &pair : actions) {
const Symbol &lookahead_sym = pair.first;
ParseAction reduce_extra =
ParseAction::ReduceExtra(ubiquitous_symbol);
if (should_add_action(shift_state_id, lookahead_sym, reduce_extra))
parse_table.add_action(shift_state_id, lookahead_sym, reduce_extra);
}
}
}
}
bool should_add_action(ParseStateId state_id, const Symbol &symbol,
const ParseAction &action) {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
return (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(
symbol, current_action->second, action));
}
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
set<int> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
if (item.consumed_symbol_count > 0)
result.insert(item.precedence());
}
return result;
}
public:
ParseTableBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar)
: grammar(grammar),
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
pair<ParseTable, vector<Conflict> > build() {
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0);
add_parse_state(
item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
parse_table.symbols.insert(rules::ERROR());
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();
ParseItemSet &item_set = pair.first;
ParseStateId &state_id = pair.second;
item_sets_to_process.pop_back();
add_reduce_actions(item_set, state_id);
add_shift_actions(item_set, state_id);
add_shift_extra_actions(state_id);
}
for (ParseStateId state_id = 0; state_id < parse_table.states.size();
state_id++)
add_reduce_extra_actions(state_id);
return { parse_table, conflict_manager.conflicts() };
}
};
pair<ParseTable, vector<Conflict> > build_parse_table(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -7,13 +7,13 @@
#include "compiler/parse_table.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::pair<ParseTable, std::vector<Conflict>>
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
namespace build_tables {
std::pair<ParseTable, std::vector<Conflict> > build_parse_table(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
}
#endif // COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_

View file

@ -4,19 +4,20 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::vector;
using std::make_tuple;
namespace build_tables {
namespace build_tables {
tuple<ParseTable, LexTable, vector<Conflict>>
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
vector<Conflict> conflicts = parse_table_result.second;
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
return make_tuple(parse_table, lex_table, conflicts);
}
}
using std::tuple;
using std::vector;
using std::make_tuple;
tuple<ParseTable, LexTable, vector<Conflict> > build_tables(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
vector<Conflict> conflicts = parse_table_result.second;
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
return make_tuple(parse_table, lex_table, conflicts);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -8,14 +8,13 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, std::vector<Conflict>>
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar);
}
namespace build_tables {
std::tuple<ParseTable, LexTable, std::vector<Conflict> > build_tables(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
}
#endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_

View file

@ -9,53 +9,55 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::set;
using rules::Symbol;
namespace build_tables {
namespace build_tables {
class FirstSet : public rules::RuleFn<set<Symbol>> {
const SyntaxGrammar *grammar;
set<Symbol> visited_symbols;
using std::set;
using rules::Symbol;
public:
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
class FirstSet : public rules::RuleFn<set<Symbol> > {
const SyntaxGrammar *grammar;
set<Symbol> visited_symbols;
set<Symbol> apply_to(const Symbol *rule) {
auto insertion_result = visited_symbols.insert(*rule);
if (insertion_result.second) {
return (rule->is_token()) ?
set<Symbol>({ *rule }) :
apply(grammar->rule(*rule));
} else {
return set<Symbol>();
}
}
public:
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
set<Symbol> apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
set<Symbol> apply_to(const rules::Choice *rule) {
set<Symbol> result;
for (const auto &el : rule->elements) {
auto &&next_syms = apply(el);
result.insert(next_syms.begin(), next_syms.end());
}
return result;
}
set<Symbol> apply_to(const rules::Seq *rule) {
auto &&result = apply(rule->left);
if (rule_can_be_blank(rule->left, *grammar)) {
auto &&right_symbols = apply(rule->right);
result.insert(right_symbols.begin(), right_symbols.end());
}
return result;
}
};
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return FirstSet(&grammar).apply(rule);
}
set<Symbol> apply_to(const Symbol *rule) {
auto insertion_result = visited_symbols.insert(*rule);
if (insertion_result.second) {
return (rule->is_token()) ? set<Symbol>({ *rule })
: apply(grammar->rule(*rule));
} else {
return set<Symbol>();
}
}
set<Symbol> apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
set<Symbol> apply_to(const rules::Choice *rule) {
set<Symbol> result;
for (const auto &el : rule->elements) {
auto &&next_syms = apply(el);
result.insert(next_syms.begin(), next_syms.end());
}
return result;
}
set<Symbol> apply_to(const rules::Seq *rule) {
auto &&result = apply(rule->left);
if (rule_can_be_blank(rule->left, *grammar)) {
auto &&right_symbols = apply(rule->right);
result.insert(right_symbols.begin(), right_symbols.end());
}
return result;
}
};
set<Symbol> first_set(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar) {
return FirstSet(&grammar).apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,18 +6,20 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace build_tables {
class SyntaxGrammar;
/*
* Returns the set of terminal symbols that can appear at
* the beginning of a string derivable from a given rule,
* in a given grammar.
*/
std::set<rules::Symbol>
first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}
namespace build_tables {
/*
* Returns the set of terminal symbols that can appear at
* the beginning of a string derivable from a given rule,
* in a given grammar.
*/
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_FIRST_SET_H_

View file

@ -3,28 +3,28 @@
#include "compiler/rules/seq.h"
namespace tree_sitter {
namespace build_tables {
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
namespace build_tables {
int apply_to(const rules::Metadata *rule) {
int result = rule->value_for(metadata_key);
return (result != 0) ? result : apply(rule->rule);
}
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
// TODO -
// Remove this. It is currently needed to make the rule generated
// by `LexTableBuilder::after_separators` have the right precedence.
int apply_to(const rules::Seq *rule) {
return apply(rule->left);
}
public:
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
};
return GetMetadata(key).apply(rule);
}
int apply_to(const rules::Metadata *rule) {
int result = rule->value_for(metadata_key);
return (result != 0) ? result : apply(rule->rule);
}
// TODO -
// Remove this. It is currently needed to make the rule generated
// by `LexTableBuilder::after_separators` have the right precedence.
int apply_to(const rules::Seq *rule) { return apply(rule->left); }
public:
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
};
return GetMetadata(key).apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,9 +5,9 @@
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key);
}
namespace build_tables {
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key);
}
}
#endif // COMPILER_BUILD_TABLES_GET_METADATA_H_

View file

@ -5,17 +5,14 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace build_tables {
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) :
lhs(lhs),
rule(rule) {}
namespace build_tables {
bool Item::is_done() const {
return rule_can_be_blank(rule);
}
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule)
: lhs(lhs), rule(rule) {}
int Item::precedence() const {
return get_metadata(rule, rules::PRECEDENCE);
}
}
}
bool Item::is_done() const { return rule_can_be_blank(rule); }
int Item::precedence() const { return get_metadata(rule, rules::PRECEDENCE); }
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,17 +5,19 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
class Item {
public:
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
bool is_done() const;
int precedence() const;
namespace build_tables {
rules::Symbol lhs;
rules::rule_ptr rule;
};
}
}
class Item {
public:
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
bool is_done() const;
int precedence() const;
rules::Symbol lhs;
rules::rule_ptr rule;
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_ITEM_H_

View file

@ -10,50 +10,56 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::set;
using std::vector;
using std::pair;
using rules::Symbol;
using rules::rule_ptr;
namespace build_tables {
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &starting_item,
const set<Symbol> &starting_lookahead_symbols,
const SyntaxGrammar &grammar) {
ParseItemSet result;
using std::set;
using std::vector;
using std::pair;
using rules::Symbol;
using rules::rule_ptr;
vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};
while (!items_to_process.empty()) {
ParseItem item = items_to_process.back().first;
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
items_to_process.pop_back();
const ParseItemSet item_set_closure(
const ParseItem &starting_item,
const set<Symbol> &starting_lookahead_symbols,
const SyntaxGrammar &grammar) {
ParseItemSet result;
set<Symbol> &lookahead_symbols = result[item];
size_t previous_size = lookahead_symbols.size();
lookahead_symbols.insert(new_lookahead_symbols.begin(), new_lookahead_symbols.end());
vector<pair<ParseItem, set<Symbol>>> items_to_process = {
{ starting_item, starting_lookahead_symbols }
};
if (lookahead_symbols.size() == previous_size)
continue;
while (!items_to_process.empty()) {
ParseItem item = items_to_process.back().first;
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
items_to_process.pop_back();
for (const auto &pair : sym_transitions(item.rule)) {
const Symbol &symbol = pair.first;
const rule_ptr &next_rule = pair.second;
set<Symbol> &lookahead_symbols = result[item];
size_t previous_size = lookahead_symbols.size();
lookahead_symbols.insert(new_lookahead_symbols.begin(),
new_lookahead_symbols.end());
if (symbol.is_token() || symbol.is_built_in())
continue;
if (lookahead_symbols.size() == previous_size)
continue;
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
if (rule_can_be_blank(next_rule, grammar))
next_lookahead_symbols.insert(lookahead_symbols.begin(), lookahead_symbols.end());
for (const auto &pair : sym_transitions(item.rule)) {
const Symbol &symbol = pair.first;
const rule_ptr &next_rule = pair.second;
items_to_process.push_back({
ParseItem(symbol, grammar.rule(symbol), 0),
next_lookahead_symbols
});
}
}
if (symbol.is_token() || symbol.is_built_in())
continue;
return result;
}
set<Symbol> next_lookahead_symbols = first_set(next_rule, grammar);
if (rule_can_be_blank(next_rule, grammar))
next_lookahead_symbols.insert(lookahead_symbols.begin(),
lookahead_symbols.end());
items_to_process.push_back({ ParseItem(symbol, grammar.rule(symbol), 0),
next_lookahead_symbols });
}
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,13 +6,13 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class SyntaxGrammar;
class SyntaxGrammar;
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &item,
const std::set<rules::Symbol> &lookahead_symbols,
const SyntaxGrammar &grammar);
}
namespace build_tables {
const ParseItemSet item_set_closure(
const ParseItem &item, const std::set<rules::Symbol> &lookahead_symbols,
const SyntaxGrammar &grammar);
}
}
#endif // COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_

View file

@ -7,43 +7,49 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::map;
using std::set;
using rules::CharacterSet;
using rules::Symbol;
namespace build_tables {
namespace build_tables {
map<Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
for (auto &transition : sym_transitions(item.rule)) {
ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1);
merge_sym_transition<ParseItemSet>(&result, { transition.first, item_set_closure(new_item, lookahead_symbols, grammar) },
[](ParseItemSet *left, const ParseItemSet *right) {
for (auto &pair : *right)
left->operator[](pair.first).insert(pair.second.begin(), pair.second.end());
});
}
}
return result;
}
using std::map;
using std::set;
using rules::CharacterSet;
using rules::Symbol;
map<CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : char_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_char_transition<LexItemSet>(&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
});
}
}
return result;
}
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const set<Symbol> &lookahead_symbols = pair.second;
for (auto &transition : sym_transitions(item.rule)) {
ParseItem new_item(item.lhs, transition.second,
item.consumed_symbol_count + 1);
merge_sym_transition<ParseItemSet>(
&result, { transition.first,
item_set_closure(new_item, lookahead_symbols, grammar) },
[](ParseItemSet *left, const ParseItemSet *right) {
for (auto &pair : *right)
left->operator[](pair.first)
.insert(pair.second.begin(), pair.second.end());
});
}
}
return result;
}
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : char_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_char_transition<LexItemSet>(
&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
});
}
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,19 +6,23 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace rules {
class CharacterSet;
class Symbol;
}
namespace build_tables {
std::map<rules::Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);
class SyntaxGrammar;
std::map<rules::CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set);
}
namespace rules {
class CharacterSet;
class Symbol;
}
namespace build_tables {
std::map<rules::Symbol, ParseItemSet> sym_transitions(
const ParseItemSet &item_set, const SyntaxGrammar &grammar);
std::map<rules::CharacterSet, LexItemSet> char_transitions(
const LexItemSet &item_set);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_

View file

@ -7,49 +7,49 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
namespace build_tables {
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
grammar(grammar) {}
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
const LexAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_lex_action(new_action, old_action);
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar)
: grammar(grammar) {}
switch (old_action.type) {
case LexActionTypeError:
return true;
case LexActionTypeAccept: {
int old_precedence = *old_action.precedence_values.begin();
switch (new_action.type) {
case LexActionTypeAccept: {
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
return new_action.symbol.index < old_action.symbol.index;
}
}
case LexActionTypeAdvance: {
return true;
}
default:
return false;
}
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
const LexAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_lex_action(new_action, old_action);
return true;
}
default:
return false;
}
switch (old_action.type) {
case LexActionTypeError:
return true;
case LexActionTypeAccept: {
int old_precedence = *old_action.precedence_values.begin();
switch (new_action.type) {
case LexActionTypeAccept: {
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
return new_action.symbol.index < old_action.symbol.index;
}
}
case LexActionTypeAdvance: { return true; }
default:
return false;
}
return true;
}
default:
return false;
}
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,16 +6,18 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
class LexConflictManager {
const LexicalGrammar grammar;
namespace build_tables {
public:
explicit LexConflictManager(const LexicalGrammar &grammar);
bool resolve_lex_action(const LexAction &old_action,
const LexAction &new_action);
};
}
}
class LexConflictManager {
const LexicalGrammar grammar;
public:
explicit LexConflictManager(const LexicalGrammar &grammar);
bool resolve_lex_action(const LexAction &old_action,
const LexAction &new_action);
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_

View file

@ -6,45 +6,42 @@
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::ostream;
using std::vector;
namespace build_tables {
namespace build_tables {
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) :
Item(lhs, rule) {}
using std::string;
using std::ostream;
using std::vector;
bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
}
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule)
: Item(lhs, rule) {}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
if (apply(rule->left))
return true;
else if (rule_can_be_blank(rule->left))
return apply(rule->right);
else
return false;
}
bool apply_to(const rules::Metadata *rule) {
return rule->value_for(rules::START_TOKEN);
}
};
return IsTokenStart().apply(rule);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(">");
}
}
bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
if (apply(rule->left))
return true;
else if (rule_can_be_blank(rule->left))
return apply(rule->right);
else
return false;
}
bool apply_to(const rules::Metadata *rule) {
return rule->value_for(rules::START_TOKEN);
}
};
return IsTokenStart().apply(rule);
}
ostream &operator<<(ostream &stream, const LexItem &item) {
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule
<< string(">");
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -6,39 +6,42 @@
#include "compiler/build_tables/item.h"
namespace tree_sitter {
namespace build_tables {
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
namespace build_tables {
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
typedef std::unordered_set<LexItem> LexItemSet;
}
}
std::ostream &operator<<(std::ostream &stream, const LexItem &item);
typedef std::unordered_set<LexItem> LexItemSet;
} // namespace build_tables
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template<>
struct hash<const tree_sitter::build_tables::LexItemSet> {
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
return result;
}
};
}
template <>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template <>
struct hash<const tree_sitter::build_tables::LexItemSet> {
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
return result;
}
};
} // namespace std
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_

View file

@ -7,69 +7,71 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
namespace build_tables {
/*
* Merges a new transition into a map with symbol keys.
* If the symbol already exists in the map, the new value for that
* symbol will be computed by merging the old and new values
* using the given function.
*/
template<typename T>
void merge_sym_transition(std::map<rules::Symbol, T> *left,
const std::pair<rules::Symbol, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
auto new_symbol = new_pair.first;
for (auto &existing_pair : *left) {
auto existing_symbol = existing_pair.first;
if (new_symbol < existing_symbol) break;
if (existing_symbol == new_symbol) {
merge_fn(&existing_pair.second, &new_pair.second);
return;
}
}
left->insert(new_pair);
}
/*
* Merges two transition maps with character set keys. If the
* two maps contain values for overlapping character sets, the
* new value for the two sets' intersection will be computed by
* merging the old and new values using the given function.
*/
template<typename T>
void merge_char_transition(std::map<rules::CharacterSet, T> *left,
const std::pair<rules::CharacterSet, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
std::map<rules::CharacterSet, T> pairs_to_insert;
auto iter = left->begin();
while (iter != left->end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
merge_fn(&value, &new_value);
pairs_to_insert.insert({ intersection, value });
left->erase(iter++);
} else {
++iter;
}
}
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
left->insert({ new_char_set, new_pair.second });
}
/*
* Merges a new transition into a map with symbol keys.
* If the symbol already exists in the map, the new value for that
* symbol will be computed by merging the old and new values
* using the given function.
*/
template <typename T>
void merge_sym_transition(std::map<rules::Symbol, T> *left,
const std::pair<rules::Symbol, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
auto new_symbol = new_pair.first;
for (auto &existing_pair : *left) {
auto existing_symbol = existing_pair.first;
if (new_symbol < existing_symbol)
break;
if (existing_symbol == new_symbol) {
merge_fn(&existing_pair.second, &new_pair.second);
return;
}
}
left->insert(new_pair);
}
/*
* Merges two transition maps with character set keys. If the
* two maps contain values for overlapping character sets, the
* new value for the two sets' intersection will be computed by
* merging the old and new values using the given function.
*/
template <typename T>
void merge_char_transition(std::map<rules::CharacterSet, T> *left,
const std::pair<rules::CharacterSet, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
std::map<rules::CharacterSet, T> pairs_to_insert;
auto iter = left->begin();
while (iter != left->end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
merge_fn(&value, &new_value);
pairs_to_insert.insert({ intersection, value });
left->erase(iter++);
} else {
++iter;
}
}
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
left->insert({ new_char_set, new_pair.second });
}
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_

View file

@ -8,127 +8,130 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
namespace build_tables {
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar) :
parse_grammar(parse_grammar),
lex_grammar(lex_grammar) {}
using std::string;
using std::to_string;
using std::map;
using std::set;
using std::vector;
bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_parse_action(symbol, new_action, old_action);
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar)
: parse_grammar(parse_grammar), lex_grammar(lex_grammar) {}
switch (old_action.type) {
case ParseActionTypeError:
return true;
case ParseActionTypeShift: {
int min_precedence = *old_action.precedence_values.begin();
int max_precedence = *old_action.precedence_values.rbegin();
switch (new_action.type) {
case ParseActionTypeReduce: {
int new_precedence = *new_action.precedence_values.rbegin();
if (max_precedence > new_precedence) {
if (min_precedence < new_precedence)
record_conflict(symbol, old_action, new_action);
return false;
} else if (max_precedence < new_precedence) {
return true;
} else {
record_conflict(symbol, old_action, new_action);
return false;
}
}
default:
return false;
}
}
case ParseActionTypeReduce:
switch (new_action.type) {
case ParseActionTypeReduce: {
int old_precedence = *old_action.precedence_values.begin();
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
record_conflict(symbol, old_action, new_action);
return new_action.symbol.index < old_action.symbol.index;
}
}
default:
return false;
}
default:
return false;
}
}
const vector<Conflict> ParseConflictManager::conflicts() const {
vector<Conflict> result;
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
return result;
}
string precedence_string(const ParseAction &action) {
string precedences = "(precedence ";
bool started = false;
for (auto value : action.precedence_values) {
if (started) precedences += ", ";
started = true;
precedences += to_string(value);
}
return precedences + ")";
}
string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
switch (action.type) {
case ParseActionTypeShift:
return "shift " + precedence_string(action);
case ParseActionTypeReduce: {
string name = parse_grammar.rule_name(action.symbol);
if (name == "")
return "ERROR" + to_string(action.symbol.index);
else
return "reduce " + name + " " + precedence_string(action);
}
case ParseActionTypeAccept:
return "accept";
default:
return "error";
}
}
string ParseConflictManager::symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ERROR";
else if (symbol == rules::END_OF_INPUT())
return "END_OF_INPUT";
else
return "";
}
if (symbol.is_token())
return lex_grammar.rule_name(symbol);
else
return parse_grammar.rule_name(symbol);
}
void ParseConflictManager::record_conflict(const rules::Symbol &symbol,
const ParseAction &left,
const ParseAction &right) {
conflicts_.insert(Conflict(symbol_name(symbol) + ": " +
message_for_action(left, parse_grammar) + " / " +
message_for_action(right, parse_grammar)));
bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_parse_action(symbol, new_action, old_action);
switch (old_action.type) {
case ParseActionTypeError:
return true;
case ParseActionTypeShift: {
int min_precedence = *old_action.precedence_values.begin();
int max_precedence = *old_action.precedence_values.rbegin();
switch (new_action.type) {
case ParseActionTypeReduce: {
int new_precedence = *new_action.precedence_values.rbegin();
if (max_precedence > new_precedence) {
if (min_precedence < new_precedence)
record_conflict(symbol, old_action, new_action);
return false;
} else if (max_precedence < new_precedence) {
return true;
} else {
record_conflict(symbol, old_action, new_action);
return false;
}
}
default:
return false;
}
}
case ParseActionTypeReduce:
switch (new_action.type) {
case ParseActionTypeReduce: {
int old_precedence = *old_action.precedence_values.begin();
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
record_conflict(symbol, old_action, new_action);
return new_action.symbol.index < old_action.symbol.index;
}
}
default:
return false;
}
default:
return false;
}
}
const vector<Conflict> ParseConflictManager::conflicts() const {
vector<Conflict> result;
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
return result;
}
string precedence_string(const ParseAction &action) {
string precedences = "(precedence ";
bool started = false;
for (auto value : action.precedence_values) {
if (started)
precedences += ", ";
started = true;
precedences += to_string(value);
}
return precedences + ")";
}
string message_for_action(const ParseAction &action,
const SyntaxGrammar &parse_grammar) {
switch (action.type) {
case ParseActionTypeShift:
return "shift " + precedence_string(action);
case ParseActionTypeReduce: {
string name = parse_grammar.rule_name(action.symbol);
if (name == "")
return "ERROR" + to_string(action.symbol.index);
else
return "reduce " + name + " " + precedence_string(action);
}
case ParseActionTypeAccept:
return "accept";
default:
return "error";
}
}
string ParseConflictManager::symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())
return "ERROR";
else if (symbol == rules::END_OF_INPUT())
return "END_OF_INPUT";
else
return "";
}
if (symbol.is_token())
return lex_grammar.rule_name(symbol);
else
return parse_grammar.rule_name(symbol);
}
void ParseConflictManager::record_conflict(const rules::Symbol &symbol,
const ParseAction &left,
const ParseAction &right) {
conflicts_.insert(Conflict(symbol_name(symbol) + ": " +
message_for_action(left, parse_grammar) + " / " +
message_for_action(right, parse_grammar)));
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -11,25 +11,28 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
class ParseConflictManager {
const SyntaxGrammar parse_grammar;
const LexicalGrammar lex_grammar;
std::set<Conflict> conflicts_;
namespace build_tables {
public:
ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar);
bool resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action);
const std::vector<Conflict> conflicts() const;
class ParseConflictManager {
const SyntaxGrammar parse_grammar;
const LexicalGrammar lex_grammar;
std::set<Conflict> conflicts_;
private:
std::string symbol_name(const rules::Symbol &symbol);
void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right);
};
}
}
public:
ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar);
bool resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action);
const std::vector<Conflict> conflicts() const;
private:
std::string symbol_name(const rules::Symbol &symbol);
void record_conflict(const rules::Symbol &symbol, const ParseAction &left,
const ParseAction &right);
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_

View file

@ -3,29 +3,28 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::pair;
using std::set;
using std::string;
using std::to_string;
using std::ostream;
namespace build_tables {
namespace build_tables {
ParseItem::ParseItem(const rules::Symbol &lhs,
const rules::rule_ptr rule,
size_t consumed_symbol_count) :
Item(lhs, rule),
consumed_symbol_count(consumed_symbol_count) {}
using std::pair;
using std::set;
using std::string;
using std::to_string;
using std::ostream;
bool ParseItem::operator==(const ParseItem &other) const {
return
(lhs == other.lhs) &&
(consumed_symbol_count == other.consumed_symbol_count) &&
(rule == other.rule || rule->operator==(*other.rule));
}
ParseItem::ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule,
size_t consumed_symbol_count)
: Item(lhs, rule), consumed_symbol_count(consumed_symbol_count) {}
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule << string(">");
}
}
bool ParseItem::operator==(const ParseItem &other) const {
return (lhs == other.lhs) &&
(consumed_symbol_count == other.consumed_symbol_count) &&
(rule == other.rule || rule->operator==(*other.rule));
}
ostream &operator<<(ostream &stream, const ParseItem &item) {
return stream << string("#<item ") << item.lhs << string(" ") << *item.rule
<< string(">");
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -8,44 +8,48 @@
#include "compiler/build_tables/item.h"
namespace tree_sitter {
namespace build_tables {
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule, const size_t consumed_symbol_count);
bool operator==(const ParseItem &other) const;
size_t consumed_symbol_count;
};
namespace build_tables {
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule,
const size_t consumed_symbol_count);
bool operator==(const ParseItem &other) const;
size_t consumed_symbol_count;
};
typedef std::unordered_map<ParseItem, std::set<rules::Symbol>> ParseItemSet;
}
}
std::ostream &operator<<(std::ostream &stream, const ParseItem &item);
typedef std::unordered_map<ParseItem, std::set<rules::Symbol> > ParseItemSet;
} // namespace build_tables
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count);
}
};
template<>
struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto &pair : set) {
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
result ^= hash<size_t>()(pair.second.size());
for (auto &symbol : pair.second)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
return result;
}
};
}
template <>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count);
}
};
template <>
struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto &pair : set) {
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
result ^= hash<size_t>()(pair.second.size());
for (auto &symbol : pair.second)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
return result;
}
};
} // namespace std
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_

View file

@ -9,60 +9,59 @@
#include "compiler/rules/blank.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
using std::set;
namespace tree_sitter {
namespace build_tables {
namespace build_tables {
class CanBeBlank : public rules::RuleFn<bool> {
protected:
bool apply_to(const rules::Blank *) {
return true;
}
using std::set;
bool apply_to(const rules::Repeat *rule) {
return true;
}
class CanBeBlank : public rules::RuleFn<bool> {
protected:
bool apply_to(const rules::Blank *) { return true; }
bool apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements)
if (apply(element)) return true;
return false;
}
bool apply_to(const rules::Repeat *rule) { return true; }
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) && apply(rule->right);
}
bool apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements)
if (apply(element))
return true;
return false;
}
bool apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
};
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) && apply(rule->right);
}
class CanBeBlankRecursive : public CanBeBlank {
const SyntaxGrammar *grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
bool apply_to(const rules::Metadata *rule) { return apply(rule->rule); }
};
public:
using CanBeBlank::apply_to;
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}
class CanBeBlankRecursive : public CanBeBlank {
const SyntaxGrammar *grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
bool apply_to(const rules::Symbol *rule) {
if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
return !rule->is_token() && apply(grammar->rule(*rule));
} else {
return false;
}
}
};
public:
using CanBeBlank::apply_to;
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar)
: grammar(grammar) {}
bool rule_can_be_blank(const rules::rule_ptr &rule) {
return CanBeBlank().apply(rule);
}
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return CanBeBlankRecursive(&grammar).apply(rule);
}
bool apply_to(const rules::Symbol *rule) {
if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
return !rule->is_token() && apply(grammar->rule(*rule));
} else {
return false;
}
}
};
bool rule_can_be_blank(const rules::rule_ptr &rule) {
return CanBeBlank().apply(rule);
}
bool rule_can_be_blank(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar) {
return CanBeBlankRecursive(&grammar).apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -4,12 +4,16 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}
class SyntaxGrammar;
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
bool rule_can_be_blank(const rules::rule_ptr &rule,
const SyntaxGrammar &grammar);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_

View file

@ -13,87 +13,91 @@
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::map;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
using rules::CharacterSet;
namespace build_tables {
namespace build_tables {
template<typename T>
void merge_transitions(map<T, rule_ptr> *left, const map<T, rule_ptr> &right);
using std::map;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
using rules::CharacterSet;
template<>
void merge_transitions(map<CharacterSet, rule_ptr> *left, const map<CharacterSet, rule_ptr> &right) {
for (auto &pair : right)
merge_char_transition<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template <typename T>
void merge_transitions(map<T, rule_ptr> *left, const map<T, rule_ptr> &right);
template<>
void merge_transitions(map<Symbol, rule_ptr> *left, const map<Symbol, rule_ptr> &right) {
for (auto &pair : right)
merge_sym_transition<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template<typename T>
class RuleTransitions : public rules::RuleFn<map<T, rule_ptr>> {
map<T, rule_ptr> apply_to_atom(const rules::Rule *rule) {
auto atom = dynamic_cast<const T *>(rule);
if (atom)
return map<T, rule_ptr>({{ *atom, make_shared<rules::Blank>() }});
else
return map<T, rule_ptr>();
}
map<T, rule_ptr> apply_to(const CharacterSet *rule) {
return apply_to_atom(rule);
}
map<T, rule_ptr> apply_to(const Symbol *rule) {
return apply_to_atom(rule);
}
map<T, rule_ptr> apply_to(const rules::Choice *rule) {
map<T, rule_ptr> result;
for (const auto &el : rule->elements)
merge_transitions<T>(&result, this->apply(el));
return result;
}
map<T, rule_ptr> apply_to(const rules::Seq *rule) {
auto result = this->apply(rule->left);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->right });
if (rule_can_be_blank(rule->left))
merge_transitions<T>(&result, this->apply(rule->right));
return result;
}
map<T, rule_ptr> apply_to(const rules::Repeat *rule) {
auto result = this->apply(rule->content);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->copy() });
return result;
}
map<T, rule_ptr> apply_to(const rules::Metadata *rule) {
auto result = this->apply(rule->rule);
for (auto &pair : result)
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
return result;
}
};
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
return RuleTransitions<CharacterSet>().apply(rule);
}
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
return RuleTransitions<Symbol>().apply(rule);
}
}
template <>
void merge_transitions(map<CharacterSet, rule_ptr> *left,
const map<CharacterSet, rule_ptr> &right) {
for (auto &pair : right)
merge_char_transition<rule_ptr>(left, pair,
[](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template <>
void merge_transitions(map<Symbol, rule_ptr> *left,
const map<Symbol, rule_ptr> &right) {
for (auto &pair : right)
merge_sym_transition<rule_ptr>(left, pair,
[](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template <typename T>
class RuleTransitions : public rules::RuleFn<map<T, rule_ptr> > {
map<T, rule_ptr> apply_to_atom(const rules::Rule *rule) {
auto atom = dynamic_cast<const T *>(rule);
if (atom)
return map<T, rule_ptr>({ { *atom, make_shared<rules::Blank>() } });
else
return map<T, rule_ptr>();
}
map<T, rule_ptr> apply_to(const CharacterSet *rule) {
return apply_to_atom(rule);
}
map<T, rule_ptr> apply_to(const Symbol *rule) { return apply_to_atom(rule); }
map<T, rule_ptr> apply_to(const rules::Choice *rule) {
map<T, rule_ptr> result;
for (const auto &el : rule->elements)
merge_transitions<T>(&result, this->apply(el));
return result;
}
map<T, rule_ptr> apply_to(const rules::Seq *rule) {
auto result = this->apply(rule->left);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->right });
if (rule_can_be_blank(rule->left))
merge_transitions<T>(&result, this->apply(rule->right));
return result;
}
map<T, rule_ptr> apply_to(const rules::Repeat *rule) {
auto result = this->apply(rule->content);
for (auto &pair : result)
pair.second = rules::Seq::Build({ pair.second, rule->copy() });
return result;
}
map<T, rule_ptr> apply_to(const rules::Metadata *rule) {
auto result = this->apply(rule->rule);
for (auto &pair : result)
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
return result;
}
};
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
return RuleTransitions<CharacterSet>().apply(rule);
}
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
return RuleTransitions<Symbol>().apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -5,14 +5,16 @@
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
std::map<rules::CharacterSet, rules::rule_ptr>
char_transitions(const rules::rule_ptr &rule);
namespace tree_sitter {
namespace build_tables {
std::map<rules::Symbol, rules::rule_ptr>
sym_transitions(const rules::rule_ptr &rule);
}
}
std::map<rules::CharacterSet, rules::rule_ptr> char_transitions(
const rules::rule_ptr &rule);
std::map<rules::Symbol, rules::rule_ptr> sym_transitions(
const rules::rule_ptr &rule);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_

View file

@ -5,29 +5,33 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::string;
using std::vector;
using std::get;
using std::make_tuple;
tuple<string, vector<Conflict>, const GrammarError *>
compile(const Grammar &grammar, std::string name) {
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
const GrammarError *error = get<2>(prepare_grammar_result);
using std::tuple;
using std::string;
using std::vector;
using std::get;
using std::make_tuple;
if (error)
return make_tuple("", vector<Conflict>(), error);
tuple<string, vector<Conflict>, const GrammarError *> compile(
const Grammar &grammar, std::string name) {
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
const GrammarError *error = get<2>(prepare_grammar_result);
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar);
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
const vector<Conflict> &conflicts = get<2>(table_build_result);
if (error)
return make_tuple("", vector<Conflict>(), error);
string code = generate_code::c_code(name, parse_table, lex_table, syntax_grammar, lexical_grammar);
auto table_build_result =
build_tables::build_tables(syntax_grammar, lexical_grammar);
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
const vector<Conflict> &conflicts = get<2>(table_build_result);
return make_tuple(code, conflicts, nullptr);
}
string code = generate_code::c_code(name, parse_table, lex_table,
syntax_grammar, lexical_grammar);
return make_tuple(code, conflicts, nullptr);
}
} // namespace tree_sitter

View file

@ -2,19 +2,21 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::string;
Conflict::Conflict(string description) : description(description) {}
using std::string;
bool Conflict::operator==(const tree_sitter::Conflict &other) const {
return other.description == description;
}
Conflict::Conflict(string description) : description(description) {}
bool Conflict::operator<(const tree_sitter::Conflict &other) const {
return other.description < description;
}
std::ostream& operator<<(std::ostream &stream, const Conflict &conflict) {
return stream << "#<conflict " + conflict.description + ">";
}
bool Conflict::operator==(const tree_sitter::Conflict &other) const {
return other.description == description;
}
bool Conflict::operator<(const tree_sitter::Conflict &other) const {
return other.description < description;
}
std::ostream &operator<<(std::ostream &stream, const Conflict &conflict) {
return stream << "#<conflict " + conflict.description + ">";
}
} // namespace tree_sitter

View file

@ -10,393 +10,378 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::function;
using std::map;
using std::vector;
using std::set;
using std::pair;
using util::escape_char;
namespace generate_code {
using std::string;
using std::to_string;
using std::function;
using std::map;
using std::vector;
using std::set;
using std::pair;
using util::escape_char;
namespace generate_code {
class CCodeGenerator {
string buffer;
size_t indent_level;
class CCodeGenerator {
string buffer;
size_t indent_level;
const string name;
const ParseTable parse_table;
const LexTable lex_table;
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
const string name;
const ParseTable parse_table;
const LexTable lex_table;
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
public:
CCodeGenerator(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) :
indent_level(0),
name(name),
parse_table(parse_table),
lex_table(lex_table),
syntax_grammar(syntax_grammar),
lexical_grammar(lexical_grammar) {}
public:
CCodeGenerator(string name, const ParseTable &parse_table,
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar)
: indent_level(0),
name(name),
parse_table(parse_table),
lex_table(lex_table),
syntax_grammar(syntax_grammar),
lexical_grammar(lexical_grammar) {}
string code() {
buffer = "";
string code() {
buffer = "";
includes();
state_and_symbol_counts();
symbol_enum();
symbol_names_list();
hidden_symbols_list();
lex_function();
lex_states_list();
parse_table_array();
parser_export();
includes();
state_and_symbol_counts();
symbol_enum();
symbol_names_list();
hidden_symbols_list();
lex_function();
lex_states_list();
parse_table_array();
parser_export();
return buffer;
}
return buffer;
}
private:
void includes() {
add("#include \"tree_sitter/parser.h\"");
line();
}
private:
void includes() {
add("#include \"tree_sitter/parser.h\"");
line();
}
void state_and_symbol_counts() {
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line();
}
void state_and_symbol_counts() {
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line();
}
void symbol_enum() {
line("enum {");
indent([&]() {
bool at_start = true;
for (auto symbol : parse_table.symbols)
if (!symbol.is_built_in()) {
if (at_start)
line(symbol_id(symbol) + " = ts_start_sym,");
else
line(symbol_id(symbol) + ",");
at_start = false;
}
});
line("};");
line();
}
void symbol_names_list() {
set<rules::Symbol> symbols(parse_table.symbols);
symbols.insert(rules::END_OF_INPUT());
symbols.insert(rules::ERROR());
line("SYMBOL_NAMES = {");
indent([&]() {
for (auto symbol : parse_table.symbols)
line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\",");
});
line("};");
line();
}
void hidden_symbols_list() {
line("HIDDEN_SYMBOLS = {");
indent([&]() {
for (auto &symbol : parse_table.symbols)
if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
line("[" + symbol_id(symbol) + "] = 1,");
});
line("};");
line();
}
void lex_function() {
line("LEX_FN() {");
indent([&]() {
line("START_LEXER();");
switch_on_lex_state();
});
line("}");
line();
}
void lex_states_list() {
line("LEX_STATES = {");
indent([&]() {
size_t state_id = 0;
for (auto &state : parse_table.states)
line("[" + to_string(state_id++) + "] = " + lex_state_index(state.lex_state_id) + ",");
});
line("};");
line();
}
void parse_table_array() {
size_t state_id = 0;
line("#pragma GCC diagnostic push");
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
line();
line("PARSE_TABLE = {");
indent([&]() {
for (auto &state : parse_table.states) {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (auto &pair : state.actions) {
line("[" + symbol_id(pair.first) + "] = ");
code_for_parse_action(pair.second);
add(",");
}
});
line("},");
}
});
line("};");
line();
line("#pragma GCC diagnostic pop");
line();
}
void parser_export() {
line("EXPORT_PARSER(ts_parser_" + name + ");");
line();
}
string rule_name(const rules::Symbol &symbol) {
return symbol.is_token() ?
lexical_grammar.rule_name(symbol) :
syntax_grammar.rule_name(symbol);
}
string symbol_id(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ?
"ts_builtin_sym_error" :
"ts_builtin_sym_end";
} else {
string name = sanitize_name(rule_name(symbol));
if (symbol.is_auxiliary())
return "ts_aux_sym_" + name;
else
return "ts_sym_" + name;
}
}
string sanitize_name(string name) {
auto existing = sanitized_names.find(name);
if (existing != sanitized_names.end())
return existing->second;
string stripped_name;
for (char c : name) {
if (('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') ||
(c == '_')) {
stripped_name += c;
}
}
for (size_t extra_number = 0;; extra_number++) {
string suffix = extra_number ? to_string(extra_number) : "";
string unique_name = stripped_name + suffix;
if (unique_name == "")
continue;
if (!has_sanitized_name(unique_name)) {
sanitized_names.insert({ name, unique_name });
return unique_name;
}
}
}
bool has_sanitized_name(string name) {
for (auto &pair : sanitized_names)
if (pair.second == name)
return true;
return false;
}
string lex_state_index(size_t i) {
return to_string(i + 1);
}
string symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "error" : "end";
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return rule_name(symbol);
} else {
return rule_name(symbol);
}
}
string condition_for_character_range(const rules::CharacterRange &range) {
string lookahead("lookahead");
if (range.min == range.max) {
return lookahead + " == '" + escape_char(range.min) + "'";
} else {
return string("'") + escape_char(range.min) + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + escape_char(range.max) + "'";
}
}
void condition_for_character_set(const rules::CharacterSet &set) {
if (set.ranges.size() == 1) {
add(condition_for_character_range(*set.ranges.begin()));
} else {
bool first = true;
for (auto &match : set.ranges) {
string part = "(" + condition_for_character_range(match) + ")";
if (first) {
add(part);
} else {
add(" ||");
line(part);
}
first = false;
}
}
}
void condition_for_character_rule(const rules::CharacterSet &rule) {
pair<rules::CharacterSet, bool> representation = rule.most_compact_representation();
if (representation.second) {
condition_for_character_set(representation.first);
} else {
add("!(");
condition_for_character_set(rule.complement());
add(")");
}
}
void code_for_parse_action(const ParseAction &action) {
switch (action.type) {
case ParseActionTypeAccept:
add("ACCEPT_INPUT()");
break;
case ParseActionTypeShift:
add("SHIFT(" + to_string(action.state_index) + ")");
break;
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduce:
add("REDUCE(" +
symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
break;
default: {}
}
}
void code_for_lex_actions(const LexAction &action,
const set<rules::CharacterSet> &expected_inputs) {
switch (action.type) {
case LexActionTypeAdvance:
line("ADVANCE(" + lex_state_index(action.state_index) + ");");
break;
case LexActionTypeAccept:
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeError:
line("LEX_ERROR();");
break;
default: {}
}
}
void code_for_lex_state(const LexState &lex_state) {
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
line("START_TOKEN();");
for (auto pair : lex_state.actions)
if (!pair.first.is_empty())
_if([&]() { condition_for_character_rule(pair.first); },
[&]() { code_for_lex_actions(pair.second, expected_inputs); });
code_for_lex_actions(lex_state.default_action, expected_inputs);
}
void switch_on_lex_state() {
_switch("lex_state", [&]() {
for (size_t i = 0; i < lex_table.states.size(); i++)
_case(lex_state_index(i), [&]() {
code_for_lex_state(lex_table.states[i]);
});
_case("ts_lex_state_error", [&]() {
code_for_lex_state(lex_table.error_state);
});
_default([&]() {
line("LEX_PANIC();");
});
});
}
void _switch(string condition, function<void()> body) {
line("switch (" + condition + ") {");
indent(body);
line("}");
}
void _case(string value, function<void()> body) {
line("case " + value + ":");
indent(body);
}
void _default(function<void()> body) {
line("default:");
indent(body);
}
void _if(function<void()> condition, function<void()> body) {
line("if (");
indent(condition);
add(")");
indent(body);
}
void line() {
line("");
}
void line(string input) {
add("\n");
if (!input.empty()) {
string space;
for (size_t i = 0; i < indent_level; i++)
space += " ";
add(space + input);
}
}
void add(string input) {
buffer += input;
}
void indent(function<void()> body) {
indent(body, 1);
}
void indent(function<void()> body, size_t n) {
indent_level += n;
body();
indent_level -= n;
}
};
string c_code(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) {
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code();
void symbol_enum() {
line("enum {");
indent([&]() {
bool at_start = true;
for (auto symbol : parse_table.symbols)
if (!symbol.is_built_in()) {
if (at_start)
line(symbol_id(symbol) + " = ts_start_sym,");
else
line(symbol_id(symbol) + ",");
at_start = false;
}
});
line("};");
line();
}
void symbol_names_list() {
set<rules::Symbol> symbols(parse_table.symbols);
symbols.insert(rules::END_OF_INPUT());
symbols.insert(rules::ERROR());
line("SYMBOL_NAMES = {");
indent([&]() {
for (auto symbol : parse_table.symbols)
line("[" + symbol_id(symbol) + "] = \"" + symbol_name(symbol) + "\",");
});
line("};");
line();
}
void hidden_symbols_list() {
line("HIDDEN_SYMBOLS = {");
indent([&]() {
for (auto &symbol : parse_table.symbols)
if (!symbol.is_built_in() &&
(symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
line("[" + symbol_id(symbol) + "] = 1,");
});
line("};");
line();
}
void lex_function() {
line("LEX_FN() {");
indent([&]() {
line("START_LEXER();");
switch_on_lex_state();
});
line("}");
line();
}
void lex_states_list() {
line("LEX_STATES = {");
indent([&]() {
size_t state_id = 0;
for (auto &state : parse_table.states)
line("[" + to_string(state_id++) + "] = " +
lex_state_index(state.lex_state_id) + ",");
});
line("};");
line();
}
void parse_table_array() {
size_t state_id = 0;
line("#pragma GCC diagnostic push");
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
line();
line("PARSE_TABLE = {");
indent([&]() {
for (auto &state : parse_table.states) {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (auto &pair : state.actions) {
line("[" + symbol_id(pair.first) + "] = ");
code_for_parse_action(pair.second);
add(",");
}
});
line("},");
}
});
line("};");
line();
line("#pragma GCC diagnostic pop");
line();
}
void parser_export() {
line("EXPORT_PARSER(ts_parser_" + name + ");");
line();
}
string rule_name(const rules::Symbol &symbol) {
return symbol.is_token() ? lexical_grammar.rule_name(symbol)
: syntax_grammar.rule_name(symbol);
}
string symbol_id(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "ts_builtin_sym_error"
: "ts_builtin_sym_end";
} else {
string name = sanitize_name(rule_name(symbol));
if (symbol.is_auxiliary())
return "ts_aux_sym_" + name;
else
return "ts_sym_" + name;
}
}
string sanitize_name(string name) {
auto existing = sanitized_names.find(name);
if (existing != sanitized_names.end())
return existing->second;
string stripped_name;
for (char c : name) {
if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') || (c == '_')) {
stripped_name += c;
}
}
for (size_t extra_number = 0;; extra_number++) {
string suffix = extra_number ? to_string(extra_number) : "";
string unique_name = stripped_name + suffix;
if (unique_name == "")
continue;
if (!has_sanitized_name(unique_name)) {
sanitized_names.insert({ name, unique_name });
return unique_name;
}
}
}
bool has_sanitized_name(string name) {
for (auto &pair : sanitized_names)
if (pair.second == name)
return true;
return false;
}
string lex_state_index(size_t i) { return to_string(i + 1); }
string symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "error" : "end";
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return rule_name(symbol);
} else {
return rule_name(symbol);
}
}
string condition_for_character_range(const rules::CharacterRange &range) {
string lookahead("lookahead");
if (range.min == range.max) {
return lookahead + " == '" + escape_char(range.min) + "'";
} else {
return string("'") + escape_char(range.min) + string("' <= ") +
lookahead + " && " + lookahead + " <= '" + escape_char(range.max) +
"'";
}
}
void condition_for_character_set(const rules::CharacterSet &set) {
if (set.ranges.size() == 1) {
add(condition_for_character_range(*set.ranges.begin()));
} else {
bool first = true;
for (auto &match : set.ranges) {
string part = "(" + condition_for_character_range(match) + ")";
if (first) {
add(part);
} else {
add(" ||");
line(part);
}
first = false;
}
}
}
void condition_for_character_rule(const rules::CharacterSet &rule) {
pair<rules::CharacterSet, bool> representation =
rule.most_compact_representation();
if (representation.second) {
condition_for_character_set(representation.first);
} else {
add("!(");
condition_for_character_set(rule.complement());
add(")");
}
}
void code_for_parse_action(const ParseAction &action) {
switch (action.type) {
case ParseActionTypeAccept:
add("ACCEPT_INPUT()");
break;
case ParseActionTypeShift:
add("SHIFT(" + to_string(action.state_index) + ")");
break;
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduce:
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
break;
default: {}
}
}
void code_for_lex_actions(const LexAction &action,
const set<rules::CharacterSet> &expected_inputs) {
switch (action.type) {
case LexActionTypeAdvance:
line("ADVANCE(" + lex_state_index(action.state_index) + ");");
break;
case LexActionTypeAccept:
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeError:
line("LEX_ERROR();");
break;
default: {}
}
}
void code_for_lex_state(const LexState &lex_state) {
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
line("START_TOKEN();");
for (auto pair : lex_state.actions)
if (!pair.first.is_empty())
_if([&]() { condition_for_character_rule(pair.first); },
[&]() { code_for_lex_actions(pair.second, expected_inputs); });
code_for_lex_actions(lex_state.default_action, expected_inputs);
}
void switch_on_lex_state() {
_switch("lex_state", [&]() {
for (size_t i = 0; i < lex_table.states.size(); i++)
_case(lex_state_index(i),
[&]() { code_for_lex_state(lex_table.states[i]); });
_case("ts_lex_state_error",
[&]() { code_for_lex_state(lex_table.error_state); });
_default([&]() { line("LEX_PANIC();"); });
});
}
void _switch(string condition, function<void()> body) {
line("switch (" + condition + ") {");
indent(body);
line("}");
}
void _case(string value, function<void()> body) {
line("case " + value + ":");
indent(body);
}
void _default(function<void()> body) {
line("default:");
indent(body);
}
void _if(function<void()> condition, function<void()> body) {
line("if (");
indent(condition);
add(")");
indent(body);
}
void line() { line(""); }
void line(string input) {
add("\n");
if (!input.empty()) {
string space;
for (size_t i = 0; i < indent_level; i++)
space += " ";
add(space + input);
}
}
void add(string input) { buffer += input; }
void indent(function<void()> body) { indent(body, 1); }
void indent(function<void()> body, size_t n) {
indent_level += n;
body();
indent_level -= n;
}
};
string c_code(string name, const ParseTable &parse_table,
const LexTable &lex_table, const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) {
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar,
lexical_grammar).code();
}
} // namespace generate_code
} // namespace tree_sitter

View file

@ -7,16 +7,17 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace generate_code {
std::string c_code(std::string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar);
}
}
namespace generate_code {
std::string c_code(std::string name, const ParseTable &parse_table,
const LexTable &lex_table,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar);
} // namespace generate_code
} // namespace tree_sitter
#endif // COMPILER_GENERATE_CODE_C_CODE_H_

View file

@ -2,83 +2,83 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
using std::ostream;
using std::pair;
using std::set;
using std::string;
using std::vector;
using rules::rule_ptr;
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
rules_(rules),
ubiquitous_tokens_({}),
separators_({ ' ', '\r', '\t', '\n' }) {}
using std::ostream;
using std::pair;
using std::set;
using std::string;
using std::vector;
using rules::rule_ptr;
bool Grammar::operator==(const Grammar &other) const {
if (other.rules_.size() != rules_.size()) return false;
Grammar::Grammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules)
: rules_(rules),
ubiquitous_tokens_({}),
separators_({ ' ', '\r', '\t', '\n' }) {}
for (size_t i = 0; i < rules_.size(); i++) {
auto &pair = rules_[i];
auto &other_pair = other.rules_[i];
if (other_pair.first != pair.first) return false;
if (!other_pair.second->operator==(*pair.second)) return false;
}
bool Grammar::operator==(const Grammar &other) const {
if (other.rules_.size() != rules_.size())
return false;
return true;
}
for (size_t i = 0; i < rules_.size(); i++) {
auto &pair = rules_[i];
auto &other_pair = other.rules_[i];
if (other_pair.first != pair.first)
return false;
if (!other_pair.second->operator==(*pair.second))
return false;
}
string Grammar::start_rule_name() const {
return rules_.front().first;
}
ostream& operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules()) {
if (started) stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
return stream << string("}>");
}
GrammarError::GrammarError(GrammarErrorType type, std::string message) :
type(type),
message(message) {}
bool GrammarError::operator==(const GrammarError &other) const {
return type == other.type && message == other.message;
}
ostream& operator<<(ostream &stream, const GrammarError *error) {
if (error)
return stream << (string("#<grammar-error '") + error->message + "'>");
else
return stream << string("#<null>");
}
const set<string> & Grammar::ubiquitous_tokens() const {
return ubiquitous_tokens_;
}
Grammar & Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
ubiquitous_tokens_ = ubiquitous_tokens;
return *this;
}
const set<char> & Grammar::separators() const {
return separators_;
}
Grammar & Grammar::separators(const set<char> &separators) {
separators_ = separators;
return *this;
}
const vector<pair<string, rule_ptr>> & Grammar::rules() const {
return rules_;
}
return true;
}
string Grammar::start_rule_name() const { return rules_.front().first; }
ostream &operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules()) {
if (started)
stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
return stream << string("}>");
}
GrammarError::GrammarError(GrammarErrorType type, std::string message)
: type(type), message(message) {}
bool GrammarError::operator==(const GrammarError &other) const {
return type == other.type && message == other.message;
}
ostream &operator<<(ostream &stream, const GrammarError *error) {
if (error)
return stream << (string("#<grammar-error '") + error->message + "'>");
else
return stream << string("#<null>");
}
const set<string> &Grammar::ubiquitous_tokens() const {
return ubiquitous_tokens_;
}
Grammar &Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
ubiquitous_tokens_ = ubiquitous_tokens;
return *this;
}
const set<char> &Grammar::separators() const { return separators_; }
Grammar &Grammar::separators(const set<char> &separators) {
separators_ = separators;
return *this;
}
const vector<pair<string, rule_ptr> > &Grammar::rules() const { return rules_; }
} // namespace tree_sitter

View file

@ -2,75 +2,79 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::map;
using std::set;
using rules::Symbol;
using rules::CharacterSet;
LexAction::LexAction() :
type(LexActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
precedence_values({0}) {}
using std::string;
using std::to_string;
using std::map;
using std::set;
using rules::Symbol;
using rules::CharacterSet;
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, set<int> precedence_values) :
type(type),
symbol(symbol),
state_index(state_index),
precedence_values(precedence_values) {}
LexAction::LexAction()
: type(LexActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
precedence_values({ 0 }) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), {0});
}
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol,
set<int> precedence_values)
: type(type),
symbol(symbol),
state_index(state_index),
precedence_values(precedence_values) {}
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_values);
}
LexAction LexAction::Accept(Symbol symbol, int precedence) {
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
}
bool LexAction::operator==(const LexAction &other) const {
return
(type == other.type) &&
(state_index == other.state_index) &&
(symbol == other.symbol);
}
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + to_string(action.symbol.index) + ">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) + ">";
default:
return stream;
}
}
set<CharacterSet> LexState::expected_inputs() const {
set<CharacterSet> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
LexState & LexTable::state(LexStateId id) {
if (id < 0)
return error_state;
else
return states[id];
}
const LexStateId LexTable::ERROR_STATE_ID = -1;
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0 });
}
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
precedence_values);
}
LexAction LexAction::Accept(Symbol symbol, int precedence) {
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
}
bool LexAction::operator==(const LexAction &other) const {
return (type == other.type) && (state_index == other.state_index) &&
(symbol == other.symbol);
}
std::ostream &operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + to_string(action.symbol.index) +
">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) +
">";
default:
return stream;
}
}
set<CharacterSet> LexState::expected_inputs() const {
set<CharacterSet> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
LexState &LexTable::state(LexStateId id) {
if (id < 0)
return error_state;
else
return states[id];
}
const LexStateId LexTable::ERROR_STATE_ID = -1;
} // namespace tree_sitter

View file

@ -9,61 +9,69 @@
#include "compiler/rules/character_set.h"
namespace tree_sitter {
typedef enum {
LexActionTypeError,
LexActionTypeAccept,
LexActionTypeAdvance
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, std::set<int> precedence_values);
public:
LexAction();
static LexAction Accept(rules::Symbol symbol, int precedence);
static LexAction Error();
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
bool operator==(const LexAction &action) const;
typedef enum {
LexActionTypeError,
LexActionTypeAccept,
LexActionTypeAdvance
} LexActionType;
LexActionType type;
rules::Symbol symbol;
size_t state_index;
std::set<int> precedence_values;
};
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol,
std::set<int> precedence_values);
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
}
public:
LexAction();
static LexAction Accept(rules::Symbol symbol, int precedence);
static LexAction Error();
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
bool operator==(const LexAction &action) const;
LexActionType type;
rules::Symbol symbol;
size_t state_index;
std::set<int> precedence_values;
};
std::ostream &operator<<(std::ostream &stream, const LexAction &item);
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::LexAction> {
size_t operator()(const tree_sitter::LexAction &action) const {
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index));
}
};
}
template <>
struct hash<tree_sitter::LexAction> {
size_t operator()(const tree_sitter::LexAction &action) const {
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index));
}
};
} // namespace std
namespace tree_sitter {
class LexState {
public:
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};
typedef int64_t LexStateId;
class LexState {
public:
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};
class LexTable {
public:
static const LexStateId ERROR_STATE_ID;
LexStateId add_state();
LexState & state(LexStateId state_id);
typedef int64_t LexStateId;
std::vector<LexState> states;
LexState error_state;
};
}
class LexTable {
public:
static const LexStateId ERROR_STATE_ID;
LexStateId add_state();
LexState &state(LexStateId state_id);
std::vector<LexState> states;
LexState error_state;
};
} // namespace tree_sitter
#endif // COMPILER_LEX_TABLE_H_

View file

@ -2,109 +2,120 @@
#include <string>
namespace tree_sitter {
using std::string;
using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using rules::Symbol;
ParseAction::ParseAction(ParseActionType type,
ParseStateId state_index,
Symbol symbol,
size_t consumed_symbol_count,
set<int> precedence_values) :
type(type),
symbol(symbol),
state_index(state_index),
consumed_symbol_count(consumed_symbol_count),
precedence_values(precedence_values) {}
using std::string;
using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using rules::Symbol;
ParseAction::ParseAction() :
type(ParseActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
consumed_symbol_count(0) {}
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
Symbol symbol, size_t consumed_symbol_count,
set<int> precedence_values)
: type(type),
symbol(symbol),
state_index(state_index),
consumed_symbol_count(consumed_symbol_count),
precedence_values(precedence_values) {}
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 });
}
ParseAction::ParseAction()
: type(ParseActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
consumed_symbol_count(0) {}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::Shift(ParseStateId state_index, set<int> precedence_values) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, precedence_values);
}
ParseAction ParseAction::ShiftExtra() {
return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 });
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, int precedence) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count, { precedence });
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool symbols_eq = symbol == other.symbol;
bool state_indices_eq = state_index == other.state_index;
bool consumed_symbol_counts_eq = consumed_symbol_count == other.consumed_symbol_count;
return types_eq && symbols_eq && state_indices_eq && consumed_symbol_counts_eq;
}
ostream& operator<<(ostream &stream, const ParseAction &action) {
switch (action.type) {
case ParseActionTypeError:
return stream << string("#<error>");
case ParseActionTypeAccept:
return stream << string("#<accept>");
case ParseActionTypeShift:
return stream << (string("#<shift ") + to_string(action.state_index) + ">");
case ParseActionTypeShiftExtra:
return stream << string("#<shift_extra");
case ParseActionTypeReduceExtra:
return stream << ("#<reduce_extra sym" + to_string(action.symbol.index) + ">");
case ParseActionTypeReduce:
return stream << ("#<reduce sym" + to_string(action.symbol.index) + " " + to_string(action.consumed_symbol_count) + ">");
default:
return stream;
}
}
ParseState::ParseState() : lex_state_id(-1) {}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
ostream& operator<<(ostream &stream, const ParseState &state) {
stream << string("#<parse_state ");
bool started = false;
for (auto pair : state.actions) {
if (started) stream << string(", ");
stream << pair.first << string(" => ") << pair.second;
started = true;
}
stream << string(">");
return stream;
}
ParseStateId ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
symbols.insert(symbol);
states[id].actions[symbol] = action;
}
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::Shift(ParseStateId state_index,
set<int> precedence_values) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0,
precedence_values);
}
ParseAction ParseAction::ShiftExtra() {
return ParseAction(ParseActionTypeShiftExtra, 0, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
return ParseAction(ParseActionTypeReduceExtra, 0, symbol, 0, { 0 });
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
int precedence) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
{ precedence });
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool symbols_eq = symbol == other.symbol;
bool state_indices_eq = state_index == other.state_index;
bool consumed_symbol_counts_eq =
consumed_symbol_count == other.consumed_symbol_count;
return types_eq && symbols_eq && state_indices_eq &&
consumed_symbol_counts_eq;
}
ostream &operator<<(ostream &stream, const ParseAction &action) {
switch (action.type) {
case ParseActionTypeError:
return stream << string("#<error>");
case ParseActionTypeAccept:
return stream << string("#<accept>");
case ParseActionTypeShift:
return stream << (string("#<shift ") + to_string(action.state_index) +
">");
case ParseActionTypeShiftExtra:
return stream << string("#<shift_extra");
case ParseActionTypeReduceExtra:
return stream << ("#<reduce_extra sym" + to_string(action.symbol.index) +
">");
case ParseActionTypeReduce:
return stream << ("#<reduce sym" + to_string(action.symbol.index) + " " +
to_string(action.consumed_symbol_count) + ">");
default:
return stream;
}
}
ParseState::ParseState() : lex_state_id(-1) {}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
ostream &operator<<(ostream &stream, const ParseState &state) {
stream << string("#<parse_state ");
bool started = false;
for (auto pair : state.actions) {
if (started)
stream << string(", ");
stream << pair.first << string(" => ") << pair.second;
started = true;
}
stream << string(">");
return stream;
}
ParseStateId ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(ParseStateId id, Symbol symbol,
ParseAction action) {
symbols.insert(symbol);
states[id].actions[symbol] = action;
}
} // namespace tree_sitter

View file

@ -9,75 +9,82 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
typedef uint64_t ParseStateId;
typedef enum {
ParseActionTypeError,
ParseActionTypeShift,
ParseActionTypeShiftExtra,
ParseActionTypeReduce,
ParseActionTypeReduceExtra,
ParseActionTypeAccept,
} ParseActionType;
typedef uint64_t ParseStateId;
class ParseAction {
ParseAction(ParseActionType type,
ParseStateId state_index,
rules::Symbol symbol,
size_t consumed_symbol_count,
std::set<int> precedence_values);
public:
ParseAction();
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index, std::set<int> precedence_values);
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, int precedence);
static ParseAction ShiftExtra();
static ParseAction ReduceExtra(rules::Symbol symbol);
bool operator==(const ParseAction &action) const;
typedef enum {
ParseActionTypeError,
ParseActionTypeShift,
ParseActionTypeShiftExtra,
ParseActionTypeReduce,
ParseActionTypeReduceExtra,
ParseActionTypeAccept,
} ParseActionType;
ParseActionType type;
rules::Symbol symbol;
ParseStateId state_index;
size_t consumed_symbol_count;
std::set<int> precedence_values;
};
class ParseAction {
ParseAction(ParseActionType type, ParseStateId state_index,
rules::Symbol symbol, size_t consumed_symbol_count,
std::set<int> precedence_values);
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
}
public:
ParseAction();
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index,
std::set<int> precedence_values);
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
int precedence);
static ParseAction ShiftExtra();
static ParseAction ReduceExtra(rules::Symbol symbol);
bool operator==(const ParseAction &action) const;
ParseActionType type;
rules::Symbol symbol;
ParseStateId state_index;
size_t consumed_symbol_count;
std::set<int> precedence_values;
};
std::ostream &operator<<(std::ostream &stream, const ParseAction &item);
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::ParseAction> {
size_t operator()(const tree_sitter::ParseAction &action) const {
return (
hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.consumed_symbol_count));
}
};
}
template <>
struct hash<tree_sitter::ParseAction> {
size_t operator()(const tree_sitter::ParseAction &action) const {
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.consumed_symbol_count));
}
};
} // namespace std
namespace tree_sitter {
class ParseState {
public:
ParseState();
std::map<rules::Symbol, ParseAction> actions;
std::set<rules::Symbol> expected_inputs() const;
LexStateId lex_state_id;
};
std::ostream& operator<<(std::ostream &stream, const ParseState &state);
class ParseState {
public:
ParseState();
std::map<rules::Symbol, ParseAction> actions;
std::set<rules::Symbol> expected_inputs() const;
LexStateId lex_state_id;
};
class ParseTable {
public:
ParseStateId add_state();
void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action);
std::ostream &operator<<(std::ostream &stream, const ParseState &state);
std::vector<ParseState> states;
std::set<rules::Symbol> symbols;
};
}
class ParseTable {
public:
ParseStateId add_state();
void add_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
std::vector<ParseState> states;
std::set<rules::Symbol> symbols;
};
} // namespace tree_sitter
#endif // COMPILER_PARSE_TABLE_H_

View file

@ -11,55 +11,57 @@
#include "compiler/rules/repeat.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::to_string;
using std::make_shared;
using rules::rule_ptr;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::Rule;
using rules::Seq;
using rules::Symbol;
namespace prepare_grammar {
namespace prepare_grammar {
class ExpandRepeats : public rules::IdentityRuleFn {
string rule_name;
using std::string;
using std::vector;
using std::pair;
using std::to_string;
using std::make_shared;
using rules::rule_ptr;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::Rule;
using rules::Seq;
using rules::Symbol;
rule_ptr apply_to(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
rule_ptr repeat_symbol = make_shared<Symbol>(offset + index, rules::SymbolOptionAuxiliary);
aux_rules.push_back({
helper_rule_name,
Choice::Build({
Seq::Build({ inner_rule, repeat_symbol }),
make_shared<Blank>()
})
});
return repeat_symbol;
}
class ExpandRepeats : public rules::IdentityRuleFn {
string rule_name;
public:
ExpandRepeats(string rule_name, size_t offset) : rule_name(rule_name), offset(offset) {}
rule_ptr apply_to(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + string("_repeat") + to_string(index);
rule_ptr repeat_symbol =
make_shared<Symbol>(offset + index, rules::SymbolOptionAuxiliary);
aux_rules.push_back(
{ helper_rule_name,
Choice::Build({ Seq::Build({ inner_rule, repeat_symbol }),
make_shared<Blank>() }) });
return repeat_symbol;
}
size_t offset;
vector<pair<string, rules::rule_ptr>> aux_rules;
};
public:
ExpandRepeats(string rule_name, size_t offset)
: rule_name(rule_name), offset(offset) {}
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
size_t offset;
vector<pair<string, rules::rule_ptr> > aux_rules;
};
for (auto &pair : grammar.rules) {
ExpandRepeats expander(pair.first, aux_rules.size());
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
}
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
vector<pair<string, rules::rule_ptr> > rules, aux_rules(grammar.aux_rules);
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
}
}
for (auto &pair : grammar.rules) {
ExpandRepeats expander(pair.first, aux_rules.size());
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
expander.aux_rules.end());
}
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -4,11 +4,14 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class SyntaxGrammar;
namespace prepare_grammar {
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
}
}
class SyntaxGrammar;
namespace prepare_grammar {
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_

View file

@ -12,60 +12,59 @@
#include "compiler/prepare_grammar/parse_regex.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::String;
using rules::Pattern;
namespace prepare_grammar {
namespace prepare_grammar {
class ExpandTokens : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::String;
using rules::Pattern;
rule_ptr apply_to(const String *rule) {
vector<rule_ptr> elements;
for (char val : rule->value)
elements.push_back(rules::CharacterSet({ val }).copy());
return rules::Seq::Build(elements);
}
class ExpandTokens : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error)
error = pair.second;
return pair.first;
}
rule_ptr apply_to(const String *rule) {
vector<rule_ptr> elements;
for (char val : rule->value)
elements.push_back(rules::CharacterSet({ val }).copy());
return rules::Seq::Build(elements);
}
public:
const GrammarError *error;
ExpandTokens() : error(nullptr) {}
};
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error)
error = pair.second;
return pair.first;
}
pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &grammar) {
vector<pair<string, rule_ptr>> rules, aux_rules;
ExpandTokens expander;
public:
const GrammarError *error;
ExpandTokens() : error(nullptr) {}
};
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
rules.push_back({ pair.first, rule });
}
pair<LexicalGrammar, const GrammarError *> expand_tokens(
const LexicalGrammar &grammar) {
vector<pair<string, rule_ptr> > rules, aux_rules;
ExpandTokens expander;
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
aux_rules.push_back({ pair.first, rule });
}
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
rules.push_back({ pair.first, rule });
}
return {
LexicalGrammar(rules, aux_rules, grammar.separators),
nullptr,
};
}
}
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar(), expander.error };
aux_rules.push_back({ pair.first, rule });
}
return { LexicalGrammar(rules, aux_rules, grammar.separators), nullptr, };
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,13 +5,15 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class LexicalGrammar;
namespace prepare_grammar {
std::pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &);
}
}
class LexicalGrammar;
namespace prepare_grammar {
std::pair<LexicalGrammar, const GrammarError *> expand_tokens(
const LexicalGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_

View file

@ -14,120 +14,123 @@
#include "compiler/prepare_grammar/token_description.h"
namespace tree_sitter {
using std::pair;
using std::string;
using std::map;
using std::to_string;
using std::vector;
using std::set;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
namespace prepare_grammar {
namespace prepare_grammar {
class IsToken : public rules::RuleFn<bool> {
bool apply_to(const rules::String *rule) { return true; }
bool apply_to(const rules::Pattern *rule) { return true; }
bool apply_to(const rules::Metadata *rule) { return rule->value_for(rules::IS_TOKEN); }
};
using std::pair;
using std::string;
using std::map;
using std::to_string;
using std::vector;
using std::set;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
class SymbolInliner : public rules::IdentityRuleFn {
map<Symbol, Symbol> replacements;
using rules::IdentityRuleFn::apply_to;
class IsToken : public rules::RuleFn<bool> {
bool apply_to(const rules::String *rule) { return true; }
bool apply_to(const rules::Pattern *rule) { return true; }
bool apply_to(const rules::Metadata *rule) {
return rule->value_for(rules::IS_TOKEN);
}
};
int new_index_for_symbol(const Symbol &symbol) {
int result = symbol.index;
for (const auto &pair : replacements)
if (pair.first.index < symbol.index &&
pair.first.is_auxiliary() == symbol.is_auxiliary())
result--;
return result;
}
class SymbolInliner : public rules::IdentityRuleFn {
map<Symbol, Symbol> replacements;
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const Symbol *rule) {
return replace_symbol(*rule).copy();
}
int new_index_for_symbol(const Symbol &symbol) {
int result = symbol.index;
for (const auto &pair : replacements)
if (pair.first.index < symbol.index &&
pair.first.is_auxiliary() == symbol.is_auxiliary())
result--;
return result;
}
public:
Symbol replace_symbol(const Symbol &rule) {
if (rule.is_built_in()) return rule;
auto replacement_pair = replacements.find(rule);
if (replacement_pair != replacements.end())
return replacement_pair->second;
else
return Symbol(new_index_for_symbol(rule), rule.options);
}
rule_ptr apply_to(const Symbol *rule) { return replace_symbol(*rule).copy(); }
SymbolInliner(const map<Symbol, Symbol> &replacements) : replacements(replacements) {}
};
public:
Symbol replace_symbol(const Symbol &rule) {
if (rule.is_built_in())
return rule;
auto replacement_pair = replacements.find(rule);
if (replacement_pair != replacements.end())
return replacement_pair->second;
else
return Symbol(new_index_for_symbol(rule), rule.options);
}
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);
SymbolInliner(const map<Symbol, Symbol> &replacements)
: replacements(replacements) {}
};
class TokenExtractor : public rules::IdentityRuleFn {
rule_ptr apply_to_token(const rules::Rule *input) {
auto rule = input->copy();
for (size_t i = 0; i < tokens.size(); i++)
if (tokens[i].second->operator==(*rule))
return make_shared<Symbol>(i, SymbolOptionAuxToken);
size_t index = tokens.size();
tokens.push_back({ token_description(rule), rule });
return make_shared<Symbol>(index, SymbolOptionAuxToken);
}
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(
rules::SymbolOptionToken | rules::SymbolOptionAuxiliary);
rule_ptr default_apply(const rules::Rule *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return result;
}
}
class TokenExtractor : public rules::IdentityRuleFn {
rule_ptr apply_to_token(const rules::Rule *input) {
auto rule = input->copy();
for (size_t i = 0; i < tokens.size(); i++)
if (tokens[i].second->operator==(*rule))
return make_shared<Symbol>(i, SymbolOptionAuxToken);
size_t index = tokens.size();
tokens.push_back({ token_description(rule), rule });
return make_shared<Symbol>(index, SymbolOptionAuxToken);
}
rule_ptr apply_to(const rules::Metadata *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return rules::IdentityRuleFn::apply_to(rule);
}
}
public:
vector<pair<string, rule_ptr>> tokens;
};
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
set<Symbol> ubiquitous_tokens;
TokenExtractor extractor;
map<Symbol, Symbol> symbol_replacements;
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
auto pair = input_grammar.rules[i];
if (IsToken().apply(pair.second)) {
tokens.push_back(pair);
symbol_replacements.insert({
Symbol(i),
Symbol(tokens.size() - 1, rules::SymbolOptionToken)
});
} else {
rules.push_back({ pair.first, extractor.apply(pair.second) });
}
}
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
SymbolInliner inliner(symbol_replacements);
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
for (auto &symbol : input_grammar.ubiquitous_tokens)
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
return {
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, input_grammar.separators),
};
}
rule_ptr default_apply(const rules::Rule *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return result;
}
}
rule_ptr apply_to(const rules::Metadata *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
return apply_to_token(rule);
} else {
return rules::IdentityRuleFn::apply_to(rule);
}
}
public:
vector<pair<string, rule_ptr> > tokens;
};
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
const InternedGrammar &input_grammar) {
vector<pair<string, rule_ptr> > rules, tokens, aux_rules, aux_tokens;
set<Symbol> ubiquitous_tokens;
TokenExtractor extractor;
map<Symbol, Symbol> symbol_replacements;
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
auto pair = input_grammar.rules[i];
if (IsToken().apply(pair.second)) {
tokens.push_back(pair);
symbol_replacements.insert(
{ Symbol(i), Symbol(tokens.size() - 1, rules::SymbolOptionToken) });
} else {
rules.push_back({ pair.first, extractor.apply(pair.second) });
}
}
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(),
extractor.tokens.end());
SymbolInliner inliner(symbol_replacements);
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
for (auto &symbol : input_grammar.ubiquitous_tokens)
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
return { SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, input_grammar.separators), };
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,12 +5,16 @@
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class SyntaxGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
}
}
class SyntaxGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(
const InternedGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_

View file

@ -10,70 +10,72 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::string;
using rules::rule_ptr;
using std::vector;
using std::set;
using std::pair;
using std::make_shared;
namespace prepare_grammar {
namespace prepare_grammar {
class InternSymbols : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
using std::string;
using rules::rule_ptr;
using std::vector;
using std::set;
using std::pair;
using std::make_shared;
rule_ptr apply_to(const rules::NamedSymbol *rule) {
auto result = symbol_for_rule_name(rule->name);
if (!result.get()) missing_rule_name = rule->name;
return result;
}
class InternSymbols : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
public:
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
for (size_t i = 0; i < grammar.rules().size(); i++)
if (grammar.rules()[i].first == rule_name)
return make_shared<rules::Symbol>(i);
return nullptr;
}
rule_ptr apply_to(const rules::NamedSymbol *rule) {
auto result = symbol_for_rule_name(rule->name);
if (!result.get())
missing_rule_name = rule->name;
return result;
}
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
const Grammar grammar;
string missing_rule_name;
};
public:
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
for (size_t i = 0; i < grammar.rules().size(); i++)
if (grammar.rules()[i].first == rule_name)
return make_shared<rules::Symbol>(i);
return nullptr;
}
pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
InternedGrammar grammar;
return {
grammar,
new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'")
};
}
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
const Grammar grammar;
string missing_rule_name;
};
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
InternSymbols interner(grammar);
vector<pair<string, rule_ptr>> rules;
for (auto &pair : grammar.rules()) {
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
return missing_rule_error(interner.missing_rule_name);
rules.push_back({ pair.first, new_rule });
}
set<rules::Symbol> ubiquitous_tokens;
for (auto &name : grammar.ubiquitous_tokens()) {
auto token = interner.symbol_for_rule_name(name);
if (!token.get())
return missing_rule_error(name);
ubiquitous_tokens.insert(*token);
}
InternedGrammar result;
result.rules = rules;
result.ubiquitous_tokens = ubiquitous_tokens;
result.separators = grammar.separators();
return { result, nullptr };
}
}
pair<InternedGrammar, const GrammarError *> missing_rule_error(
string rule_name) {
InternedGrammar grammar;
return { grammar, new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'") };
}
pair<InternedGrammar, const GrammarError *> intern_symbols(
const Grammar &grammar) {
InternSymbols interner(grammar);
vector<pair<string, rule_ptr> > rules;
for (auto &pair : grammar.rules()) {
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
return missing_rule_error(interner.missing_rule_name);
rules.push_back({ pair.first, new_rule });
}
set<rules::Symbol> ubiquitous_tokens;
for (auto &name : grammar.ubiquitous_tokens()) {
auto token = interner.symbol_for_rule_name(name);
if (!token.get())
return missing_rule_error(name);
ubiquitous_tokens.insert(*token);
}
InternedGrammar result;
result.rules = rules;
result.ubiquitous_tokens = ubiquitous_tokens;
result.separators = grammar.separators();
return { result, nullptr };
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -7,11 +7,15 @@
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class Grammar;
namespace prepare_grammar {
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
}
}
class Grammar;
namespace prepare_grammar {
std::pair<InternedGrammar, const GrammarError *> intern_symbols(
const Grammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_INTERN_SYMBOLS_H_

View file

@ -9,14 +9,16 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace prepare_grammar {
class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
std::set<rules::Symbol> ubiquitous_tokens;
std::set<char> separators;
};
}
}
namespace prepare_grammar {
class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr> > rules;
std::set<rules::Symbol> ubiquitous_tokens;
std::set<char> separators;
};
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_

View file

@ -10,202 +10,193 @@
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::CharacterSet;
using rules::Seq;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::CharacterRange;
using rules::blank;
namespace prepare_grammar {
namespace prepare_grammar {
class PatternParser {
public:
explicit PatternParser(const string &input) :
input(input),
length(input.length()),
position(0) {}
using std::string;
using std::vector;
using std::pair;
using std::make_shared;
using rules::rule_ptr;
using rules::CharacterSet;
using rules::Seq;
using rules::Blank;
using rules::Choice;
using rules::Repeat;
using rules::CharacterRange;
using rules::blank;
pair<rule_ptr, const GrammarError *> rule(bool nested) {
vector<rule_ptr> choices = {};
do {
if (!choices.empty()) {
if (peek() == '|')
next();
else
break;
}
auto pair = term(nested);
if (pair.second)
return { blank(), pair.second };
choices.push_back(pair.first);
} while (has_more_input());
auto rule = (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, nullptr };
}
class PatternParser {
public:
explicit PatternParser(const string &input)
: input(input), length(input.length()), position(0) {}
private:
pair<rule_ptr, const GrammarError *> term(bool nested) {
rule_ptr result = blank();
do {
if (peek() == '|')
break;
if (nested && peek() == ')')
break;
auto pair = factor();
if (pair.second)
return { blank(), pair.second };
result = Seq::Build({ result, pair.first });
} while (has_more_input());
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> rule(bool nested) {
vector<rule_ptr> choices = {};
do {
if (!choices.empty()) {
if (peek() == '|')
next();
else
break;
}
auto pair = term(nested);
if (pair.second)
return { blank(), pair.second };
choices.push_back(pair.first);
} while (has_more_input());
auto rule =
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, nullptr };
}
pair<rule_ptr, const GrammarError *> factor() {
auto pair = atom();
if (pair.second)
return { blank(), pair.second };
rule_ptr result = pair.first;
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = make_shared<Repeat>(result);
break;
case '+':
next();
result = make_shared<Seq>(result, make_shared<Repeat>(result));
break;
case '?':
next();
result = Choice::Build({ result, make_shared<Blank>() });
break;
}
}
return { result, nullptr };
}
private:
pair<rule_ptr, const GrammarError *> term(bool nested) {
rule_ptr result = blank();
do {
if (peek() == '|')
break;
if (nested && peek() == ')')
break;
auto pair = factor();
if (pair.second)
return { blank(), pair.second };
result = Seq::Build({ result, pair.first });
} while (has_more_input());
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> atom() {
switch (peek()) {
case '(': {
next();
auto pair = rule(true);
if (pair.second)
return { blank(), pair.second };
if (peek() != ')')
return error("unmatched open paren");
next();
return { pair.first, nullptr };
}
case '[': {
next();
auto pair = char_set();
if (pair.second)
return { blank(), pair.second };
if (peek() != ']')
return error("unmatched open square bracket");
next();
return { pair.first.copy(), nullptr };
}
case ')': {
return error("unmatched close paren");
}
case ']': {
return error("unmatched close square bracket");
}
case '.': {
next();
return { CharacterSet({ '\n' }).complement().copy(), nullptr };
}
default: {
auto pair = single_char();
if (pair.second)
return { blank(), pair.second };
return { pair.first.copy(), nullptr };
}
}
}
pair<rule_ptr, const GrammarError *> factor() {
auto pair = atom();
if (pair.second)
return { blank(), pair.second };
rule_ptr result = pair.first;
if (has_more_input()) {
switch (peek()) {
case '*':
next();
result = make_shared<Repeat>(result);
break;
case '+':
next();
result = make_shared<Seq>(result, make_shared<Repeat>(result));
break;
case '?':
next();
result = Choice::Build({ result, make_shared<Blank>() });
break;
}
}
return { result, nullptr };
}
pair<CharacterSet, const GrammarError *> char_set() {
bool is_affirmative = true;
if (peek() == '^') {
next();
is_affirmative = false;
}
CharacterSet result;
while (has_more_input() && (peek() != ']')) {
auto pair = single_char();
if (pair.second)
return { CharacterSet(), pair.second };
result.add_set(pair.first);
}
if (!is_affirmative)
result = result.complement();
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> atom() {
switch (peek()) {
case '(': {
next();
auto pair = rule(true);
if (pair.second)
return { blank(), pair.second };
if (peek() != ')')
return error("unmatched open paren");
next();
return { pair.first, nullptr };
}
case '[': {
next();
auto pair = char_set();
if (pair.second)
return { blank(), pair.second };
if (peek() != ']')
return error("unmatched open square bracket");
next();
return { pair.first.copy(), nullptr };
}
case ')': { return error("unmatched close paren"); }
case ']': { return error("unmatched close square bracket"); }
case '.': {
next();
return { CharacterSet({ '\n' }).complement().copy(), nullptr };
}
default: {
auto pair = single_char();
if (pair.second)
return { blank(), pair.second };
return { pair.first.copy(), nullptr };
}
}
}
pair<CharacterSet, const GrammarError *> single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
next();
break;
default:
char first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet({ CharacterRange(first_char, peek()) });
next();
} else {
value = CharacterSet({ first_char });
}
}
return { value, nullptr };
}
pair<CharacterSet, const GrammarError *> char_set() {
bool is_affirmative = true;
if (peek() == '^') {
next();
is_affirmative = false;
}
CharacterSet result;
while (has_more_input() && (peek() != ']')) {
auto pair = single_char();
if (pair.second)
return { CharacterSet(), pair.second };
result.add_set(pair.first);
}
if (!is_affirmative)
result = result.complement();
return { result, nullptr };
}
CharacterSet escaped_char(char value) {
switch (value) {
case 'a':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'} });
case 'w':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'}});
case 'd':
return CharacterSet({ {'0', '9'} });
default:
return CharacterSet({ value });
}
}
void next() {
position++;
}
char peek() {
return input[position];
}
bool has_more_input() {
return position < length;
}
pair<rule_ptr, const GrammarError *> error(string msg) {
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
}
const string input;
const size_t length;
size_t position;
};
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
return PatternParser(input).rule(false);
pair<CharacterSet, const GrammarError *> single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
next();
break;
default:
char first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet({ CharacterRange(first_char, peek()) });
next();
} else {
value = CharacterSet({ first_char });
}
}
return { value, nullptr };
}
CharacterSet escaped_char(char value) {
switch (value) {
case 'a':
return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' } });
case 'w':
return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } });
case 'd':
return CharacterSet({ { '0', '9' } });
default:
return CharacterSet({ value });
}
}
void next() { position++; }
char peek() { return input[position]; }
bool has_more_input() { return position < length; }
pair<rule_ptr, const GrammarError *> error(string msg) {
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
}
const string input;
const size_t length;
size_t position;
};
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
return PatternParser(input).rule(false);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -6,10 +6,12 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace prepare_grammar {
std::pair<rules::rule_ptr, const GrammarError *>
parse_regex(const std::string &);
}
}
namespace prepare_grammar {
std::pair<rules::rule_ptr, const GrammarError *> parse_regex(
const std::string &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_PARSE_REGEX_H_

View file

@ -7,29 +7,31 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::make_tuple;
namespace prepare_grammar {
namespace prepare_grammar {
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &input_grammar) {
auto result = intern_symbols(input_grammar);
const InternedGrammar &grammar = result.first;
const GrammarError *error = result.second;
using std::tuple;
using std::make_tuple;
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
const Grammar &input_grammar) {
auto result = intern_symbols(input_grammar);
const InternedGrammar &grammar = result.first;
const GrammarError *error = result.second;
auto grammars = extract_tokens(grammar);
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
auto expand_tokens_result = expand_tokens(grammars.second);
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
auto grammars = extract_tokens(grammar);
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
auto expand_tokens_result = expand_tokens(grammars.second);
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
return make_tuple(rule_grammar, lex_grammar, nullptr);
}
}
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
return make_tuple(rule_grammar, lex_grammar, nullptr);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,13 +5,16 @@
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
class Grammar;
class GrammarError;
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &);
}
}
class Grammar;
class GrammarError;
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
const Grammar &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_

View file

@ -9,36 +9,36 @@
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
namespace prepare_grammar {
namespace prepare_grammar {
class TokenDescription : public rules::RuleFn<string> {
string apply_to(const rules::Pattern *rule) {
return "/" + util::escape_string(rule->value) + "/";
}
using std::string;
string apply_to(const rules::String *rule) {
return "'" + util::escape_string(rule->value) + "'";
}
class TokenDescription : public rules::RuleFn<string> {
string apply_to(const rules::Pattern *rule) {
return "/" + util::escape_string(rule->value) + "/";
}
string apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
string apply_to(const rules::String *rule) {
return "'" + util::escape_string(rule->value) + "'";
}
string apply_to(const rules::Seq *rule) {
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
}
string apply_to(const rules::Metadata *rule) { return apply(rule->rule); }
string apply_to(const rules::Choice *rule) {
string result = "(choice";
for (auto &element : rule->elements)
result += " " + apply(element);
return result + ")";
}
};
string apply_to(const rules::Seq *rule) {
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
}
std::string token_description(const rules::rule_ptr &rule) {
return TokenDescription().apply(rule);
}
}
string apply_to(const rules::Choice *rule) {
string result = "(choice";
for (auto &element : rule->elements)
result += " " + apply(element);
return result + ")";
}
};
std::string token_description(const rules::rule_ptr &rule) {
return TokenDescription().apply(rule);
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -5,9 +5,11 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace prepare_grammar {
std::string token_description(const rules::rule_ptr &);
}
}
namespace prepare_grammar {
std::string token_description(const rules::rule_ptr &);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_

View file

@ -5,54 +5,52 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::string;
using std::pair;
using std::vector;
using std::set;
const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ?
aux_rules[symbol.index].second :
rules[symbol.index].second;
}
using std::string;
using std::pair;
using std::vector;
using std::set;
const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ?
aux_rules[symbol.index].first :
rules[symbol.index].first;
}
PreparedGrammar::PreparedGrammar() {}
SyntaxGrammar::SyntaxGrammar() {}
LexicalGrammar::LexicalGrammar() {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
PreparedGrammar(rules, aux_rules) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
PreparedGrammar(rules, aux_rules) {}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
rules(rules),
aux_rules(aux_rules) {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const set<rules::Symbol> &ubiquitous_tokens) :
PreparedGrammar(rules, aux_rules),
ubiquitous_tokens(ubiquitous_tokens) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const set<char> &separators) :
PreparedGrammar(rules, aux_rules),
separators(separators) {}
const rules::rule_ptr &PreparedGrammar::rule(const rules::Symbol &symbol)
const {
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
: rules[symbol.index].second;
}
const string &PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ? aux_rules[symbol.index].first
: rules[symbol.index].first;
}
PreparedGrammar::PreparedGrammar() {}
SyntaxGrammar::SyntaxGrammar() {}
LexicalGrammar::LexicalGrammar() {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: PreparedGrammar(rules, aux_rules) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: PreparedGrammar(rules, aux_rules) {}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules)
: rules(rules), aux_rules(aux_rules) {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules,
const set<rules::Symbol> &ubiquitous_tokens)
: PreparedGrammar(rules, aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr> > &rules,
const vector<pair<string, rules::rule_ptr> > &aux_rules,
const set<char> &separators)
: PreparedGrammar(rules, aux_rules), separators(separators) {}
} // namespace tree_sitter

View file

@ -9,47 +9,49 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
class PreparedGrammar {
public:
PreparedGrammar();
PreparedGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
class PreparedGrammar {
public:
PreparedGrammar();
PreparedGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
const std::string & rule_name(const rules::Symbol &symbol) const;
const rules::rule_ptr & rule(const rules::Symbol &symbol) const;
};
const std::vector<std::pair<std::string, rules::rule_ptr> > rules;
const std::vector<std::pair<std::string, rules::rule_ptr> > aux_rules;
class SyntaxGrammar : public PreparedGrammar {
public:
SyntaxGrammar();
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::set<rules::Symbol> &ubiquitous_tokens);
const std::string &rule_name(const rules::Symbol &symbol) const;
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
};
std::set<rules::Symbol> ubiquitous_tokens;
};
class SyntaxGrammar : public PreparedGrammar {
public:
SyntaxGrammar();
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
const std::set<rules::Symbol> &ubiquitous_tokens);
class LexicalGrammar : public PreparedGrammar {
public:
LexicalGrammar();
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::set<char> &separators);
std::set<rules::Symbol> ubiquitous_tokens;
};
std::set<char> separators;
};
}
class LexicalGrammar : public PreparedGrammar {
public:
LexicalGrammar();
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules);
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules,
const std::vector<std::pair<std::string, rules::rule_ptr> > &aux_rules,
const std::set<char> &separators);
std::set<char> separators;
};
} // namespace tree_sitter
#endif // COMPILER_PREPARED_GRAMMAR_H_

View file

@ -2,28 +2,22 @@
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
Blank::Blank() {}
namespace tree_sitter {
namespace rules {
bool Blank::operator==(const Rule &rule) const {
return dynamic_cast<const Blank *>(&rule) != nullptr;
}
Blank::Blank() {}
size_t Blank::hash_code() const {
return 0;
}
rule_ptr Blank::copy() const {
return std::make_shared<Blank>();
}
std::string Blank::to_string() const {
return "#<blank>";
}
void Blank::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Blank::operator==(const Rule &rule) const {
return dynamic_cast<const Blank *>(&rule) != nullptr;
}
size_t Blank::hash_code() const { return 0; }
rule_ptr Blank::copy() const { return std::make_shared<Blank>(); }
std::string Blank::to_string() const { return "#<blank>"; }
void Blank::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,19 +4,21 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Blank : public Rule {
public:
Blank();
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
};
}
}
class Blank : public Rule {
public:
Blank();
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_BLANK_H_

View file

@ -1,9 +1,11 @@
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
namespace rules {
Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
Symbol START() { return Symbol(-3); }
}
}
namespace rules {
Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
Symbol START() { return Symbol(-3); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,11 +4,11 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace rules {
Symbol ERROR();
Symbol START();
Symbol END_OF_INPUT();
}
namespace rules {
Symbol ERROR();
Symbol START();
Symbol END_OF_INPUT();
}
}
#endif // COMPILER_RULES_BUILT_IN_SYMBOLS_H_

View file

@ -3,50 +3,55 @@
#include <string>
namespace tree_sitter {
using std::string;
namespace rules {
namespace rules {
static const unsigned char MAX_CHAR = -1;
using std::string;
CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {}
CharacterRange::CharacterRange(unsigned char min, unsigned char max) : min(min), max(max) {}
static const unsigned char MAX_CHAR = -1;
bool CharacterRange::operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
}
CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {}
CharacterRange::CharacterRange(unsigned char min, unsigned char max)
: min(min), max(max) {}
bool CharacterRange::operator<(const CharacterRange &other) const {
if (min < other.min) return true;
if (min > other.min) return false;
if (max < other.max) return true;
return false;
}
string escape_character(unsigned char input) {
switch (input) {
case '\0':
return "<EOF>";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case MAX_CHAR:
return "<MAX>";
default:
return string() + static_cast<char>(input);
}
}
string CharacterRange::to_string() const {
if (min == 0 && max == MAX_CHAR)
return "<ANY>";
if (min == max)
return escape_character(min);
else
return string() + escape_character(min) + "-" + escape_character(max);
}
}
bool CharacterRange::operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
}
bool CharacterRange::operator<(const CharacterRange &other) const {
if (min < other.min)
return true;
if (min > other.min)
return false;
if (max < other.max)
return true;
return false;
}
string escape_character(unsigned char input) {
switch (input) {
case '\0':
return "<EOF>";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case MAX_CHAR:
return "<MAX>";
default:
return string() + static_cast<char>(input);
}
}
string CharacterRange::to_string() const {
if (min == 0 && max == MAX_CHAR)
return "<ANY>";
if (min == max)
return escape_character(min);
else
return string() + escape_character(min) + "-" + escape_character(max);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -5,29 +5,34 @@
#include <string>
namespace tree_sitter {
namespace rules {
struct CharacterRange {
unsigned char min;
unsigned char max;
namespace rules {
// IMPLICIT_CONSTRUCTORS
CharacterRange(unsigned char value);
CharacterRange(unsigned char min, unsigned char max);
struct CharacterRange {
unsigned char min;
unsigned char max;
bool operator==(const CharacterRange &other) const;
bool operator<(const CharacterRange &others) const;
std::string to_string() const;
};
}
}
// IMPLICIT_CONSTRUCTORS
CharacterRange(unsigned char value);
CharacterRange(unsigned char min, unsigned char max);
bool operator==(const CharacterRange &other) const;
bool operator<(const CharacterRange &others) const;
std::string to_string() const;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::CharacterRange> {
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
return (hash<unsigned char>()(range.min) ^ hash<unsigned char>()(range.max));
}
};
}
template <>
struct hash<tree_sitter::rules::CharacterRange> {
size_t operator()(const tree_sitter::rules::CharacterRange &range) const {
return (hash<unsigned char>()(range.min) ^
hash<unsigned char>()(range.max));
}
};
} // namespace std
#endif // COMPILER_RULES_CHARACTER_RANGE_H_

View file

@ -3,140 +3,141 @@
#include <utility>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
using std::set;
using std::pair;
using std::initializer_list;
namespace tree_sitter {
namespace rules {
static const unsigned char MAX_CHAR = -1;
static const unsigned char MAX_CHAR = -1;
CharacterSet::CharacterSet() : ranges({}) {}
CharacterSet::CharacterSet(const set<CharacterRange> &ranges) : ranges(ranges) {}
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges) : ranges(ranges) {}
CharacterSet::CharacterSet() : ranges({}) {}
CharacterSet::CharacterSet(const set<CharacterRange> &ranges)
: ranges(ranges) {}
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges)
: ranges(ranges) {}
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
return other && (ranges == other->ranges);
}
bool CharacterSet::operator<(const CharacterSet &other) const {
return ranges < other.ranges;
}
size_t CharacterSet::hash_code() const {
size_t result = std::hash<size_t>()(ranges.size());
for (auto &range : ranges) {
result ^= std::hash<unsigned char>()(range.min);
result ^= std::hash<unsigned char>()(range.max);
}
return result;
}
rule_ptr CharacterSet::copy() const {
return std::make_shared<CharacterSet>(*this);
}
string CharacterSet::to_string() const {
string result("#<char {");
for (auto &range : ranges)
result += " " + range.to_string();
return result + " }>";
}
CharacterSet CharacterSet::complement() const {
CharacterSet result({ {0, MAX_CHAR} });
result.remove_set(*this);
return result;
}
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
auto first_range = *ranges.begin();
if (first_range.min == 0 && first_range.max > 0) {
return { this->complement(), false };
} else {
return { *this, true };
}
}
void add_range(CharacterSet *self, CharacterRange addition) {
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
bool is_adjacent = false;
if (range.min < addition.min && range.max >= addition.min - 1) {
is_adjacent = true;
addition.min = range.min;
}
if (range.max > addition.max && range.min <= addition.max + 1) {
is_adjacent = true;
addition.max = range.max;
}
if (!is_adjacent) {
new_ranges.insert(range);
}
}
new_ranges.insert(addition);
self->ranges = new_ranges;
}
CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) {
CharacterSet removed_set;
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
if (range_to_remove.min <= range.min) {
if (range_to_remove.max < range.min) {
new_ranges.insert(range);
} else if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, CharacterRange(range.min, range_to_remove.max));
} else {
add_range(&removed_set, range);
}
} else if (range_to_remove.min <= range.max) {
if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, range_to_remove);
} else {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
add_range(&removed_set, CharacterRange(range_to_remove.min, range.max));
}
} else {
new_ranges.insert(range);
}
}
self->ranges = new_ranges;
return removed_set;
}
bool CharacterSet::is_empty() const {
return ranges.empty();
}
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);
}
}
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet result;
for (auto &other_range : other.ranges) {
auto removed_set = remove_range(this, other_range);
result.add_set(removed_set);
}
return result;
}
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
CharacterSet copy = *this;
return copy.remove_set(set);
}
void CharacterSet::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
return other && (ranges == other->ranges);
}
bool CharacterSet::operator<(const CharacterSet &other) const {
return ranges < other.ranges;
}
size_t CharacterSet::hash_code() const {
size_t result = std::hash<size_t>()(ranges.size());
for (auto &range : ranges) {
result ^= std::hash<unsigned char>()(range.min);
result ^= std::hash<unsigned char>()(range.max);
}
return result;
}
rule_ptr CharacterSet::copy() const {
return std::make_shared<CharacterSet>(*this);
}
string CharacterSet::to_string() const {
string result("#<char {");
for (auto &range : ranges)
result += " " + range.to_string();
return result + " }>";
}
CharacterSet CharacterSet::complement() const {
CharacterSet result({ { 0, MAX_CHAR } });
result.remove_set(*this);
return result;
}
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation()
const {
auto first_range = *ranges.begin();
if (first_range.min == 0 && first_range.max > 0) {
return { this->complement(), false };
} else {
return { *this, true };
}
}
void add_range(CharacterSet *self, CharacterRange addition) {
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
bool is_adjacent = false;
if (range.min < addition.min && range.max >= addition.min - 1) {
is_adjacent = true;
addition.min = range.min;
}
if (range.max > addition.max && range.min <= addition.max + 1) {
is_adjacent = true;
addition.max = range.max;
}
if (!is_adjacent) {
new_ranges.insert(range);
}
}
new_ranges.insert(addition);
self->ranges = new_ranges;
}
CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) {
CharacterSet removed_set;
set<CharacterRange> new_ranges;
for (auto range : self->ranges) {
if (range_to_remove.min <= range.min) {
if (range_to_remove.max < range.min) {
new_ranges.insert(range);
} else if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, CharacterRange(range.min, range_to_remove.max));
} else {
add_range(&removed_set, range);
}
} else if (range_to_remove.min <= range.max) {
if (range_to_remove.max < range.max) {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max));
add_range(&removed_set, range_to_remove);
} else {
new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1));
add_range(&removed_set, CharacterRange(range_to_remove.min, range.max));
}
} else {
new_ranges.insert(range);
}
}
self->ranges = new_ranges;
return removed_set;
}
bool CharacterSet::is_empty() const { return ranges.empty(); }
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);
}
}
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet result;
for (auto &other_range : other.ranges) {
auto removed_set = remove_range(this, other_range);
result.add_set(removed_set);
}
return result;
}
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
CharacterSet copy = *this;
return copy.remove_set(set);
}
void CharacterSet::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -8,40 +8,44 @@
#include "compiler/rules/rule.h"
#include "compiler/rules/character_range.h"
namespace tree_sitter {
namespace rules {
class CharacterSet : public Rule {
public:
CharacterSet();
explicit CharacterSet(const std::set<CharacterRange> &ranges);
explicit CharacterSet(const std::initializer_list<CharacterRange> &ranges);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
bool operator<(const CharacterSet &) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class CharacterSet : public Rule {
public:
CharacterSet();
explicit CharacterSet(const std::set<CharacterRange> &ranges);
explicit CharacterSet(const std::initializer_list<CharacterRange> &ranges);
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
bool is_empty() const;
bool operator==(const Rule &other) const;
bool operator<(const CharacterSet &) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
std::set<CharacterRange> ranges;
};
}
}
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
bool is_empty() const;
std::set<CharacterRange> ranges;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::CharacterSet> {
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
return rule.hash_code();
}
};
}
template <>
struct hash<tree_sitter::rules::CharacterSet> {
size_t operator()(const tree_sitter::rules::CharacterSet &rule) const {
return rule.hash_code();
}
};
} // namespace std
#endif // COMPILER_RULES_CHARACTER_SET_H_

View file

@ -3,50 +3,51 @@
#include <set>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::make_shared;
using std::vector;
using std::set;
using std::dynamic_pointer_cast;
namespace tree_sitter {
namespace rules {
namespace rules {
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
using std::string;
using std::make_shared;
using std::vector;
using std::set;
using std::dynamic_pointer_cast;
rule_ptr Choice::Build(const vector<rule_ptr> &elements) {
return make_shared<Choice>(elements);
}
Choice::Choice(const vector<rule_ptr> &elements) : elements(elements) {}
bool Choice::operator==(const Rule &rule) const {
const Choice *other = dynamic_cast<const Choice *>(&rule);
if (!other) return false;
size_t size = elements.size();
if (size != other->elements.size()) return false;
for (size_t i = 0; i < size; i++)
if (!elements[i]->operator==(*other->elements[i])) return false;
return true;
}
size_t Choice::hash_code() const {
size_t result = std::hash<size_t>()(elements.size());
for (const auto &element : elements)
result ^= element->hash_code();
return result;
}
rule_ptr Choice::copy() const {
return std::make_shared<Choice>(*this);
}
string Choice::to_string() const {
string result = "#<choice";
for (const auto &element : elements)
result += " " + element->to_string();
return result + ">";
}
void Choice::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
rule_ptr Choice::Build(const vector<rule_ptr> &elements) {
return make_shared<Choice>(elements);
}
bool Choice::operator==(const Rule &rule) const {
const Choice *other = dynamic_cast<const Choice *>(&rule);
if (!other)
return false;
size_t size = elements.size();
if (size != other->elements.size())
return false;
for (size_t i = 0; i < size; i++)
if (!elements[i]->operator==(*other->elements[i]))
return false;
return true;
}
size_t Choice::hash_code() const {
size_t result = std::hash<size_t>()(elements.size());
for (const auto &element : elements)
result ^= element->hash_code();
return result;
}
rule_ptr Choice::copy() const { return std::make_shared<Choice>(*this); }
string Choice::to_string() const {
string result = "#<choice";
for (const auto &element : elements)
result += " " + element->to_string();
return result + ">";
}
void Choice::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -6,21 +6,23 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Choice : public Rule {
public:
explicit Choice(const std::vector<rule_ptr> &elements);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Choice : public Rule {
public:
explicit Choice(const std::vector<rule_ptr> &elements);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
const std::vector<rule_ptr> elements;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::vector<rule_ptr> elements;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_CHOICE_H_

View file

@ -3,45 +3,44 @@
#include <map>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::hash;
using std::make_shared;
using std::map;
namespace tree_sitter {
namespace rules {
namespace rules {
Metadata::Metadata(rule_ptr rule, map<MetadataKey, int> values) : rule(rule), value(values) {}
using std::hash;
using std::make_shared;
using std::map;
bool Metadata::operator==(const Rule &rule) const {
auto other = dynamic_cast<const Metadata *>(&rule);
return other && other->value == value && other->rule->operator==(*this->rule);
}
Metadata::Metadata(rule_ptr rule, map<MetadataKey, int> values)
: rule(rule), value(values) {}
size_t Metadata::hash_code() const {
size_t result = hash<size_t>()(value.size());
for (auto &pair : value) {
result ^= hash<int>()(pair.first);
result ^= hash<int>()(pair.second);
}
return result;
}
rule_ptr Metadata::copy() const {
return make_shared<Metadata>(rule->copy(), value);
}
int Metadata::value_for(MetadataKey key) const {
auto pair = value.find(key);
return (pair != value.end()) ?
pair->second :
0;
}
std::string Metadata::to_string() const {
return "#<metadata " + rule->to_string() + ">";
}
void Metadata::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Metadata::operator==(const Rule &rule) const {
auto other = dynamic_cast<const Metadata *>(&rule);
return other && other->value == value && other->rule->operator==(*this->rule);
}
size_t Metadata::hash_code() const {
size_t result = hash<size_t>()(value.size());
for (auto &pair : value) {
result ^= hash<int>()(pair.first);
result ^= hash<int>()(pair.second);
}
return result;
}
rule_ptr Metadata::copy() const {
return make_shared<Metadata>(rule->copy(), value);
}
int Metadata::value_for(MetadataKey key) const {
auto pair = value.find(key);
return (pair != value.end()) ? pair->second : 0;
}
std::string Metadata::to_string() const {
return "#<metadata " + rule->to_string() + ">";
}
void Metadata::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -5,30 +5,32 @@
#include <map>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
typedef enum {
START_TOKEN,
PRECEDENCE,
IS_TOKEN,
DESCRIPTION,
} MetadataKey;
namespace tree_sitter {
namespace rules {
class Metadata : public Rule {
public:
Metadata(rule_ptr rule, std::map<MetadataKey, int> value);
typedef enum {
START_TOKEN,
PRECEDENCE,
IS_TOKEN,
DESCRIPTION,
} MetadataKey;
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
int value_for(MetadataKey key) const;
class Metadata : public Rule {
public:
Metadata(rule_ptr rule, std::map<MetadataKey, int> value);
const rule_ptr rule;
const std::map<MetadataKey, int> value;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
int value_for(MetadataKey key) const;
const rule_ptr rule;
const std::map<MetadataKey, int> value;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_METADATA_H_

View file

@ -2,32 +2,30 @@
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::hash;
namespace tree_sitter {
namespace rules {
namespace rules {
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
using std::string;
using std::hash;
bool NamedSymbol::operator==(const Rule &rule) const {
auto other = dynamic_cast<const NamedSymbol *>(&rule);
return other && other->name == name;
}
NamedSymbol::NamedSymbol(const std::string &name) : name(name) {}
size_t NamedSymbol::hash_code() const {
return hash<string>()(name);
}
rule_ptr NamedSymbol::copy() const {
return std::make_shared<NamedSymbol>(*this);
}
string NamedSymbol::to_string() const {
return string("#<sym '") + name + "'>";
}
void NamedSymbol::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool NamedSymbol::operator==(const Rule &rule) const {
auto other = dynamic_cast<const NamedSymbol *>(&rule);
return other && other->name == name;
}
size_t NamedSymbol::hash_code() const { return hash<string>()(name); }
rule_ptr NamedSymbol::copy() const {
return std::make_shared<NamedSymbol>(*this);
}
string NamedSymbol::to_string() const {
return string("#<sym '") + name + "'>";
}
void NamedSymbol::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,21 +4,23 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class NamedSymbol : public Rule {
public:
explicit NamedSymbol(const std::string &name);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class NamedSymbol : public Rule {
public:
explicit NamedSymbol(const std::string &name);
std::string name;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
std::string name;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_NAMED_SYMBOL_H_

View file

@ -4,31 +4,27 @@
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
namespace rules {
Pattern::Pattern(const string &string) : value(string) {}
using std::string;
using std::hash;
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
auto pattern = dynamic_cast<const Pattern *>(&other);
return pattern && (pattern->value == value);
}
Pattern::Pattern(const string &string) : value(string) {}
size_t Pattern::hash_code() const {
return hash<string>()(value);
}
rule_ptr Pattern::copy() const {
return std::make_shared<Pattern>(*this);
}
string Pattern::to_string() const {
return string("#<pattern '") + util::escape_string(value) + "'>";
}
void Pattern::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
auto pattern = dynamic_cast<const Pattern *>(&other);
return pattern && (pattern->value == value);
}
size_t Pattern::hash_code() const { return hash<string>()(value); }
rule_ptr Pattern::copy() const { return std::make_shared<Pattern>(*this); }
string Pattern::to_string() const {
return string("#<pattern '") + util::escape_string(value) + "'>";
}
void Pattern::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -5,21 +5,22 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Pattern : public Rule {
public:
explicit Pattern(const std::string &string);
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Pattern : public Rule {
public:
explicit Pattern(const std::string &string);
const std::string value;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::string value;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_PATTERN_H_

View file

@ -3,30 +3,26 @@
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
namespace rules {
namespace rules {
Repeat::Repeat(const rule_ptr content) : content(content) {}
using std::string;
bool Repeat::operator==(const Rule &rule) const {
const Repeat *other = dynamic_cast<const Repeat *>(&rule);
return other && (*other->content == *content);
}
Repeat::Repeat(const rule_ptr content) : content(content) {}
size_t Repeat::hash_code() const {
return content->hash_code();
}
rule_ptr Repeat::copy() const {
return std::make_shared<Repeat>(*this);
}
string Repeat::to_string() const {
return string("#<repeat ") + content->to_string() + ">";
}
void Repeat::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Repeat::operator==(const Rule &rule) const {
const Repeat *other = dynamic_cast<const Repeat *>(&rule);
return other && (*other->content == *content);
}
size_t Repeat::hash_code() const { return content->hash_code(); }
rule_ptr Repeat::copy() const { return std::make_shared<Repeat>(*this); }
string Repeat::to_string() const {
return string("#<repeat ") + content->to_string() + ">";
}
void Repeat::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,21 +4,23 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Repeat : public Rule {
public:
explicit Repeat(rule_ptr content);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Repeat : public Rule {
public:
explicit Repeat(rule_ptr content);
const rule_ptr content;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const rule_ptr content;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_REPEAT_H_

View file

@ -2,26 +2,28 @@
#include <set>
namespace tree_sitter {
using std::ostream;
using std::string;
namespace rules {
namespace rules {
bool Rule::operator!=(const Rule &other) const {
return !this->operator==(other);
}
using std::ostream;
using std::string;
ostream& operator<<(ostream& stream, const Rule &rule) {
return stream << rule.to_string();
}
ostream& operator<<(ostream& stream, const rule_ptr &rule) {
if (rule.get())
stream << *rule;
else
stream << string("#<null-rule>");
return stream;
}
Rule::~Rule() {}
}
bool Rule::operator!=(const Rule &other) const {
return !this->operator==(other);
}
ostream &operator<<(ostream &stream, const Rule &rule) {
return stream << rule.to_string();
}
ostream &operator<<(ostream &stream, const rule_ptr &rule) {
if (rule.get())
stream << *rule;
else
stream << string("#<null-rule>");
return stream;
}
Rule::~Rule() {}
} // namespace rules
} // namespace tree_sitter

View file

@ -5,35 +5,39 @@
#include <memory>
namespace tree_sitter {
namespace rules {
class Visitor;
class Rule;
namespace rules {
typedef std::shared_ptr<Rule> rule_ptr;
class Visitor;
class Rule;
class Rule {
public:
virtual bool operator==(const Rule& other) const = 0;
bool operator!=(const Rule& other) const;
virtual size_t hash_code() const = 0;
virtual rule_ptr copy() const = 0;
virtual std::string to_string() const = 0;
virtual void accept(Visitor *visitor) const = 0;
virtual ~Rule();
};
typedef std::shared_ptr<Rule> rule_ptr;
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
}
}
class Rule {
public:
virtual bool operator==(const Rule &other) const = 0;
bool operator!=(const Rule &other) const;
virtual size_t hash_code() const = 0;
virtual rule_ptr copy() const = 0;
virtual std::string to_string() const = 0;
virtual void accept(Visitor *visitor) const = 0;
virtual ~Rule();
};
std::ostream &operator<<(std::ostream &stream, const Rule &rule);
std::ostream &operator<<(std::ostream &stream, const rule_ptr &rule);
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::rule_ptr> {
size_t operator()(const tree_sitter::rules::rule_ptr &rule) const {
return typeid(*rule).hash_code() ^ rule->hash_code();
}
};
}
template <>
struct hash<tree_sitter::rules::rule_ptr> {
size_t operator()(const tree_sitter::rules::rule_ptr &rule) const {
return typeid(*rule).hash_code() ^ rule->hash_code();
}
};
} // namespace std
#endif // COMPILER_RULES_RULE_H_

View file

@ -16,65 +16,55 @@
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
using std::make_shared;
using std::string;
using std::set;
using std::vector;
using std::map;
namespace rules {
namespace rules {
static const int KEYWORD_PRECEDENCE = 100;
using std::make_shared;
using std::string;
using std::set;
using std::vector;
using std::map;
static rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
return std::make_shared<Metadata>(rule, values);
}
static const int KEYWORD_PRECEDENCE = 100;
rule_ptr blank() {
return make_shared<Blank>();
}
rule_ptr choice(const vector<rule_ptr> &rules) {
return Choice::Build(rules);
}
rule_ptr repeat(const rule_ptr &content) {
return std::make_shared<Repeat>(content);
}
rule_ptr seq(const vector<rule_ptr> &rules) {
return Seq::Build(rules);
}
rule_ptr sym(const string &name) {
return make_shared<NamedSymbol>(name);
}
rule_ptr pattern(const string &value) {
return make_shared<Pattern>(value);
}
rule_ptr str(const string &value) {
return make_shared<String>(value);
}
rule_ptr keyword(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, str(value)));
}
rule_ptr keypattern(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, pattern(value)));
}
rule_ptr err(const rule_ptr &rule) {
return choice({ rule, ERROR().copy() });
}
rule_ptr prec(int precedence, rule_ptr rule) {
return metadata(rule, {{ PRECEDENCE, precedence }});
}
rule_ptr token(rule_ptr rule) {
return metadata(rule, {{ IS_TOKEN, 1 }});
}
}
static rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
return std::make_shared<Metadata>(rule, values);
}
rule_ptr blank() { return make_shared<Blank>(); }
rule_ptr choice(const vector<rule_ptr> &rules) { return Choice::Build(rules); }
rule_ptr repeat(const rule_ptr &content) {
return std::make_shared<Repeat>(content);
}
rule_ptr seq(const vector<rule_ptr> &rules) { return Seq::Build(rules); }
rule_ptr sym(const string &name) { return make_shared<NamedSymbol>(name); }
rule_ptr pattern(const string &value) { return make_shared<Pattern>(value); }
rule_ptr str(const string &value) { return make_shared<String>(value); }
rule_ptr keyword(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, str(value)));
}
rule_ptr keypattern(const string &value) {
return token(prec(KEYWORD_PRECEDENCE, pattern(value)));
}
rule_ptr err(const rule_ptr &rule) {
return choice({ rule, ERROR().copy() });
}
rule_ptr prec(int precedence, rule_ptr rule) {
return metadata(rule, { { PRECEDENCE, precedence } });
}
rule_ptr token(rule_ptr rule) {
return metadata(rule, { { IS_TOKEN, 1 } });
}
} // namespace rules
} // namespace tree_sitter

View file

@ -4,39 +4,36 @@
#include "compiler/rules/blank.h"
namespace tree_sitter {
using std::make_shared;
using std::string;
using std::vector;
namespace rules {
namespace rules {
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
using std::make_shared;
using std::string;
using std::vector;
rule_ptr Seq::Build(const std::vector<rule_ptr> &rules) {
rule_ptr result = make_shared<Blank>();
for (auto &rule : rules)
result = (typeid(*result) != typeid(Blank)) ? make_shared<Seq>(result, rule) : rule;
return result;
}
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {}
bool Seq::operator==(const Rule &rule) const {
const Seq *other = dynamic_cast<const Seq *>(&rule);
return other && (*other->left == *left) && (*other->right == *right);
}
size_t Seq::hash_code() const {
return left->hash_code() ^ right->hash_code();
}
rule_ptr Seq::copy() const {
return std::make_shared<Seq>(*this);
}
string Seq::to_string() const {
return string("#<seq ") + left->to_string() + " " + right->to_string() + ">";
}
void Seq::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
rule_ptr Seq::Build(const std::vector<rule_ptr> &rules) {
rule_ptr result = make_shared<Blank>();
for (auto &rule : rules)
result = (typeid(*result) != typeid(Blank)) ? make_shared<Seq>(result, rule)
: rule;
return result;
}
bool Seq::operator==(const Rule &rule) const {
const Seq *other = dynamic_cast<const Seq *>(&rule);
return other && (*other->left == *left) && (*other->right == *right);
}
size_t Seq::hash_code() const { return left->hash_code() ^ right->hash_code(); }
rule_ptr Seq::copy() const { return std::make_shared<Seq>(*this); }
string Seq::to_string() const {
return string("#<seq ") + left->to_string() + " " + right->to_string() + ">";
}
void Seq::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -5,23 +5,25 @@
#include <vector>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Seq : public Rule {
public:
Seq(rule_ptr left, rule_ptr right);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class Seq : public Rule {
public:
Seq(rule_ptr left, rule_ptr right);
static rule_ptr Build(const std::vector<rule_ptr> &rules);
const rule_ptr left;
const rule_ptr right;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const rule_ptr left;
const rule_ptr right;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_SEQ_H_

View file

@ -2,32 +2,26 @@
#include <string>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::hash;
namespace tree_sitter {
namespace rules {
namespace rules {
String::String(string value) : value(value) {}
using std::string;
using std::hash;
bool String::operator==(const Rule &rule) const {
const String *other = dynamic_cast<const String *>(&rule);
return other && (other->value == value);
}
String::String(string value) : value(value) {}
size_t String::hash_code() const {
return hash<string>()(value);
}
rule_ptr String::copy() const {
return std::make_shared<String>(*this);
}
string String::to_string() const {
return string("#<string '") + value + "'>";
}
void String::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool String::operator==(const Rule &rule) const {
const String *other = dynamic_cast<const String *>(&rule);
return other && (other->value == value);
}
size_t String::hash_code() const { return hash<string>()(value); }
rule_ptr String::copy() const { return std::make_shared<String>(*this); }
string String::to_string() const { return string("#<string '") + value + "'>"; }
void String::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,21 +4,23 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class String : public Rule {
public:
explicit String(std::string value);
namespace tree_sitter {
namespace rules {
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
class String : public Rule {
public:
explicit String(std::string value);
const std::string value;
};
}
}
bool operator==(const Rule &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::string value;
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_STRING_H_

View file

@ -3,63 +3,54 @@
#include <map>
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::hash;
namespace tree_sitter {
namespace rules {
namespace rules {
Symbol::Symbol(int index) :
index(index),
options(SymbolOption(0)) {}
using std::string;
using std::to_string;
using std::hash;
Symbol::Symbol(int index, SymbolOption options) :
index(index),
options(options) {}
Symbol::Symbol(int index) : index(index), options(SymbolOption(0)) {}
bool Symbol::operator==(const Symbol &other) const {
return (other.index == index) && (other.options == options);
}
Symbol::Symbol(int index, SymbolOption options)
: index(index), options(options) {}
bool Symbol::operator==(const Rule &rule) const {
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
return other && this->operator==(*other);
}
size_t Symbol::hash_code() const {
return hash<int>()(index) ^ hash<int16_t>()(options);
}
rule_ptr Symbol::copy() const {
return std::make_shared<Symbol>(*this);
}
string Symbol::to_string() const {
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
name += (options & SymbolOptionToken) ? "token" : "sym";
return "#<" + name + " " + std::to_string(index) + ">";
}
bool Symbol::operator<(const Symbol &other) const {
if (options < other.options) return true;
if (options > other.options) return false;
return (index < other.index);
}
bool Symbol::is_token() const {
return options & SymbolOptionToken;
}
bool Symbol::is_built_in() const {
return index < 0;
}
bool Symbol::is_auxiliary() const {
return options & SymbolOptionAuxiliary;
}
void Symbol::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
bool Symbol::operator==(const Symbol &other) const {
return (other.index == index) && (other.options == options);
}
bool Symbol::operator==(const Rule &rule) const {
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
return other && this->operator==(*other);
}
size_t Symbol::hash_code() const {
return hash<int>()(index) ^ hash<int16_t>()(options);
}
rule_ptr Symbol::copy() const { return std::make_shared<Symbol>(*this); }
string Symbol::to_string() const {
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
name += (options & SymbolOptionToken) ? "token" : "sym";
return "#<" + name + " " + std::to_string(index) + ">";
}
bool Symbol::operator<(const Symbol &other) const {
if (options < other.options)
return true;
if (options > other.options)
return false;
return (index < other.index);
}
bool Symbol::is_token() const { return options & SymbolOptionToken; }
bool Symbol::is_built_in() const { return index < 0; }
bool Symbol::is_auxiliary() const { return options & SymbolOptionAuxiliary; }
void Symbol::accept(Visitor *visitor) const { visitor->visit(this); }
} // namespace rules
} // namespace tree_sitter

View file

@ -4,44 +4,48 @@
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
typedef enum {
SymbolOptionToken = 1 << 0,
SymbolOptionAuxiliary = 1 << 1,
} SymbolOption;
namespace tree_sitter {
namespace rules {
class Symbol : public Rule {
public:
explicit Symbol(int index);
Symbol(int index, SymbolOption options);
typedef enum {
SymbolOptionToken = 1 << 0,
SymbolOptionAuxiliary = 1 << 1,
} SymbolOption;
bool operator==(const Symbol &other) const;
bool operator==(const Rule &other) const;
class Symbol : public Rule {
public:
explicit Symbol(int index);
Symbol(int index, SymbolOption options);
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
bool operator==(const Symbol &other) const;
bool operator==(const Rule &other) const;
bool operator<(const Symbol &other) const;
bool is_token() const;
bool is_built_in() const;
bool is_auxiliary() const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
int index;
SymbolOption options;
};
}
}
bool operator<(const Symbol &other) const;
bool is_token() const;
bool is_built_in() const;
bool is_auxiliary() const;
int index;
SymbolOption options;
};
} // namespace rules
} // namespace tree_sitter
namespace std {
template<>
struct hash<tree_sitter::rules::Symbol> {
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
return rule.hash_code();
}
};
}
template <>
struct hash<tree_sitter::rules::Symbol> {
size_t operator()(const tree_sitter::rules::Symbol &rule) const {
return rule.hash_code();
}
};
} // std
#endif // COMPILER_RULES_SYMBOL_H_

View file

@ -11,32 +11,34 @@
#include "compiler/rules/repeat.h"
namespace tree_sitter {
using std::vector;
namespace rules {
namespace rules {
Visitor::~Visitor() {}
using std::vector;
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
return rule->copy();
}
Visitor::~Visitor() {}
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
vector<rule_ptr> rules;
for (const auto &el : rule->elements)
rules.push_back(apply(el));
return Choice::Build(rules);
}
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
return Seq::Build({ apply(rule->left), apply(rule->right) });
}
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
return std::make_shared<Repeat>(apply(rule->content));
}
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
return std::make_shared<Metadata>(apply(rule->rule), rule->value);
}
}
rule_ptr IdentityRuleFn::default_apply(const Rule *rule) {
return rule->copy();
}
rule_ptr IdentityRuleFn::apply_to(const Choice *rule) {
vector<rule_ptr> rules;
for (const auto &el : rule->elements)
rules.push_back(apply(el));
return Choice::Build(rules);
}
rule_ptr IdentityRuleFn::apply_to(const Seq *rule) {
return Seq::Build({ apply(rule->left), apply(rule->right) });
}
rule_ptr IdentityRuleFn::apply_to(const Repeat *rule) {
return std::make_shared<Repeat>(apply(rule->content));
}
rule_ptr IdentityRuleFn::apply_to(const Metadata *rule) {
return std::make_shared<Metadata>(apply(rule->rule), rule->value);
}
} // namespace rules
} // namespace tree_sitter

View file

@ -4,79 +4,101 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
class Blank;
class NamedSymbol;
class CharacterSet;
class Choice;
class Repeat;
class Seq;
class String;
class Symbol;
class Pattern;
class Metadata;
namespace rules {
class Visitor {
public:
virtual void visit(const Blank *rule) = 0;
virtual void visit(const CharacterSet *rule) = 0;
virtual void visit(const Choice *rule) = 0;
virtual void visit(const Metadata *rule) = 0;
virtual void visit(const Pattern *rule) = 0;
virtual void visit(const Repeat *rule) = 0;
virtual void visit(const Seq *rule) = 0;
virtual void visit(const String *rule) = 0;
virtual void visit(const NamedSymbol *rule) = 0;
virtual void visit(const Symbol *rule) = 0;
virtual ~Visitor();
};
class Blank;
class NamedSymbol;
class CharacterSet;
class Choice;
class Repeat;
class Seq;
class String;
class Symbol;
class Pattern;
class Metadata;
template<typename T>
class RuleFn : private Visitor {
public:
T apply(const rule_ptr &rule) {
value_ = T();
rule->accept(this);
return value_;
}
class Visitor {
public:
virtual void visit(const Blank *rule) = 0;
virtual void visit(const CharacterSet *rule) = 0;
virtual void visit(const Choice *rule) = 0;
virtual void visit(const Metadata *rule) = 0;
virtual void visit(const Pattern *rule) = 0;
virtual void visit(const Repeat *rule) = 0;
virtual void visit(const Seq *rule) = 0;
virtual void visit(const String *rule) = 0;
virtual void visit(const NamedSymbol *rule) = 0;
virtual void visit(const Symbol *rule) = 0;
virtual ~Visitor();
};
protected:
virtual T default_apply(const Rule *rule) { return T(); }
virtual T apply_to(const Blank *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const CharacterSet *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Choice *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Metadata *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Pattern *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Repeat *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Seq *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const String *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const NamedSymbol *rule) { return default_apply((const Rule *)rule); }
virtual T apply_to(const Symbol *rule) { return default_apply((const Rule *)rule); }
template <typename T>
class RuleFn : private Visitor {
public:
T apply(const rule_ptr &rule) {
value_ = T();
rule->accept(this);
return value_;
}
void visit(const Blank *rule) { value_ = apply_to(rule); }
void visit(const CharacterSet *rule) { value_ = apply_to(rule); }
void visit(const Choice *rule) { value_ = apply_to(rule); }
void visit(const Metadata *rule) { value_ = apply_to(rule); }
void visit(const Pattern *rule) { value_ = apply_to(rule); }
void visit(const Repeat *rule) { value_ = apply_to(rule); }
void visit(const Seq *rule) { value_ = apply_to(rule); }
void visit(const String *rule) { value_ = apply_to(rule); }
void visit(const NamedSymbol *rule) { value_ = apply_to(rule); }
void visit(const Symbol *rule) { value_ = apply_to(rule); }
protected:
virtual T default_apply(const Rule *rule) { return T(); }
virtual T apply_to(const Blank *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const CharacterSet *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Choice *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Metadata *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Pattern *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Repeat *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Seq *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const String *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const NamedSymbol *rule) {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const Symbol *rule) {
return default_apply((const Rule *)rule);
}
private:
T value_;
};
void visit(const Blank *rule) { value_ = apply_to(rule); }
void visit(const CharacterSet *rule) { value_ = apply_to(rule); }
void visit(const Choice *rule) { value_ = apply_to(rule); }
void visit(const Metadata *rule) { value_ = apply_to(rule); }
void visit(const Pattern *rule) { value_ = apply_to(rule); }
void visit(const Repeat *rule) { value_ = apply_to(rule); }
void visit(const Seq *rule) { value_ = apply_to(rule); }
void visit(const String *rule) { value_ = apply_to(rule); }
void visit(const NamedSymbol *rule) { value_ = apply_to(rule); }
void visit(const Symbol *rule) { value_ = apply_to(rule); }
class IdentityRuleFn : public RuleFn<rule_ptr> {
protected:
virtual rule_ptr default_apply(const Rule *rule);
virtual rule_ptr apply_to(const Choice *rule);
virtual rule_ptr apply_to(const Metadata *rule);
virtual rule_ptr apply_to(const Seq *rule);
virtual rule_ptr apply_to(const Repeat *rule);
};
}
}
private:
T value_;
};
class IdentityRuleFn : public RuleFn<rule_ptr> {
protected:
virtual rule_ptr default_apply(const Rule *rule);
virtual rule_ptr apply_to(const Choice *rule);
virtual rule_ptr apply_to(const Metadata *rule);
virtual rule_ptr apply_to(const Seq *rule);
virtual rule_ptr apply_to(const Repeat *rule);
};
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_VISITOR_H_

View file

@ -2,47 +2,50 @@
#include <vector>
namespace tree_sitter {
using std::string;
using std::vector;
using std::set;
namespace util {
namespace util {
void str_replace(string *input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {
pos = input->find(search, pos);
if (pos == string::npos) break;
input->erase(pos, search.length());
input->insert(pos, replace);
pos += replace.length();
}
}
using std::string;
using std::vector;
using std::set;
string escape_string(string input) {
str_replace(&input, "\"", "\\\"");
str_replace(&input, "\n", "\\n");
return input;
}
string escape_char(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
case '\'':
return "\\'";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case '\\':
return "\\\\";
default:
return string() + character;
}
}
}
void str_replace(string *input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {
pos = input->find(search, pos);
if (pos == string::npos)
break;
input->erase(pos, search.length());
input->insert(pos, replace);
pos += replace.length();
}
}
string escape_string(string input) {
str_replace(&input, "\"", "\\\"");
str_replace(&input, "\n", "\\n");
return input;
}
string escape_char(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
case '\'':
return "\\'";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case '\\':
return "\\\\";
default:
return string() + character;
}
}
} // namespace util
} // namespace tree_sitter

View file

@ -6,11 +6,14 @@
#include <set>
namespace tree_sitter {
namespace util {
void str_replace(std::string *input, const std::string &search, const std::string &replace);
std::string escape_string(std::string input);
std::string escape_char(char character);
}
}
namespace util {
void str_replace(std::string *input, const std::string &search,
const std::string &replace);
std::string escape_string(std::string input);
std::string escape_char(char character);
} // namespace util
} // namespace tree_sitter
#endif // COMPILER_UTIL_STRING_HELPERS_H_

View file

@ -11,11 +11,9 @@ struct TSDocument {
size_t error_count;
};
TSDocument * ts_document_make() {
TSDocument *ts_document_make() {
TSDocument *document = malloc(sizeof(TSDocument));
*document = (TSDocument) {
.input = (TSInput) {}
};
*document = (TSDocument) { .input = (TSInput) {} };
return document;
}
@ -33,12 +31,13 @@ void ts_document_set_parser(TSDocument *document, TSParser *parser) {
document->parser = parser;
}
const TSTree * ts_document_tree(const TSDocument *document) {
const TSTree *ts_document_tree(const TSDocument *document) {
return document->tree;
}
const char * ts_document_string(const TSDocument *document) {
return ts_tree_string(document->tree, ts_parser_config(document->parser).symbol_names);
const char *ts_document_string(const TSDocument *document) {
return ts_tree_string(document->tree,
ts_parser_config(document->parser).symbol_names);
}
void ts_document_set_input(TSDocument *document, TSInput input) {
@ -50,7 +49,8 @@ void ts_document_edit(TSDocument *document, TSInputEdit edit) {
document->tree = ts_parser_parse(document->parser, document->input, &edit);
}
const char * ts_document_symbol_name(const TSDocument *document, const TSTree *tree) {
const char *ts_document_symbol_name(const TSDocument *document,
const TSTree *tree) {
return ts_parser_config(document->parser).symbol_names[tree->symbol];
}
@ -60,7 +60,7 @@ typedef struct {
size_t length;
} TSStringInput;
const char * ts_string_input_read(void *d, size_t *bytes_read) {
const char *ts_string_input_read(void *d, size_t *bytes_read) {
TSStringInput *data = (TSStringInput *)d;
if (data->position >= data->length) {
*bytes_read = 0;
@ -83,24 +83,22 @@ TSInput ts_string_input_make(const char *string) {
data->string = string;
data->position = 0;
data->length = strlen(string);
TSInput input = {
.data = (void *)data,
.read_fn = ts_string_input_read,
.seek_fn = ts_string_input_seek,
.release_fn = free,
};
return input;
return (TSInput) { .data = (void *)data,
.read_fn = ts_string_input_read,
.seek_fn = ts_string_input_seek,
.release_fn = free };
}
void ts_document_set_input_string(TSDocument *document, const char *text) {
ts_document_set_input(document, ts_string_input_make(text));
}
TSNode * ts_document_root_node(const TSDocument *document) {
return ts_node_make_root(document->tree, document->parser->config.symbol_names);
TSNode *ts_document_root_node(const TSDocument *document) {
return ts_node_make_root(document->tree,
document->parser->config.symbol_names);
}
TSNode * ts_document_get_node(const TSDocument *document, size_t pos) {
TSNode *ts_document_get_node(const TSDocument *document, size_t pos) {
TSNode *root = ts_document_root_node(document);
TSNode *result = ts_node_leaf_at_pos(root, pos);
ts_node_release(root);

View file

@ -2,16 +2,14 @@
#include "runtime/tree.h"
TSLexer ts_lexer_make() {
return (TSLexer) {
.chunk = NULL,
.debug = 0,
.chunk_start = 0,
.chunk_size = 0,
.position_in_chunk = 0,
.token_start_position = 0,
.token_end_position = 0,
.reached_end = 0
};
return (TSLexer) { .chunk = NULL,
.debug = 0,
.chunk_start = 0,
.chunk_size = 0,
.position_in_chunk = 0,
.token_start_position = 0,
.token_end_position = 0,
.reached_end = 0 };
}
int ts_lexer_advance(TSLexer *lexer) {
@ -33,11 +31,10 @@ int ts_lexer_advance(TSLexer *lexer) {
return 1;
}
TSTree * ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
TSTree *ts_lexer_build_node(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
size_t current_position = ts_lexer_position(lexer);
size_t size = current_position - lexer->token_start_position;
size_t offset = lexer->token_start_position - lexer->token_end_position;
lexer->token_end_position = current_position;
return ts_tree_make_leaf(symbol, size, offset, is_hidden);
}

View file

@ -1,34 +1,33 @@
#include "runtime/node.h"
#include "runtime/tree.h"
TSNode * ts_node_make(const TSTree *tree, TSNode *parent, size_t index, size_t start_position, const char **names) {
if (parent) ts_node_retain(parent);
TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
size_t start_position, const char **names) {
if (parent)
ts_node_retain(parent);
TSNode *result = malloc(sizeof(TSNode));
*result = (TSNode) {
.ref_count = 1,
.parent = parent,
.index = index,
.content = tree,
.start_position = start_position,
.names = names,
};
*result = (TSNode) { .ref_count = 1,
.parent = parent,
.index = index,
.content = tree,
.start_position = start_position,
.names = names, };
return result;
}
TSNode * ts_node_make_root(const TSTree *tree, const char **names) {
TSNode *ts_node_make_root(const TSTree *tree, const char **names) {
while (ts_tree_is_wrapper(tree))
tree = tree->children[0];
return ts_node_make(tree, NULL, 0, 0, names);
}
void ts_node_retain(TSNode *node) {
node->ref_count++;
}
void ts_node_retain(TSNode *node) { node->ref_count++; }
void ts_node_release(TSNode *node) {
node->ref_count--;
if (node->ref_count == 0) {
if (node->parent) ts_node_release(node->parent);
if (node->parent)
ts_node_release(node->parent);
free(node);
}
}
@ -37,31 +36,27 @@ size_t ts_node_pos(const TSNode *node) {
return node->start_position + node->content->offset;
}
size_t ts_node_size(const TSNode *node) {
return node->content->size;
}
size_t ts_node_size(const TSNode *node) { return node->content->size; }
int ts_node_eq(const TSNode *left, const TSNode *right) {
return ts_tree_equals(left->content, right->content);
}
const char * ts_node_name(const TSNode *node) {
const char *ts_node_name(const TSNode *node) {
return node->names[node->content->symbol];
}
const char * ts_node_string(const TSNode *node) {
const char *ts_node_string(const TSNode *node) {
return ts_tree_string(node->content, node->names);
}
TSNode * ts_node_parent(TSNode *child) {
return child->parent;
}
TSNode *ts_node_parent(TSNode *child) { return child->parent; }
TSNode * ts_node_prev_sibling(TSNode *child) {
TSNode *ts_node_prev_sibling(TSNode *child) {
return ts_node_child(child->parent, child->index - 1);
}
TSNode * ts_node_next_sibling(TSNode *child) {
TSNode *ts_node_next_sibling(TSNode *child) {
return ts_node_child(child->parent, child->index + 1);
}
@ -71,25 +66,29 @@ size_t ts_node_child_count(const TSNode *parent) {
return result;
}
TSNode * ts_node_child(TSNode *parent, size_t index) {
TSNode *ts_node_child(TSNode *parent, size_t index) {
size_t child_count;
TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count);
TSChildWithPosition *children =
ts_tree_visible_children(parent->content, &child_count);
if (child_count <= index)
return NULL;
size_t position = parent->start_position + children[index].position;
return ts_node_make(children[index].tree, parent, index, position, parent->names);
return ts_node_make(children[index].tree, parent, index, position,
parent->names);
}
TSNode * ts_node_leaf_at_pos(TSNode *parent, size_t position) {
TSNode *ts_node_leaf_at_pos(TSNode *parent, size_t position) {
size_t child_count;
TSChildWithPosition *children = ts_tree_visible_children(parent->content, &child_count);
TSChildWithPosition *children =
ts_tree_visible_children(parent->content, &child_count);
for (size_t i = 0; i < child_count; i++) {
TSChildWithPosition child = children[i];
size_t child_left = child.position + child.tree->offset;
if (child_left > position)
break;
if (child_left + child.tree->size > position) {
TSNode *node = ts_node_make(child.tree, parent, i, child.position, parent->names);
TSNode *node =
ts_node_make(child.tree, parent, i, child.position, parent->names);
TSNode *result = ts_node_leaf_at_pos(node, position);
ts_node_release(node);
return result;

View file

@ -13,7 +13,8 @@ struct TSNode {
const char **names;
};
TSNode * ts_node_make(const TSTree *tree, TSNode *parent, size_t index, size_t start_position, const char **names);
TSNode * ts_node_make_root(const TSTree *tree, const char **names);
TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
size_t start_position, const char **names);
TSNode *ts_node_make_root(const TSTree *tree, const char **names);
#endif

View file

@ -7,24 +7,28 @@
* Private
*/
static const TSParseAction * actions_for_state(TSParserConfig config, TSStateId state) {
static const TSParseAction *actions_for_state(TSParserConfig config,
TSStateId state) {
return config.parse_table + (state * config.symbol_count);
}
static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) {
if (!edit) return 0;
if (!edit)
return 0;
TSStack *stack = &parser->stack;
size_t position = 0;
for (;;) {
TSTree *node = ts_stack_top_node(stack);
if (!node) break;
if (!node)
break;
position = ts_stack_right_position(stack);
size_t child_count;
TSTree **children = ts_tree_children(node, &child_count);
if (position <= edit->position && !children) break;
if (position <= edit->position && !children)
break;
stack->size--;
position -= ts_tree_total_size(node);
@ -32,7 +36,8 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) {
for (size_t i = 0; i < child_count && position < edit->position; i++) {
TSTree *child = children[i];
TSStateId state = ts_stack_top_state(stack);
TSStateId next_state = actions_for_state(parser->config, state)[child->symbol].data.to_state;
TSStateId next_state =
actions_for_state(parser->config, state)[child->symbol].data.to_state;
ts_stack_push(stack, next_state, child);
ts_tree_retain(child);
position += ts_tree_total_size(child);
@ -44,9 +49,10 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) {
return position;
}
static TSSymbol * expected_symbols(TSParser *parser, size_t *count) {
static TSSymbol *expected_symbols(TSParser *parser, size_t *count) {
*count = 0;
const TSParseAction *actions = actions_for_state(parser->config, ts_stack_top_state(&parser->stack));
const TSParseAction *actions =
actions_for_state(parser->config, ts_stack_top_state(&parser->stack));
for (size_t i = 0; i < parser->config.symbol_count; i++)
if (actions[i].type != TSParseActionTypeError)
(*count)++;
@ -64,26 +70,27 @@ static TSSymbol * expected_symbols(TSParser *parser, size_t *count) {
* Public
*/
TSParser * ts_parser_make(TSParserConfig config) {
TSParser *ts_parser_make(TSParserConfig config) {
TSParser *result = malloc(sizeof(*result));
*result = (TSParser) {
.lexer = ts_lexer_make(),
.stack = ts_stack_make(),
.debug = 0,
.config = config,
};
*result = (TSParser) { .lexer = ts_lexer_make(),
.stack = ts_stack_make(),
.debug = 0,
.config = config, };
return result;
}
void ts_parser_free(TSParser *parser) {
if (parser->lookahead) ts_tree_release(parser->lookahead);
if (parser->next_lookahead) ts_tree_release(parser->next_lookahead);
if (parser->lookahead)
ts_tree_release(parser->lookahead);
if (parser->next_lookahead)
ts_tree_release(parser->next_lookahead);
ts_stack_delete(&parser->stack);
free(parser);
}
void ts_parser_start(TSParser *parser, TSInput input, TSInputEdit *edit) {
if (!edit) ts_stack_shrink(&parser->stack, 0);
if (!edit)
ts_stack_shrink(&parser->stack, 0);
parser->lookahead = NULL;
parser->next_lookahead = NULL;
@ -110,11 +117,8 @@ void ts_parser_shift_extra(TSParser *parser) {
void ts_parser_reduce(TSParser *parser, TSSymbol symbol, size_t child_count) {
parser->next_lookahead = parser->lookahead;
parser->lookahead = ts_stack_reduce(
&parser->stack,
symbol,
child_count,
parser->config.hidden_symbol_flags, 1);
parser->lookahead = ts_stack_reduce(&parser->stack, symbol, child_count,
parser->config.hidden_symbol_flags, 1);
}
int ts_parser_reduce_extra(TSParser *parser, TSSymbol symbol) {
@ -131,12 +135,8 @@ int ts_parser_reduce_extra(TSParser *parser, TSSymbol symbol) {
int ts_parser_handle_error(TSParser *parser) {
size_t count = 0;
const TSSymbol *inputs = expected_symbols(parser, &count);
TSTree *error = ts_tree_make_error(
ts_lexer_lookahead_char(&parser->lexer),
count,
inputs,
0,
0);
TSTree *error = ts_tree_make_error(ts_lexer_lookahead_char(&parser->lexer),
count, inputs, 0, 0);
for (;;) {
ts_tree_release(parser->lookahead);
@ -159,10 +159,13 @@ int ts_parser_handle_error(TSParser *parser) {
for (size_t j = 0; j < parser->stack.size; j++) {
size_t i = parser->stack.size - 1 - j;
TSStateId stack_state = parser->stack.entries[i].state;
TSParseAction action_on_error = actions_for_state(parser->config, stack_state)[ts_builtin_sym_error];
TSParseAction action_on_error =
actions_for_state(parser->config, stack_state)[ts_builtin_sym_error];
if (action_on_error.type == TSParseActionTypeShift) {
TSStateId state_after_error = action_on_error.data.to_state;
if (actions_for_state(parser->config, state_after_error)[parser->lookahead->symbol].type != TSParseActionTypeError) {
if (actions_for_state(parser->config,
state_after_error)[parser->lookahead->symbol]
.type != TSParseActionTypeError) {
ts_stack_shrink(&parser->stack, i + 1);
ts_stack_push(&parser->stack, state_after_error, error);
return 1;
@ -172,7 +175,7 @@ int ts_parser_handle_error(TSParser *parser) {
}
}
TSTree * ts_parser_tree_root(TSParser *parser) {
TSTree *ts_parser_tree_root(TSParser *parser) {
TSStack *stack = &parser->stack;
size_t node_count = 0;
for (size_t i = 0; i < stack->size; i++) {
@ -182,7 +185,8 @@ TSTree * ts_parser_tree_root(TSParser *parser) {
}
if (node_count > 1)
return ts_stack_reduce(stack, 2, stack->size, parser->config.hidden_symbol_flags, 0);
return ts_stack_reduce(stack, 2, stack->size,
parser->config.hidden_symbol_flags, 0);
else
return ts_stack_top_node(stack);
}
@ -190,16 +194,20 @@ TSTree * ts_parser_tree_root(TSParser *parser) {
TSParseAction ts_parser_next_action(TSParser *parser) {
TSStateId state = ts_stack_top_state(&parser->stack);
if (!parser->lookahead)
parser->lookahead = parser->config.lex_fn(parser, parser->config.lex_states[state]);
parser->lookahead =
parser->config.lex_fn(parser, parser->config.lex_states[state]);
return actions_for_state(parser->config, state)[parser->lookahead->symbol];
}
#define DEBUG_PARSE(...) \
if (parser->debug) { fprintf(stderr, "\n" __VA_ARGS__); }
#define DEBUG_PARSE(...) \
if (parser->debug) { \
fprintf(stderr, "\n" __VA_ARGS__); \
}
TSTree * ts_parser_step(TSParser *parser) {
TSTree *ts_parser_step(TSParser *parser) {
TSParseAction action = ts_parser_next_action(parser);
DEBUG_PARSE("LOOKAHEAD %s", parser->config.symbol_names[parser->lookahead->symbol]);
DEBUG_PARSE("LOOKAHEAD %s",
parser->config.symbol_names[parser->lookahead->symbol]);
switch (action.type) {
case TSParseActionTypeShift:
DEBUG_PARSE("SHIFT %d", action.data.to_state);
@ -210,7 +218,9 @@ TSTree * ts_parser_step(TSParser *parser) {
ts_parser_shift_extra(parser);
return NULL;
case TSParseActionTypeReduce:
DEBUG_PARSE("REDUCE %s %d", parser->config.symbol_names[action.data.symbol], action.data.child_count);
DEBUG_PARSE("REDUCE %s %d",
parser->config.symbol_names[action.data.symbol],
action.data.child_count);
ts_parser_reduce(parser, action.data.symbol, action.data.child_count);
return NULL;
case TSParseActionTypeReduceExtra:
@ -235,15 +245,15 @@ error:
return NULL;
}
const TSTree * ts_parser_parse(TSParser *parser, TSInput input, TSInputEdit *edit) {
const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
TSInputEdit *edit) {
ts_parser_start(parser, input, edit);
for (;;) {
const TSTree *tree = ts_parser_step(parser);
if (tree) return tree;
if (tree)
return tree;
}
}
TSParserConfig ts_parser_config(TSParser *parser) {
return parser->config;
}
TSParserConfig ts_parser_config(TSParser *parser) { return parser->config; }

View file

@ -8,8 +8,7 @@ static TSStateId INITIAL_STATE = 0;
TSStack ts_stack_make() {
TSStack result = {
.entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)),
.size = 0,
.entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), .size = 0,
};
return result;
}
@ -25,7 +24,7 @@ TSStateId ts_stack_top_state(const TSStack *stack) {
return stack->entries[stack->size - 1].state;
}
TSTree * ts_stack_top_node(const TSStack *stack) {
TSTree *ts_stack_top_node(const TSStack *stack) {
if (stack->size == 0)
return NULL;
return stack->entries[stack->size - 1].node;
@ -53,11 +52,8 @@ size_t ts_stack_right_position(const TSStack *stack) {
return result;
}
TSTree * ts_stack_reduce(TSStack *stack,
TSSymbol symbol,
size_t child_count,
const int *hidden_symbol_flags,
int dont_count_extras) {
TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t child_count,
const int *hidden_symbol_flags, int dont_count_extras) {
// First, walk down the stack to determine which symbols will be reduced.
// The child node count is known ahead of time, but some children may be
@ -73,11 +69,8 @@ TSTree * ts_stack_reduce(TSStack *stack,
for (size_t i = 0; i < child_count; i++)
children[i] = stack->entries[start_index + i].node;
TSTree *lookahead = ts_tree_make_node(
symbol,
child_count,
children,
hidden_symbol_flags[symbol]);
TSTree *lookahead = ts_tree_make_node(symbol, child_count, children,
hidden_symbol_flags[symbol]);
ts_stack_shrink(stack, stack->size - child_count);
return lookahead;

View file

@ -3,26 +3,27 @@
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
static TSTree * ts_tree_make(TSSymbol symbol, size_t size, size_t offset, int is_hidden) {
static TSTree *ts_tree_make(TSSymbol symbol, size_t size, size_t offset,
int is_hidden) {
TSTree *result = malloc(sizeof(TSTree));
*result = (TSTree) {
.ref_count = 1,
.symbol = symbol,
.size = size,
.offset = offset,
.options = is_hidden ? TSTreeOptionsHidden : 0,
};
*result = (TSTree) { .ref_count = 1,
.symbol = symbol,
.size = size,
.offset = offset,
.options = is_hidden ? TSTreeOptionsHidden : 0, };
return result;
}
TSTree * ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset, int is_hidden) {
TSTree *ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset,
int is_hidden) {
TSTree *result = ts_tree_make(symbol, size, offset, is_hidden);
result->child_count = 0;
result->children = NULL;
return result;
}
TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **children, int is_hidden) {
TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
TSTree **children, int is_hidden) {
size_t size = 0, offset = 0, visible_child_count = 0;
for (size_t i = 0; i < child_count; i++) {
TSTree *child = children[i];
@ -43,34 +44,34 @@ TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **childre
TSTreeOptions options = 0;
if (is_hidden)
options |= TSTreeOptionsHidden;
if (child_count == 1 && (ts_tree_is_visible(children[0]) || ts_tree_is_wrapper(children[0])))
if (child_count == 1 &&
(ts_tree_is_visible(children[0]) || ts_tree_is_wrapper(children[0])))
options |= (TSTreeOptionsWrapper | TSTreeOptionsHidden);
TSTree *result = malloc(sizeof(TSTree) + (visible_child_count * sizeof(TSChildWithPosition)));
*result = (TSTree) {
.ref_count = 1,
.symbol = symbol,
.size = size,
.offset = offset,
.options = options,
.children = children,
.child_count = child_count,
.visible_child_count = visible_child_count,
};
TSTree *result = malloc(sizeof(TSTree) +
(visible_child_count * sizeof(TSChildWithPosition)));
*result = (TSTree) { .ref_count = 1,
.symbol = symbol,
.size = size,
.offset = offset,
.options = options,
.children = children,
.child_count = child_count,
.visible_child_count = visible_child_count, };
TSChildWithPosition *visible_children = ts_tree_visible_children(result, NULL);
TSChildWithPosition *visible_children =
ts_tree_visible_children(result, NULL);
for (size_t i = 0, visible_i = 0, child_position = 0; i < child_count; i++) {
TSTree *child = children[i];
if (ts_tree_is_visible(child)) {
visible_children[visible_i] = (TSChildWithPosition) {
.tree = child,
.position = child_position
};
visible_children[visible_i] =
(TSChildWithPosition) { .tree = child, .position = child_position };
visible_i++;
} else {
size_t granchild_count = 0;
TSChildWithPosition *grandchildren = ts_tree_visible_children(child, &granchild_count);
TSChildWithPosition *grandchildren =
ts_tree_visible_children(child, &granchild_count);
for (size_t j = 0; j < granchild_count; j++) {
visible_children[visible_i] = (TSChildWithPosition) {
.tree = grandchildren[j].tree,
@ -86,7 +87,9 @@ TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **childre
return result;
}
TSTree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const TSSymbol *expected_inputs, size_t size, size_t offset) {
TSTree *ts_tree_make_error(char lookahead_char, size_t expected_input_count,
const TSSymbol *expected_inputs, size_t size,
size_t offset) {
TSTree *result = ts_tree_make(ts_builtin_sym_error, size, offset, 0);
result->lookahead_char = lookahead_char;
result->expected_input_count = expected_input_count;
@ -94,9 +97,7 @@ TSTree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, co
return result;
}
void ts_tree_retain(TSTree *tree) {
tree->ref_count++;
}
void ts_tree_retain(TSTree *tree) { tree->ref_count++; }
void ts_tree_release(TSTree *tree) {
tree->ref_count--;
@ -115,27 +116,33 @@ size_t ts_tree_total_size(const TSTree *tree) {
}
int ts_tree_equals(const TSTree *node1, const TSTree *node2) {
if (node1->symbol != node2->symbol) return 0;
if (node1->symbol != node2->symbol)
return 0;
if (node1->symbol == ts_builtin_sym_error) {
// check error equality
} else {
if (node1->child_count != node2->child_count) return 0;
if (node1->child_count != node2->child_count)
return 0;
for (size_t i = 0; i < node1->child_count; i++)
if (!ts_tree_equals(node1->children[i], node2->children[i])) return 0;
if (!ts_tree_equals(node1->children[i], node2->children[i]))
return 0;
}
return 1;
}
TSTree ** ts_tree_children(const TSTree *tree, size_t *count) {
TSTree **ts_tree_children(const TSTree *tree, size_t *count) {
if (!tree || tree->symbol == ts_builtin_sym_error) {
if (count) *count = 0;
if (count)
*count = 0;
return NULL;
}
if (count) *count = tree->child_count;
if (count)
*count = tree->child_count;
return tree->children;
}
static size_t write_lookahead_to_string(char *string, size_t limit, char lookahead) {
static size_t write_lookahead_to_string(char *string, size_t limit,
char lookahead) {
switch (lookahead) {
case '\0':
return snprintf(string, limit, "<EOF>");
@ -144,7 +151,9 @@ static size_t write_lookahead_to_string(char *string, size_t limit, char lookahe
}
}
static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names, char *string, size_t limit, int is_root) {
static size_t tree_write_to_string(const TSTree *tree,
const char **symbol_names, char *string,
size_t limit, int is_root) {
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
int visible = ts_tree_is_visible(tree);
@ -167,7 +176,8 @@ static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names
}
for (size_t i = 0; i < tree->child_count; i++)
cursor += tree_write_to_string(tree->children[i], symbol_names, *writer, limit, is_root);
cursor += tree_write_to_string(tree->children[i], symbol_names, *writer,
limit, is_root);
if (visible)
cursor += snprintf(*writer, limit, ")");
@ -175,9 +185,10 @@ static size_t tree_write_to_string(const TSTree *tree, const char **symbol_names
return cursor - string;
}
char * ts_tree_string(const TSTree *tree, const char **symbol_names) {
char *ts_tree_string(const TSTree *tree, const char **symbol_names) {
static char SCRATCH_STRING[1];
size_t size = tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0, 1) + 1;
size_t size =
tree_write_to_string(tree, symbol_names, SCRATCH_STRING, 0, 1) + 1;
char *result = malloc(size * sizeof(char));
tree_write_to_string(tree, symbol_names, result, size, 1);
return result;

View file

@ -61,25 +61,32 @@ static inline size_t ts_tree_visible_child_count(const TSTree *tree) {
return tree->visible_child_count;
}
static inline TSChildWithPosition * ts_tree_visible_children(const TSTree *tree, size_t *count) {
static inline TSChildWithPosition *ts_tree_visible_children(const TSTree *tree,
size_t *count) {
if (tree->symbol == ts_builtin_sym_error || tree->visible_child_count == 0) {
if (count) *count = 0;
if (count)
*count = 0;
return NULL;
} else {
if (count) *count = tree->visible_child_count;
if (count)
*count = tree->visible_child_count;
return (TSChildWithPosition *)(tree + 1);
}
}
TSTree * ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset, int is_hidden);
TSTree * ts_tree_make_node(TSSymbol symbol, size_t child_count, TSTree **children, int is_hidden);
TSTree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const TSSymbol *expected_inputs, size_t size, size_t offset);
TSTree *ts_tree_make_leaf(TSSymbol symbol, size_t size, size_t offset,
int is_hidden);
TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
TSTree **children, int is_hidden);
TSTree *ts_tree_make_error(char lookahead_char, size_t expected_input_count,
const TSSymbol *expected_inputs, size_t size,
size_t offset);
void ts_tree_retain(TSTree *tree);
void ts_tree_release(TSTree *tree);
int ts_tree_equals(const TSTree *tree1, const TSTree *tree2);
char * ts_tree_string(const TSTree *tree, const char **names);
char * ts_tree_error_string(const TSTree *tree, const char **names);
TSTree ** ts_tree_children(const TSTree *tree, size_t *count);
char *ts_tree_string(const TSTree *tree, const char **names);
char *ts_tree_error_string(const TSTree *tree, const char **names);
TSTree **ts_tree_children(const TSTree *tree, size_t *count);
size_t ts_tree_total_size(const TSTree *tree);
#ifdef __cplusplus