Remove auxiliary rules from syntax tree

This commit is contained in:
Max Brunsfeld 2014-01-28 22:09:37 -08:00
parent c3b65d22bf
commit 7d297f2f9e
15 changed files with 1107 additions and 724 deletions

View file

@ -5,6 +5,7 @@
using std::string;
using std::to_string;
using std::ostream;
using std::vector;
using tree_sitter::rules::Symbol;
namespace tree_sitter {
@ -33,7 +34,7 @@ namespace tree_sitter {
string("' ") <<
*item.rule <<
string(" ") <<
to_string(item.consumed_sym_count) <<
to_string(item.consumed_symbols.size()) <<
string(" ") <<
item.lookahead_sym <<
string(">");
@ -51,8 +52,8 @@ namespace tree_sitter {
if (other.lhs < lhs) return false;
if (rule->to_string() < other.rule->to_string()) return true;
if (rule->to_string() > other.rule->to_string()) return false;
if (consumed_sym_count < other.consumed_sym_count) return true;
if (consumed_sym_count > other.consumed_sym_count) return false;
if (consumed_symbols < other.consumed_symbols) return true;
if (consumed_symbols > other.consumed_symbols) return false;
if (lookahead_sym < other.lookahead_sym) return true;
return false;
}
@ -65,15 +66,15 @@ namespace tree_sitter {
return lhs_eq && rules_eq;
}
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym) :
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, const vector<Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_sym_count(consumed_sym_count),
consumed_symbols(consumed_symbols),
lookahead_sym(lookahead_sym) {}
bool ParseItem::operator==(const ParseItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count);
bool consumed_sym_counts_eq = (other.consumed_symbols == consumed_symbols);
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
}

View file

@ -6,6 +6,7 @@
#include <set>
#include <unordered_set>
#include "symbol.h"
#include <vector>
namespace tree_sitter {
class Grammar;
@ -29,11 +30,11 @@ namespace tree_sitter {
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym);
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, const std::vector<rules::Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym);
bool operator<(const ParseItem &other) const;
bool operator==(const ParseItem &other) const;
const int consumed_sym_count;
const std::vector<rules::Symbol> consumed_symbols;
const rules::Symbol lookahead_sym;
};
@ -61,7 +62,7 @@ namespace std {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::Rule>()(*item.rule) ^
hash<size_t>()(item.consumed_sym_count) ^
hash<size_t>()(item.consumed_symbols.size()) ^
hash<tree_sitter::rules::Symbol>()(item.lookahead_sym);
}
};

View file

@ -20,7 +20,7 @@ namespace tree_sitter {
auto non_terminal = pair.first;
auto terminals = pair.second;
for (rules::Symbol terminal : terminals) {
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), 0, terminal);
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), {}, terminal);
add_item(item_set, next_item, grammar);
}
}

View file

@ -43,7 +43,9 @@ namespace tree_sitter {
for (auto transition : rule_transitions(item.rule)) {
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
if (rule.get()) {
auto new_item = ParseItem(item.lhs, transition.second, item.consumed_sym_count + 1, item.lookahead_sym);
auto consumed_symbols = item.consumed_symbols;
consumed_symbols.push_back(*rule);
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
}

View file

@ -60,13 +60,21 @@ namespace tree_sitter {
}
}
}
static vector<bool> reduce_flags(const vector<rules::Symbol> &child_symbols) {
vector<bool> result;
for (auto symbol : child_symbols) {
result.push_back(symbol.is_auxiliary);
}
return result;
}
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
for (ParseItem item : item_set) {
if (item.is_done()) {
ParseAction action = (item.lhs.name == START) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_sym_count);
ParseAction::Reduce(item.lhs, reduce_flags(item.consumed_symbols));
parse_table.add_action(state_index, item.lookahead_sym, action);
}
}
@ -116,7 +124,7 @@ namespace tree_sitter {
lex_grammar(lex_grammar) {};
pair<ParseTable, LexTable> build() {
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), 0, END_OF_INPUT);
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), {}, END_OF_INPUT);
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
add_parse_state(item_set);
return pair<ParseTable, LexTable>(parse_table, lex_table);

View file

@ -118,6 +118,17 @@ namespace tree_sitter {
}
}
string collapse_flags(vector<bool> flags) {
string result;
bool started = false;
for (auto flag : flags) {
if (started) result += ", ";
result += (flag ? "1" : "0");
started = true;
}
return result;
}
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<rules::Symbol> &expected_inputs) {
auto action = actions.begin();
if (action == actions.end()) {
@ -129,7 +140,7 @@ namespace tree_sitter {
case ParseActionTypeShift:
return "SHIFT(" + to_string(action->state_index) + ");";
case ParseActionTypeReduce:
return "REDUCE(" + symbol_id(action->symbol) + ", " + std::to_string(action->child_symbol_count) + ");";
return "REDUCE(" + symbol_id(action->symbol) + ", " + to_string(action->child_flags.size()) + ", COLLAPSE({" + collapse_flags(action->child_flags) + "}));";
default:
return "";
}

View file

@ -4,37 +4,38 @@ using std::string;
using std::ostream;
using std::to_string;
using std::unordered_set;
using std::vector;
using tree_sitter::rules::Symbol;
namespace tree_sitter {
// Action
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count) :
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const vector<bool> &child_flags) :
type(type),
state_index(state_index),
symbol(symbol),
child_symbol_count(child_symbol_count) {};
child_flags(child_flags) {};
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, Symbol(""), -1);
return ParseAction(ParseActionTypeError, -1, Symbol(""), {});
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), -1);
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), {});
}
ParseAction ParseAction::Shift(size_t state_index) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), -1);
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), {});
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t child_symbol_count) {
return ParseAction(ParseActionTypeReduce, -1, symbol, child_symbol_count);
ParseAction ParseAction::Reduce(Symbol symbol, const vector<bool> &child_flags) {
return ParseAction(ParseActionTypeReduce, -1, symbol, child_flags);
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool state_indices_eq = state_index == other.state_index;
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
return types_eq && state_indices_eq && child_symbol_counts_eq;
bool child_flags_eq = child_flags == other.child_flags;
return types_eq && state_indices_eq && child_flags_eq;
}
ostream& operator<<(ostream &stream, const ParseAction &action) {

View file

@ -15,16 +15,16 @@ namespace tree_sitter {
} ParseActionType;
class ParseAction {
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count);
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const std::vector<bool> &child_flags);
public:
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(size_t state_index);
static ParseAction Reduce(rules::Symbol symbol, size_t child_symbol_count);
static ParseAction Reduce(rules::Symbol symbol, const std::vector<bool> &child_flags);
bool operator==(const ParseAction &action) const;
ParseActionType type;
size_t child_symbol_count;
std::vector<bool> child_flags;
rules::Symbol symbol;
size_t state_index;
};
@ -40,7 +40,7 @@ namespace std {
hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.child_symbol_count));
hash<size_t>()(action.child_flags.size()));
}
};
}