Remove auxiliary rules from syntax tree
This commit is contained in:
parent
c3b65d22bf
commit
7d297f2f9e
15 changed files with 1107 additions and 724 deletions
|
|
@ -5,6 +5,7 @@
|
|||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
using tree_sitter::rules::Symbol;
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -33,7 +34,7 @@ namespace tree_sitter {
|
|||
string("' ") <<
|
||||
*item.rule <<
|
||||
string(" ") <<
|
||||
to_string(item.consumed_sym_count) <<
|
||||
to_string(item.consumed_symbols.size()) <<
|
||||
string(" ") <<
|
||||
item.lookahead_sym <<
|
||||
string(">");
|
||||
|
|
@ -51,8 +52,8 @@ namespace tree_sitter {
|
|||
if (other.lhs < lhs) return false;
|
||||
if (rule->to_string() < other.rule->to_string()) return true;
|
||||
if (rule->to_string() > other.rule->to_string()) return false;
|
||||
if (consumed_sym_count < other.consumed_sym_count) return true;
|
||||
if (consumed_sym_count > other.consumed_sym_count) return false;
|
||||
if (consumed_symbols < other.consumed_symbols) return true;
|
||||
if (consumed_symbols > other.consumed_symbols) return false;
|
||||
if (lookahead_sym < other.lookahead_sym) return true;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -65,15 +66,15 @@ namespace tree_sitter {
|
|||
return lhs_eq && rules_eq;
|
||||
}
|
||||
|
||||
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym) :
|
||||
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, const vector<Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym) :
|
||||
Item(lhs, rule),
|
||||
consumed_sym_count(consumed_sym_count),
|
||||
consumed_symbols(consumed_symbols),
|
||||
lookahead_sym(lookahead_sym) {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
bool lhs_eq = other.lhs == lhs;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count);
|
||||
bool consumed_sym_counts_eq = (other.consumed_symbols == consumed_symbols);
|
||||
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
|
||||
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include "symbol.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
|
@ -29,11 +30,11 @@ namespace tree_sitter {
|
|||
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym);
|
||||
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, const std::vector<rules::Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym);
|
||||
bool operator<(const ParseItem &other) const;
|
||||
bool operator==(const ParseItem &other) const;
|
||||
|
||||
const int consumed_sym_count;
|
||||
const std::vector<rules::Symbol> consumed_symbols;
|
||||
const rules::Symbol lookahead_sym;
|
||||
};
|
||||
|
||||
|
|
@ -61,7 +62,7 @@ namespace std {
|
|||
return
|
||||
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::Rule>()(*item.rule) ^
|
||||
hash<size_t>()(item.consumed_sym_count) ^
|
||||
hash<size_t>()(item.consumed_symbols.size()) ^
|
||||
hash<tree_sitter::rules::Symbol>()(item.lookahead_sym);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ namespace tree_sitter {
|
|||
auto non_terminal = pair.first;
|
||||
auto terminals = pair.second;
|
||||
for (rules::Symbol terminal : terminals) {
|
||||
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), 0, terminal);
|
||||
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), {}, terminal);
|
||||
add_item(item_set, next_item, grammar);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,9 @@ namespace tree_sitter {
|
|||
for (auto transition : rule_transitions(item.rule)) {
|
||||
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
|
||||
if (rule.get()) {
|
||||
auto new_item = ParseItem(item.lhs, transition.second, item.consumed_sym_count + 1, item.lookahead_sym);
|
||||
auto consumed_symbols = item.consumed_symbols;
|
||||
consumed_symbols.push_back(*rule);
|
||||
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
|
||||
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
|
||||
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,13 +60,21 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
static vector<bool> reduce_flags(const vector<rules::Symbol> &child_symbols) {
|
||||
vector<bool> result;
|
||||
for (auto symbol : child_symbols) {
|
||||
result.push_back(symbol.is_auxiliary);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
|
||||
for (ParseItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs.name == START) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs, item.consumed_sym_count);
|
||||
ParseAction::Reduce(item.lhs, reduce_flags(item.consumed_symbols));
|
||||
parse_table.add_action(state_index, item.lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
|
|
@ -116,7 +124,7 @@ namespace tree_sitter {
|
|||
lex_grammar(lex_grammar) {};
|
||||
|
||||
pair<ParseTable, LexTable> build() {
|
||||
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), 0, END_OF_INPUT);
|
||||
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), {}, END_OF_INPUT);
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
||||
add_parse_state(item_set);
|
||||
return pair<ParseTable, LexTable>(parse_table, lex_table);
|
||||
|
|
|
|||
|
|
@ -118,6 +118,17 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
string collapse_flags(vector<bool> flags) {
|
||||
string result;
|
||||
bool started = false;
|
||||
for (auto flag : flags) {
|
||||
if (started) result += ", ";
|
||||
result += (flag ? "1" : "0");
|
||||
started = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<rules::Symbol> &expected_inputs) {
|
||||
auto action = actions.begin();
|
||||
if (action == actions.end()) {
|
||||
|
|
@ -129,7 +140,7 @@ namespace tree_sitter {
|
|||
case ParseActionTypeShift:
|
||||
return "SHIFT(" + to_string(action->state_index) + ");";
|
||||
case ParseActionTypeReduce:
|
||||
return "REDUCE(" + symbol_id(action->symbol) + ", " + std::to_string(action->child_symbol_count) + ");";
|
||||
return "REDUCE(" + symbol_id(action->symbol) + ", " + to_string(action->child_flags.size()) + ", COLLAPSE({" + collapse_flags(action->child_flags) + "}));";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,37 +4,38 @@ using std::string;
|
|||
using std::ostream;
|
||||
using std::to_string;
|
||||
using std::unordered_set;
|
||||
using std::vector;
|
||||
using tree_sitter::rules::Symbol;
|
||||
|
||||
namespace tree_sitter {
|
||||
// Action
|
||||
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count) :
|
||||
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const vector<bool> &child_flags) :
|
||||
type(type),
|
||||
state_index(state_index),
|
||||
symbol(symbol),
|
||||
child_symbol_count(child_symbol_count) {};
|
||||
child_flags(child_flags) {};
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(""), -1);
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(""), {});
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Accept() {
|
||||
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), -1);
|
||||
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), {});
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Shift(size_t state_index) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), -1);
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), {});
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, size_t child_symbol_count) {
|
||||
return ParseAction(ParseActionTypeReduce, -1, symbol, child_symbol_count);
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, const vector<bool> &child_flags) {
|
||||
return ParseAction(ParseActionTypeReduce, -1, symbol, child_flags);
|
||||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
bool types_eq = type == other.type;
|
||||
bool state_indices_eq = state_index == other.state_index;
|
||||
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
|
||||
return types_eq && state_indices_eq && child_symbol_counts_eq;
|
||||
bool child_flags_eq = child_flags == other.child_flags;
|
||||
return types_eq && state_indices_eq && child_flags_eq;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseAction &action) {
|
||||
|
|
|
|||
|
|
@ -15,16 +15,16 @@ namespace tree_sitter {
|
|||
} ParseActionType;
|
||||
|
||||
class ParseAction {
|
||||
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count);
|
||||
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const std::vector<bool> &child_flags);
|
||||
public:
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(size_t state_index);
|
||||
static ParseAction Reduce(rules::Symbol symbol, size_t child_symbol_count);
|
||||
static ParseAction Reduce(rules::Symbol symbol, const std::vector<bool> &child_flags);
|
||||
bool operator==(const ParseAction &action) const;
|
||||
|
||||
ParseActionType type;
|
||||
size_t child_symbol_count;
|
||||
std::vector<bool> child_flags;
|
||||
rules::Symbol symbol;
|
||||
size_t state_index;
|
||||
};
|
||||
|
|
@ -40,7 +40,7 @@ namespace std {
|
|||
hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.child_symbol_count));
|
||||
hash<size_t>()(action.child_flags.size()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue