Remove auxiliary rules from syntax tree

This commit is contained in:
Max Brunsfeld 2014-01-28 22:09:37 -08:00
parent c3b65d22bf
commit 7d297f2f9e
15 changed files with 1107 additions and 724 deletions

View file

@ -7,6 +7,7 @@ extern "C" {
#include "tree.h"
#include "parse_config.h"
#include <stdio.h>
#include <string.h>
// #define TS_DEBUG_PARSE
// #define TS_DEBUG_LEX
@ -88,16 +89,34 @@ static void TSParserShift(TSParser *parser, TSState parse_state) {
parser->stack_size++;
}
static void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) {
parser->stack_size -= child_count;
static void TSParserReduce(TSParser *parser, TSSymbol symbol, int immediate_child_count, const int *collapse_flags) {
parser->stack_size -= immediate_child_count;
TSTree **children = malloc(child_count * sizeof(TSTree *));
for (int i = 0; i < child_count; i++) {
children[i] = parser->stack[parser->stack_size + i].node;
int total_child_count = 0;
for (int i = 0; i < immediate_child_count; i++) {
TSTree *child = parser->stack[parser->stack_size + i].node;
if (collapse_flags[i]) {
total_child_count += child->child_count;
} else {
total_child_count++;
}
}
TSTree **children = malloc(total_child_count * sizeof(TSTree *));
int n = 0;
for (int i = 0; i < immediate_child_count; i++) {
TSTree *child = parser->stack[parser->stack_size + i].node;
if (collapse_flags[i]) {
memcpy(children + n, child->children, (child->child_count * sizeof(TSTree *)));
n += child->child_count;
} else {
children[n] = child;
n++;
}
}
parser->prev_lookahead_node = parser->lookahead_node;
parser->lookahead_node = TSTreeMake(symbol, child_count, children);
parser->lookahead_node = TSTreeMake(symbol, total_child_count, children);
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], TSParserParseState(parser));
}
@ -173,8 +192,12 @@ parser->lex_state
#define ADVANCE(state_index) \
{ TSParserAdvance(parser, state_index); goto next_state; }
#define REDUCE(symbol, child_count) \
{ TSParserReduce(parser, symbol, child_count); goto next_state; }
#define REDUCE(symbol, child_count, collapse_flags) \
{ \
static const int flags[] = collapse_flags; \
TSParserReduce(parser, symbol, child_count, flags); \
goto next_state; \
}
#define ACCEPT_INPUT() \
{ TSParserAcceptInput(parser); goto done; }
@ -203,6 +226,7 @@ printf("Lex error: unexpected state %ud", LEX_STATE());
printf("Parse error: unexpected state %ud", PARSE_STATE());
#define EXPECT(...) __VA_ARGS__
#define COLLAPSE(...) __VA_ARGS__
#define FINISH_PARSER() \
done: \

View file

@ -30,22 +30,23 @@ describe("computing closures of item sets", []() {
it("computes the item set closure", [&]() {
ParseItemSet item_set = item_set_closure(ParseItemSet({
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__"))
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), {}, Symbol("__END__"))
}), grammar);
AssertThat(item_set, Equals(ParseItemSet({
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("__END__")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("+")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("*")),
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("__END__")),
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("+")),
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), {}, Symbol("__END__")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), {}, Symbol("+")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), {}, Symbol("*")),
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), {}, Symbol("__END__")),
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), {}, Symbol("+")),
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), {}, Symbol("__END__")),
})));
ParseItemSet next_item_set = *sym_transitions(item_set, grammar)[rules::Symbol("v")];
auto sym1 = rules::Symbol("v");
ParseItemSet next_item_set = *sym_transitions(item_set, grammar)[sym1];
AssertThat(next_item_set, Equals(ParseItemSet({
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("__END__")),
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("*")),
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("+")),
ParseItem(Symbol("F"), rules::blank(), { sym1 }, Symbol("__END__")),
ParseItem(Symbol("F"), rules::blank(), { sym1 }, Symbol("*")),
ParseItem(Symbol("F"), rules::blank(), { sym1 }, Symbol("+")),
})));
});
});

View file

@ -101,7 +101,7 @@ describe("building parse and lex tables", []() {
it("has the right next states", [&]() {
AssertThat(parse_state(2).actions, Equals(unordered_map<Symbol, parse_actions>({
{ Symbol("plus"), parse_actions({ ParseAction::Shift(3) }) },
{ Symbol("__END__"), parse_actions({ ParseAction::Reduce(Symbol("expression"), 1) }) },
{ Symbol("__END__"), parse_actions({ ParseAction::Reduce(Symbol("expression"), { false }) }) },
})));
});
});

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -36,7 +36,7 @@ describe("arithmetic", []() {
TSDocumentSetText(document, "x*(y+z)");
AssertThat(string(TSDocumentToString(document)), Equals(
"(expression (term (factor (variable)) (times) (factor (token1) (expression (term (factor (variable))) (plus) (term (factor (variable)))) (token2))))"));
"(expression (term (factor (variable)) (times) (factor (expression (term (factor (variable))) (plus) (term (factor (variable)))))))"));
});
});

View file

@ -20,15 +20,15 @@ describe("json", []() {
it("parses objects", [&]() {
TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (token5) (string) (token6) (value (number)) (repeat_helper1 (token2) (string) (token6) (value (number))) (token7)))"));
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (string) (value (number)) (string) (value (number))))"));
TSDocumentSetText(document, "{\"key1\":1}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (token5) (string) (token6) (value (number)) (token3) (token7)))"));
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (string) (value (number))))"));
});
it("parses arrays", [&]() {
TSDocumentSetText(document, "[1,2,3]");
AssertThat(string(TSDocumentToString(document)), Equals("(value (array (token1) (value (number)) (repeat_helper2 (token2) (value (number)) (repeat_helper2 (token2) (value (number)))) (token4)))"));
AssertThat(string(TSDocumentToString(document)), Equals("(value (array (value (number)) (value (number)) (value (number))))"));
});
});

View file

@ -5,6 +5,7 @@
using std::string;
using std::to_string;
using std::ostream;
using std::vector;
using tree_sitter::rules::Symbol;
namespace tree_sitter {
@ -33,7 +34,7 @@ namespace tree_sitter {
string("' ") <<
*item.rule <<
string(" ") <<
to_string(item.consumed_sym_count) <<
to_string(item.consumed_symbols.size()) <<
string(" ") <<
item.lookahead_sym <<
string(">");
@ -51,8 +52,8 @@ namespace tree_sitter {
if (other.lhs < lhs) return false;
if (rule->to_string() < other.rule->to_string()) return true;
if (rule->to_string() > other.rule->to_string()) return false;
if (consumed_sym_count < other.consumed_sym_count) return true;
if (consumed_sym_count > other.consumed_sym_count) return false;
if (consumed_symbols < other.consumed_symbols) return true;
if (consumed_symbols > other.consumed_symbols) return false;
if (lookahead_sym < other.lookahead_sym) return true;
return false;
}
@ -65,15 +66,15 @@ namespace tree_sitter {
return lhs_eq && rules_eq;
}
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym) :
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, const vector<Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_sym_count(consumed_sym_count),
consumed_symbols(consumed_symbols),
lookahead_sym(lookahead_sym) {}
bool ParseItem::operator==(const ParseItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count);
bool consumed_sym_counts_eq = (other.consumed_symbols == consumed_symbols);
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
}

View file

@ -6,6 +6,7 @@
#include <set>
#include <unordered_set>
#include "symbol.h"
#include <vector>
namespace tree_sitter {
class Grammar;
@ -29,11 +30,11 @@ namespace tree_sitter {
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym);
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, const std::vector<rules::Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym);
bool operator<(const ParseItem &other) const;
bool operator==(const ParseItem &other) const;
const int consumed_sym_count;
const std::vector<rules::Symbol> consumed_symbols;
const rules::Symbol lookahead_sym;
};
@ -61,7 +62,7 @@ namespace std {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::Rule>()(*item.rule) ^
hash<size_t>()(item.consumed_sym_count) ^
hash<size_t>()(item.consumed_symbols.size()) ^
hash<tree_sitter::rules::Symbol>()(item.lookahead_sym);
}
};

View file

@ -20,7 +20,7 @@ namespace tree_sitter {
auto non_terminal = pair.first;
auto terminals = pair.second;
for (rules::Symbol terminal : terminals) {
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), 0, terminal);
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), {}, terminal);
add_item(item_set, next_item, grammar);
}
}

View file

@ -43,7 +43,9 @@ namespace tree_sitter {
for (auto transition : rule_transitions(item.rule)) {
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
if (rule.get()) {
auto new_item = ParseItem(item.lhs, transition.second, item.consumed_sym_count + 1, item.lookahead_sym);
auto consumed_symbols = item.consumed_symbols;
consumed_symbols.push_back(*rule);
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
}

View file

@ -60,13 +60,21 @@ namespace tree_sitter {
}
}
}
static vector<bool> reduce_flags(const vector<rules::Symbol> &child_symbols) {
vector<bool> result;
for (auto symbol : child_symbols) {
result.push_back(symbol.is_auxiliary);
}
return result;
}
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
for (ParseItem item : item_set) {
if (item.is_done()) {
ParseAction action = (item.lhs.name == START) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_sym_count);
ParseAction::Reduce(item.lhs, reduce_flags(item.consumed_symbols));
parse_table.add_action(state_index, item.lookahead_sym, action);
}
}
@ -116,7 +124,7 @@ namespace tree_sitter {
lex_grammar(lex_grammar) {};
pair<ParseTable, LexTable> build() {
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), 0, END_OF_INPUT);
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), {}, END_OF_INPUT);
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
add_parse_state(item_set);
return pair<ParseTable, LexTable>(parse_table, lex_table);

View file

@ -118,6 +118,17 @@ namespace tree_sitter {
}
}
string collapse_flags(vector<bool> flags) {
string result;
bool started = false;
for (auto flag : flags) {
if (started) result += ", ";
result += (flag ? "1" : "0");
started = true;
}
return result;
}
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<rules::Symbol> &expected_inputs) {
auto action = actions.begin();
if (action == actions.end()) {
@ -129,7 +140,7 @@ namespace tree_sitter {
case ParseActionTypeShift:
return "SHIFT(" + to_string(action->state_index) + ");";
case ParseActionTypeReduce:
return "REDUCE(" + symbol_id(action->symbol) + ", " + std::to_string(action->child_symbol_count) + ");";
return "REDUCE(" + symbol_id(action->symbol) + ", " + to_string(action->child_flags.size()) + ", COLLAPSE({" + collapse_flags(action->child_flags) + "}));";
default:
return "";
}

View file

@ -4,37 +4,38 @@ using std::string;
using std::ostream;
using std::to_string;
using std::unordered_set;
using std::vector;
using tree_sitter::rules::Symbol;
namespace tree_sitter {
// Action
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count) :
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const vector<bool> &child_flags) :
type(type),
state_index(state_index),
symbol(symbol),
child_symbol_count(child_symbol_count) {};
child_flags(child_flags) {};
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, Symbol(""), -1);
return ParseAction(ParseActionTypeError, -1, Symbol(""), {});
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), -1);
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), {});
}
ParseAction ParseAction::Shift(size_t state_index) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), -1);
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), {});
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t child_symbol_count) {
return ParseAction(ParseActionTypeReduce, -1, symbol, child_symbol_count);
ParseAction ParseAction::Reduce(Symbol symbol, const vector<bool> &child_flags) {
return ParseAction(ParseActionTypeReduce, -1, symbol, child_flags);
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool state_indices_eq = state_index == other.state_index;
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
return types_eq && state_indices_eq && child_symbol_counts_eq;
bool child_flags_eq = child_flags == other.child_flags;
return types_eq && state_indices_eq && child_flags_eq;
}
ostream& operator<<(ostream &stream, const ParseAction &action) {

View file

@ -15,16 +15,16 @@ namespace tree_sitter {
} ParseActionType;
class ParseAction {
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count);
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const std::vector<bool> &child_flags);
public:
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(size_t state_index);
static ParseAction Reduce(rules::Symbol symbol, size_t child_symbol_count);
static ParseAction Reduce(rules::Symbol symbol, const std::vector<bool> &child_flags);
bool operator==(const ParseAction &action) const;
ParseActionType type;
size_t child_symbol_count;
std::vector<bool> child_flags;
rules::Symbol symbol;
size_t state_index;
};
@ -40,7 +40,7 @@ namespace std {
hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.child_symbol_count));
hash<size_t>()(action.child_flags.size()));
}
};
}