Remove auxiliary rules from syntax tree
This commit is contained in:
parent
c3b65d22bf
commit
7d297f2f9e
15 changed files with 1107 additions and 724 deletions
|
|
@ -7,6 +7,7 @@ extern "C" {
|
|||
#include "tree.h"
|
||||
#include "parse_config.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
// #define TS_DEBUG_PARSE
|
||||
// #define TS_DEBUG_LEX
|
||||
|
|
@ -88,16 +89,34 @@ static void TSParserShift(TSParser *parser, TSState parse_state) {
|
|||
parser->stack_size++;
|
||||
}
|
||||
|
||||
static void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) {
|
||||
parser->stack_size -= child_count;
|
||||
static void TSParserReduce(TSParser *parser, TSSymbol symbol, int immediate_child_count, const int *collapse_flags) {
|
||||
parser->stack_size -= immediate_child_count;
|
||||
|
||||
TSTree **children = malloc(child_count * sizeof(TSTree *));
|
||||
for (int i = 0; i < child_count; i++) {
|
||||
children[i] = parser->stack[parser->stack_size + i].node;
|
||||
int total_child_count = 0;
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
TSTree *child = parser->stack[parser->stack_size + i].node;
|
||||
if (collapse_flags[i]) {
|
||||
total_child_count += child->child_count;
|
||||
} else {
|
||||
total_child_count++;
|
||||
}
|
||||
}
|
||||
|
||||
TSTree **children = malloc(total_child_count * sizeof(TSTree *));
|
||||
int n = 0;
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
TSTree *child = parser->stack[parser->stack_size + i].node;
|
||||
if (collapse_flags[i]) {
|
||||
memcpy(children + n, child->children, (child->child_count * sizeof(TSTree *)));
|
||||
n += child->child_count;
|
||||
} else {
|
||||
children[n] = child;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
parser->prev_lookahead_node = parser->lookahead_node;
|
||||
parser->lookahead_node = TSTreeMake(symbol, child_count, children);
|
||||
parser->lookahead_node = TSTreeMake(symbol, total_child_count, children);
|
||||
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], TSParserParseState(parser));
|
||||
}
|
||||
|
||||
|
|
@ -173,8 +192,12 @@ parser->lex_state
|
|||
#define ADVANCE(state_index) \
|
||||
{ TSParserAdvance(parser, state_index); goto next_state; }
|
||||
|
||||
#define REDUCE(symbol, child_count) \
|
||||
{ TSParserReduce(parser, symbol, child_count); goto next_state; }
|
||||
#define REDUCE(symbol, child_count, collapse_flags) \
|
||||
{ \
|
||||
static const int flags[] = collapse_flags; \
|
||||
TSParserReduce(parser, symbol, child_count, flags); \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{ TSParserAcceptInput(parser); goto done; }
|
||||
|
|
@ -203,6 +226,7 @@ printf("Lex error: unexpected state %ud", LEX_STATE());
|
|||
printf("Parse error: unexpected state %ud", PARSE_STATE());
|
||||
|
||||
#define EXPECT(...) __VA_ARGS__
|
||||
#define COLLAPSE(...) __VA_ARGS__
|
||||
|
||||
#define FINISH_PARSER() \
|
||||
done: \
|
||||
|
|
|
|||
|
|
@ -30,22 +30,23 @@ describe("computing closures of item sets", []() {
|
|||
|
||||
it("computes the item set closure", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({
|
||||
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__"))
|
||||
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), {}, Symbol("__END__"))
|
||||
}), grammar);
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("__END__")),
|
||||
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("+")),
|
||||
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("*")),
|
||||
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("__END__")),
|
||||
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), 0, Symbol("+")),
|
||||
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__")),
|
||||
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), {}, Symbol("__END__")),
|
||||
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), {}, Symbol("+")),
|
||||
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), {}, Symbol("*")),
|
||||
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), {}, Symbol("__END__")),
|
||||
ParseItem(Symbol("T"), grammar.rule(Symbol("T")), {}, Symbol("+")),
|
||||
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), {}, Symbol("__END__")),
|
||||
})));
|
||||
|
||||
ParseItemSet next_item_set = *sym_transitions(item_set, grammar)[rules::Symbol("v")];
|
||||
auto sym1 = rules::Symbol("v");
|
||||
ParseItemSet next_item_set = *sym_transitions(item_set, grammar)[sym1];
|
||||
AssertThat(next_item_set, Equals(ParseItemSet({
|
||||
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("__END__")),
|
||||
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("*")),
|
||||
ParseItem(Symbol("F"), rules::blank(), 1, Symbol("+")),
|
||||
ParseItem(Symbol("F"), rules::blank(), { sym1 }, Symbol("__END__")),
|
||||
ParseItem(Symbol("F"), rules::blank(), { sym1 }, Symbol("*")),
|
||||
ParseItem(Symbol("F"), rules::blank(), { sym1 }, Symbol("+")),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ describe("building parse and lex tables", []() {
|
|||
it("has the right next states", [&]() {
|
||||
AssertThat(parse_state(2).actions, Equals(unordered_map<Symbol, parse_actions>({
|
||||
{ Symbol("plus"), parse_actions({ ParseAction::Shift(3) }) },
|
||||
{ Symbol("__END__"), parse_actions({ ParseAction::Reduce(Symbol("expression"), 1) }) },
|
||||
{ Symbol("__END__"), parse_actions({ ParseAction::Reduce(Symbol("expression"), { false }) }) },
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
702
spec/fixtures/parsers/arithmetic.c
vendored
702
spec/fixtures/parsers/arithmetic.c
vendored
File diff suppressed because it is too large
Load diff
978
spec/fixtures/parsers/json.c
vendored
978
spec/fixtures/parsers/json.c
vendored
File diff suppressed because it is too large
Load diff
|
|
@ -36,7 +36,7 @@ describe("arithmetic", []() {
|
|||
|
||||
TSDocumentSetText(document, "x*(y+z)");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals(
|
||||
"(expression (term (factor (variable)) (times) (factor (token1) (expression (term (factor (variable))) (plus) (term (factor (variable)))) (token2))))"));
|
||||
"(expression (term (factor (variable)) (times) (factor (expression (term (factor (variable))) (plus) (term (factor (variable)))))))"));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -20,15 +20,15 @@ describe("json", []() {
|
|||
|
||||
it("parses objects", [&]() {
|
||||
TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (token5) (string) (token6) (value (number)) (repeat_helper1 (token2) (string) (token6) (value (number))) (token7)))"));
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (string) (value (number)) (string) (value (number))))"));
|
||||
|
||||
TSDocumentSetText(document, "{\"key1\":1}");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (token5) (string) (token6) (value (number)) (token3) (token7)))"));
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (string) (value (number))))"));
|
||||
});
|
||||
|
||||
it("parses arrays", [&]() {
|
||||
TSDocumentSetText(document, "[1,2,3]");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (array (token1) (value (number)) (repeat_helper2 (token2) (value (number)) (repeat_helper2 (token2) (value (number)))) (token4)))"));
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (array (value (number)) (value (number)) (value (number))))"));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
using tree_sitter::rules::Symbol;
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -33,7 +34,7 @@ namespace tree_sitter {
|
|||
string("' ") <<
|
||||
*item.rule <<
|
||||
string(" ") <<
|
||||
to_string(item.consumed_sym_count) <<
|
||||
to_string(item.consumed_symbols.size()) <<
|
||||
string(" ") <<
|
||||
item.lookahead_sym <<
|
||||
string(">");
|
||||
|
|
@ -51,8 +52,8 @@ namespace tree_sitter {
|
|||
if (other.lhs < lhs) return false;
|
||||
if (rule->to_string() < other.rule->to_string()) return true;
|
||||
if (rule->to_string() > other.rule->to_string()) return false;
|
||||
if (consumed_sym_count < other.consumed_sym_count) return true;
|
||||
if (consumed_sym_count > other.consumed_sym_count) return false;
|
||||
if (consumed_symbols < other.consumed_symbols) return true;
|
||||
if (consumed_symbols > other.consumed_symbols) return false;
|
||||
if (lookahead_sym < other.lookahead_sym) return true;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -65,15 +66,15 @@ namespace tree_sitter {
|
|||
return lhs_eq && rules_eq;
|
||||
}
|
||||
|
||||
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym) :
|
||||
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, const vector<Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym) :
|
||||
Item(lhs, rule),
|
||||
consumed_sym_count(consumed_sym_count),
|
||||
consumed_symbols(consumed_symbols),
|
||||
lookahead_sym(lookahead_sym) {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
bool lhs_eq = other.lhs == lhs;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
bool consumed_sym_counts_eq = (other.consumed_sym_count == consumed_sym_count);
|
||||
bool consumed_sym_counts_eq = (other.consumed_symbols == consumed_symbols);
|
||||
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
|
||||
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include "symbol.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
|
@ -29,11 +30,11 @@ namespace tree_sitter {
|
|||
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, int consumed_sym_count, const rules::Symbol &lookahead_sym);
|
||||
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, const std::vector<rules::Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym);
|
||||
bool operator<(const ParseItem &other) const;
|
||||
bool operator==(const ParseItem &other) const;
|
||||
|
||||
const int consumed_sym_count;
|
||||
const std::vector<rules::Symbol> consumed_symbols;
|
||||
const rules::Symbol lookahead_sym;
|
||||
};
|
||||
|
||||
|
|
@ -61,7 +62,7 @@ namespace std {
|
|||
return
|
||||
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::Rule>()(*item.rule) ^
|
||||
hash<size_t>()(item.consumed_sym_count) ^
|
||||
hash<size_t>()(item.consumed_symbols.size()) ^
|
||||
hash<tree_sitter::rules::Symbol>()(item.lookahead_sym);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ namespace tree_sitter {
|
|||
auto non_terminal = pair.first;
|
||||
auto terminals = pair.second;
|
||||
for (rules::Symbol terminal : terminals) {
|
||||
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), 0, terminal);
|
||||
auto next_item = ParseItem(non_terminal, grammar.rule(non_terminal), {}, terminal);
|
||||
add_item(item_set, next_item, grammar);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,9 @@ namespace tree_sitter {
|
|||
for (auto transition : rule_transitions(item.rule)) {
|
||||
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
|
||||
if (rule.get()) {
|
||||
auto new_item = ParseItem(item.lhs, transition.second, item.consumed_sym_count + 1, item.lookahead_sym);
|
||||
auto consumed_symbols = item.consumed_symbols;
|
||||
consumed_symbols.push_back(*rule);
|
||||
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
|
||||
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
|
||||
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,13 +60,21 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
static vector<bool> reduce_flags(const vector<rules::Symbol> &child_symbols) {
|
||||
vector<bool> result;
|
||||
for (auto symbol : child_symbols) {
|
||||
result.push_back(symbol.is_auxiliary);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
|
||||
for (ParseItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs.name == START) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs, item.consumed_sym_count);
|
||||
ParseAction::Reduce(item.lhs, reduce_flags(item.consumed_symbols));
|
||||
parse_table.add_action(state_index, item.lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
|
|
@ -116,7 +124,7 @@ namespace tree_sitter {
|
|||
lex_grammar(lex_grammar) {};
|
||||
|
||||
pair<ParseTable, LexTable> build() {
|
||||
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), 0, END_OF_INPUT);
|
||||
auto item = ParseItem(START, rules::sym(grammar.start_rule_name), {}, END_OF_INPUT);
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
||||
add_parse_state(item_set);
|
||||
return pair<ParseTable, LexTable>(parse_table, lex_table);
|
||||
|
|
|
|||
|
|
@ -118,6 +118,17 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
string collapse_flags(vector<bool> flags) {
|
||||
string result;
|
||||
bool started = false;
|
||||
for (auto flag : flags) {
|
||||
if (started) result += ", ";
|
||||
result += (flag ? "1" : "0");
|
||||
started = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<rules::Symbol> &expected_inputs) {
|
||||
auto action = actions.begin();
|
||||
if (action == actions.end()) {
|
||||
|
|
@ -129,7 +140,7 @@ namespace tree_sitter {
|
|||
case ParseActionTypeShift:
|
||||
return "SHIFT(" + to_string(action->state_index) + ");";
|
||||
case ParseActionTypeReduce:
|
||||
return "REDUCE(" + symbol_id(action->symbol) + ", " + std::to_string(action->child_symbol_count) + ");";
|
||||
return "REDUCE(" + symbol_id(action->symbol) + ", " + to_string(action->child_flags.size()) + ", COLLAPSE({" + collapse_flags(action->child_flags) + "}));";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,37 +4,38 @@ using std::string;
|
|||
using std::ostream;
|
||||
using std::to_string;
|
||||
using std::unordered_set;
|
||||
using std::vector;
|
||||
using tree_sitter::rules::Symbol;
|
||||
|
||||
namespace tree_sitter {
|
||||
// Action
|
||||
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count) :
|
||||
ParseAction::ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const vector<bool> &child_flags) :
|
||||
type(type),
|
||||
state_index(state_index),
|
||||
symbol(symbol),
|
||||
child_symbol_count(child_symbol_count) {};
|
||||
child_flags(child_flags) {};
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(""), -1);
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(""), {});
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Accept() {
|
||||
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), -1);
|
||||
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), {});
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Shift(size_t state_index) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), -1);
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), {});
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, size_t child_symbol_count) {
|
||||
return ParseAction(ParseActionTypeReduce, -1, symbol, child_symbol_count);
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, const vector<bool> &child_flags) {
|
||||
return ParseAction(ParseActionTypeReduce, -1, symbol, child_flags);
|
||||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
bool types_eq = type == other.type;
|
||||
bool state_indices_eq = state_index == other.state_index;
|
||||
bool child_symbol_counts_eq = child_symbol_count == other.child_symbol_count;
|
||||
return types_eq && state_indices_eq && child_symbol_counts_eq;
|
||||
bool child_flags_eq = child_flags == other.child_flags;
|
||||
return types_eq && state_indices_eq && child_flags_eq;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseAction &action) {
|
||||
|
|
|
|||
|
|
@ -15,16 +15,16 @@ namespace tree_sitter {
|
|||
} ParseActionType;
|
||||
|
||||
class ParseAction {
|
||||
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, size_t child_symbol_count);
|
||||
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const std::vector<bool> &child_flags);
|
||||
public:
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(size_t state_index);
|
||||
static ParseAction Reduce(rules::Symbol symbol, size_t child_symbol_count);
|
||||
static ParseAction Reduce(rules::Symbol symbol, const std::vector<bool> &child_flags);
|
||||
bool operator==(const ParseAction &action) const;
|
||||
|
||||
ParseActionType type;
|
||||
size_t child_symbol_count;
|
||||
std::vector<bool> child_flags;
|
||||
rules::Symbol symbol;
|
||||
size_t state_index;
|
||||
};
|
||||
|
|
@ -40,7 +40,7 @@ namespace std {
|
|||
hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.child_symbol_count));
|
||||
hash<size_t>()(action.child_flags.size()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue