Merge pull request #140 from tree-sitter/eliminate-unit-reductions
Eliminate non-user-visible unit reductions from parse tables
This commit is contained in:
commit
84b15d2c78
6 changed files with 117 additions and 103 deletions
|
|
@ -67,19 +67,11 @@ typedef struct {
|
|||
uint32_t offset[2];
|
||||
} TSNode;
|
||||
|
||||
typedef struct {
|
||||
TSSymbol value;
|
||||
bool done;
|
||||
void *data;
|
||||
} TSSymbolIterator;
|
||||
|
||||
uint32_t ts_node_start_byte(TSNode);
|
||||
TSPoint ts_node_start_point(TSNode);
|
||||
uint32_t ts_node_end_byte(TSNode);
|
||||
TSPoint ts_node_end_point(TSNode);
|
||||
TSSymbol ts_node_symbol(TSNode);
|
||||
TSSymbolIterator ts_node_symbols(TSNode);
|
||||
void ts_symbol_iterator_next(TSSymbolIterator *);
|
||||
const char *ts_node_type(TSNode, const TSDocument *);
|
||||
char *ts_node_string(TSNode, const TSDocument *);
|
||||
bool ts_node_eq(TSNode, TSNode);
|
||||
|
|
|
|||
|
|
@ -110,6 +110,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
build_error_parse_state(error_state_id);
|
||||
remove_precedence_values();
|
||||
remove_duplicate_parse_states();
|
||||
eliminate_unit_reductions();
|
||||
populate_used_terminals();
|
||||
|
||||
auto lex_table_result = lex_table_builder->build(&parse_table);
|
||||
return {
|
||||
|
|
@ -222,7 +224,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
// Only add the highest-precedence Reduce actions to the parse table.
|
||||
// If other lower-precedence actions are possible, ignore them.
|
||||
if (entry.actions.empty()) {
|
||||
parse_table.add_terminal_action(state_id, lookahead, action);
|
||||
entry.actions.push_back(action);
|
||||
} else {
|
||||
ParseAction &existing_action = entry.actions[0];
|
||||
if (existing_action.type == ParseActionTypeAccept) {
|
||||
|
|
@ -376,30 +378,96 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
vector<ParseStateId> new_state_ids(parse_table.states.size());
|
||||
size_t deleted_state_count = 0;
|
||||
auto deleted_state_iter = deleted_states.begin();
|
||||
for (ParseStateId i = 0; i < new_state_ids.size(); i++) {
|
||||
while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) {
|
||||
deleted_state_count++;
|
||||
deleted_state_iter++;
|
||||
delete_parse_states(deleted_states);
|
||||
}
|
||||
|
||||
void eliminate_unit_reductions() {
|
||||
|
||||
// Find all the "unit reduction states" - states whose only actions are unit reductions,
|
||||
// all of which reduce by the same symbol. Store the symbols along with the state indices.
|
||||
unordered_map<ParseStateId, Symbol::Index> unit_reduction_states;
|
||||
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
bool only_unit_reductions = true;
|
||||
Symbol::Index unit_reduction_symbol = -1;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
for (ParseAction &action : entry.second.actions) {
|
||||
if (action.extra) continue;
|
||||
if (action.type == ParseActionTypeReduce &&
|
||||
action.consumed_symbol_count == 1 &&
|
||||
action.alias_sequence_id == 0 &&
|
||||
grammar.variables[action.symbol.index].type != VariableTypeNamed &&
|
||||
(unit_reduction_symbol == -1 || unit_reduction_symbol == action.symbol.index)
|
||||
) {
|
||||
unit_reduction_symbol = action.symbol.index;
|
||||
} else {
|
||||
only_unit_reductions = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!only_unit_reductions) break;
|
||||
}
|
||||
new_state_ids[i] = i - deleted_state_count;
|
||||
|
||||
if (only_unit_reductions) unit_reduction_states[i] = unit_reduction_symbol;
|
||||
}
|
||||
|
||||
ParseStateId original_state_index = 0;
|
||||
auto iter = parse_table.states.begin();
|
||||
while (iter != parse_table.states.end()) {
|
||||
if (deleted_states.count(original_state_index)) {
|
||||
iter = parse_table.states.erase(iter);
|
||||
} else {
|
||||
ParseState &state = *iter;
|
||||
state.each_referenced_state([&new_state_ids](ParseStateId *state_index) {
|
||||
*state_index = new_state_ids[*state_index];
|
||||
});
|
||||
++iter;
|
||||
// Update each parse state so that the parser never enters these "unit reduction states".
|
||||
for (ParseState &state : parse_table.states) {
|
||||
|
||||
// Update all of the shift actions associated with terminals. If a shift action
|
||||
// points to a unit reduction state, update it to point directly at the same state
|
||||
// as the shift action that's associated with the unit reduction state's non-terminal.
|
||||
for (auto entry = state.nonterminal_entries.begin();
|
||||
entry != state.nonterminal_entries.end();) {
|
||||
const auto &unit_reduction_entry = unit_reduction_states.find(entry->second);
|
||||
if (unit_reduction_entry != unit_reduction_states.end() &&
|
||||
unit_reduction_entry->first == entry->second) {
|
||||
auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second);
|
||||
if (entry_for_reduced_symbol != state.nonterminal_entries.end()) {
|
||||
entry->second = entry_for_reduced_symbol->second;
|
||||
} else {
|
||||
entry = state.nonterminal_entries.erase(entry);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
++entry;
|
||||
}
|
||||
|
||||
// Update all of the shift actions associated with non-terminals in the same way.
|
||||
for (auto entry = state.terminal_entries.begin(); entry != state.terminal_entries.end();) {
|
||||
auto &last_action = entry->second.actions.back();
|
||||
if (last_action.type == ParseActionTypeShift) {
|
||||
const auto &unit_reduction_entry = unit_reduction_states.find(last_action.state_index);
|
||||
if (unit_reduction_entry != unit_reduction_states.end() &&
|
||||
unit_reduction_entry->first == last_action.state_index) {
|
||||
auto entry_for_reduced_symbol = state.nonterminal_entries.find(unit_reduction_entry->second);
|
||||
if (entry_for_reduced_symbol != state.nonterminal_entries.end()) {
|
||||
last_action.state_index = entry_for_reduced_symbol->second;
|
||||
} else {
|
||||
entry = state.terminal_entries.erase(entry);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
++entry;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the unit reduction states from the parse table.
|
||||
set<ParseStateId> states_to_delete;
|
||||
for (auto &entry : unit_reduction_states) {
|
||||
if (entry.first != 1) states_to_delete.insert(entry.first);
|
||||
}
|
||||
delete_parse_states(states_to_delete);
|
||||
}
|
||||
|
||||
void populate_used_terminals() {
|
||||
for (const ParseState &state : parse_table.states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
parse_table.symbols.insert(entry.first);
|
||||
}
|
||||
original_state_index++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -697,6 +765,34 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
return description;
|
||||
}
|
||||
|
||||
void delete_parse_states(const set<ParseStateId> deleted_states) {
|
||||
vector<ParseStateId> new_state_ids(parse_table.states.size());
|
||||
size_t deleted_state_count = 0;
|
||||
auto deleted_state_iter = deleted_states.begin();
|
||||
for (ParseStateId i = 0; i < new_state_ids.size(); i++) {
|
||||
while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) {
|
||||
deleted_state_count++;
|
||||
deleted_state_iter++;
|
||||
}
|
||||
new_state_ids[i] = i - deleted_state_count;
|
||||
}
|
||||
|
||||
ParseStateId original_state_index = 0;
|
||||
auto iter = parse_table.states.begin();
|
||||
while (iter != parse_table.states.end()) {
|
||||
if (deleted_states.count(original_state_index)) {
|
||||
iter = parse_table.states.erase(iter);
|
||||
} else {
|
||||
ParseState &state = *iter;
|
||||
state.each_referenced_state([&new_state_ids](ParseStateId *state_index) {
|
||||
*state_index = new_state_ids[*state_index];
|
||||
});
|
||||
++iter;
|
||||
}
|
||||
original_state_index++;
|
||||
}
|
||||
}
|
||||
|
||||
string symbol_name(const rules::Symbol &symbol) const {
|
||||
if (symbol.is_built_in()) {
|
||||
if (symbol == END_OF_INPUT())
|
||||
|
|
|
|||
|
|
@ -146,7 +146,6 @@ bool ParseState::operator==(const ParseState &other) const {
|
|||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
||||
Symbol lookahead,
|
||||
ParseAction action) {
|
||||
symbols.insert(lookahead);
|
||||
ParseTableEntry &entry = states[state_id].terminal_entries[lookahead];
|
||||
entry.actions.push_back(action);
|
||||
return *entry.actions.rbegin();
|
||||
|
|
|
|||
|
|
@ -263,26 +263,6 @@ TSSymbol ts_node_symbol(TSNode self) {
|
|||
return tree->context.alias_symbol ? tree->context.alias_symbol : tree->symbol;
|
||||
}
|
||||
|
||||
TSSymbolIterator ts_node_symbols(TSNode self) {
|
||||
const Tree *tree = ts_node__tree(self);
|
||||
return (TSSymbolIterator){
|
||||
.value = tree->symbol, .done = false, .data = (void *)tree,
|
||||
};
|
||||
}
|
||||
|
||||
void ts_symbol_iterator_next(TSSymbolIterator *self) {
|
||||
const Tree *tree = (const Tree *)self->data;
|
||||
const Tree *parent = tree->context.parent;
|
||||
if (!self->done && parent) {
|
||||
if (parent->child_count == 1 && !parent->visible) {
|
||||
self->value = parent->symbol;
|
||||
self->data = (void *)parent;
|
||||
return;
|
||||
}
|
||||
}
|
||||
self->done = true;
|
||||
}
|
||||
|
||||
const char *ts_node_type(TSNode self, const TSDocument *document) {
|
||||
return ts_language_symbol_name(document->parser.language, ts_node_symbol(self));
|
||||
}
|
||||
|
|
|
|||
27
test/fixtures/error_corpus/c_errors.txt
vendored
27
test/fixtures/error_corpus/c_errors.txt
vendored
|
|
@ -141,30 +141,3 @@ int y = 5;
|
|||
(translation_unit
|
||||
(declaration (primitive_type) (ERROR (identifier)) (identifier))
|
||||
(declaration (primitive_type) (init_declarator (identifier) (number_literal))))
|
||||
|
||||
==========================================
|
||||
Declarations with missing variable names
|
||||
==========================================
|
||||
|
||||
int a() {
|
||||
struct x = 1;
|
||||
int = 2;
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
(translation_unit
|
||||
(function_definition
|
||||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement
|
||||
(declaration
|
||||
(struct_specifier (type_identifier))
|
||||
(init_declarator
|
||||
(MISSING)
|
||||
(number_literal)))
|
||||
(declaration
|
||||
(primitive_type)
|
||||
(init_declarator
|
||||
(MISSING)
|
||||
(number_literal))))))
|
||||
|
|
|
|||
|
|
@ -255,32 +255,6 @@ describe("Node", [&]() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("symbols()", [&]() {
|
||||
it("returns an iterator that yields each of the node's symbols", [&]() {
|
||||
const TSLanguage *language = ts_document_language(document);
|
||||
|
||||
TSNode false_node = ts_node_descendant_for_byte_range(root_node, false_index, false_index + 1);
|
||||
TSSymbolIterator iterator = ts_node_symbols(false_node);
|
||||
AssertThat(iterator.done, Equals(false));
|
||||
AssertThat(ts_language_symbol_name(language, iterator.value), Equals("false"));
|
||||
|
||||
ts_symbol_iterator_next(&iterator);
|
||||
AssertThat(iterator.done, Equals(false));
|
||||
AssertThat(ts_language_symbol_name(language, iterator.value), Equals("_value"));
|
||||
|
||||
ts_symbol_iterator_next(&iterator);
|
||||
AssertThat(iterator.done, Equals(true));
|
||||
|
||||
TSNode comma_node = ts_node_descendant_for_byte_range(root_node, number_end_index, number_end_index);
|
||||
iterator = ts_node_symbols(comma_node);
|
||||
AssertThat(iterator.done, Equals(false));
|
||||
AssertThat(ts_language_symbol_name(language, iterator.value), Equals(","));
|
||||
|
||||
ts_symbol_iterator_next(&iterator);
|
||||
AssertThat(iterator.done, Equals(true));
|
||||
});
|
||||
});
|
||||
|
||||
describe("child_count(), child(i)", [&]() {
|
||||
it("returns the child node at the given index, including anonymous nodes", [&]() {
|
||||
AssertThat(ts_node_child_count(root_node), Equals<size_t>(7));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue