Start work on recording parse action conflicts
This commit is contained in:
parent
1da9f1fdfd
commit
3982b73ed6
12 changed files with 278 additions and 26 deletions
|
|
@ -31,9 +31,16 @@ namespace tree_sitter {
|
|||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
struct Conflict {
|
||||
Conflict(std::string description);
|
||||
std::string description;
|
||||
bool operator==(const Conflict &other) const;
|
||||
};
|
||||
|
||||
std::string compile(const Grammar &grammar, std::string name);
|
||||
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
std::ostream& operator<<(std::ostream &stream, const Conflict &conflict);
|
||||
|
||||
std::pair<std::string, std::vector<Conflict>> compile(const Grammar &grammar, std::string name);
|
||||
}
|
||||
|
||||
#endif // TREE_SITTER_COMPILER_H_
|
||||
|
|
|
|||
89
spec/compiler/build_tables/conflict_manager_spec.cc
Normal file
89
spec/compiler/build_tables/conflict_manager_spec.cc
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/conflict_manager.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("resolving parse conflicts", []() {
|
||||
LexAction lex_action;
|
||||
ParseAction parse_action;
|
||||
ConflictManager *manager;
|
||||
|
||||
PreparedGrammar parse_grammar({
|
||||
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
|
||||
{ "rule2", sym("token1") },
|
||||
}, {});
|
||||
|
||||
PreparedGrammar lex_grammar({
|
||||
{ "token1", pattern("[a-c]") },
|
||||
{ "token2", pattern("[b-d]") },
|
||||
}, {});
|
||||
|
||||
before_each([&]() {
|
||||
manager = new ConflictManager(parse_grammar, lex_grammar);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
delete manager;
|
||||
});
|
||||
|
||||
describe("lexical conflicts", [&]() {
|
||||
Symbol sym1("token1");
|
||||
Symbol sym2("token2");
|
||||
|
||||
it("favors non-errors over lexical errors", [&]() {
|
||||
lex_action = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2));
|
||||
AssertThat(lex_action, Equals(LexAction::Advance(2)));
|
||||
|
||||
lex_action = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error());
|
||||
AssertThat(lex_action, Equals(LexAction::Advance(2)));
|
||||
});
|
||||
|
||||
it("prefers tokens that are listed earlier in the grammar", [&]() {
|
||||
lex_action = manager->resolve_lex_action(LexAction::Accept(sym1), LexAction::Accept(sym2));
|
||||
AssertThat(lex_action, Equals(LexAction::Accept(sym1)));
|
||||
|
||||
lex_action = manager->resolve_lex_action(LexAction::Accept(sym2), LexAction::Accept(sym1));
|
||||
AssertThat(lex_action, Equals(LexAction::Accept(sym1)));
|
||||
});
|
||||
});
|
||||
|
||||
describe("syntactic conflicts", [&]() {
|
||||
Symbol sym1("rule1");
|
||||
Symbol sym2("rule2");
|
||||
|
||||
it("favors non-errors over parse errors", [&]() {
|
||||
parse_action = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2));
|
||||
AssertThat(parse_action, Equals(ParseAction::Shift(2)));
|
||||
|
||||
parse_action = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Error());
|
||||
AssertThat(parse_action, Equals(ParseAction::Shift(2)));
|
||||
});
|
||||
|
||||
describe("shift/reduce conflicts", [&]() {
|
||||
it("records shift/reduce conflicts, favoring the shift", [&]() {
|
||||
manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Shift(2));
|
||||
manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Reduce(sym2, 1));
|
||||
|
||||
AssertThat(manager->conflicts()[0], Equals(Conflict("rule1: shift / reduce rule2")));
|
||||
AssertThat(manager->conflicts()[1], Equals(Conflict("rule1: shift / reduce rule2")));
|
||||
});
|
||||
|
||||
it("favors the shift", [&]() {
|
||||
parse_action = manager->resolve_parse_action(sym1, ParseAction::Reduce(sym2, 1), ParseAction::Shift(2));
|
||||
AssertThat(parse_action, Equals(ParseAction::Shift(2)));
|
||||
|
||||
parse_action = manager->resolve_parse_action(sym1, ParseAction::Shift(2), ParseAction::Reduce(sym2, 1));
|
||||
AssertThat(parse_action, Equals(ParseAction::Shift(2)));
|
||||
});
|
||||
});
|
||||
|
||||
it("records reduce/reduce conflicts, favoring the symbol listed earlier in the grammar", [&]() {
|
||||
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include <fstream>
|
||||
|
||||
#include "stream_methods.h"
|
||||
|
||||
static string src_dir() {
|
||||
const char * dir = getenv("TREESITTER_DIR");
|
||||
if (!dir) dir = getenv("PWD");
|
||||
|
|
@ -22,7 +24,11 @@ describe("compiling the example grammars", []() {
|
|||
auto compile_grammar = [&](const Grammar &grammar, string language) {
|
||||
it(("compiles the " + language + " grammar").c_str(), [&]() {
|
||||
ofstream file(example_parser_dir + language + ".c");
|
||||
file << compile(grammar, language);
|
||||
auto result = compile(grammar, language);
|
||||
|
||||
// cout << "\n conflicts for " << language << "\n:" << result.second;
|
||||
|
||||
file << result.first;
|
||||
file.close();
|
||||
});
|
||||
};
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/build_tables/conflict_manager.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
|
|
@ -16,6 +17,7 @@ namespace tree_sitter {
|
|||
using std::pair;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
|
|
@ -25,10 +27,9 @@ namespace tree_sitter {
|
|||
class TableBuilder {
|
||||
const PreparedGrammar grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
ConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
ParseTable parse_table;
|
||||
LexTable lex_table;
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (auto transition : sym_transitions(item_set, grammar)) {
|
||||
|
|
@ -58,13 +59,10 @@ namespace tree_sitter {
|
|||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (LexItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
const Symbol &new_symbol = item.lhs;
|
||||
auto &action = lex_table.states[state_id].default_action;
|
||||
if (action.type == LexActionTypeAccept) {
|
||||
const Symbol &old_symbol = action.symbol;
|
||||
if (lex_grammar.index_of(new_symbol) >= lex_grammar.index_of(old_symbol)) continue;
|
||||
}
|
||||
lex_table.add_default_action(state_id, LexAction::Accept(new_symbol));
|
||||
auto current_action = lex_table.state(state_id).default_action;
|
||||
auto new_action = LexAction::Accept(item.lhs);
|
||||
auto action = conflict_manager.resolve_lex_action(current_action, new_action);
|
||||
lex_table.add_default_action(state_id, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -147,24 +145,31 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
|
||||
TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
lex_grammar(lex_grammar) {}
|
||||
lex_grammar(lex_grammar),
|
||||
conflict_manager(ConflictManager(grammar, lex_grammar))
|
||||
{}
|
||||
|
||||
pair<ParseTable, LexTable> build() {
|
||||
void build() {
|
||||
auto start_symbol = make_shared<Symbol>(grammar.start_rule_name());
|
||||
ParseItem item(rules::START(), start_symbol, {}, rules::END_OF_INPUT());
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
||||
add_parse_state(item_set);
|
||||
add_error_lex_state();
|
||||
return pair<ParseTable, LexTable>(parse_table, lex_table);
|
||||
}
|
||||
|
||||
vector<Conflict> conflicts;
|
||||
ParseTable parse_table;
|
||||
LexTable lex_table;
|
||||
};
|
||||
|
||||
pair<ParseTable, LexTable> build_tables(const PreparedGrammar &grammar,
|
||||
pair<pair<ParseTable, LexTable>, vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar) {
|
||||
return TableBuilder(grammar, lex_grammar).build();
|
||||
TableBuilder builder(grammar, lex_grammar);
|
||||
builder.build();
|
||||
return { { builder.parse_table, builder.lex_table }, builder.conflicts };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
#define COMPILER_BUILD_TABLES_BUILD_TABLES_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
|
|
@ -9,8 +11,8 @@ namespace tree_sitter {
|
|||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, LexTable> build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar);
|
||||
std::pair<std::pair<ParseTable, LexTable>, std::vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
92
src/compiler/build_tables/conflict_manager.cpp
Normal file
92
src/compiler/build_tables/conflict_manager.cpp
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
#include "compiler/build_tables/conflict_manager.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
using rules::Symbol;
|
||||
using std::vector;
|
||||
using std::string;
|
||||
|
||||
string message_for_action(const ParseAction &action) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift:
|
||||
return "shift";
|
||||
case ParseActionTypeReduce:
|
||||
return "reduce " + action.symbol.name;
|
||||
case ParseActionTypeAccept:
|
||||
return "accept";
|
||||
case ParseActionTypeError:
|
||||
return "error";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ConflictManager::record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right) {
|
||||
conflicts_.push_back(Conflict(symbol.name + ": " +
|
||||
message_for_action(left) +
|
||||
" / " +
|
||||
message_for_action(right)));
|
||||
}
|
||||
|
||||
ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar) :
|
||||
parse_grammar(parse_grammar),
|
||||
lex_grammar(lex_grammar) {}
|
||||
|
||||
ParseAction ConflictManager::resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right) {
|
||||
if (right.type < left.type) {
|
||||
ParseAction swap = right;
|
||||
right = left;
|
||||
left = swap;
|
||||
}
|
||||
|
||||
switch (left.type) {
|
||||
case ParseActionTypeError:
|
||||
return right;
|
||||
case ParseActionTypeShift:
|
||||
switch (right.type) {
|
||||
case ParseActionTypeShift:
|
||||
record_conflict(symbol, left, right);
|
||||
return left;
|
||||
case ParseActionTypeReduce:
|
||||
record_conflict(symbol, left, right);
|
||||
return left;
|
||||
default:
|
||||
return left;
|
||||
}
|
||||
case ParseActionTypeReduce:
|
||||
switch (right.type) {
|
||||
case ParseActionTypeReduce: {
|
||||
size_t left_index = parse_grammar.index_of(left.symbol);
|
||||
size_t right_index = parse_grammar.index_of(right.symbol);
|
||||
return (right_index < left_index) ? right : left;
|
||||
}
|
||||
default:
|
||||
return right;
|
||||
}
|
||||
default:
|
||||
return left;
|
||||
}
|
||||
}
|
||||
|
||||
LexAction ConflictManager::resolve_lex_action(const LexAction &left, const LexAction &right) {
|
||||
switch (left.type) {
|
||||
case LexActionTypeError:
|
||||
return right;
|
||||
case LexActionTypeAccept:
|
||||
if (right.type == LexActionTypeAccept) {
|
||||
size_t left_index = lex_grammar.index_of(left.symbol);
|
||||
size_t right_index = lex_grammar.index_of(right.symbol);
|
||||
return (right_index < left_index) ? right : left;
|
||||
} else {
|
||||
return left;
|
||||
}
|
||||
default:
|
||||
return left;
|
||||
}
|
||||
}
|
||||
|
||||
const vector<Conflict> & ConflictManager::conflicts() const {
|
||||
return conflicts_;
|
||||
}
|
||||
}
|
||||
}
|
||||
28
src/compiler/build_tables/conflict_manager.h
Normal file
28
src/compiler/build_tables/conflict_manager.h
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_
|
||||
#define COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class ConflictManager {
|
||||
const PreparedGrammar parse_grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
std::vector<Conflict> conflicts_;
|
||||
|
||||
public:
|
||||
ConflictManager(const PreparedGrammar &parse_grammar, const PreparedGrammar &lex_grammar);
|
||||
|
||||
LexAction resolve_lex_action(const LexAction &left, const LexAction &right);
|
||||
ParseAction resolve_parse_action(const rules::Symbol &symbol, ParseAction left, ParseAction right);
|
||||
|
||||
void record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right);
|
||||
const std::vector<Conflict> & conflicts() const;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_
|
||||
|
|
@ -6,16 +6,22 @@
|
|||
#include "compiler/name_symbols/name_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
std::string compile(const Grammar &grammar, std::string name) {
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
pair<string, vector<Conflict>> compile(const Grammar &grammar, std::string name) {
|
||||
auto grammars = prepare_grammar::prepare_grammar(grammar);
|
||||
PreparedGrammar &syntax_grammar = grammars.first;
|
||||
PreparedGrammar &lexical_grammar = grammars.second;
|
||||
|
||||
auto tables = build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
auto table_build_result = build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
auto tables = table_build_result.first;
|
||||
auto conflicts = table_build_result.second;
|
||||
ParseTable &parse_table = tables.first;
|
||||
LexTable &lex_table = tables.second;
|
||||
|
||||
auto symbol_names = name_symbols::name_symbols(parse_table.symbols, lexical_grammar);
|
||||
return generate_code::c_code(name, parse_table, lex_table, symbol_names);
|
||||
return { generate_code::c_code(name, parse_table, lex_table, symbol_names), conflicts };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
16
src/compiler/conflict.cc
Normal file
16
src/compiler/conflict.cc
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
||||
Conflict::Conflict(string description) : description(description) {}
|
||||
|
||||
bool Conflict::operator==(const tree_sitter::Conflict &other) const {
|
||||
return other.description == description;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Conflict &conflict) {
|
||||
return stream << "#<conflict " + conflict.description + ">";
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,8 @@ namespace tree_sitter {
|
|||
state_index(state_index),
|
||||
consumed_symbol_count(consumed_symbol_count) {}
|
||||
|
||||
ParseAction::ParseAction() : ParseAction(ParseActionTypeError, -1, Symbol(""), {}) {}
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction(ParseActionTypeError, -1, Symbol(""), {});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@
|
|||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeAccept,
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeReduce,
|
||||
ParseActionTypeAccept,
|
||||
} ParseActionType;
|
||||
|
||||
class ParseAction {
|
||||
|
|
@ -22,6 +22,7 @@ namespace tree_sitter {
|
|||
rules::Symbol symbol,
|
||||
size_t consumed_symbol_count);
|
||||
public:
|
||||
ParseAction();
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(size_t state_index);
|
||||
|
|
|
|||
|
|
@ -23,8 +23,6 @@ namespace tree_sitter {
|
|||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
|
||||
|
||||
std::string compile(const Grammar &grammar, std::string name);
|
||||
}
|
||||
|
||||
#endif // COMPILER_PREPARED_GRAMMAR_H_
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue