Use different types for advance and accept-token actions

Unlike with parse actions, lexical actions of different types never appear
in the same places in the table
This commit is contained in:
Max Brunsfeld 2016-01-22 22:16:47 -07:00
parent 1ec39abe6a
commit 6401a065ae
13 changed files with 147 additions and 185 deletions

View file

@ -15,46 +15,43 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
Symbol sym2(1, true);
Symbol sym3(2, true);
it("favors non-errors over lexical errors", [&]() {
update = conflict_manager.resolve(LexAction::Advance(2, {0, 0}), LexAction::Error());
it("favors advance actions over empty accept token actions", [&]() {
update = conflict_manager.resolve(AdvanceAction(2, {0, 0}), AcceptTokenAction());
AssertThat(update, IsTrue());
update = conflict_manager.resolve(LexAction::Error(), LexAction::Advance(2, {0, 0}));
AssertThat(update, IsFalse());
});
describe("accept-token/accept-token conflicts", [&]() {
describe("when one tokens' precedence values differ", [&]() {
it("favors the token with higher precedence", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false));
update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym1, 2, false), LexAction::Accept(sym2, 1, false));
update = conflict_manager.resolve(AcceptTokenAction(sym1, 2, false), AcceptTokenAction(sym2, 1, false));
AssertThat(update, IsTrue());
});
it("adds the discarded token to the 'fragile tokens' set", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false));
update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
AssertThat(conflict_manager.fragile_tokens, Contains(sym2));
});
});
describe("when one token is string-based and the other is regexp-based", [&]() {
it("favors the string-based token", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, true));
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, true));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym2, 0, true), LexAction::Accept(sym1, 0, false));
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, true), AcceptTokenAction(sym1, 0, false));
AssertThat(update, IsTrue());
});
});
describe("when the tokens have equal precedence", [&]() {
it("favors the token listed earlier in the grammar", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 0, false), LexAction::Accept(sym1, 0, false));
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, false), AcceptTokenAction(sym1, 0, false));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, false));
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, false));
AssertThat(update, IsTrue());
});
});
@ -63,21 +60,15 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
describe("advance/accept-token conflicts", [&]() {
describe("when the token to accept has higher precedence", [&]() {
it("prefers the accept-token action", [&]() {
update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 3, true));
update = conflict_manager.resolve(AdvanceAction(1, { 1, 2 }), AcceptTokenAction(sym3, 3, true));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym3, 3, true), LexAction::Advance(1, { 1, 2 }));
AssertThat(update, IsTrue());
});
});
describe("when the token to accept does not have a higher precedence", [&]() {
it("favors the advance action", [&]() {
update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 2, true));
update = conflict_manager.resolve(AdvanceAction(1, { 1, 2 }), AcceptTokenAction(sym3, 2, true));
AssertThat(update, IsTrue());
update = conflict_manager.resolve(LexAction::Accept(sym3, 2, true), LexAction::Advance(1, { 1, 2 }));
AssertThat(update, IsFalse());
});
});
});

View file

@ -50,19 +50,12 @@ ostream &operator<<(ostream &stream, const SyntaxVariable &variable) {
return stream << string("{") << variable.name << string(", ") << variable.productions << string(", ") << to_string(variable.type) << string("}");
}
std::ostream &operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("#<error>");
case LexActionTypeAccept:
return stream << string("#<accept ") + to_string(action.symbol.index) +
">";
case LexActionTypeAdvance:
return stream << string("#<advance ") + to_string(action.state_index) +
">";
default:
return stream;
}
std::ostream &operator<<(std::ostream &stream, const AdvanceAction &action) {
return stream << string("#<advance ") + to_string(action.state_index) + ">";
}
std::ostream &operator<<(std::ostream &stream, const AcceptTokenAction &action) {
return stream << string("#<accept ") + to_string(action.symbol.index) + ">";
}
ostream &operator<<(ostream &stream, const ParseAction &action) {

View file

@ -92,7 +92,8 @@ using std::string;
using std::to_string;
struct Variable;
struct SyntaxVariable;
class LexAction;
struct AdvanceAction;
struct AcceptTokenAction;
class ParseAction;
class ParseState;
struct ProductionStep;
@ -104,7 +105,8 @@ ostream &operator<<(ostream &, const Rule &);
ostream &operator<<(ostream &, const rule_ptr &);
ostream &operator<<(ostream &, const Variable &);
ostream &operator<<(ostream &, const SyntaxVariable &);
ostream &operator<<(ostream &, const LexAction &);
ostream &operator<<(ostream &, const AdvanceAction &);
ostream &operator<<(ostream &, const AcceptTokenAction &);
ostream &operator<<(ostream &, const ParseAction &);
ostream &operator<<(ostream &, const ParseState &);
ostream &operator<<(ostream &, const ProductionStep &);

View file

@ -119,11 +119,12 @@ class LexTableBuilder {
const CharacterSet &rule = transition.first;
const LexItemSet &new_item_set = transition.second.first;
const PrecedenceRange &precedence = transition.second.second;
auto current_action = lex_table.state(state_id).default_action;
auto action = LexAction::Advance(-1, precedence);
AdvanceAction action(-1, precedence);
auto current_action = lex_table.state(state_id).accept_action;
if (conflict_manager.resolve(action, current_action)) {
action.state_index = add_lex_state(new_item_set);
lex_table.state(state_id).actions[rule] = action;
lex_table.state(state_id).advance_actions[rule] = action;
}
}
}
@ -132,12 +133,12 @@ class LexTableBuilder {
for (const LexItem &item : item_set.entries) {
LexItem::CompletionStatus completion_status = item.completion_status();
if (completion_status.is_done) {
auto current_action = lex_table.state(state_id).default_action;
auto action =
LexAction::Accept(item.lhs, completion_status.precedence.max,
completion_status.is_string);
AcceptTokenAction action(item.lhs, completion_status.precedence.max,
completion_status.is_string);
auto current_action = lex_table.state(state_id).accept_action;
if (conflict_manager.resolve(action, current_action))
lex_table.state(state_id).default_action = action;
lex_table.state(state_id).accept_action = action;
}
}
}
@ -150,15 +151,19 @@ class LexTableBuilder {
void mark_fragile_tokens() {
for (LexState &state : lex_table.states)
if (state.default_action.type == LexActionTypeAccept)
if (conflict_manager.fragile_tokens.count(state.default_action.symbol))
state.default_action.type = LexActionTypeAcceptFragile;
if (state.accept_action.is_present())
if (conflict_manager.fragile_tokens.count(state.accept_action.symbol))
state.accept_action.is_fragile = true;
}
void remove_duplicate_lex_states() {
for (LexState &state : lex_table.states) {
state.accept_action.is_string = false;
state.accept_action.precedence = 0;
}
auto replacements =
remove_duplicate_states<LexState, LexAction, LexActionTypeAdvance>(
&lex_table.states);
remove_duplicate_states<LexState, AdvanceAction>(&lex_table.states);
for (ParseState &parse_state : parse_table->states) {
auto replacement = replacements.find(parse_state.lex_state_id);

View file

@ -193,8 +193,7 @@ class ParseTableBuilder {
}
void remove_duplicate_parse_states() {
remove_duplicate_states<ParseState, ParseAction, ParseActionTypeShift>(
&parse_table.states);
remove_duplicate_states<ParseState, ParseAction>(&parse_table.states);
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,

View file

@ -6,59 +6,41 @@
namespace tree_sitter {
namespace build_tables {
bool LexConflictManager::resolve(const LexAction &new_action,
const LexAction &old_action) {
if (new_action.type < old_action.type)
return !resolve(old_action, new_action);
bool LexConflictManager::resolve(const AdvanceAction &new_action,
const AcceptTokenAction &old_action) {
if (!old_action.is_present())
return true;
return new_action.precedence_range.max >= old_action.precedence;
}
switch (old_action.type) {
case LexActionTypeError:
return true;
bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
const AcceptTokenAction &old_action) {
if (!old_action.is_present())
return true;
case LexActionTypeAccept: {
int old_precedence = old_action.precedence_range.min;
int old_precedence = old_action.precedence;
int new_precedence = new_action.precedence;
switch (new_action.type) {
case LexActionTypeAccept: {
int new_precedence = new_action.precedence_range.min;
bool result;
if (new_precedence > old_precedence)
result = true;
else if (new_precedence < old_precedence)
result = false;
else if (new_action.is_string && !old_action.is_string)
result = true;
else if (old_action.is_string && !new_action.is_string)
result = false;
else if (new_action.symbol.index < old_action.symbol.index)
result = true;
else
result = false;
bool result;
if (new_precedence > old_precedence)
result = true;
else if (new_precedence < old_precedence)
result = false;
else if (new_action.is_string && !old_action.is_string)
result = true;
else if (old_action.is_string && !new_action.is_string)
result = false;
else if (new_action.symbol.index < old_action.symbol.index)
result = true;
else
result = false;
if (result)
fragile_tokens.insert(old_action.symbol);
else
fragile_tokens.insert(new_action.symbol);
if (result)
fragile_tokens.insert(old_action.symbol);
else
fragile_tokens.insert(new_action.symbol);
return result;
}
case LexActionTypeAdvance:
if (old_precedence > new_action.precedence_range.max)
return false;
else
return true;
default:
return false;
}
return true;
}
default:
return false;
}
return result;
}
} // namespace build_tables

View file

@ -7,13 +7,15 @@
namespace tree_sitter {
class LexAction;
struct AdvanceAction;
struct AcceptTokenAction;
namespace build_tables {
class LexConflictManager {
public:
bool resolve(const LexAction &, const LexAction &);
bool resolve(const AdvanceAction &, const AcceptTokenAction &);
bool resolve(const AcceptTokenAction &, const AcceptTokenAction &);
std::set<rules::Symbol> fragile_tokens;
};

View file

@ -7,7 +7,7 @@
namespace tree_sitter {
namespace build_tables {
template <typename StateType, typename ActionType, int advance_action>
template <typename StateType, typename ActionType>
std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states) {
std::map<size_t, size_t> replacements;
@ -46,13 +46,12 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
}
for (StateType &state : *states)
state.each_action([&duplicates, &new_replacements](ActionType *action) {
if (action->type == advance_action) {
state.each_advance_action(
[&duplicates, &new_replacements](ActionType *action) {
auto new_replacement = new_replacements.find(action->state_index);
if (new_replacement != new_replacements.end())
action->state_index = new_replacement->second;
}
});
});
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
states->erase(states->begin() + i->first);

View file

@ -253,14 +253,18 @@ class CCodeGenerator {
}
void add_lex_state(const LexState &lex_state) {
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
line("START_TOKEN();");
for (const auto &pair : lex_state.actions)
for (const auto &pair : lex_state.advance_actions)
if (!pair.first.is_empty())
_if([&]() { add_character_set_condition(pair.first); },
[&]() { add_lex_actions(pair.second, expected_inputs); });
add_lex_actions(lex_state.default_action, expected_inputs);
[&]() { add_advance_action(pair.second); });
if (lex_state.accept_action.is_present())
add_accept_token_action(lex_state.accept_action);
else
line("LEX_ERROR();");
}
void add_character_set_condition(const rules::CharacterSet &rule) {
@ -304,23 +308,15 @@ class CCodeGenerator {
}
}
void add_lex_actions(const LexAction &action,
const set<rules::CharacterSet> &expected_inputs) {
switch (action.type) {
case LexActionTypeAdvance:
line("ADVANCE(" + to_string(action.state_index) + ");");
break;
case LexActionTypeAccept:
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeAcceptFragile:
line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeError:
line("LEX_ERROR();");
break;
default: {}
}
void add_advance_action(const AdvanceAction &action) {
line("ADVANCE(" + to_string(action.state_index) + ");");
}
void add_accept_token_action(const AcceptTokenAction &action) {
if (action.is_fragile)
line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");");
else
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
}
void add_parse_action_list() {

View file

@ -1,5 +1,6 @@
#include "compiler/lex_table.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
@ -11,56 +12,53 @@ using std::set;
using rules::Symbol;
using rules::CharacterSet;
LexAction::LexAction()
: type(LexActionTypeError),
symbol(Symbol(-1)),
state_index(-1),
precedence_range({ 0, 0 }) {}
AdvanceAction::AdvanceAction() : state_index(-1) {}
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol,
PrecedenceRange precedence_range, bool is_string)
: type(type),
symbol(symbol),
state_index(state_index),
precedence_range(precedence_range),
is_string(is_string) {}
AdvanceAction::AdvanceAction(size_t state_index,
PrecedenceRange precedence_range)
: state_index(state_index), precedence_range(precedence_range) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 }, false);
bool AdvanceAction::operator==(const AdvanceAction &other) const {
return (state_index == other.state_index) &&
(precedence_range == other.precedence_range);
}
LexAction LexAction::Advance(size_t state_index,
PrecedenceRange precedence_range) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
precedence_range, false);
AcceptTokenAction::AcceptTokenAction()
: symbol(rules::NONE()), precedence(0), is_string(false), is_fragile(false) {}
AcceptTokenAction::AcceptTokenAction(Symbol symbol, int precedence,
bool is_string)
: symbol(symbol),
precedence(precedence),
is_string(is_string),
is_fragile(false) {}
bool AcceptTokenAction::is_present() const {
return symbol != rules::NONE();
}
LexAction LexAction::Accept(Symbol symbol, int precedence, bool is_string) {
return LexAction(LexActionTypeAccept, -1, symbol, { precedence, precedence },
is_string);
}
bool LexAction::operator==(const LexAction &other) const {
return (type == other.type) && (state_index == other.state_index) &&
(symbol == other.symbol);
bool AcceptTokenAction::operator==(const AcceptTokenAction &other) const {
return (symbol == other.symbol) && (precedence == other.precedence) &&
(is_string == other.is_string) && (is_fragile == other.is_fragile);
}
LexState::LexState() : is_token_start(false) {}
set<CharacterSet> LexState::expected_inputs() const {
set<CharacterSet> result;
for (auto &pair : actions)
for (auto &pair : advance_actions)
result.insert(pair.first);
return result;
}
bool LexState::operator==(const LexState &other) const {
return actions == other.actions && default_action == other.default_action &&
return advance_actions == other.advance_actions &&
accept_action == other.accept_action &&
is_token_start == other.is_token_start;
}
void LexState::each_action(function<void(LexAction *)> fn) {
for (auto &entry : actions)
void LexState::each_advance_action(function<void(AdvanceAction *)> fn) {
for (auto &entry : advance_actions)
fn(&entry.second);
}

View file

@ -18,38 +18,32 @@ typedef enum {
LexActionTypeAdvance
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol,
PrecedenceRange precedence_range, bool is_string);
struct AdvanceAction {
AdvanceAction();
AdvanceAction(size_t, PrecedenceRange);
public:
LexAction();
static LexAction Accept(rules::Symbol symbol, int precedence, bool is_string);
static LexAction Error();
static LexAction Advance(size_t state_index, PrecedenceRange precedence_range);
bool operator==(const LexAction &action) const;
bool operator==(const AdvanceAction &action) const;
LexActionType type;
rules::Symbol symbol;
size_t state_index;
PrecedenceRange precedence_range;
};
struct AcceptTokenAction {
AcceptTokenAction();
AcceptTokenAction(rules::Symbol, int, bool);
bool is_present() const;
bool operator==(const AcceptTokenAction &action) const;
rules::Symbol symbol;
int precedence;
bool is_string;
bool is_fragile;
};
} // namespace tree_sitter
namespace std {
template <>
struct hash<tree_sitter::LexAction> {
size_t operator()(const tree_sitter::LexAction &action) const {
return (hash<int>()(action.type) ^
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index));
}
};
} // namespace std
namespace std {} // namespace std
namespace tree_sitter {
@ -58,10 +52,10 @@ class LexState {
LexState();
std::set<rules::CharacterSet> expected_inputs() const;
bool operator==(const LexState &) const;
void each_action(std::function<void(LexAction *)>);
void each_advance_action(std::function<void(AdvanceAction *)>);
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
AcceptTokenAction accept_action;
bool is_token_start;
};

View file

@ -128,10 +128,11 @@ set<Symbol> ParseState::expected_inputs() const {
return result;
}
void ParseState::each_action(function<void(ParseAction *)> fn) {
void ParseState::each_advance_action(function<void(ParseAction *)> fn) {
for (auto &entry : actions)
for (ParseAction &action : entry.second)
fn(&action);
if (action.type == ParseActionTypeShift)
fn(&action);
}
bool ParseState::operator==(const ParseState &other) const {

View file

@ -81,7 +81,7 @@ class ParseState {
ParseState();
std::set<rules::Symbol> expected_inputs() const;
bool operator==(const ParseState &) const;
void each_action(std::function<void(ParseAction *)>);
void each_advance_action(std::function<void(ParseAction *)>);
std::map<rules::Symbol, std::vector<ParseAction>> actions;
LexStateId lex_state_id;