Distinguish separators from main tokens via a property on transitions

It was incorrect to store it as a property on the lexical states themselves
This commit is contained in:
Max Brunsfeld 2016-05-19 16:25:44 -07:00
parent 59712ec492
commit a3679fbb1f
13 changed files with 157 additions and 153 deletions

View file

@ -34,7 +34,7 @@ using rules::Repeat;
using rules::Symbol;
using rules::Metadata;
using rules::Seq;
using rules::START_TOKEN;
using rules::MAIN_TOKEN;
using rules::PRECEDENCE;
using rules::IS_ACTIVE;
@ -79,7 +79,6 @@ class LexTableBuilder {
lex_state_ids[item_set] = state_id;
add_accept_token_actions(item_set, state_id);
add_advance_actions(item_set, state_id);
add_token_start(item_set, state_id);
return state_id;
} else {
return pair->second;
@ -90,7 +89,7 @@ class LexTableBuilder {
for (const auto &pair : item_set.transitions()) {
const CharacterSet &characters = pair.first;
const LexItemSet::Transition &transition = pair.second;
AdvanceAction action(-1, transition.precedence);
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
auto current_action = lex_table.state(state_id).accept_action;
if (conflict_manager.resolve(action, current_action)) {
@ -114,12 +113,6 @@ class LexTableBuilder {
}
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &item : item_set.entries)
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
void mark_fragile_tokens() {
for (LexState &state : lex_table.states)
if (state.accept_action.is_present())
@ -152,8 +145,8 @@ class LexTableBuilder {
symbol,
Metadata::build(
Seq::build({
Metadata::build(separator_rule, { { START_TOKEN, true } }),
Metadata::build(rule, { { PRECEDENCE, 0 } }),
separator_rule,
Metadata::build(rule, { { PRECEDENCE, 0 }, { MAIN_TOKEN, 1 } }),
}),
{ { PRECEDENCE, INT_MIN }, { IS_ACTIVE, true } })));
return result;

View file

@ -26,28 +26,6 @@ bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) ||
(rule_can_be_blank(rule->left) && apply(rule->right));
}
bool apply_to(const rules::Metadata *rule) {
return (rule->value_for(rules::START_TOKEN).second) || apply(rule->rule);
}
bool apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
if (apply(element))
return true;
return false;
}
};
return IsTokenStart().apply(rule);
}
LexItem::CompletionStatus LexItem::completion_status() const {
class GetCompletionStatus : public rules::RuleFn<CompletionStatus> {
protected:
@ -119,7 +97,8 @@ LexItemSet::TransitionMap LexItemSet::transitions() const {
}
bool LexItemSet::Transition::operator==(const LexItemSet::Transition &other) const {
return destination == other.destination && precedence == other.precedence;
return destination == other.destination && precedence == other.precedence &&
in_main_token == other.in_main_token;
}
} // namespace build_tables

View file

@ -27,7 +27,6 @@ class LexItem {
};
bool operator==(const LexItem &other) const;
bool is_token_start() const;
CompletionStatus completion_status() const;
rules::Symbol lhs;
@ -56,6 +55,7 @@ class LexItemSet {
struct LexItemSet::Transition {
LexItemSet destination;
PrecedenceRange precedence;
bool in_main_token;
bool operator==(const LexItemSet::Transition &) const;
};

View file

@ -32,6 +32,7 @@ using rules::Repeat;
using rules::Metadata;
using rules::PRECEDENCE;
using rules::IS_ACTIVE;
using rules::MAIN_TOKEN;
typedef LexItemSet::Transition Transition;
typedef LexItemSet::TransitionMap TransitionMap;
@ -39,13 +40,15 @@ class TransitionBuilder : public rules::RuleFn<void> {
TransitionMap *transitions;
const Symbol &item_lhs;
vector<int> *precedence_stack;
bool in_main_token;
Transition transform_transition(const Transition &transition,
function<rule_ptr(rule_ptr)> callback) {
LexItemSet destination;
for (const LexItem &item : transition.destination.entries)
destination.entries.insert(LexItem(item.lhs, callback(item.rule)));
return Transition{ destination, transition.precedence };
return Transition{ destination, transition.precedence,
transition.in_main_token };
}
void add_transition(TransitionMap *transitions, CharacterSet new_characters,
@ -56,8 +59,6 @@ class TransitionBuilder : public rules::RuleFn<void> {
while (iter != transitions->end()) {
CharacterSet existing_characters = iter->first;
Transition &existing_transition = iter->second;
LexItemSet &existing_item_set = existing_transition.destination;
PrecedenceRange &existing_precedence = existing_transition.precedence;
CharacterSet intersecting_characters =
existing_characters.remove_set(new_characters);
@ -70,17 +71,17 @@ class TransitionBuilder : public rules::RuleFn<void> {
if (!existing_characters.is_empty())
new_entries.push_back({
existing_characters,
Transition{ existing_item_set, existing_precedence },
existing_characters, existing_transition,
});
existing_item_set.entries.insert(
existing_transition.destination.entries.insert(
new_transition.destination.entries.begin(),
new_transition.destination.entries.end());
existing_precedence.add(new_transition.precedence);
existing_transition.precedence.add(new_transition.precedence);
existing_transition.in_main_token |= new_transition.in_main_token;
new_entries.push_back({
intersecting_characters,
Transition{ existing_item_set, existing_precedence },
intersecting_characters, existing_transition,
});
transitions->erase(iter++);
@ -97,11 +98,11 @@ class TransitionBuilder : public rules::RuleFn<void> {
if (!precedence_stack->empty())
precedence.add(precedence_stack->back());
add_transition(
transitions, *character_set,
Transition{
LexItemSet({ LexItem(item_lhs, Blank::build()) }), precedence,
});
add_transition(transitions, *character_set,
Transition{
LexItemSet({ LexItem(item_lhs, Blank::build()) }),
precedence, in_main_token,
});
}
void apply_to(const Choice *choice) {
@ -144,6 +145,9 @@ class TransitionBuilder : public rules::RuleFn<void> {
if (has_active_precedence)
precedence_stack->push_back(metadata->value_for(PRECEDENCE).first);
if (metadata->value_for(MAIN_TOKEN).second)
in_main_token = true;
auto metadata_value = metadata->value;
if (metadata_value.count(PRECEDENCE))
metadata_value.insert({ IS_ACTIVE, true });
@ -165,20 +169,23 @@ class TransitionBuilder : public rules::RuleFn<void> {
public:
TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs,
vector<int> *precedence_stack)
vector<int> *precedence_stack, bool in_main_token)
: transitions(transitions),
item_lhs(item_lhs),
precedence_stack(precedence_stack) {}
precedence_stack(precedence_stack),
in_main_token(in_main_token) {}
TransitionBuilder(TransitionMap *transitions, TransitionBuilder *other)
: transitions(transitions),
item_lhs(other->item_lhs),
precedence_stack(other->precedence_stack) {}
precedence_stack(other->precedence_stack),
in_main_token(other->in_main_token) {}
};
void lex_item_transitions(TransitionMap *transitions, const LexItem &item) {
vector<int> precedence_stack;
TransitionBuilder(transitions, item.lhs, &precedence_stack).apply(item.rule);
TransitionBuilder(transitions, item.lhs, &precedence_stack, false)
.apply(item.rule);
}
} // namespace build_tables

View file

@ -328,7 +328,10 @@ class CCodeGenerator {
}
void add_advance_action(const AdvanceAction &action) {
line("ADVANCE(" + to_string(action.state_index) + ");");
if (action.in_main_token)
line("ADVANCE(" + to_string(action.state_index) + ");");
else
line("SKIP(" + to_string(action.state_index) + ");");
}
void add_accept_token_action(const AcceptTokenAction &action) {
@ -529,6 +532,10 @@ class CCodeGenerator {
}
}
string _boolean(bool value) {
return value ? "true" : "false";
}
bool has_sanitized_name(string name) {
for (const auto &pair : sanitized_names)
if (pair.second == name)

View file

@ -15,8 +15,11 @@ using rules::CharacterSet;
AdvanceAction::AdvanceAction() : state_index(-1) {}
AdvanceAction::AdvanceAction(size_t state_index,
PrecedenceRange precedence_range)
: state_index(state_index), precedence_range(precedence_range) {}
PrecedenceRange precedence_range,
bool in_main_token)
: state_index(state_index),
precedence_range(precedence_range),
in_main_token(in_main_token) {}
bool AdvanceAction::operator==(const AdvanceAction &other) const {
return (state_index == other.state_index) &&

View file

@ -20,12 +20,13 @@ typedef enum {
struct AdvanceAction {
AdvanceAction();
AdvanceAction(size_t, PrecedenceRange);
AdvanceAction(size_t, PrecedenceRange, bool);
bool operator==(const AdvanceAction &action) const;
size_t state_index;
PrecedenceRange precedence_range;
bool in_main_token;
};
struct AcceptTokenAction {

View file

@ -15,7 +15,7 @@ enum Associativity {
};
enum MetadataKey {
START_TOKEN,
MAIN_TOKEN,
PRECEDENCE,
ASSOCIATIVITY,
IS_TOKEN,

View file

@ -52,20 +52,15 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) {
LOG_LOOKAHEAD();
self->starting_state = lex_state;
self->token_start_position = self->current_position;
if (!self->chunk)
ts_lexer__get_chunk(self);
if (!self->lookahead_size)
ts_lexer__get_lookahead(self);
}
static void ts_lexer__start_token(TSLexer *self) {
LOG("start_token chars:%lu, rows:%lu, columns:%lu",
self->current_position.chars, self->current_position.rows,
self->current_position.columns);
self->token_start_position = self->current_position;
}
static bool ts_lexer__advance(TSLexer *self, TSStateId state) {
static bool ts_lexer__advance(TSLexer *self, TSStateId state,
bool in_main_token) {
LOG("advance state:%d", state);
if (self->chunk == empty_chunk)
@ -83,6 +78,9 @@ static bool ts_lexer__advance(TSLexer *self, TSStateId state) {
}
}
if (!in_main_token)
self->token_start_position = self->current_position;
if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
ts_lexer__get_chunk(self);
@ -125,7 +123,6 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
void ts_lexer_init(TSLexer *self) {
*self = (TSLexer){
.start_fn = ts_lexer__start,
.start_token_fn = ts_lexer__start_token,
.advance_fn = ts_lexer__advance,
.accept_fn = ts_lexer__accept,
.chunk = NULL,

View file

@ -501,7 +501,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
fprintf(f, "rankdir=\"RL\";\n");
fprintf(f, "edge [arrowhead=none]\n");
Array(StackNode *) visited_nodes = array_new();
Array(StackNode *)visited_nodes = array_new();
array_clear(&self->pop_paths);
for (size_t i = 0; i < self->heads.size; i++) {