Distinguish separators from main tokens via a property on transitions
It was incorrect to store it as a property on the lexical states themselves
This commit is contained in:
parent
59712ec492
commit
a3679fbb1f
13 changed files with 157 additions and 153 deletions
|
|
@ -34,7 +34,7 @@ using rules::Repeat;
|
|||
using rules::Symbol;
|
||||
using rules::Metadata;
|
||||
using rules::Seq;
|
||||
using rules::START_TOKEN;
|
||||
using rules::MAIN_TOKEN;
|
||||
using rules::PRECEDENCE;
|
||||
using rules::IS_ACTIVE;
|
||||
|
||||
|
|
@ -79,7 +79,6 @@ class LexTableBuilder {
|
|||
lex_state_ids[item_set] = state_id;
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_token_start(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
|
|
@ -90,7 +89,7 @@ class LexTableBuilder {
|
|||
for (const auto &pair : item_set.transitions()) {
|
||||
const CharacterSet &characters = pair.first;
|
||||
const LexItemSet::Transition &transition = pair.second;
|
||||
AdvanceAction action(-1, transition.precedence);
|
||||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
|
||||
auto current_action = lex_table.state(state_id).accept_action;
|
||||
if (conflict_manager.resolve(action, current_action)) {
|
||||
|
|
@ -114,12 +113,6 @@ class LexTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &item : item_set.entries)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
void mark_fragile_tokens() {
|
||||
for (LexState &state : lex_table.states)
|
||||
if (state.accept_action.is_present())
|
||||
|
|
@ -152,8 +145,8 @@ class LexTableBuilder {
|
|||
symbol,
|
||||
Metadata::build(
|
||||
Seq::build({
|
||||
Metadata::build(separator_rule, { { START_TOKEN, true } }),
|
||||
Metadata::build(rule, { { PRECEDENCE, 0 } }),
|
||||
separator_rule,
|
||||
Metadata::build(rule, { { PRECEDENCE, 0 }, { MAIN_TOKEN, 1 } }),
|
||||
}),
|
||||
{ { PRECEDENCE, INT_MIN }, { IS_ACTIVE, true } })));
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -26,28 +26,6 @@ bool LexItem::operator==(const LexItem &other) const {
|
|||
return (other.lhs == lhs) && other.rule->operator==(*rule);
|
||||
}
|
||||
|
||||
bool LexItem::is_token_start() const {
|
||||
class IsTokenStart : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
return apply(rule->left) ||
|
||||
(rule_can_be_blank(rule->left) && apply(rule->right));
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return (rule->value_for(rules::START_TOKEN).second) || apply(rule->rule);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
if (apply(element))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
return IsTokenStart().apply(rule);
|
||||
}
|
||||
|
||||
LexItem::CompletionStatus LexItem::completion_status() const {
|
||||
class GetCompletionStatus : public rules::RuleFn<CompletionStatus> {
|
||||
protected:
|
||||
|
|
@ -119,7 +97,8 @@ LexItemSet::TransitionMap LexItemSet::transitions() const {
|
|||
}
|
||||
|
||||
bool LexItemSet::Transition::operator==(const LexItemSet::Transition &other) const {
|
||||
return destination == other.destination && precedence == other.precedence;
|
||||
return destination == other.destination && precedence == other.precedence &&
|
||||
in_main_token == other.in_main_token;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ class LexItem {
|
|||
};
|
||||
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
CompletionStatus completion_status() const;
|
||||
|
||||
rules::Symbol lhs;
|
||||
|
|
@ -56,6 +55,7 @@ class LexItemSet {
|
|||
struct LexItemSet::Transition {
|
||||
LexItemSet destination;
|
||||
PrecedenceRange precedence;
|
||||
bool in_main_token;
|
||||
|
||||
bool operator==(const LexItemSet::Transition &) const;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ using rules::Repeat;
|
|||
using rules::Metadata;
|
||||
using rules::PRECEDENCE;
|
||||
using rules::IS_ACTIVE;
|
||||
using rules::MAIN_TOKEN;
|
||||
typedef LexItemSet::Transition Transition;
|
||||
typedef LexItemSet::TransitionMap TransitionMap;
|
||||
|
||||
|
|
@ -39,13 +40,15 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
TransitionMap *transitions;
|
||||
const Symbol &item_lhs;
|
||||
vector<int> *precedence_stack;
|
||||
bool in_main_token;
|
||||
|
||||
Transition transform_transition(const Transition &transition,
|
||||
function<rule_ptr(rule_ptr)> callback) {
|
||||
LexItemSet destination;
|
||||
for (const LexItem &item : transition.destination.entries)
|
||||
destination.entries.insert(LexItem(item.lhs, callback(item.rule)));
|
||||
return Transition{ destination, transition.precedence };
|
||||
return Transition{ destination, transition.precedence,
|
||||
transition.in_main_token };
|
||||
}
|
||||
|
||||
void add_transition(TransitionMap *transitions, CharacterSet new_characters,
|
||||
|
|
@ -56,8 +59,6 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
while (iter != transitions->end()) {
|
||||
CharacterSet existing_characters = iter->first;
|
||||
Transition &existing_transition = iter->second;
|
||||
LexItemSet &existing_item_set = existing_transition.destination;
|
||||
PrecedenceRange &existing_precedence = existing_transition.precedence;
|
||||
|
||||
CharacterSet intersecting_characters =
|
||||
existing_characters.remove_set(new_characters);
|
||||
|
|
@ -70,17 +71,17 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
|
||||
if (!existing_characters.is_empty())
|
||||
new_entries.push_back({
|
||||
existing_characters,
|
||||
Transition{ existing_item_set, existing_precedence },
|
||||
existing_characters, existing_transition,
|
||||
});
|
||||
|
||||
existing_item_set.entries.insert(
|
||||
existing_transition.destination.entries.insert(
|
||||
new_transition.destination.entries.begin(),
|
||||
new_transition.destination.entries.end());
|
||||
existing_precedence.add(new_transition.precedence);
|
||||
existing_transition.precedence.add(new_transition.precedence);
|
||||
existing_transition.in_main_token |= new_transition.in_main_token;
|
||||
|
||||
new_entries.push_back({
|
||||
intersecting_characters,
|
||||
Transition{ existing_item_set, existing_precedence },
|
||||
intersecting_characters, existing_transition,
|
||||
});
|
||||
|
||||
transitions->erase(iter++);
|
||||
|
|
@ -97,11 +98,11 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
if (!precedence_stack->empty())
|
||||
precedence.add(precedence_stack->back());
|
||||
|
||||
add_transition(
|
||||
transitions, *character_set,
|
||||
Transition{
|
||||
LexItemSet({ LexItem(item_lhs, Blank::build()) }), precedence,
|
||||
});
|
||||
add_transition(transitions, *character_set,
|
||||
Transition{
|
||||
LexItemSet({ LexItem(item_lhs, Blank::build()) }),
|
||||
precedence, in_main_token,
|
||||
});
|
||||
}
|
||||
|
||||
void apply_to(const Choice *choice) {
|
||||
|
|
@ -144,6 +145,9 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
if (has_active_precedence)
|
||||
precedence_stack->push_back(metadata->value_for(PRECEDENCE).first);
|
||||
|
||||
if (metadata->value_for(MAIN_TOKEN).second)
|
||||
in_main_token = true;
|
||||
|
||||
auto metadata_value = metadata->value;
|
||||
if (metadata_value.count(PRECEDENCE))
|
||||
metadata_value.insert({ IS_ACTIVE, true });
|
||||
|
|
@ -165,20 +169,23 @@ class TransitionBuilder : public rules::RuleFn<void> {
|
|||
|
||||
public:
|
||||
TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs,
|
||||
vector<int> *precedence_stack)
|
||||
vector<int> *precedence_stack, bool in_main_token)
|
||||
: transitions(transitions),
|
||||
item_lhs(item_lhs),
|
||||
precedence_stack(precedence_stack) {}
|
||||
precedence_stack(precedence_stack),
|
||||
in_main_token(in_main_token) {}
|
||||
|
||||
TransitionBuilder(TransitionMap *transitions, TransitionBuilder *other)
|
||||
: transitions(transitions),
|
||||
item_lhs(other->item_lhs),
|
||||
precedence_stack(other->precedence_stack) {}
|
||||
precedence_stack(other->precedence_stack),
|
||||
in_main_token(other->in_main_token) {}
|
||||
};
|
||||
|
||||
void lex_item_transitions(TransitionMap *transitions, const LexItem &item) {
|
||||
vector<int> precedence_stack;
|
||||
TransitionBuilder(transitions, item.lhs, &precedence_stack).apply(item.rule);
|
||||
TransitionBuilder(transitions, item.lhs, &precedence_stack, false)
|
||||
.apply(item.rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -328,7 +328,10 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_advance_action(const AdvanceAction &action) {
|
||||
line("ADVANCE(" + to_string(action.state_index) + ");");
|
||||
if (action.in_main_token)
|
||||
line("ADVANCE(" + to_string(action.state_index) + ");");
|
||||
else
|
||||
line("SKIP(" + to_string(action.state_index) + ");");
|
||||
}
|
||||
|
||||
void add_accept_token_action(const AcceptTokenAction &action) {
|
||||
|
|
@ -529,6 +532,10 @@ class CCodeGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
string _boolean(bool value) {
|
||||
return value ? "true" : "false";
|
||||
}
|
||||
|
||||
bool has_sanitized_name(string name) {
|
||||
for (const auto &pair : sanitized_names)
|
||||
if (pair.second == name)
|
||||
|
|
|
|||
|
|
@ -15,8 +15,11 @@ using rules::CharacterSet;
|
|||
AdvanceAction::AdvanceAction() : state_index(-1) {}
|
||||
|
||||
AdvanceAction::AdvanceAction(size_t state_index,
|
||||
PrecedenceRange precedence_range)
|
||||
: state_index(state_index), precedence_range(precedence_range) {}
|
||||
PrecedenceRange precedence_range,
|
||||
bool in_main_token)
|
||||
: state_index(state_index),
|
||||
precedence_range(precedence_range),
|
||||
in_main_token(in_main_token) {}
|
||||
|
||||
bool AdvanceAction::operator==(const AdvanceAction &other) const {
|
||||
return (state_index == other.state_index) &&
|
||||
|
|
|
|||
|
|
@ -20,12 +20,13 @@ typedef enum {
|
|||
|
||||
struct AdvanceAction {
|
||||
AdvanceAction();
|
||||
AdvanceAction(size_t, PrecedenceRange);
|
||||
AdvanceAction(size_t, PrecedenceRange, bool);
|
||||
|
||||
bool operator==(const AdvanceAction &action) const;
|
||||
|
||||
size_t state_index;
|
||||
PrecedenceRange precedence_range;
|
||||
bool in_main_token;
|
||||
};
|
||||
|
||||
struct AcceptTokenAction {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ enum Associativity {
|
|||
};
|
||||
|
||||
enum MetadataKey {
|
||||
START_TOKEN,
|
||||
MAIN_TOKEN,
|
||||
PRECEDENCE,
|
||||
ASSOCIATIVITY,
|
||||
IS_TOKEN,
|
||||
|
|
|
|||
|
|
@ -52,20 +52,15 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) {
|
|||
LOG_LOOKAHEAD();
|
||||
|
||||
self->starting_state = lex_state;
|
||||
self->token_start_position = self->current_position;
|
||||
if (!self->chunk)
|
||||
ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size)
|
||||
ts_lexer__get_lookahead(self);
|
||||
}
|
||||
|
||||
static void ts_lexer__start_token(TSLexer *self) {
|
||||
LOG("start_token chars:%lu, rows:%lu, columns:%lu",
|
||||
self->current_position.chars, self->current_position.rows,
|
||||
self->current_position.columns);
|
||||
self->token_start_position = self->current_position;
|
||||
}
|
||||
|
||||
static bool ts_lexer__advance(TSLexer *self, TSStateId state) {
|
||||
static bool ts_lexer__advance(TSLexer *self, TSStateId state,
|
||||
bool in_main_token) {
|
||||
LOG("advance state:%d", state);
|
||||
|
||||
if (self->chunk == empty_chunk)
|
||||
|
|
@ -83,6 +78,9 @@ static bool ts_lexer__advance(TSLexer *self, TSStateId state) {
|
|||
}
|
||||
}
|
||||
|
||||
if (!in_main_token)
|
||||
self->token_start_position = self->current_position;
|
||||
|
||||
if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
|
||||
ts_lexer__get_chunk(self);
|
||||
|
||||
|
|
@ -125,7 +123,6 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
|
|||
void ts_lexer_init(TSLexer *self) {
|
||||
*self = (TSLexer){
|
||||
.start_fn = ts_lexer__start,
|
||||
.start_token_fn = ts_lexer__start_token,
|
||||
.advance_fn = ts_lexer__advance,
|
||||
.accept_fn = ts_lexer__accept,
|
||||
.chunk = NULL,
|
||||
|
|
|
|||
|
|
@ -501,7 +501,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|||
fprintf(f, "rankdir=\"RL\";\n");
|
||||
fprintf(f, "edge [arrowhead=none]\n");
|
||||
|
||||
Array(StackNode *) visited_nodes = array_new();
|
||||
Array(StackNode *)visited_nodes = array_new();
|
||||
|
||||
array_clear(&self->pop_paths);
|
||||
for (size_t i = 0; i < self->heads.size; i++) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue