Clean up lex table generation
This commit is contained in:
parent
31cc6e6f9c
commit
59712ec492
7 changed files with 182 additions and 144 deletions
|
|
@ -3,6 +3,7 @@
|
|||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -13,7 +14,7 @@ describe("LexItem", []() {
|
|||
describe("is_token_start()", [&]() {
|
||||
Symbol sym(1);
|
||||
rule_ptr token_start = make_shared<Metadata>(str("a"), map<MetadataKey, int>({
|
||||
{ START_TOKEN, 1 }
|
||||
{ START_TOKEN, true }
|
||||
}));
|
||||
|
||||
it("returns true for rules designated as token starts", [&]() {
|
||||
|
|
@ -155,7 +156,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
}
|
||||
})));
|
||||
|
||||
LexItemSet item_set2 = transitions[CharacterSet().include('v')].first;
|
||||
LexItemSet item_set2 = transitions[CharacterSet().include('v')].destination;
|
||||
transitions = item_set2.transitions();
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -180,7 +181,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
}
|
||||
})));
|
||||
|
||||
LexItemSet item_set3 = transitions[CharacterSet().include('w')].first;
|
||||
LexItemSet item_set3 = transitions[CharacterSet().include('w')].destination;
|
||||
transitions = item_set3.transitions();
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -202,7 +203,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
}
|
||||
})));
|
||||
|
||||
LexItemSet item_set4 = transitions[CharacterSet().include('x')].first;
|
||||
LexItemSet item_set4 = transitions[CharacterSet().include('x')].destination;
|
||||
transitions = item_set4.transitions();
|
||||
|
||||
AssertThat(
|
||||
|
|
|
|||
|
|
@ -114,6 +114,10 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) {
|
|||
return stream << item_set.entries;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) {
|
||||
return stream << "{dest: " << transition.destination << ", prec: " << transition.precedence << "}";
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream << string("(item variable:") << to_string(item.variable_index)
|
||||
<< string(" production:") << to_string((size_t)item.production % 1000)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include <vector>
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
using std::cout;
|
||||
|
||||
|
|
@ -122,6 +123,7 @@ class LookaheadSet;
|
|||
|
||||
ostream &operator<<(ostream &, const LexItem &);
|
||||
ostream &operator<<(ostream &, const LexItemSet &);
|
||||
ostream &operator<<(ostream &, const LexItemSet::Transition &);
|
||||
ostream &operator<<(ostream &, const ParseItem &);
|
||||
ostream &operator<<(ostream &, const ParseItemSet &);
|
||||
ostream &operator<<(ostream &, const LookaheadSet &);
|
||||
|
|
|
|||
|
|
@ -21,29 +21,37 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::make_shared;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::unordered_map;
|
||||
using std::vector;
|
||||
using std::make_shared;
|
||||
using std::unordered_map;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::CharacterSet;
|
||||
using rules::Repeat;
|
||||
using rules::Symbol;
|
||||
using rules::Metadata;
|
||||
using rules::Seq;
|
||||
using rules::START_TOKEN;
|
||||
using rules::PRECEDENCE;
|
||||
using rules::IS_ACTIVE;
|
||||
|
||||
class LexTableBuilder {
|
||||
const LexicalGrammar lex_grammar;
|
||||
LexConflictManager conflict_manager;
|
||||
ParseTable *parse_table;
|
||||
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
ParseTable *parse_table;
|
||||
const LexicalGrammar lex_grammar;
|
||||
vector<rule_ptr> separator_rules;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
|
||||
: lex_grammar(lex_grammar), parse_table(parse_table) {
|
||||
: parse_table(parse_table), lex_grammar(lex_grammar) {
|
||||
for (const rule_ptr &rule : lex_grammar.separators)
|
||||
separator_rules.push_back(rules::Repeat::build(rule));
|
||||
separator_rules.push_back(rules::Blank::build());
|
||||
separator_rules.push_back(Repeat::build(rule));
|
||||
separator_rules.push_back(Blank::build());
|
||||
}
|
||||
|
||||
LexTable build() {
|
||||
|
|
@ -59,44 +67,9 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
private:
|
||||
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
|
||||
LexItemSet result;
|
||||
for (const Symbol &symbol : symbols) {
|
||||
vector<rule_ptr> rules;
|
||||
if (symbol == rules::END_OF_INPUT()) {
|
||||
rules.push_back(CharacterSet().include(0).copy());
|
||||
} else if (symbol.is_token) {
|
||||
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
|
||||
|
||||
auto choice = rule->as<rules::Choice>();
|
||||
if (choice)
|
||||
for (const rule_ptr &element : choice->elements)
|
||||
rules.push_back(element);
|
||||
else
|
||||
rules.push_back(rule);
|
||||
}
|
||||
|
||||
for (const rule_ptr &rule : rules)
|
||||
for (const rule_ptr &separator_rule : separator_rules)
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
rules::Metadata::build(
|
||||
rules::Seq::build({
|
||||
rules::Metadata::build(separator_rule,
|
||||
{ { rules::START_TOKEN, 1 } }),
|
||||
rules::Metadata::build(rule, { { rules::PRECEDENCE, 0 } }),
|
||||
}),
|
||||
{
|
||||
{ rules::PRECEDENCE, INT_MIN }, { rules::IS_ACTIVE, true },
|
||||
})));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void add_lex_state_for_parse_state(ParseState *parse_state) {
|
||||
parse_state->lex_state_id =
|
||||
add_lex_state(build_lex_item_set(parse_state->expected_inputs()));
|
||||
add_lex_state(item_set_for_tokens(parse_state->expected_inputs()));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
|
|
@ -114,16 +87,15 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &transition : item_set.transitions()) {
|
||||
const CharacterSet &rule = transition.first;
|
||||
const LexItemSet &new_item_set = transition.second.first;
|
||||
const PrecedenceRange &precedence = transition.second.second;
|
||||
AdvanceAction action(-1, precedence);
|
||||
for (const auto &pair : item_set.transitions()) {
|
||||
const CharacterSet &characters = pair.first;
|
||||
const LexItemSet::Transition &transition = pair.second;
|
||||
AdvanceAction action(-1, transition.precedence);
|
||||
|
||||
auto current_action = lex_table.state(state_id).accept_action;
|
||||
if (conflict_manager.resolve(action, current_action)) {
|
||||
action.state_index = add_lex_state(new_item_set);
|
||||
lex_table.state(state_id).advance_actions[rule] = action;
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
lex_table.state(state_id).advance_actions[characters] = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -170,6 +142,38 @@ class LexTableBuilder {
|
|||
parse_state.lex_state_id = replacement->second;
|
||||
}
|
||||
}
|
||||
|
||||
LexItemSet item_set_for_tokens(const set<Symbol> &symbols) {
|
||||
LexItemSet result;
|
||||
for (const Symbol &symbol : symbols)
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol))
|
||||
for (const rule_ptr &separator_rule : separator_rules)
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::build(
|
||||
Seq::build({
|
||||
Metadata::build(separator_rule, { { START_TOKEN, true } }),
|
||||
Metadata::build(rule, { { PRECEDENCE, 0 } }),
|
||||
}),
|
||||
{ { PRECEDENCE, INT_MIN }, { IS_ACTIVE, true } })));
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
|
||||
if (!symbol.is_token)
|
||||
return {};
|
||||
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return { CharacterSet().include(0).copy() };
|
||||
|
||||
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
|
||||
|
||||
auto choice = rule->as<Choice>();
|
||||
if (choice)
|
||||
return choice->elements;
|
||||
else
|
||||
return { rule };
|
||||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {
|
||||
|
|
|
|||
|
|
@ -118,5 +118,9 @@ LexItemSet::TransitionMap LexItemSet::transitions() const {
|
|||
return result;
|
||||
}
|
||||
|
||||
bool LexItemSet::Transition::operator==(const LexItemSet::Transition &other) const {
|
||||
return destination == other.destination && precedence == other.precedence;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -39,19 +39,27 @@ class LexItemSet {
|
|||
LexItemSet();
|
||||
explicit LexItemSet(const std::unordered_set<LexItem, LexItem::Hash> &);
|
||||
|
||||
typedef std::map<rules::CharacterSet, std::pair<LexItemSet, PrecedenceRange>>
|
||||
TransitionMap;
|
||||
bool operator==(const LexItemSet &) const;
|
||||
|
||||
struct Hash {
|
||||
size_t operator()(const LexItemSet &) const;
|
||||
};
|
||||
|
||||
bool operator==(const LexItemSet &) const;
|
||||
struct Transition;
|
||||
typedef std::map<rules::CharacterSet, Transition> TransitionMap;
|
||||
|
||||
TransitionMap transitions() const;
|
||||
|
||||
std::unordered_set<LexItem, LexItem::Hash> entries;
|
||||
};
|
||||
|
||||
struct LexItemSet::Transition {
|
||||
LexItemSet destination;
|
||||
PrecedenceRange precedence;
|
||||
|
||||
bool operator==(const LexItemSet::Transition &) const;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
|
|
|
|||
|
|
@ -23,147 +23,162 @@ using std::map;
|
|||
using std::pair;
|
||||
using std::vector;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
using rules::Blank;
|
||||
using rules::MetadataKey;
|
||||
using rules::Choice;
|
||||
using rules::Seq;
|
||||
using rules::Repeat;
|
||||
using rules::Metadata;
|
||||
using rules::PRECEDENCE;
|
||||
using rules::IS_ACTIVE;
|
||||
typedef LexItemSet::Transition Transition;
|
||||
typedef LexItemSet::TransitionMap TransitionMap;
|
||||
|
||||
class LexItemTransitions : public rules::RuleFn<void> {
|
||||
LexItemSet::TransitionMap *transitions;
|
||||
const rules::Symbol &item_lhs;
|
||||
class TransitionBuilder : public rules::RuleFn<void> {
|
||||
TransitionMap *transitions;
|
||||
const Symbol &item_lhs;
|
||||
vector<int> *precedence_stack;
|
||||
|
||||
LexItemSet transform_item_set(const LexItemSet &item_set,
|
||||
function<rule_ptr(rule_ptr)> callback) {
|
||||
LexItemSet new_set;
|
||||
for (const LexItem &item : item_set.entries)
|
||||
new_set.entries.insert(LexItem(item.lhs, callback(item.rule)));
|
||||
return new_set;
|
||||
Transition transform_transition(const Transition &transition,
|
||||
function<rule_ptr(rule_ptr)> callback) {
|
||||
LexItemSet destination;
|
||||
for (const LexItem &item : transition.destination.entries)
|
||||
destination.entries.insert(LexItem(item.lhs, callback(item.rule)));
|
||||
return Transition{ destination, transition.precedence };
|
||||
}
|
||||
|
||||
void merge_transition(LexItemSet::TransitionMap *transitions,
|
||||
CharacterSet new_char_set, LexItemSet new_item_set,
|
||||
PrecedenceRange new_precedence_range) {
|
||||
vector<pair<CharacterSet, pair<LexItemSet, PrecedenceRange>>> new_entries;
|
||||
void add_transition(TransitionMap *transitions, CharacterSet new_characters,
|
||||
Transition new_transition) {
|
||||
vector<pair<CharacterSet, Transition>> new_entries;
|
||||
|
||||
auto iter = transitions->begin();
|
||||
while (iter != transitions->end()) {
|
||||
CharacterSet existing_char_set = iter->first;
|
||||
LexItemSet &existing_item_set = iter->second.first;
|
||||
PrecedenceRange &existing_precedence_range = iter->second.second;
|
||||
CharacterSet existing_characters = iter->first;
|
||||
Transition &existing_transition = iter->second;
|
||||
LexItemSet &existing_item_set = existing_transition.destination;
|
||||
PrecedenceRange &existing_precedence = existing_transition.precedence;
|
||||
|
||||
CharacterSet intersection = existing_char_set.remove_set(new_char_set);
|
||||
if (!intersection.is_empty()) {
|
||||
new_char_set.remove_set(intersection);
|
||||
if (!existing_char_set.is_empty())
|
||||
new_entries.push_back(
|
||||
{ existing_char_set,
|
||||
{ existing_item_set, existing_precedence_range } });
|
||||
existing_item_set.entries.insert(new_item_set.entries.begin(),
|
||||
new_item_set.entries.end());
|
||||
existing_precedence_range.add(new_precedence_range);
|
||||
new_entries.push_back(
|
||||
{ intersection, { existing_item_set, existing_precedence_range } });
|
||||
transitions->erase(iter++);
|
||||
} else {
|
||||
CharacterSet intersecting_characters =
|
||||
existing_characters.remove_set(new_characters);
|
||||
if (intersecting_characters.is_empty()) {
|
||||
iter++;
|
||||
continue;
|
||||
}
|
||||
|
||||
new_characters.remove_set(intersecting_characters);
|
||||
|
||||
if (!existing_characters.is_empty())
|
||||
new_entries.push_back({
|
||||
existing_characters,
|
||||
Transition{ existing_item_set, existing_precedence },
|
||||
});
|
||||
|
||||
existing_item_set.entries.insert(
|
||||
new_transition.destination.entries.begin(),
|
||||
new_transition.destination.entries.end());
|
||||
existing_precedence.add(new_transition.precedence);
|
||||
new_entries.push_back({
|
||||
intersecting_characters,
|
||||
Transition{ existing_item_set, existing_precedence },
|
||||
});
|
||||
|
||||
transitions->erase(iter++);
|
||||
}
|
||||
|
||||
transitions->insert(new_entries.begin(), new_entries.end());
|
||||
|
||||
if (!new_char_set.is_empty())
|
||||
transitions->insert(
|
||||
{ new_char_set, { new_item_set, new_precedence_range } });
|
||||
if (!new_characters.is_empty())
|
||||
transitions->insert({ new_characters, new_transition });
|
||||
}
|
||||
|
||||
map<rules::MetadataKey, int> activate_precedence(
|
||||
map<rules::MetadataKey, int> metadata) {
|
||||
if (metadata.count(rules::PRECEDENCE))
|
||||
metadata.insert({ rules::IS_ACTIVE, 1 });
|
||||
return metadata;
|
||||
}
|
||||
|
||||
void apply_to(const CharacterSet *rule) {
|
||||
void apply_to(const CharacterSet *character_set) {
|
||||
PrecedenceRange precedence;
|
||||
if (!precedence_stack->empty())
|
||||
precedence.add(precedence_stack->back());
|
||||
|
||||
merge_transition(transitions, *rule,
|
||||
LexItemSet({ LexItem(item_lhs, rules::Blank::build()) }),
|
||||
precedence);
|
||||
add_transition(
|
||||
transitions, *character_set,
|
||||
Transition{
|
||||
LexItemSet({ LexItem(item_lhs, Blank::build()) }), precedence,
|
||||
});
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
void apply_to(const Choice *choice) {
|
||||
for (const rule_ptr &element : choice->elements)
|
||||
apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
LexItemSet::TransitionMap left_transitions;
|
||||
LexItemTransitions(&left_transitions, this).apply(rule->left);
|
||||
void apply_to(const Seq *sequence) {
|
||||
TransitionMap left_transitions;
|
||||
TransitionBuilder(&left_transitions, this).apply(sequence->left);
|
||||
|
||||
for (const auto &pair : left_transitions) {
|
||||
merge_transition(
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
|
||||
return rules::Seq::build({ item_rule, rule->right });
|
||||
}), pair.second.second);
|
||||
transform_transition(pair.second, [&sequence](rule_ptr rule) {
|
||||
return Seq::build({ rule, sequence->right });
|
||||
}));
|
||||
}
|
||||
|
||||
if (rule_can_be_blank(rule->left))
|
||||
apply(rule->right);
|
||||
if (rule_can_be_blank(sequence->left))
|
||||
apply(sequence->right);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
LexItemSet::TransitionMap content_transitions;
|
||||
LexItemTransitions(&content_transitions, this).apply(rule->content);
|
||||
void apply_to(const Repeat *repeat) {
|
||||
TransitionMap content_transitions;
|
||||
TransitionBuilder(&content_transitions, this).apply(repeat->content);
|
||||
|
||||
for (const auto &pair : content_transitions) {
|
||||
merge_transition(transitions, pair.first, pair.second.first,
|
||||
pair.second.second);
|
||||
merge_transition(
|
||||
add_transition(transitions, pair.first, pair.second);
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
|
||||
return rules::Seq::build({ item_rule, rule->copy() });
|
||||
}), pair.second.second);
|
||||
transform_transition(pair.second, [&repeat](rule_ptr item_rule) {
|
||||
return Seq::build({ item_rule, repeat->copy() });
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
LexItemSet::TransitionMap content_transitions;
|
||||
auto precedence = rule->value_for(rules::PRECEDENCE);
|
||||
bool has_active_precedence =
|
||||
precedence.second && rule->value_for(rules::IS_ACTIVE).second;
|
||||
void apply_to(const Metadata *metadata) {
|
||||
bool has_active_precedence = metadata->value_for(IS_ACTIVE).second;
|
||||
if (has_active_precedence)
|
||||
precedence_stack->push_back(precedence.first);
|
||||
precedence_stack->push_back(metadata->value_for(PRECEDENCE).first);
|
||||
|
||||
LexItemTransitions(&content_transitions, this).apply(rule->rule);
|
||||
for (const auto &pair : content_transitions)
|
||||
merge_transition(
|
||||
auto metadata_value = metadata->value;
|
||||
if (metadata_value.count(PRECEDENCE))
|
||||
metadata_value.insert({ IS_ACTIVE, true });
|
||||
|
||||
TransitionMap content_transitions;
|
||||
TransitionBuilder(&content_transitions, this).apply(metadata->rule);
|
||||
|
||||
for (const auto &pair : content_transitions) {
|
||||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_item_set(pair.second.first, [this, &rule](rule_ptr item_rule) {
|
||||
return rules::Metadata::build(item_rule,
|
||||
activate_precedence(rule->value));
|
||||
}), pair.second.second);
|
||||
transform_transition(pair.second, [&metadata_value](rule_ptr rule) {
|
||||
return Metadata::build(rule, metadata_value);
|
||||
}));
|
||||
}
|
||||
|
||||
if (has_active_precedence)
|
||||
precedence_stack->pop_back();
|
||||
}
|
||||
|
||||
public:
|
||||
LexItemTransitions(LexItemSet::TransitionMap *transitions,
|
||||
const rules::Symbol &item_lhs,
|
||||
vector<int> *precedence_stack)
|
||||
TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs,
|
||||
vector<int> *precedence_stack)
|
||||
: transitions(transitions),
|
||||
item_lhs(item_lhs),
|
||||
precedence_stack(precedence_stack) {}
|
||||
|
||||
LexItemTransitions(LexItemSet::TransitionMap *transitions,
|
||||
LexItemTransitions *other)
|
||||
TransitionBuilder(TransitionMap *transitions, TransitionBuilder *other)
|
||||
: transitions(transitions),
|
||||
item_lhs(other->item_lhs),
|
||||
precedence_stack(other->precedence_stack) {}
|
||||
};
|
||||
|
||||
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
|
||||
const LexItem &item) {
|
||||
void lex_item_transitions(TransitionMap *transitions, const LexItem &item) {
|
||||
vector<int> precedence_stack;
|
||||
LexItemTransitions(transitions, item.lhs, &precedence_stack).apply(item.rule);
|
||||
TransitionBuilder(transitions, item.lhs, &precedence_stack).apply(item.rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue