Clean up lex table generation

This commit is contained in:
Max Brunsfeld 2016-05-19 09:56:49 -07:00
parent 31cc6e6f9c
commit 59712ec492
7 changed files with 182 additions and 144 deletions

View file

@ -3,6 +3,7 @@
#include "compiler/rules/metadata.h"
#include "compiler/rules.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
using namespace rules;
using namespace build_tables;
@ -13,7 +14,7 @@ describe("LexItem", []() {
describe("is_token_start()", [&]() {
Symbol sym(1);
rule_ptr token_start = make_shared<Metadata>(str("a"), map<MetadataKey, int>({
{ START_TOKEN, 1 }
{ START_TOKEN, true }
}));
it("returns true for rules designated as token starts", [&]() {
@ -155,7 +156,7 @@ describe("LexItemSet::transitions()", [&]() {
}
})));
LexItemSet item_set2 = transitions[CharacterSet().include('v')].first;
LexItemSet item_set2 = transitions[CharacterSet().include('v')].destination;
transitions = item_set2.transitions();
AssertThat(
@ -180,7 +181,7 @@ describe("LexItemSet::transitions()", [&]() {
}
})));
LexItemSet item_set3 = transitions[CharacterSet().include('w')].first;
LexItemSet item_set3 = transitions[CharacterSet().include('w')].destination;
transitions = item_set3.transitions();
AssertThat(
@ -202,7 +203,7 @@ describe("LexItemSet::transitions()", [&]() {
}
})));
LexItemSet item_set4 = transitions[CharacterSet().include('x')].first;
LexItemSet item_set4 = transitions[CharacterSet().include('x')].destination;
transitions = item_set4.transitions();
AssertThat(

View file

@ -114,6 +114,10 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) {
return stream << item_set.entries;
}
ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) {
return stream << "{dest: " << transition.destination << ", prec: " << transition.precedence << "}";
}
ostream &operator<<(ostream &stream, const ParseItem &item) {
return stream << string("(item variable:") << to_string(item.variable_index)
<< string(" production:") << to_string((size_t)item.production % 1000)

View file

@ -9,6 +9,7 @@
#include <vector>
#include "compiler/grammar.h"
#include "compiler/compile_error.h"
#include "compiler/build_tables/lex_item.h"
using std::cout;
@ -122,6 +123,7 @@ class LookaheadSet;
ostream &operator<<(ostream &, const LexItem &);
ostream &operator<<(ostream &, const LexItemSet &);
ostream &operator<<(ostream &, const LexItemSet::Transition &);
ostream &operator<<(ostream &, const ParseItem &);
ostream &operator<<(ostream &, const ParseItemSet &);
ostream &operator<<(ostream &, const LookaheadSet &);

View file

@ -21,29 +21,37 @@
namespace tree_sitter {
namespace build_tables {
using std::make_shared;
using std::map;
using std::set;
using std::string;
using std::unordered_map;
using std::vector;
using std::make_shared;
using std::unordered_map;
using rules::Blank;
using rules::Choice;
using rules::CharacterSet;
using rules::Repeat;
using rules::Symbol;
using rules::Metadata;
using rules::Seq;
using rules::START_TOKEN;
using rules::PRECEDENCE;
using rules::IS_ACTIVE;
class LexTableBuilder {
const LexicalGrammar lex_grammar;
LexConflictManager conflict_manager;
ParseTable *parse_table;
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
LexTable lex_table;
ParseTable *parse_table;
const LexicalGrammar lex_grammar;
vector<rule_ptr> separator_rules;
LexConflictManager conflict_manager;
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
: lex_grammar(lex_grammar), parse_table(parse_table) {
: parse_table(parse_table), lex_grammar(lex_grammar) {
for (const rule_ptr &rule : lex_grammar.separators)
separator_rules.push_back(rules::Repeat::build(rule));
separator_rules.push_back(rules::Blank::build());
separator_rules.push_back(Repeat::build(rule));
separator_rules.push_back(Blank::build());
}
LexTable build() {
@ -59,44 +67,9 @@ class LexTableBuilder {
}
private:
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet result;
for (const Symbol &symbol : symbols) {
vector<rule_ptr> rules;
if (symbol == rules::END_OF_INPUT()) {
rules.push_back(CharacterSet().include(0).copy());
} else if (symbol.is_token) {
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
auto choice = rule->as<rules::Choice>();
if (choice)
for (const rule_ptr &element : choice->elements)
rules.push_back(element);
else
rules.push_back(rule);
}
for (const rule_ptr &rule : rules)
for (const rule_ptr &separator_rule : separator_rules)
result.entries.insert(LexItem(
symbol,
rules::Metadata::build(
rules::Seq::build({
rules::Metadata::build(separator_rule,
{ { rules::START_TOKEN, 1 } }),
rules::Metadata::build(rule, { { rules::PRECEDENCE, 0 } }),
}),
{
{ rules::PRECEDENCE, INT_MIN }, { rules::IS_ACTIVE, true },
})));
}
return result;
}
void add_lex_state_for_parse_state(ParseState *parse_state) {
parse_state->lex_state_id =
add_lex_state(build_lex_item_set(parse_state->expected_inputs()));
add_lex_state(item_set_for_tokens(parse_state->expected_inputs()));
}
LexStateId add_lex_state(const LexItemSet &item_set) {
@ -114,16 +87,15 @@ class LexTableBuilder {
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &transition : item_set.transitions()) {
const CharacterSet &rule = transition.first;
const LexItemSet &new_item_set = transition.second.first;
const PrecedenceRange &precedence = transition.second.second;
AdvanceAction action(-1, precedence);
for (const auto &pair : item_set.transitions()) {
const CharacterSet &characters = pair.first;
const LexItemSet::Transition &transition = pair.second;
AdvanceAction action(-1, transition.precedence);
auto current_action = lex_table.state(state_id).accept_action;
if (conflict_manager.resolve(action, current_action)) {
action.state_index = add_lex_state(new_item_set);
lex_table.state(state_id).advance_actions[rule] = action;
action.state_index = add_lex_state(transition.destination);
lex_table.state(state_id).advance_actions[characters] = action;
}
}
}
@ -170,6 +142,38 @@ class LexTableBuilder {
parse_state.lex_state_id = replacement->second;
}
}
LexItemSet item_set_for_tokens(const set<Symbol> &symbols) {
LexItemSet result;
for (const Symbol &symbol : symbols)
for (const rule_ptr &rule : rules_for_symbol(symbol))
for (const rule_ptr &separator_rule : separator_rules)
result.entries.insert(LexItem(
symbol,
Metadata::build(
Seq::build({
Metadata::build(separator_rule, { { START_TOKEN, true } }),
Metadata::build(rule, { { PRECEDENCE, 0 } }),
}),
{ { PRECEDENCE, INT_MIN }, { IS_ACTIVE, true } })));
return result;
}
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
if (!symbol.is_token)
return {};
if (symbol == rules::END_OF_INPUT())
return { CharacterSet().include(0).copy() };
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
auto choice = rule->as<Choice>();
if (choice)
return choice->elements;
else
return { rule };
}
};
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {

View file

@ -118,5 +118,9 @@ LexItemSet::TransitionMap LexItemSet::transitions() const {
return result;
}
bool LexItemSet::Transition::operator==(const LexItemSet::Transition &other) const {
return destination == other.destination && precedence == other.precedence;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -39,19 +39,27 @@ class LexItemSet {
LexItemSet();
explicit LexItemSet(const std::unordered_set<LexItem, LexItem::Hash> &);
typedef std::map<rules::CharacterSet, std::pair<LexItemSet, PrecedenceRange>>
TransitionMap;
bool operator==(const LexItemSet &) const;
struct Hash {
size_t operator()(const LexItemSet &) const;
};
bool operator==(const LexItemSet &) const;
struct Transition;
typedef std::map<rules::CharacterSet, Transition> TransitionMap;
TransitionMap transitions() const;
std::unordered_set<LexItem, LexItem::Hash> entries;
};
struct LexItemSet::Transition {
LexItemSet destination;
PrecedenceRange precedence;
bool operator==(const LexItemSet::Transition &) const;
};
} // namespace build_tables
} // namespace tree_sitter

View file

@ -23,147 +23,162 @@ using std::map;
using std::pair;
using std::vector;
using rules::CharacterSet;
using rules::Symbol;
using rules::Blank;
using rules::MetadataKey;
using rules::Choice;
using rules::Seq;
using rules::Repeat;
using rules::Metadata;
using rules::PRECEDENCE;
using rules::IS_ACTIVE;
typedef LexItemSet::Transition Transition;
typedef LexItemSet::TransitionMap TransitionMap;
class LexItemTransitions : public rules::RuleFn<void> {
LexItemSet::TransitionMap *transitions;
const rules::Symbol &item_lhs;
class TransitionBuilder : public rules::RuleFn<void> {
TransitionMap *transitions;
const Symbol &item_lhs;
vector<int> *precedence_stack;
LexItemSet transform_item_set(const LexItemSet &item_set,
function<rule_ptr(rule_ptr)> callback) {
LexItemSet new_set;
for (const LexItem &item : item_set.entries)
new_set.entries.insert(LexItem(item.lhs, callback(item.rule)));
return new_set;
Transition transform_transition(const Transition &transition,
function<rule_ptr(rule_ptr)> callback) {
LexItemSet destination;
for (const LexItem &item : transition.destination.entries)
destination.entries.insert(LexItem(item.lhs, callback(item.rule)));
return Transition{ destination, transition.precedence };
}
void merge_transition(LexItemSet::TransitionMap *transitions,
CharacterSet new_char_set, LexItemSet new_item_set,
PrecedenceRange new_precedence_range) {
vector<pair<CharacterSet, pair<LexItemSet, PrecedenceRange>>> new_entries;
void add_transition(TransitionMap *transitions, CharacterSet new_characters,
Transition new_transition) {
vector<pair<CharacterSet, Transition>> new_entries;
auto iter = transitions->begin();
while (iter != transitions->end()) {
CharacterSet existing_char_set = iter->first;
LexItemSet &existing_item_set = iter->second.first;
PrecedenceRange &existing_precedence_range = iter->second.second;
CharacterSet existing_characters = iter->first;
Transition &existing_transition = iter->second;
LexItemSet &existing_item_set = existing_transition.destination;
PrecedenceRange &existing_precedence = existing_transition.precedence;
CharacterSet intersection = existing_char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!existing_char_set.is_empty())
new_entries.push_back(
{ existing_char_set,
{ existing_item_set, existing_precedence_range } });
existing_item_set.entries.insert(new_item_set.entries.begin(),
new_item_set.entries.end());
existing_precedence_range.add(new_precedence_range);
new_entries.push_back(
{ intersection, { existing_item_set, existing_precedence_range } });
transitions->erase(iter++);
} else {
CharacterSet intersecting_characters =
existing_characters.remove_set(new_characters);
if (intersecting_characters.is_empty()) {
iter++;
continue;
}
new_characters.remove_set(intersecting_characters);
if (!existing_characters.is_empty())
new_entries.push_back({
existing_characters,
Transition{ existing_item_set, existing_precedence },
});
existing_item_set.entries.insert(
new_transition.destination.entries.begin(),
new_transition.destination.entries.end());
existing_precedence.add(new_transition.precedence);
new_entries.push_back({
intersecting_characters,
Transition{ existing_item_set, existing_precedence },
});
transitions->erase(iter++);
}
transitions->insert(new_entries.begin(), new_entries.end());
if (!new_char_set.is_empty())
transitions->insert(
{ new_char_set, { new_item_set, new_precedence_range } });
if (!new_characters.is_empty())
transitions->insert({ new_characters, new_transition });
}
map<rules::MetadataKey, int> activate_precedence(
map<rules::MetadataKey, int> metadata) {
if (metadata.count(rules::PRECEDENCE))
metadata.insert({ rules::IS_ACTIVE, 1 });
return metadata;
}
void apply_to(const CharacterSet *rule) {
void apply_to(const CharacterSet *character_set) {
PrecedenceRange precedence;
if (!precedence_stack->empty())
precedence.add(precedence_stack->back());
merge_transition(transitions, *rule,
LexItemSet({ LexItem(item_lhs, rules::Blank::build()) }),
precedence);
add_transition(
transitions, *character_set,
Transition{
LexItemSet({ LexItem(item_lhs, Blank::build()) }), precedence,
});
}
void apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
void apply_to(const Choice *choice) {
for (const rule_ptr &element : choice->elements)
apply(element);
}
void apply_to(const rules::Seq *rule) {
LexItemSet::TransitionMap left_transitions;
LexItemTransitions(&left_transitions, this).apply(rule->left);
void apply_to(const Seq *sequence) {
TransitionMap left_transitions;
TransitionBuilder(&left_transitions, this).apply(sequence->left);
for (const auto &pair : left_transitions) {
merge_transition(
add_transition(
transitions, pair.first,
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->right });
}), pair.second.second);
transform_transition(pair.second, [&sequence](rule_ptr rule) {
return Seq::build({ rule, sequence->right });
}));
}
if (rule_can_be_blank(rule->left))
apply(rule->right);
if (rule_can_be_blank(sequence->left))
apply(sequence->right);
}
void apply_to(const rules::Repeat *rule) {
LexItemSet::TransitionMap content_transitions;
LexItemTransitions(&content_transitions, this).apply(rule->content);
void apply_to(const Repeat *repeat) {
TransitionMap content_transitions;
TransitionBuilder(&content_transitions, this).apply(repeat->content);
for (const auto &pair : content_transitions) {
merge_transition(transitions, pair.first, pair.second.first,
pair.second.second);
merge_transition(
add_transition(transitions, pair.first, pair.second);
add_transition(
transitions, pair.first,
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->copy() });
}), pair.second.second);
transform_transition(pair.second, [&repeat](rule_ptr item_rule) {
return Seq::build({ item_rule, repeat->copy() });
}));
}
}
void apply_to(const rules::Metadata *rule) {
LexItemSet::TransitionMap content_transitions;
auto precedence = rule->value_for(rules::PRECEDENCE);
bool has_active_precedence =
precedence.second && rule->value_for(rules::IS_ACTIVE).second;
void apply_to(const Metadata *metadata) {
bool has_active_precedence = metadata->value_for(IS_ACTIVE).second;
if (has_active_precedence)
precedence_stack->push_back(precedence.first);
precedence_stack->push_back(metadata->value_for(PRECEDENCE).first);
LexItemTransitions(&content_transitions, this).apply(rule->rule);
for (const auto &pair : content_transitions)
merge_transition(
auto metadata_value = metadata->value;
if (metadata_value.count(PRECEDENCE))
metadata_value.insert({ IS_ACTIVE, true });
TransitionMap content_transitions;
TransitionBuilder(&content_transitions, this).apply(metadata->rule);
for (const auto &pair : content_transitions) {
add_transition(
transitions, pair.first,
transform_item_set(pair.second.first, [this, &rule](rule_ptr item_rule) {
return rules::Metadata::build(item_rule,
activate_precedence(rule->value));
}), pair.second.second);
transform_transition(pair.second, [&metadata_value](rule_ptr rule) {
return Metadata::build(rule, metadata_value);
}));
}
if (has_active_precedence)
precedence_stack->pop_back();
}
public:
LexItemTransitions(LexItemSet::TransitionMap *transitions,
const rules::Symbol &item_lhs,
vector<int> *precedence_stack)
TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs,
vector<int> *precedence_stack)
: transitions(transitions),
item_lhs(item_lhs),
precedence_stack(precedence_stack) {}
LexItemTransitions(LexItemSet::TransitionMap *transitions,
LexItemTransitions *other)
TransitionBuilder(TransitionMap *transitions, TransitionBuilder *other)
: transitions(transitions),
item_lhs(other->item_lhs),
precedence_stack(other->precedence_stack) {}
};
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
const LexItem &item) {
void lex_item_transitions(TransitionMap *transitions, const LexItem &item) {
vector<int> precedence_stack;
LexItemTransitions(transitions, item.lhs, &precedence_stack).apply(item.rule);
TransitionBuilder(transitions, item.lhs, &precedence_stack).apply(item.rule);
}
} // namespace build_tables