Include precedence calculation in LexItemSet::transitions

This commit is contained in:
Max Brunsfeld 2015-10-30 16:07:29 -07:00
parent e9be0ff24e
commit 73b3280fbb
11 changed files with 278 additions and 142 deletions

View file

@ -48,12 +48,15 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
{
LexItemSet({
LexItem(Symbol(1), blank()),
}),
PrecedenceRange()
}
}
})));
});
@ -70,16 +73,77 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('w'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
})
{
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
}),
PrecedenceRange()
}
}
})));
});
it("handles sequences with nested precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
prec(3, seq({
prec(4, seq({
character({ 'w' }),
character({ 'x' }) })),
character({ 'y' }) })),
character({ 'z' }),
})),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('w'),
{
LexItemSet({
LexItem(Symbol(1), seq({
prec(3, seq({
prec(4, character({ 'x' })),
character({ 'y' }) })),
character({ 'z' }),
})),
}),
PrecedenceRange(4)
}
}
})));
LexItemSet item_set2({
LexItem(Symbol(1), seq({
prec(3, seq({
prec(4, character({ 'x' })),
character({ 'y' }) })),
character({ 'z' }),
})),
});
AssertThat(
item_set2.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
{
LexItemSet({
LexItem(Symbol(1), seq({
prec(3, character({ 'y' })),
character({ 'z' }),
})),
}),
PrecedenceRange(3)
}
}
})));
});
@ -98,21 +162,27 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'y' }),
character({ 'z' }),
})),
})
{
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'y' }),
character({ 'z' }),
})),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('y'),
LexItemSet({
LexItem(Symbol(1), character({ 'z' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ 'z' })),
}),
PrecedenceRange()
}
}
})));
});
@ -136,26 +206,32 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'b' }),
repeat1(seq({
character({ 'a' }),
{
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'b' }),
}))
})),
LexItem(Symbol(1), character({ 'b' })),
})
repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))
})),
LexItem(Symbol(1), character({ 'b' })),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('c'),
LexItemSet({
LexItem(Symbol(2), repeat1(character({ 'c' }))),
LexItem(Symbol(2), blank()),
})
{
LexItemSet({
LexItem(Symbol(2), repeat1(character({ 'c' }))),
LexItem(Symbol(2), blank()),
}),
PrecedenceRange()
}
}
})));
});
@ -163,38 +239,47 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between overlapping character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), choice({
seq({
prec(2, seq({
character({ 'a', 'b', 'c', 'd' }),
character({ 'x' }),
}),
seq({
})),
prec(3, seq({
character({ 'c', 'd', 'e', 'f' }),
character({ 'y' }),
}),
})),
}))
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a', 'b'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
})
{
LexItemSet({
LexItem(Symbol(1), prec(2, character({ 'x' }))),
}),
PrecedenceRange(2)
}
},
{
CharacterSet().include('c', 'd'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), prec(2, character({ 'x' }))),
LexItem(Symbol(1), prec(3, character({ 'y' }))),
}),
PrecedenceRange(2, 3)
}
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), prec(3, character({ 'y' }))),
}),
PrecedenceRange(3)
}
},
})));
});
@ -215,19 +300,25 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a').include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('b', 'd'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
}),
PrecedenceRange()
}
},
})));
});
@ -248,19 +339,25 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include_all().exclude('/').exclude('\\'),
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('\\'),
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
}),
PrecedenceRange()
}
},
})));
});
@ -271,25 +368,34 @@ describe("LexItemSet::transitions()", [&]() {
LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
});
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a', 'd'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
{
LexItemSet({
LexItem(Symbol(1), blank()),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
})
{
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('g', 'i'),
LexItemSet({
LexItem(Symbol(2), blank()),
})
{
LexItemSet({
LexItem(Symbol(2), blank()),
}),
PrecedenceRange()
}
},
})));
});

View file

@ -7,7 +7,6 @@
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/get_completion_status.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/parse_table.h"
@ -51,7 +50,8 @@ class LexTableBuilder {
LexTable build() {
for (ParseState &parse_state : parse_table->states) {
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs(), false);
LexItemSet item_set =
build_lex_item_set(parse_state.expected_inputs(), false);
parse_state.lex_state_id = add_lex_state(item_set);
}
@ -120,13 +120,14 @@ class LexTableBuilder {
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &transition : item_set.transitions()) {
const CharacterSet &rule = transition.first;
const LexItemSet &new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
auto action = LexAction::Advance(
new_state_id, precedence_range_for_item_set(new_item_set));
if (conflict_manager.resolve(action,
lex_table.state(state_id).default_action))
const LexItemSet &new_item_set = transition.second.first;
const PrecedenceRange &precedence = transition.second.second;
auto current_action = lex_table.state(state_id).default_action;
auto action = LexAction::Advance(-1, precedence);
if (conflict_manager.resolve(action, current_action)) {
action.state_index = add_lex_state(new_item_set);
lex_table.state(state_id).actions[rule] = action;
}
}
}
@ -135,10 +136,9 @@ class LexTableBuilder {
CompletionStatus completion_status = get_completion_status(item.rule);
if (completion_status.is_done) {
auto current_action = lex_table.state(state_id).default_action;
auto new_action =
LexAction::Accept(item.lhs, completion_status.precedence);
if (conflict_manager.resolve(new_action, current_action))
lex_table.state(state_id).default_action = new_action;
auto action = LexAction::Accept(item.lhs, completion_status.precedence);
if (conflict_manager.resolve(action, current_action))
lex_table.state(state_id).default_action = action;
}
}
}
@ -148,16 +148,6 @@ class LexTableBuilder {
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
PrecedenceRange precedence_range_for_item_set(const LexItemSet &item_set) const {
PrecedenceRange result;
for (const auto &item : item_set.entries) {
auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE);
result.add(precedence_range.min);
result.add(precedence_range.max);
}
return result;
}
};
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {

View file

@ -102,9 +102,7 @@ class ParseTableBuilder {
const PrecedenceRange &precedence = transition.second.second;
ParseAction *new_action = add_action(
state_id, symbol,
ParseAction::Shift(0, precedence),
item_set);
state_id, symbol, ParseAction::Shift(0, precedence), item_set);
if (new_action)
new_action->state_index = add_parse_state(next_item_set);
}
@ -121,8 +119,7 @@ class ParseTableBuilder {
(item.lhs() == rules::START())
? ParseAction::Accept()
: ParseAction::Reduce(Symbol(item.variable_index), item.step_index,
status.precedence,
status.associativity,
status.precedence, status.associativity,
*item.production);
for (const auto &lookahead_sym : *lookahead_symbols.entries)

View file

@ -45,8 +45,8 @@ bool LexItemSet::operator==(const LexItemSet &other) const {
return entries == other.entries;
}
map<CharacterSet, LexItemSet> LexItemSet::transitions() const {
map<CharacterSet, LexItemSet> result;
LexItemSet::TransitionMap LexItemSet::transitions() const {
TransitionMap result;
for (const LexItem &item : entries)
lex_item_transitions(&result, item);
return result;

View file

@ -3,9 +3,11 @@
#include <unordered_set>
#include <map>
#include <utility>
#include <string>
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/precedence_range.h"
namespace tree_sitter {
namespace build_tables {
@ -29,8 +31,11 @@ class LexItemSet {
LexItemSet();
explicit LexItemSet(const std::unordered_set<LexItem, LexItem::Hash> &);
typedef std::map<rules::CharacterSet, std::pair<LexItemSet, PrecedenceRange>>
TransitionMap;
bool operator==(const LexItemSet &) const;
std::map<rules::CharacterSet, LexItemSet> transitions() const;
TransitionMap transitions() const;
std::unordered_set<LexItem, LexItem::Hash> entries;

View file

@ -25,8 +25,9 @@ using std::vector;
using rules::CharacterSet;
class LexItemTransitions : public rules::RuleFn<void> {
map<CharacterSet, LexItemSet> *transitions;
LexItemSet::TransitionMap *transitions;
const rules::Symbol &item_lhs;
vector<int> *precedence_stack;
LexItemSet transform_item_set(const LexItemSet &item_set,
function<rule_ptr(rule_ptr)> callback) {
@ -36,23 +37,29 @@ class LexItemTransitions : public rules::RuleFn<void> {
return new_set;
}
void merge_transition(map<CharacterSet, LexItemSet> *transitions,
CharacterSet new_char_set, LexItemSet new_item_set) {
vector<pair<CharacterSet, LexItemSet>> new_entries;
void merge_transition(LexItemSet::TransitionMap *transitions,
CharacterSet new_char_set, LexItemSet new_item_set,
PrecedenceRange new_precedence_range) {
vector<pair<CharacterSet, pair<LexItemSet, PrecedenceRange>>> new_entries;
auto iter = transitions->begin();
while (iter != transitions->end()) {
CharacterSet existing_char_set = iter->first;
LexItemSet &existing_item_set = iter->second;
LexItemSet &existing_item_set = iter->second.first;
PrecedenceRange &existing_precedence_range = iter->second.second;
CharacterSet intersection = existing_char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!existing_char_set.is_empty())
new_entries.push_back({ existing_char_set, existing_item_set });
new_entries.push_back(
{ existing_char_set,
{ existing_item_set, existing_precedence_range } });
existing_item_set.entries.insert(new_item_set.entries.begin(),
new_item_set.entries.end());
new_entries.push_back({ intersection, existing_item_set });
existing_precedence_range.add(new_precedence_range);
new_entries.push_back(
{ intersection, { existing_item_set, existing_precedence_range } });
transitions->erase(iter++);
} else {
iter++;
@ -62,14 +69,15 @@ class LexItemTransitions : public rules::RuleFn<void> {
transitions->insert(new_entries.begin(), new_entries.end());
if (!new_char_set.is_empty())
transitions->insert({ new_char_set, new_item_set });
transitions->insert(
{ new_char_set, { new_item_set, new_precedence_range } });
}
void apply_to(const CharacterSet *rule) {
merge_transition(transitions, *rule,
LexItemSet({
LexItem(item_lhs, rules::Blank::build()),
}));
}), PrecedenceRange());
}
void apply_to(const rules::Choice *rule) {
@ -78,52 +86,72 @@ class LexItemTransitions : public rules::RuleFn<void> {
}
void apply_to(const rules::Seq *rule) {
map<CharacterSet, LexItemSet> left_transitions;
LexItemTransitions(&left_transitions, item_lhs).apply(rule->left);
for (const auto &pair : left_transitions)
LexItemSet::TransitionMap left_transitions;
LexItemTransitions(&left_transitions, this).apply(rule->left);
for (const auto &pair : left_transitions) {
PrecedenceRange precedence(pair.second.second);
if (precedence.empty && !precedence_stack->empty())
precedence.add(precedence_stack->back());
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->right });
}));
}), precedence);
}
if (rule_can_be_blank(rule->left))
apply(rule->right);
}
void apply_to(const rules::Repeat *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->content);
LexItemSet::TransitionMap content_transitions;
LexItemTransitions(&content_transitions, this).apply(rule->content);
for (const auto &pair : content_transitions) {
merge_transition(transitions, pair.first, pair.second);
merge_transition(transitions, pair.first, pair.second.first,
pair.second.second);
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->copy() });
}));
}), pair.second.second);
}
}
void apply_to(const rules::Metadata *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule);
LexItemSet::TransitionMap content_transitions;
precedence_stack->push_back(rule->value_for(rules::PRECEDENCE));
LexItemTransitions(&content_transitions, this).apply(rule->rule);
for (const auto &pair : content_transitions)
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Metadata::build(item_rule, rule->value);
}));
}), pair.second.second);
precedence_stack->pop_back();
}
public:
LexItemTransitions(map<CharacterSet, LexItemSet> *transitions,
const rules::Symbol &item_lhs)
: transitions(transitions), item_lhs(item_lhs) {}
LexItemTransitions(LexItemSet::TransitionMap *transitions,
const rules::Symbol &item_lhs,
vector<int> *precedence_stack)
: transitions(transitions),
item_lhs(item_lhs),
precedence_stack(precedence_stack) {}
LexItemTransitions(LexItemSet::TransitionMap *transitions,
LexItemTransitions *other)
: transitions(transitions),
item_lhs(other->item_lhs),
precedence_stack(other->precedence_stack) {}
};
void lex_item_transitions(map<CharacterSet, LexItemSet> *transitions,
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
const LexItem &item) {
LexItemTransitions(transitions, item.lhs).apply(item.rule);
vector<int> precedence_stack;
LexItemTransitions(transitions, item.lhs, &precedence_stack).apply(item.rule);
}
} // namespace build_tables

View file

@ -1,8 +1,6 @@
#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
#define COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
#include <map>
#include <set>
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/build_tables/lex_item.h"
@ -10,7 +8,7 @@
namespace tree_sitter {
namespace build_tables {
void lex_item_transitions(std::map<rules::CharacterSet, LexItemSet> *transitions,
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
const LexItem &);
} // namespace build_tables

View file

@ -46,8 +46,7 @@ ParseItem::CompletionStatus ParseItem::completion_status() const {
if (step_index == production->size()) {
result.is_done = true;
if (step_index > 0) {
const ProductionStep &last_step =
production->at(step_index - 1);
const ProductionStep &last_step = production->at(step_index - 1);
result.precedence = last_step.precedence;
result.associativity = last_step.associativity;
}

View file

@ -44,7 +44,8 @@ class ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
typedef std::map<rules::Symbol, std::pair<ParseItemSet, PrecedenceRange>> TransitionMap;
typedef std::map<rules::Symbol, std::pair<ParseItemSet, PrecedenceRange>>
TransitionMap;
TransitionMap transitions() const;
bool operator==(const ParseItemSet &) const;

View file

@ -7,6 +7,9 @@ PrecedenceRange::PrecedenceRange() : min(0), max(0), empty(true) {}
PrecedenceRange::PrecedenceRange(int min, int max)
: min(min), max(max), empty(false) {}
PrecedenceRange::PrecedenceRange(int value)
: min(value), max(value), empty(false) {}
void PrecedenceRange::add(int new_value) {
if (empty) {
min = new_value;
@ -20,6 +23,13 @@ void PrecedenceRange::add(int new_value) {
}
}
void PrecedenceRange::add(const PrecedenceRange &other) {
if (!other.empty) {
add(other.min);
add(other.max);
}
}
bool PrecedenceRange::operator<(const PrecedenceRange &other) const {
if (empty)
return !other.empty;

View file

@ -5,9 +5,11 @@ namespace tree_sitter {
struct PrecedenceRange {
PrecedenceRange();
explicit PrecedenceRange(int value);
PrecedenceRange(int min, int max);
void add(int value);
void add(const PrecedenceRange &);
bool operator==(const PrecedenceRange &other) const;
bool operator<(const PrecedenceRange &other) const;