Simplify lex item set transitions code
This commit is contained in:
parent
25791085c3
commit
db9966b57c
13 changed files with 378 additions and 428 deletions
|
|
@ -17,12 +17,12 @@
|
|||
'src/compiler/build_tables/get_metadata.cc',
|
||||
'src/compiler/build_tables/item_set_closure.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
'src/compiler/build_tables/parse_item.cc',
|
||||
'src/compiler/build_tables/parse_conflict_manager.cc',
|
||||
'src/compiler/build_tables/rule_can_be_blank.cc',
|
||||
'src/compiler/build_tables/rule_transitions.cc',
|
||||
'src/compiler/compile.cc',
|
||||
'src/compiler/generate_code/c_code.cc',
|
||||
'src/compiler/grammar.cc',
|
||||
|
|
|
|||
|
|
@ -40,36 +40,256 @@ describe("LexItem", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("lex_item_set_transitions", [&]() {
|
||||
describe("when two items in the set have transitions on the same character", [&]() {
|
||||
it("merges the transitions by computing the union of the two item sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
|
||||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
});
|
||||
describe("LexItemSet::transitions()", [&]() {
|
||||
it("handles single characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
});
|
||||
|
||||
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
CharacterSet().include('x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('w'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
})
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences where the left hand side can be blank", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
choice({
|
||||
character({ 'x' }),
|
||||
blank(),
|
||||
}),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('y'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'z' })),
|
||||
})
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles blanks", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), blank()),
|
||||
});
|
||||
|
||||
AssertThat(item_set.transitions(), IsEmpty());
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), repeat(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))),
|
||||
LexItem(Symbol(2), repeat(character({ 'c' }))),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
character({ 'b' }),
|
||||
repeat(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))
|
||||
}))
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('c'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), repeat(character({ 'c' }))),
|
||||
})
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between overlapping character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), choice({
|
||||
seq({
|
||||
character({ 'a', 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
}),
|
||||
seq({
|
||||
character({ 'c', 'd', 'e', 'f' }),
|
||||
character({ 'y' }),
|
||||
}),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'b'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('c', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between a subset and a superset of characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), choice({
|
||||
seq({
|
||||
character({ 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
}),
|
||||
seq({
|
||||
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
|
||||
character({ 'y' }),
|
||||
}),
|
||||
})),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a').include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('b', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between whitelisted and blacklisted character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
choice({
|
||||
character({ '/' }, false),
|
||||
seq({
|
||||
character({ '\\' }),
|
||||
character({ '/' }),
|
||||
}),
|
||||
}),
|
||||
character({ '/' }),
|
||||
}))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
item_set.transitions(),
|
||||
Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include_all().exclude('/').exclude('\\'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ '/' })),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('\\'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ '/' })),
|
||||
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles different items with overlapping character sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
|
||||
LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
|
||||
});
|
||||
|
||||
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'i'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,90 +0,0 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("merge_transition", []() {
|
||||
typedef map<CharacterSet, int> int_map;
|
||||
|
||||
auto do_merge = [&](int_map *left, const pair<CharacterSet, int> &new_pair) {
|
||||
merge_transition<int>(left, new_pair, [](int *l, const int *r) {
|
||||
*l = *l | *r;
|
||||
});
|
||||
};
|
||||
|
||||
describe("when none of the transitions intersect", [&]() {
|
||||
it("returns the union of the two sets of transitions", [&]() {
|
||||
int_map map({
|
||||
{ CharacterSet().include('a').include('c'), 1 },
|
||||
{ CharacterSet().include('x').include('y'), 2 },
|
||||
{ CharacterSet().include('1').include('9'), 4 },
|
||||
});
|
||||
|
||||
do_merge(&map, { CharacterSet().include(' '), 8 });
|
||||
do_merge(&map, { CharacterSet().include('\t'), 16 });
|
||||
|
||||
AssertThat(map, Equals(int_map({
|
||||
{ CharacterSet().include('a').include('c'), 1 },
|
||||
{ CharacterSet().include('x').include('y'), 2 },
|
||||
{ CharacterSet().include('1').include('9'), 4 },
|
||||
{ CharacterSet().include(' '), 8 },
|
||||
{ CharacterSet().include('\t'), 16 },
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when transitions intersect", [&]() {
|
||||
it("merges the intersecting transitions using the provided function", [&]() {
|
||||
int_map map({
|
||||
{ CharacterSet().include('a', 'f').include('A', 'F'), 1 },
|
||||
{ CharacterSet().include('0', '9'), 2 },
|
||||
});
|
||||
|
||||
do_merge(&map, { CharacterSet().include('c'), 4 });
|
||||
do_merge(&map, { CharacterSet().include('3'), 8 });
|
||||
|
||||
AssertThat(map, Equals(int_map({
|
||||
{
|
||||
CharacterSet()
|
||||
.include('a', 'b')
|
||||
.include('d', 'f')
|
||||
.include('A', 'F'),
|
||||
1
|
||||
},
|
||||
{
|
||||
CharacterSet().include('c'),
|
||||
5
|
||||
},
|
||||
{
|
||||
CharacterSet().include('0', '2').include('4', '9'),
|
||||
2
|
||||
},
|
||||
{
|
||||
CharacterSet().include('3'),
|
||||
10
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when two of the right transitions intersect the same left transition", [&]() {
|
||||
it("splits the left-hand transition correctly", [&]() {
|
||||
int_map map1({
|
||||
{ CharacterSet().include('a').include('c'), 1 },
|
||||
});
|
||||
|
||||
do_merge(&map1, { CharacterSet().include('a'), 2 });
|
||||
do_merge(&map1, { CharacterSet().include('c'), 4 });
|
||||
|
||||
AssertThat(map1, Equals(int_map({
|
||||
{ CharacterSet().include('a'), 3 },
|
||||
{ CharacterSet().include('c'), 5 },
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
class transition_map : public std::map<CharacterSet, rule_ptr> {
|
||||
public:
|
||||
bool operator==(const std::map<CharacterSet, rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (const auto &pair : *this) {
|
||||
auto other_pair = other.find(pair.first);
|
||||
if (other_pair == other.end()) return false;
|
||||
if (!pair.second->operator==(*other_pair->second)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
transition_map(const std::initializer_list<std::pair<const CharacterSet, rule_ptr>> &list) :
|
||||
std::map<CharacterSet, rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("rule_transitions", []() {
|
||||
it("handles single characters", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(character({ '1' })),
|
||||
Equals(transition_map({
|
||||
{ CharacterSet().include('1'), blank() }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({ character({ '1' }), character({ '2' }) })),
|
||||
Equals(transition_map({
|
||||
{ CharacterSet().include('1'), character({ '2' }) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles long sequences", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({
|
||||
character({ '1' }),
|
||||
character({ '2' }),
|
||||
character({ '3' }),
|
||||
character({ '4' })
|
||||
})),
|
||||
Equals(transition_map({
|
||||
{
|
||||
CharacterSet().include('1'),
|
||||
seq({ character({ '2' }), character({ '3' }), character({ '4' }) }),
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences whose left sides can be blank", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({
|
||||
choice({
|
||||
character({ '1' }),
|
||||
blank() }),
|
||||
seq({
|
||||
character({ '1' }),
|
||||
character({ '2' }) })
|
||||
})), Equals(transition_map({
|
||||
{
|
||||
CharacterSet().include('1'),
|
||||
choice({ seq({ character({ '1' }), character({ '2' }) }), character({ '2' }), }),
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between overlapping character sets", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(choice({
|
||||
seq({
|
||||
character({ 'a', 'b', 'c', 'd' }),
|
||||
sym("x") }),
|
||||
seq({
|
||||
character({ 'c', 'd', 'e', 'f' }),
|
||||
sym("y") }) })),
|
||||
Equals(transition_map({
|
||||
{ CharacterSet().include('a', 'b'), sym("x") },
|
||||
{ CharacterSet().include('c', 'd'), choice({ sym("x"), sym("y") }) },
|
||||
{ CharacterSet().include('e', 'f'), sym("y") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between whitelisted and blacklisted character sets", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({
|
||||
choice({
|
||||
character({ '/' }, false),
|
||||
seq({
|
||||
character({ '\\' }),
|
||||
character({ '/' }) }) }),
|
||||
character({ '/' }) })),
|
||||
|
||||
Equals(transition_map({
|
||||
{ CharacterSet()
|
||||
.include_all()
|
||||
.exclude('/')
|
||||
.exclude('\\'),
|
||||
character({ '/' }) },
|
||||
{ CharacterSet()
|
||||
.include('\\'),
|
||||
seq({
|
||||
choice({
|
||||
blank(),
|
||||
character({ '/' }) }),
|
||||
character({ '/' }) }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices between a subset and a superset of characters", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(choice({
|
||||
seq({
|
||||
character({ 'b', 'c', 'd' }),
|
||||
sym("x") }),
|
||||
seq({
|
||||
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
|
||||
sym("y") }) })),
|
||||
Equals(transition_map({
|
||||
{ CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) },
|
||||
{ CharacterSet().include('a').include('e', 'f'), sym("y") },
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
rule_transitions(choice({
|
||||
seq({
|
||||
character({ 'a', 'b', 'c', 'd', 'e', 'f' }),
|
||||
sym("x") }),
|
||||
seq({
|
||||
character({ 'b', 'c', 'd' }),
|
||||
sym("y") }) })),
|
||||
Equals(transition_map({
|
||||
{ CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) },
|
||||
{ CharacterSet().include('a').include('e', 'f'), sym("x") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles blanks", [&]() {
|
||||
AssertThat(rule_transitions(blank()), Equals(transition_map({})));
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
rule_ptr rule = repeat(seq({ character({ 'a' }), character({ 'b' }) }));
|
||||
|
||||
AssertThat(
|
||||
rule_transitions(rule),
|
||||
Equals(transition_map({
|
||||
{
|
||||
CharacterSet().include('a'),
|
||||
seq({
|
||||
character({ 'b' }),
|
||||
rule })
|
||||
}})));
|
||||
|
||||
rule = repeat(character({ 'a' }));
|
||||
|
||||
AssertThat(
|
||||
rule_transitions(rule),
|
||||
Equals(transition_map({
|
||||
{ CharacterSet().include('a'), rule }
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -130,8 +130,8 @@ class LexTableBuilder {
|
|||
return rules::Seq::build({
|
||||
make_shared<rules::Metadata>(
|
||||
separator_rule, map<rules::MetadataKey, int>({
|
||||
{ rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
|
||||
})),
|
||||
{ rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
|
||||
})),
|
||||
rule,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@
|
|||
#include <vector>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/build_tables/lex_item_transitions.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <unordered_set>
|
||||
|
||||
|
|
@ -48,16 +47,8 @@ bool LexItemSet::operator==(const LexItemSet &other) const {
|
|||
|
||||
map<CharacterSet, LexItemSet> LexItemSet::transitions() const {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : entries) {
|
||||
for (auto &transition : rule_transitions(item.rule)) {
|
||||
LexItem next_item(item.lhs, transition.second);
|
||||
merge_transition<LexItemSet>(
|
||||
&result, { transition.first, LexItemSet({ next_item }) },
|
||||
[](LexItemSet *left, const LexItemSet *right) {
|
||||
left->entries.insert(right->entries.begin(), right->entries.end());
|
||||
});
|
||||
}
|
||||
}
|
||||
for (const LexItem &item : entries)
|
||||
lex_item_transitions(&result, item);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
127
src/compiler/build_tables/lex_item_transitions.cc
Normal file
127
src/compiler/build_tables/lex_item_transitions.cc
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
#include "compiler/build_tables/lex_item_transitions.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::function;
|
||||
using std::make_shared;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using rules::CharacterSet;
|
||||
|
||||
class LexItemTransitions : public rules::RuleFn<void> {
|
||||
map<CharacterSet, LexItemSet> *transitions;
|
||||
const rules::Symbol &item_lhs;
|
||||
|
||||
LexItemSet transform_item_set(const LexItemSet &item_set,
|
||||
function<rule_ptr(rule_ptr)> transform) {
|
||||
LexItemSet new_set;
|
||||
for (const LexItem &item : item_set.entries)
|
||||
new_set.entries.insert(LexItem(item.lhs, transform(item.rule)));
|
||||
return new_set;
|
||||
}
|
||||
|
||||
void merge_transition(map<CharacterSet, LexItemSet> *transitions,
|
||||
CharacterSet new_char_set, LexItemSet new_item_set) {
|
||||
vector<pair<CharacterSet, LexItemSet>> new_entries;
|
||||
|
||||
auto iter = transitions->begin();
|
||||
while (iter != transitions->end()) {
|
||||
CharacterSet existing_char_set = iter->first;
|
||||
LexItemSet &existing_item_set = iter->second;
|
||||
|
||||
CharacterSet intersection = existing_char_set.remove_set(new_char_set);
|
||||
if (!intersection.is_empty()) {
|
||||
new_char_set.remove_set(intersection);
|
||||
if (!existing_char_set.is_empty())
|
||||
new_entries.push_back({ existing_char_set, existing_item_set });
|
||||
existing_item_set.entries.insert(new_item_set.entries.begin(),
|
||||
new_item_set.entries.end());
|
||||
new_entries.push_back({ intersection, existing_item_set });
|
||||
transitions->erase(iter++);
|
||||
} else {
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
transitions->insert(new_entries.begin(), new_entries.end());
|
||||
|
||||
if (!new_char_set.is_empty())
|
||||
transitions->insert({ new_char_set, new_item_set });
|
||||
}
|
||||
|
||||
void apply_to(const CharacterSet *rule) {
|
||||
merge_transition(transitions, *rule,
|
||||
LexItemSet({
|
||||
LexItem(item_lhs, rules::Blank::build()),
|
||||
}));
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
map<CharacterSet, LexItemSet> left_transitions;
|
||||
LexItemTransitions(&left_transitions, item_lhs).apply(rule->left);
|
||||
for (auto &pair : left_transitions)
|
||||
merge_transition(
|
||||
transitions, pair.first,
|
||||
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
|
||||
return rules::Seq::build({ item_rule, rule->right });
|
||||
}));
|
||||
|
||||
if (rule_can_be_blank(rule->left))
|
||||
apply(rule->right);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
map<CharacterSet, LexItemSet> content_transitions;
|
||||
LexItemTransitions(&content_transitions, item_lhs).apply(rule->content);
|
||||
for (auto &pair : content_transitions)
|
||||
merge_transition(
|
||||
transitions, pair.first,
|
||||
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
|
||||
return rules::Seq::build({ item_rule, rule->copy() });
|
||||
}));
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
map<CharacterSet, LexItemSet> content_transitions;
|
||||
LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule);
|
||||
for (auto &pair : content_transitions)
|
||||
merge_transition(
|
||||
transitions, pair.first,
|
||||
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
|
||||
return make_shared<rules::Metadata>(item_rule, rule->value);
|
||||
}));
|
||||
}
|
||||
|
||||
public:
|
||||
LexItemTransitions(map<CharacterSet, LexItemSet> *transitions,
|
||||
const rules::Symbol &item_lhs)
|
||||
: transitions(transitions), item_lhs(item_lhs) {}
|
||||
};
|
||||
|
||||
void lex_item_transitions(map<CharacterSet, LexItemSet> *transitions,
|
||||
const LexItem &item) {
|
||||
LexItemTransitions(transitions, item.lhs).apply(item.rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -2,13 +2,16 @@
|
|||
#define COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
std::map<rules::CharacterSet, rule_ptr> rule_transitions(const rule_ptr &);
|
||||
void lex_item_transitions(std::map<rules::CharacterSet, LexItemSet> *transitions,
|
||||
const LexItem &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
|
||||
#define COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Merges two transition maps with character set keys. If the
|
||||
* two maps contain values for overlapping character sets, the
|
||||
* new value for the two sets' intersection will be computed by
|
||||
* merging the old and new values using the given function.
|
||||
*/
|
||||
template <typename T>
|
||||
void merge_transition(std::map<rules::CharacterSet, T> *left,
|
||||
const std::pair<rules::CharacterSet, T> &new_pair,
|
||||
std::function<void(T *, const T *)> merge_fn) {
|
||||
rules::CharacterSet new_char_set = new_pair.first;
|
||||
T new_value = new_pair.second;
|
||||
|
||||
std::map<rules::CharacterSet, T> pairs_to_insert;
|
||||
|
||||
auto iter = left->begin();
|
||||
while (iter != left->end()) {
|
||||
rules::CharacterSet char_set = iter->first;
|
||||
T value = iter->second;
|
||||
|
||||
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
|
||||
if (!intersection.is_empty()) {
|
||||
new_char_set.remove_set(intersection);
|
||||
if (!char_set.is_empty())
|
||||
pairs_to_insert.insert({ char_set, value });
|
||||
merge_fn(&value, &new_value);
|
||||
pairs_to_insert.insert({ intersection, value });
|
||||
left->erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
|
||||
|
||||
if (!new_char_set.is_empty())
|
||||
left->insert({ new_char_set, new_value });
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using rules::CharacterSet;
|
||||
using rules::Choice;
|
||||
using rules::Symbol;
|
||||
|
||||
class RuleTransitions : public rules::RuleFn<map<CharacterSet, rule_ptr>> {
|
||||
private:
|
||||
void merge_transitions(map<CharacterSet, rule_ptr> *left,
|
||||
const map<CharacterSet, rule_ptr> &right) {
|
||||
for (auto &pair : right)
|
||||
merge_transition<rule_ptr>(left, pair,
|
||||
[](rule_ptr *left, const rule_ptr *right) {
|
||||
*left = Choice::build({ *left, *right });
|
||||
});
|
||||
}
|
||||
|
||||
map<CharacterSet, rule_ptr> apply_to(const CharacterSet *rule) {
|
||||
return map<CharacterSet, rule_ptr>(
|
||||
{ { *rule, make_shared<rules::Blank>() } });
|
||||
}
|
||||
|
||||
map<CharacterSet, rule_ptr> apply_to(const rules::Choice *rule) {
|
||||
map<CharacterSet, rule_ptr> result;
|
||||
for (const auto &el : rule->elements)
|
||||
merge_transitions(&result, this->apply(el));
|
||||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, rule_ptr> apply_to(const rules::Seq *rule) {
|
||||
auto result = this->apply(rule->left);
|
||||
for (auto &pair : result)
|
||||
pair.second = rules::Seq::build({ pair.second, rule->right });
|
||||
if (rule_can_be_blank(rule->left))
|
||||
merge_transitions(&result, this->apply(rule->right));
|
||||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, rule_ptr> apply_to(const rules::Repeat *rule) {
|
||||
auto result = this->apply(rule->content);
|
||||
for (auto &pair : result)
|
||||
pair.second = rules::Seq::build({ pair.second, rule->copy() });
|
||||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, rule_ptr> apply_to(const rules::Metadata *rule) {
|
||||
auto result = this->apply(rule->rule);
|
||||
for (auto &pair : result)
|
||||
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
map<CharacterSet, rule_ptr> rule_transitions(const rule_ptr &rule) {
|
||||
return RuleTransitions().apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -27,7 +27,8 @@ LexAction LexAction::Error() {
|
|||
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 });
|
||||
}
|
||||
|
||||
LexAction LexAction::Advance(size_t state_index, PrecedenceRange precedence_range) {
|
||||
LexAction LexAction::Advance(size_t state_index,
|
||||
PrecedenceRange precedence_range) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
|
||||
precedence_range);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,8 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
if (error)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
return make_tuple(flatten_grammar(syntax_grammar), normalize_rules(lex_grammar), nullptr);
|
||||
return make_tuple(flatten_grammar(syntax_grammar),
|
||||
normalize_rules(lex_grammar), nullptr);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue