Reorganize ParseItemSet and LexItemSet

This commit is contained in:
Max Brunsfeld 2015-10-05 11:21:17 -07:00
parent 39a0934088
commit f01972c64e
16 changed files with 219 additions and 304 deletions

View file

@ -15,9 +15,7 @@
'src/compiler/build_tables/build_tables.cc',
'src/compiler/build_tables/get_completion_status.cc',
'src/compiler/build_tables/get_metadata.cc',
'src/compiler/build_tables/item.cc',
'src/compiler/build_tables/item_set_closure.cc',
'src/compiler/build_tables/item_set_transitions.cc',
'src/compiler/build_tables/lex_item.cc',
'src/compiler/build_tables/lex_conflict_manager.cc',
'src/compiler/build_tables/lookahead_set.cc',

View file

@ -1,115 +0,0 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/syntax_grammar.h"
#include "compiler/helpers/rule_helpers.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("char_transitions(LexItemSet)", []() {
describe("when two items in the set have transitions on the same character", [&]() {
it("merges the transitions by computing the union of the two item sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
});
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a', 'd'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
})
},
{
CharacterSet().include('g', 'x'),
LexItemSet({
LexItem(Symbol(2), blank()),
})
},
})));
});
});
});
describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
it("computes the closure of the new item sets", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(1), 0, AssociativityNone, 103},
{Symbol(13, true), 0, AssociativityNone, 104},
})
}),
SyntaxVariable("rule_1", VariableTypeNamed, {
Production({
{Symbol(2), 0, AssociativityNone, 105},
{Symbol(14, true), 0, AssociativityNone, 106},
})
}),
SyntaxVariable("rule_2", VariableTypeNamed, {
Production({
{Symbol(15, true), 0, AssociativityNone, 105},
})
})
}, {}, {}};
ParseItemSet set1({
{
// Step 2 of rule_0's production: right before the reference to rule_1.
ParseItem(Symbol(0), 0, 2, 103),
LookaheadSet({ Symbol(16, true) })
}
});
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
// Consume symbol 1 -> step 3 of rule_0's production
{
Symbol(1),
ParseItemSet({
{
ParseItem(Symbol(0), 0, 3, 104),
LookaheadSet({ Symbol(16, true) })
}
})
},
// Consume symbol 2 -> step 1 of rule_1's production
{
Symbol(2),
ParseItemSet({
{
ParseItem(Symbol(1), 0, 1, 106),
LookaheadSet({ Symbol(13, true) })
},
})
},
// Consume token 15 -> step 1 of rule_2's production
{
Symbol(15, true),
ParseItemSet({
{
ParseItem(Symbol(2), 0, 1, 0),
LookaheadSet({ Symbol(14, true) })
},
})
},
})));
});
});
END_TEST

View file

@ -1,5 +1,5 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/rules/metadata.h"
using namespace rules;
@ -8,7 +8,7 @@ using namespace build_tables;
START_TEST
describe("LexItem", []() {
describe("determining if an item is the start of a token", [&]() {
describe("is_token_start()", [&]() {
Symbol sym(1);
rule_ptr token_start = make_shared<Metadata>(str("a"), map<MetadataKey, int>({
{ START_TOKEN, 1 }
@ -40,4 +40,37 @@ describe("LexItem", []() {
});
});
describe("lex_item_set_transitions", [&]() {
describe("when two items in the set have transitions on the same character", [&]() {
it("merges the transitions by computing the union of the two item sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
});
AssertThat(lex_item_set_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
{
CharacterSet().include('a', 'd'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
})
},
{
CharacterSet().include('g', 'x'),
LexItemSet({
LexItem(Symbol(2), blank()),
})
},
})));
});
});
});
END_TEST

View file

@ -0,0 +1,74 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/syntax_grammar.h"
#include "compiler/helpers/rule_helpers.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("parse_item_set_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
it("computes the closure of the new item sets", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(1), 0, AssociativityNone, 103},
{Symbol(13, true), 0, AssociativityNone, 104},
})
}),
SyntaxVariable("rule_1", VariableTypeNamed, {
Production({
{Symbol(2), 0, AssociativityNone, 105},
{Symbol(14, true), 0, AssociativityNone, 106},
})
}),
SyntaxVariable("rule_2", VariableTypeNamed, {
Production({
{Symbol(15, true), 0, AssociativityNone, 105},
})
})
}, {}, {}};
ParseItemSet set1({
{
ParseItem(Symbol(0), 0, 2, 103),
LookaheadSet({ Symbol(16, true) })
},
{
ParseItem(Symbol(1), 0, 0, 106),
LookaheadSet({ Symbol(17, true) })
},
{
ParseItem(Symbol(2), 0, 1, 106),
LookaheadSet({ Symbol(17, true) })
}
});
AssertThat(parse_item_set_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
{
Symbol(1),
ParseItemSet({
{
ParseItem(Symbol(0), 0, 3, 104),
LookaheadSet({ Symbol(16, true) })
}
})
},
{
Symbol(2),
ParseItemSet({
{
ParseItem(Symbol(1), 0, 1, 106),
LookaheadSet({ Symbol(17, true) })
},
})
},
})));
});
});
END_TEST

View file

@ -6,7 +6,6 @@
#include <utility>
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/get_completion_status.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/lex_item.h"
@ -35,7 +34,7 @@ class LexTableBuilder {
const LexicalGrammar lex_grammar;
const LexConflictManager conflict_manager;
ParseTable *parse_table;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
unordered_map<const LexItemSet, LexStateId, LexItemSetHash> lex_state_ids;
LexTable lex_table;
public:
@ -95,7 +94,7 @@ class LexTableBuilder {
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = char_transitions(item_set);
auto transitions = lex_item_set_transitions(item_set);
for (const auto &transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;

View file

@ -6,11 +6,11 @@
#include <unordered_map>
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/get_completion_status.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/lexical_grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/rules/symbol.h"
@ -34,7 +34,7 @@ class ParseTableBuilder {
const SyntaxGrammar grammar;
const LexicalGrammar lexical_grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
unordered_map<const ParseItemSet, ParseStateId, ParseItemSetHash> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
ParseTable parse_table;
std::set<string> conflicts;
@ -56,7 +56,7 @@ class ParseTableBuilder {
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();
ParseItemSet item_set = std::move(pair.first);
ParseItemSet item_set = item_set_closure(pair.first, grammar);
ParseStateId state_id = pair.second;
item_sets_to_process.pop_back();
@ -92,7 +92,7 @@ class ParseTableBuilder {
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : sym_transitions(item_set, grammar)) {
for (const auto &transition : parse_item_set_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;

View file

@ -1,11 +0,0 @@
#include "compiler/build_tables/item.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace build_tables {
Item::Item(const rules::Symbol &lhs, const rule_ptr rule)
: lhs(lhs), rule(rule) {}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,21 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_ITEM_H_
#define COMPILER_BUILD_TABLES_ITEM_H_
#include "compiler/rules/symbol.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace build_tables {
class Item {
public:
Item(const rules::Symbol &lhs, rule_ptr rule);
rules::Symbol lhs;
rule_ptr rule;
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_ITEM_H_

View file

@ -5,7 +5,6 @@
#include "tree_sitter/compiler.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/build_tables/item.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {

View file

@ -1,56 +0,0 @@
#include <set>
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/merge_transitions.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/syntax_grammar.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
using std::map;
using rules::CharacterSet;
using rules::Symbol;
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &input_item_set,
const SyntaxGrammar &grammar) {
ParseItemSet item_set(item_set_closure(input_item_set, grammar));
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size())
continue;
const Symbol &symbol = production[item.step_index].symbol;
unsigned int step = item.step_index + 1;
int rule_id = step < production.size() ? production[step].rule_id : 0;
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
result[symbol][new_item] = lookahead_symbols;
}
return result;
}
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : rule_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_transition<LexItemSet>(
&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
});
}
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,28 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
#define COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
#include <map>
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
struct SyntaxGrammar;
namespace rules {
class CharacterSet;
class Symbol;
}
namespace build_tables {
std::map<rules::Symbol, ParseItemSet> sym_transitions(
const ParseItemSet &item_set, const SyntaxGrammar &grammar);
std::map<rules::CharacterSet, LexItemSet> char_transitions(
const LexItemSet &item_set);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_

View file

@ -1,18 +1,20 @@
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/merge_transitions.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace build_tables {
using std::hash;
using std::map;
using std::string;
using std::ostream;
using rules::CharacterSet;
using rules::Symbol;
LexItem::LexItem(const rules::Symbol &lhs, const rule_ptr rule)
: Item(lhs, rule) {}
: lhs(lhs), rule(rule) {}
bool LexItem::operator==(const LexItem &other) const {
return (other.lhs == lhs) && other.rule->operator==(*rule);
@ -22,5 +24,31 @@ bool LexItem::is_token_start() const {
return get_metadata(rule, rules::START_TOKEN).max > 0;
}
size_t LexItem::Hash::operator()(const LexItem &item) const {
return hash<Symbol>()(item.lhs) ^ hash<rule_ptr>()(item.rule);
}
size_t LexItemSetHash::operator()(const LexItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.size());
for (const auto &item : item_set)
result ^= LexItem::Hash()(item);
return result;
}
map<CharacterSet, LexItemSet> lex_item_set_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : rule_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_transition<LexItemSet>(
&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
});
}
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -2,44 +2,38 @@
#define COMPILER_BUILD_TABLES_LEX_ITEM_H_
#include <unordered_set>
#include <map>
#include <string>
#include "compiler/build_tables/item.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
class LexItem : public Item {
class LexItem {
public:
LexItem(const rules::Symbol &lhs, rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
rules::Symbol lhs;
rule_ptr rule;
struct Hash {
size_t operator()(const LexItem &) const;
};
};
typedef std::unordered_set<LexItem> LexItemSet;
typedef std::unordered_set<LexItem, LexItem::Hash> LexItemSet;
struct LexItemSetHash {
size_t operator()(const LexItemSet &) const;
};
std::map<rules::CharacterSet, LexItemSet> lex_item_set_transitions(
const LexItemSet &);
} // namespace build_tables
} // namespace tree_sitter
namespace std {
template <>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rule_ptr>()(item.rule);
}
};
template <>
struct hash<const tree_sitter::build_tables::LexItemSet> {
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
return result;
}
};
} // namespace std
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_

View file

@ -6,8 +6,10 @@
namespace tree_sitter {
namespace build_tables {
using std::map;
using std::string;
using std::to_string;
using std::hash;
using rules::Symbol;
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
@ -38,5 +40,43 @@ Symbol ParseItem::lhs() const {
return Symbol(variable_index);
}
size_t ParseItemSetHash::operator()(const ParseItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.size());
for (auto &pair : item_set) {
const ParseItem &item = pair.first;
result ^= hash<unsigned int>()(item.variable_index) ^
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
const LookaheadSet &lookahead_set = pair.second;
result ^= hash<size_t>()(lookahead_set.entries->size());
for (auto &symbol : *pair.second.entries) {
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
}
return result;
}
map<Symbol, ParseItemSet> parse_item_set_transitions(
const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size())
continue;
size_t step = item.step_index + 1;
Symbol symbol = production[item.step_index].symbol;
int rule_id = step < production.size() ? production[step].rule_id : 0;
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
result[symbol][new_item] = lookahead_symbols;
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -2,9 +2,9 @@
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#include <map>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {
namespace build_tables {
@ -25,33 +25,14 @@ class ParseItem {
typedef std::map<ParseItem, LookaheadSet> ParseItemSet;
struct ParseItemSetHash {
size_t operator()(const ParseItemSet &) const;
};
std::map<rules::Symbol, ParseItemSet> parse_item_set_transitions(
const ParseItemSet &, const SyntaxGrammar &);
} // namespace build_tables
} // namespace tree_sitter
namespace std {
template <>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return hash<unsigned int>()(item.variable_index) ^
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
}
};
template <>
struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto &pair : set) {
result ^= hash<tree_sitter::build_tables::ParseItem>()(pair.first);
result ^= hash<size_t>()(pair.second.entries->size());
for (auto &symbol : *pair.second.entries)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
return result;
}
};
} // namespace std
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_

View file

@ -24,10 +24,10 @@ class RuleTransitions : public rules::RuleFn<map<CharacterSet, rule_ptr>> {
void merge_transitions(map<CharacterSet, rule_ptr> *left,
const map<CharacterSet, rule_ptr> &right) {
for (auto &pair : right)
merge_transition<rule_ptr>(
left, pair, [](rule_ptr *left, const rule_ptr *right) {
*left = Choice::build({ *left, *right });
});
merge_transition<rule_ptr>(left, pair,
[](rule_ptr *left, const rule_ptr *right) {
*left = Choice::build({ *left, *right });
});
}
map<CharacterSet, rule_ptr> apply_to(const CharacterSet *rule) {