Refactor bookkeeping of token starts in lexical rules

- Move lex items and parse items into their own files
This commit is contained in:
Max Brunsfeld 2014-04-17 13:20:43 -07:00
parent a12cd49585
commit 33d781f492
19 changed files with 280 additions and 232 deletions

View file

@ -661,7 +661,6 @@ LEX_FN() {
ADVANCE(53);
LEX_ERROR();
case 52:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))

View file

@ -70,35 +70,35 @@ describe("resolving parse conflicts", []() {
describe("when the shift has higher precedence", [&]() {
ParseAction shift = ParseAction::Shift(2, { 3 });
ParseAction reduce = ParseAction::Reduce(sym2, 1, 1);
it("does not record a conflict", [&]() {
manager->resolve_parse_action(sym1, shift, reduce);
manager->resolve_parse_action(sym1, reduce, shift);
AssertThat(manager->conflicts(), IsEmpty());
});
it("favors the shift", [&]() {
AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse());
AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue());
});
});
describe("when the reduce has higher precedence", [&]() {
ParseAction shift = ParseAction::Shift(2, { 1 });
ParseAction reduce = ParseAction::Reduce(sym2, 1, 3);
it("does not record a conflict", [&]() {
manager->resolve_parse_action(sym1, reduce, shift);
manager->resolve_parse_action(sym1, shift, reduce);
AssertThat(manager->conflicts(), IsEmpty());
});
it("favors the reduce", [&]() {
AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsFalse());
AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsTrue());
});
});
describe("when the precedences are equal", [&]() {
ParseAction shift = ParseAction::Shift(2, { 0 });
ParseAction reduce = ParseAction::Reduce(sym2, 1, 0);
@ -110,17 +110,17 @@ describe("resolving parse conflicts", []() {
Conflict("rule1: shift (precedence 0) / reduce rule2 (precedence 0)")
})));
});
it("favors the shift", [&]() {
AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse());
AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue());
});
});
describe("when the shift has conflicting precedences compared to the reduce", [&]() {
ParseAction shift = ParseAction::Shift(2, { 0, 1, 3 });
ParseAction reduce = ParseAction::Reduce(sym2, 1, 2);
it("records a conflict", [&]() {
manager->resolve_parse_action(sym1, reduce, shift);
manager->resolve_parse_action(sym1, shift, reduce);
@ -128,7 +128,7 @@ describe("resolving parse conflicts", []() {
Conflict("rule1: shift (precedence 0, 1, 3) / reduce rule2 (precedence 2)")
})));
});
it("favors the shift", [&]() {
AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse());
AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue());
@ -140,23 +140,23 @@ describe("resolving parse conflicts", []() {
describe("when one action has higher precedence", [&]() {
ParseAction left = ParseAction::Reduce(sym2, 1, 0);
ParseAction right = ParseAction::Reduce(sym2, 1, 3);
it("favors that action", [&]() {
AssertThat(manager->resolve_parse_action(sym1, left, right), IsTrue());
AssertThat(manager->resolve_parse_action(sym1, right, left), IsFalse());
});
it("does not record a conflict", [&]() {
manager->resolve_parse_action(sym1, left, right);
manager->resolve_parse_action(sym1, right, left);
AssertThat(manager->conflicts(), IsEmpty());
});
});
describe("when the actions have the same precedence", [&]() {
ParseAction left = ParseAction::Reduce(sym1, 1, 0);
ParseAction right = ParseAction::Reduce(sym2, 1, 0);
it("favors the symbol listed earlier in the grammar", [&]() {
AssertThat(manager->resolve_parse_action(sym1, right, left), IsTrue());
AssertThat(manager->resolve_parse_action(sym1, left, right), IsFalse());

View file

@ -33,49 +33,6 @@ describe("getting metadata for rules", []() {
AssertThat(get_metadata(rule, key1), Equals(0));
});
});
it("works for metadata rules preceded by other rules that can be blank", [&]() {
auto rule = seq({
repeat(sym("x")),
make_shared<Metadata>(sym("x"), map<MetadataKey, int>({
{ key1, 1 },
{ key2, 2 },
})),
});
AssertThat(get_metadata(rule, key2), Equals(2));
});
it("works for choices containing metadata rule", [&]() {
auto rule = choice({
sym("x"),
make_shared<Metadata>(sym("x"), map<MetadataKey, int>({
{ key1, 1 },
{ key2, 2 },
})),
});
AssertThat(get_metadata(rule, key2), Equals(1));
});
it("works for repetitions containing metadata rules", [&]() {
auto rule = repeat(make_shared<Metadata>(sym("x"), map<MetadataKey, int>({
{ key1, 1 },
{ key2, 2 },
})));
AssertThat(get_metadata(rule, key2), Equals(2));
});
it("returns 0 for metadata rules preceded by rules that can't be blank", [&]() {
auto rule = seq({
sym("x"),
make_shared<Metadata>(sym("y"), map<MetadataKey, int>({
{ key1, 1 },
{ key2, 2 },
})),
});
AssertThat(get_metadata(rule, key2), Equals(0));
});
});
END_TEST

View file

@ -0,0 +1,43 @@
#include "compiler_spec_helper.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/prepared_grammar.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("lex items", []() {
describe("determining if an item is the start of a token", [&]() {
Symbol sym("x");
rule_ptr token_start = make_shared<Metadata>(str("a"), map<MetadataKey, int>({
{ START_TOKEN, 1 }
}));
it("returns true for rules designated as token starts", [&]() {
LexItem item(sym, token_start);
AssertThat(item.is_token_start(), IsTrue());
});
it("returns false for rules not designated as token starts", [&]() {
AssertThat(LexItem(sym, make_shared<Metadata>(str("a"), map<MetadataKey, int>({
{ START_TOKEN, 0 }
}))).is_token_start(), IsFalse());
AssertThat(LexItem(sym, str("a")).is_token_start(), IsFalse());
});
describe("when given a sequence containing a token start", [&]() {
it("returns true when the rule before the token start may be blank", [&]() {
LexItem item(sym, seq({ repeat(str("a")), token_start }));
AssertThat(item.is_token_start(), IsTrue());
});
it("returns false when the rule before the token start cannot be blank", [&]() {
LexItem item(sym, seq({ str("a"), token_start }));
AssertThat(item.is_token_start(), IsFalse());
});
});
});
});
END_TEST

View file

@ -183,12 +183,12 @@ describe("rule transitions", []() {
{ CharacterSet({ 'a' }), rule }
})));
});
it("preserves metadata", [&]() {
map<MetadataKey, int> metadata_value({
{ PRECEDENCE, 5 }
});
rule_ptr rule = make_shared<Metadata>(seq({ sym("x"), sym("y") }), metadata_value);
AssertThat(
sym_transitions(rule),

View file

@ -42,7 +42,7 @@ namespace tree_sitter {
result.insert(item.precedence());
return result;
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
@ -51,7 +51,7 @@ namespace tree_sitter {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
if (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
ParseStateId new_state_id = add_parse_state(item_set);
@ -95,7 +95,7 @@ namespace tree_sitter {
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(item.lookahead_sym);
if (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
parse_table.add_action(state_id, item.lookahead_sym, action);

View file

@ -97,7 +97,7 @@ namespace tree_sitter {
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
return result;
}
string precedence_string(const ParseAction &action) {
string precedences = "(precedence ";
bool started = false;
@ -108,7 +108,7 @@ namespace tree_sitter {
}
return precedences + ")";
}
string message_for_action(const ParseAction &action, const map<Symbol, string> &rule_names) {
switch (action.type) {
case ParseActionTypeShift:
@ -126,7 +126,7 @@ namespace tree_sitter {
return "error";
}
}
void ConflictManager::record_conflict(const rules::Symbol &symbol,
const ParseAction &left,
const ParseAction &right) {

View file

@ -2,7 +2,7 @@
#define COMPILER_BUILD_TABLES_FIRST_SET_H_
#include <set>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {

View file

@ -3,7 +3,7 @@
#include <set>
#include <map>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {

View file

@ -1,42 +1,21 @@
#include "compiler/build_tables/get_metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
#include "compiler/build_tables/rule_can_be_blank.h"
namespace tree_sitter {
namespace build_tables {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
public:
GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
int apply_to(const rules::Choice *rule) {
return apply(rule->left) || apply(rule->right);
}
int apply_to(const rules::Repeat *rule) {
return apply(rule->content);
}
int apply_to(const rules::Seq *rule) {
int result = apply(rule->left);
if (rule_can_be_blank(rule->left) && result == 0)
result = apply(rule->right);
return result;
}
int apply_to(const rules::Metadata *rule) {
auto pair = rule->value.find(metadata_key);
if (pair != rule->value.end())
return pair->second;
else
return 0;
}
};
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
int apply_to(const rules::Metadata *rule) {
auto pair = rule->value.find(metadata_key);
return (pair != rule->value.end()) ? pair->second : 0;
}
public:
GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
};
return GetMetadata(key).apply(rule);
}
}

View file

@ -1,78 +1,16 @@
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/build_tables/get_metadata.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::ostream;
using std::vector;
using rules::Symbol;
using rules::rule_ptr;
namespace build_tables {
Item::Item(const Symbol &lhs, const rule_ptr rule) :
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) :
lhs(lhs),
rule(rule) {}
bool Item::is_done() const {
return rule_can_be_blank(rule);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(">");
}
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(" ") <<
to_string(item.consumed_symbol_count) <<
string(" ") <<
item.lookahead_sym <<
string(">");
}
LexItem::LexItem(const Symbol &lhs, const rule_ptr rule) : Item(lhs, rule) {}
bool LexItem::operator==(const LexItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
return lhs_eq && rules_eq;
}
bool LexItem::is_token_start() const {
return get_metadata(rule, rules::START_TOKEN) != 0;
}
ParseItem::ParseItem(const Symbol &lhs,
const rule_ptr rule,
size_t consumed_symbol_count,
const Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_symbol_count(consumed_symbol_count),
lookahead_sym(lookahead_sym) {}
bool ParseItem::operator==(const ParseItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count);
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
}
int ParseItem::precedence() const {
return get_metadata(rule, rules::PRECEDENCE);
}
}
}

View file

@ -2,14 +2,9 @@
#define COMPILER_BUILD_TABLES_ITEM_H_
#include <unordered_set>
#include <string>
#include <vector>
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
class Grammar;
namespace build_tables {
class Item {
public:
@ -19,65 +14,7 @@ namespace tree_sitter {
rules::Symbol lhs;
rules::rule_ptr rule;
};
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs,
rules::rule_ptr rule,
const size_t consumed_symbol_count,
const rules::Symbol &lookahead_sym);
bool operator==(const ParseItem &other) const;
int precedence() const;
const size_t consumed_symbol_count;
const rules::Symbol lookahead_sym;
};
typedef std::unordered_set<ParseItem> ParseItemSet;
typedef std::unordered_set<LexItem> LexItemSet;
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
}
}
namespace std {
template<>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template<>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<string>()(item.lhs.name) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count) ^
hash<string>()(item.lookahead_sym.name);
}
};
template<typename T>
struct hash<const unordered_set<T>> {
size_t operator()(const unordered_set<T> &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<T>()(item);
return result;
}
};
}
#endif // COMPILER_BUILD_TABLES_ITEM_H_

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
#define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;

View file

@ -2,7 +2,8 @@
#define COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
#include <map>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;

View file

@ -0,0 +1,51 @@
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::ostream;
using std::vector;
namespace build_tables {
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) :
Item(lhs, rule) {}
bool LexItem::operator==(const LexItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
return lhs_eq && rules_eq;
}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
bool result = apply(rule->left);
if (!result && rule_can_be_blank(rule->left))
result = apply(rule->right);
return result;
}
bool apply_to(const rules::Metadata *rule) {
auto pair = rule->value.find(rules::START_TOKEN);
return (pair != rule->value.end()) && pair->second;
}
};
return IsTokenStart().apply(rule);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(">");
}
}
}

View file

@ -0,0 +1,44 @@
#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_H_
#define COMPILER_BUILD_TABLES_LEX_ITEM_H_
#include <unordered_set>
#include <string>
#include "compiler/build_tables/item.h"
namespace tree_sitter {
namespace build_tables {
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
typedef std::unordered_set<LexItem> LexItemSet;
}
}
namespace std {
template<>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template<>
struct hash<const tree_sitter::build_tables::LexItemSet> {
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
return result;
}
};
}
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_

View file

@ -0,0 +1,45 @@
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/get_metadata.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::ostream;
namespace build_tables {
ParseItem::ParseItem(const rules::Symbol &lhs,
const rules::rule_ptr rule,
size_t consumed_symbol_count,
const rules::Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_symbol_count(consumed_symbol_count),
lookahead_sym(lookahead_sym) {}
bool ParseItem::operator==(const ParseItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count);
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
}
int ParseItem::precedence() const {
return get_metadata(rule, rules::PRECEDENCE);
}
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(" ") <<
to_string(item.consumed_symbol_count) <<
string(" ") <<
item.lookahead_sym <<
string(">");
}
}
}

View file

@ -0,0 +1,54 @@
#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#include <unordered_set>
#include <string>
#include "compiler/rules/symbol.h"
#include "compiler/build_tables/item.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs,
rules::rule_ptr rule,
const size_t consumed_symbol_count,
const rules::Symbol &lookahead_sym);
bool operator==(const ParseItem &other) const;
int precedence() const;
const size_t consumed_symbol_count;
const rules::Symbol lookahead_sym;
};
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
typedef std::unordered_set<ParseItem> ParseItemSet;
}
}
namespace std {
template<>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<string>()(item.lhs.name) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count) ^
hash<string>()(item.lookahead_sym.name);
}
};
template<>
struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::ParseItem>()(item);
return result;
}
};
}
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_

View file

@ -23,7 +23,7 @@ namespace tree_sitter {
str_replace(&input, "\n", "\\n");
return input;
}
string join(vector<string> lines, string separator) {
string result;
bool started = false;
@ -38,13 +38,13 @@ namespace tree_sitter {
string join(vector<string> lines) {
return join(lines, "\n");
}
string indent(string input) {
string tab = " ";
util::str_replace(&input, "\n", "\n" + tab);
return tab + input;
}
string character_code(char character) {
switch (character) {
case '\0':