Refactor bookkeeping of token starts in lexical rules

- Move lex items and parse items into their own files
This commit is contained in:
Max Brunsfeld 2014-04-17 13:20:43 -07:00
parent a12cd49585
commit 33d781f492
19 changed files with 280 additions and 232 deletions

View file

@ -42,7 +42,7 @@ namespace tree_sitter {
result.insert(item.precedence());
return result;
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
@ -51,7 +51,7 @@ namespace tree_sitter {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
if (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
ParseStateId new_state_id = add_parse_state(item_set);
@ -95,7 +95,7 @@ namespace tree_sitter {
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(item.lookahead_sym);
if (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
parse_table.add_action(state_id, item.lookahead_sym, action);

View file

@ -97,7 +97,7 @@ namespace tree_sitter {
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
return result;
}
string precedence_string(const ParseAction &action) {
string precedences = "(precedence ";
bool started = false;
@ -108,7 +108,7 @@ namespace tree_sitter {
}
return precedences + ")";
}
string message_for_action(const ParseAction &action, const map<Symbol, string> &rule_names) {
switch (action.type) {
case ParseActionTypeShift:
@ -126,7 +126,7 @@ namespace tree_sitter {
return "error";
}
}
void ConflictManager::record_conflict(const rules::Symbol &symbol,
const ParseAction &left,
const ParseAction &right) {

View file

@ -2,7 +2,7 @@
#define COMPILER_BUILD_TABLES_FIRST_SET_H_
#include <set>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {

View file

@ -3,7 +3,7 @@
#include <set>
#include <map>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {

View file

@ -1,42 +1,21 @@
#include "compiler/build_tables/get_metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
#include "compiler/build_tables/rule_can_be_blank.h"
namespace tree_sitter {
namespace build_tables {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
public:
GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
int apply_to(const rules::Choice *rule) {
return apply(rule->left) || apply(rule->right);
}
int apply_to(const rules::Repeat *rule) {
return apply(rule->content);
}
int apply_to(const rules::Seq *rule) {
int result = apply(rule->left);
if (rule_can_be_blank(rule->left) && result == 0)
result = apply(rule->right);
return result;
}
int apply_to(const rules::Metadata *rule) {
auto pair = rule->value.find(metadata_key);
if (pair != rule->value.end())
return pair->second;
else
return 0;
}
};
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
class GetMetadata : public rules::RuleFn<int> {
rules::MetadataKey metadata_key;
int apply_to(const rules::Metadata *rule) {
auto pair = rule->value.find(metadata_key);
return (pair != rule->value.end()) ? pair->second : 0;
}
public:
GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
};
return GetMetadata(key).apply(rule);
}
}

View file

@ -1,78 +1,16 @@
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/build_tables/get_metadata.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::ostream;
using std::vector;
using rules::Symbol;
using rules::rule_ptr;
namespace build_tables {
Item::Item(const Symbol &lhs, const rule_ptr rule) :
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) :
lhs(lhs),
rule(rule) {}
bool Item::is_done() const {
return rule_can_be_blank(rule);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(">");
}
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(" ") <<
to_string(item.consumed_symbol_count) <<
string(" ") <<
item.lookahead_sym <<
string(">");
}
LexItem::LexItem(const Symbol &lhs, const rule_ptr rule) : Item(lhs, rule) {}
bool LexItem::operator==(const LexItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
return lhs_eq && rules_eq;
}
bool LexItem::is_token_start() const {
return get_metadata(rule, rules::START_TOKEN) != 0;
}
ParseItem::ParseItem(const Symbol &lhs,
const rule_ptr rule,
size_t consumed_symbol_count,
const Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_symbol_count(consumed_symbol_count),
lookahead_sym(lookahead_sym) {}
bool ParseItem::operator==(const ParseItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count);
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
}
int ParseItem::precedence() const {
return get_metadata(rule, rules::PRECEDENCE);
}
}
}

View file

@ -2,14 +2,9 @@
#define COMPILER_BUILD_TABLES_ITEM_H_
#include <unordered_set>
#include <string>
#include <vector>
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
class Grammar;
namespace build_tables {
class Item {
public:
@ -19,65 +14,7 @@ namespace tree_sitter {
rules::Symbol lhs;
rules::rule_ptr rule;
};
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs,
rules::rule_ptr rule,
const size_t consumed_symbol_count,
const rules::Symbol &lookahead_sym);
bool operator==(const ParseItem &other) const;
int precedence() const;
const size_t consumed_symbol_count;
const rules::Symbol lookahead_sym;
};
typedef std::unordered_set<ParseItem> ParseItemSet;
typedef std::unordered_set<LexItem> LexItemSet;
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
}
}
namespace std {
template<>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template<>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<string>()(item.lhs.name) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count) ^
hash<string>()(item.lookahead_sym.name);
}
};
template<typename T>
struct hash<const unordered_set<T>> {
size_t operator()(const unordered_set<T> &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<T>()(item);
return result;
}
};
}
#endif // COMPILER_BUILD_TABLES_ITEM_H_

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
#define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;

View file

@ -2,7 +2,8 @@
#define COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
#include <map>
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;

View file

@ -0,0 +1,51 @@
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/visitor.h"
namespace tree_sitter {
using std::string;
using std::ostream;
using std::vector;
namespace build_tables {
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) :
Item(lhs, rule) {}
bool LexItem::operator==(const LexItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
return lhs_eq && rules_eq;
}
bool LexItem::is_token_start() const {
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
bool result = apply(rule->left);
if (!result && rule_can_be_blank(rule->left))
result = apply(rule->right);
return result;
}
bool apply_to(const rules::Metadata *rule) {
auto pair = rule->value.find(rules::START_TOKEN);
return (pair != rule->value.end()) && pair->second;
}
};
return IsTokenStart().apply(rule);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(">");
}
}
}

View file

@ -0,0 +1,44 @@
#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_H_
#define COMPILER_BUILD_TABLES_LEX_ITEM_H_
#include <unordered_set>
#include <string>
#include "compiler/build_tables/item.h"
namespace tree_sitter {
namespace build_tables {
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
};
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
typedef std::unordered_set<LexItem> LexItemSet;
}
}
namespace std {
template<>
struct hash<tree_sitter::build_tables::LexItem> {
size_t operator()(const tree_sitter::build_tables::Item &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule);
}
};
template<>
struct hash<const tree_sitter::build_tables::LexItemSet> {
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
return result;
}
};
}
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_

View file

@ -0,0 +1,45 @@
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/get_metadata.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
using std::string;
using std::to_string;
using std::ostream;
namespace build_tables {
ParseItem::ParseItem(const rules::Symbol &lhs,
const rules::rule_ptr rule,
size_t consumed_symbol_count,
const rules::Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_symbol_count(consumed_symbol_count),
lookahead_sym(lookahead_sym) {}
bool ParseItem::operator==(const ParseItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count);
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
}
int ParseItem::precedence() const {
return get_metadata(rule, rules::PRECEDENCE);
}
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream <<
string("#<item ") <<
item.lhs <<
string(" ") <<
*item.rule <<
string(" ") <<
to_string(item.consumed_symbol_count) <<
string(" ") <<
item.lookahead_sym <<
string(">");
}
}
}

View file

@ -0,0 +1,54 @@
#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#include <unordered_set>
#include <string>
#include "compiler/rules/symbol.h"
#include "compiler/build_tables/item.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs,
rules::rule_ptr rule,
const size_t consumed_symbol_count,
const rules::Symbol &lookahead_sym);
bool operator==(const ParseItem &other) const;
int precedence() const;
const size_t consumed_symbol_count;
const rules::Symbol lookahead_sym;
};
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
typedef std::unordered_set<ParseItem> ParseItemSet;
}
}
namespace std {
template<>
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<string>()(item.lhs.name) ^
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
hash<size_t>()(item.consumed_symbol_count) ^
hash<string>()(item.lookahead_sym.name);
}
};
template<>
struct hash<const tree_sitter::build_tables::ParseItemSet> {
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
size_t result = hash<size_t>()(set.size());
for (auto item : set)
result ^= hash<tree_sitter::build_tables::ParseItem>()(item);
return result;
}
};
}
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_

View file

@ -23,7 +23,7 @@ namespace tree_sitter {
str_replace(&input, "\n", "\\n");
return input;
}
string join(vector<string> lines, string separator) {
string result;
bool started = false;
@ -38,13 +38,13 @@ namespace tree_sitter {
string join(vector<string> lines) {
return join(lines, "\n");
}
string indent(string input) {
string tab = " ";
util::str_replace(&input, "\n", "\n" + tab);
return tab + input;
}
string character_code(char character) {
switch (character) {
case '\0':