Refactor bookkeeping of token starts in lexical rules
- Move lex items and parse items into their own files
This commit is contained in:
parent
a12cd49585
commit
33d781f492
19 changed files with 280 additions and 232 deletions
|
|
@ -42,7 +42,7 @@ namespace tree_sitter {
|
|||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
|
|
@ -51,7 +51,7 @@ namespace tree_sitter {
|
|||
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
|
||||
ParseStateId new_state_id = add_parse_state(item_set);
|
||||
|
|
@ -95,7 +95,7 @@ namespace tree_sitter {
|
|||
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(item.lookahead_sym);
|
||||
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
|
||||
parse_table.add_action(state_id, item.lookahead_sym, action);
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ namespace tree_sitter {
|
|||
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
string precedence_string(const ParseAction &action) {
|
||||
string precedences = "(precedence ";
|
||||
bool started = false;
|
||||
|
|
@ -108,7 +108,7 @@ namespace tree_sitter {
|
|||
}
|
||||
return precedences + ")";
|
||||
}
|
||||
|
||||
|
||||
string message_for_action(const ParseAction &action, const map<Symbol, string> &rule_names) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift:
|
||||
|
|
@ -126,7 +126,7 @@ namespace tree_sitter {
|
|||
return "error";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ConflictManager::record_conflict(const rules::Symbol &symbol,
|
||||
const ParseAction &left,
|
||||
const ParseAction &right) {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_BUILD_TABLES_FIRST_SET_H_
|
||||
|
||||
#include <set>
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -1,42 +1,21 @@
|
|||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class GetMetadata : public rules::RuleFn<int> {
|
||||
rules::MetadataKey metadata_key;
|
||||
public:
|
||||
GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
|
||||
|
||||
int apply_to(const rules::Choice *rule) {
|
||||
return apply(rule->left) || apply(rule->right);
|
||||
}
|
||||
|
||||
int apply_to(const rules::Repeat *rule) {
|
||||
return apply(rule->content);
|
||||
}
|
||||
|
||||
int apply_to(const rules::Seq *rule) {
|
||||
int result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left) && result == 0)
|
||||
result = apply(rule->right);
|
||||
return result;
|
||||
}
|
||||
|
||||
int apply_to(const rules::Metadata *rule) {
|
||||
auto pair = rule->value.find(metadata_key);
|
||||
if (pair != rule->value.end())
|
||||
return pair->second;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) {
|
||||
class GetMetadata : public rules::RuleFn<int> {
|
||||
rules::MetadataKey metadata_key;
|
||||
|
||||
int apply_to(const rules::Metadata *rule) {
|
||||
auto pair = rule->value.find(metadata_key);
|
||||
return (pair != rule->value.end()) ? pair->second : 0;
|
||||
}
|
||||
|
||||
public:
|
||||
GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
|
||||
};
|
||||
|
||||
return GetMetadata(key).apply(rule);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,78 +1,16 @@
|
|||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
using rules::Symbol;
|
||||
using rules::rule_ptr;
|
||||
|
||||
namespace build_tables {
|
||||
Item::Item(const Symbol &lhs, const rule_ptr rule) :
|
||||
Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) :
|
||||
lhs(lhs),
|
||||
rule(rule) {}
|
||||
|
||||
bool Item::is_done() const {
|
||||
return rule_can_be_blank(rule);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const LexItem &item) {
|
||||
return stream <<
|
||||
string("#<item ") <<
|
||||
item.lhs <<
|
||||
string(" ") <<
|
||||
*item.rule <<
|
||||
string(">");
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream <<
|
||||
string("#<item ") <<
|
||||
item.lhs <<
|
||||
string(" ") <<
|
||||
*item.rule <<
|
||||
string(" ") <<
|
||||
to_string(item.consumed_symbol_count) <<
|
||||
string(" ") <<
|
||||
item.lookahead_sym <<
|
||||
string(">");
|
||||
}
|
||||
|
||||
LexItem::LexItem(const Symbol &lhs, const rule_ptr rule) : Item(lhs, rule) {}
|
||||
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
bool lhs_eq = other.lhs == lhs;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
return lhs_eq && rules_eq;
|
||||
}
|
||||
|
||||
bool LexItem::is_token_start() const {
|
||||
return get_metadata(rule, rules::START_TOKEN) != 0;
|
||||
}
|
||||
|
||||
ParseItem::ParseItem(const Symbol &lhs,
|
||||
const rule_ptr rule,
|
||||
size_t consumed_symbol_count,
|
||||
const Symbol &lookahead_sym) :
|
||||
Item(lhs, rule),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
lookahead_sym(lookahead_sym) {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
bool lhs_eq = other.lhs == lhs;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count);
|
||||
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
|
||||
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
|
||||
}
|
||||
|
||||
int ParseItem::precedence() const {
|
||||
return get_metadata(rule, rules::PRECEDENCE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,14 +2,9 @@
|
|||
#define COMPILER_BUILD_TABLES_ITEM_H_
|
||||
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace build_tables {
|
||||
class Item {
|
||||
public:
|
||||
|
|
@ -19,65 +14,7 @@ namespace tree_sitter {
|
|||
rules::Symbol lhs;
|
||||
rules::rule_ptr rule;
|
||||
};
|
||||
|
||||
class LexItem : public Item {
|
||||
public:
|
||||
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
};
|
||||
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs,
|
||||
rules::rule_ptr rule,
|
||||
const size_t consumed_symbol_count,
|
||||
const rules::Symbol &lookahead_sym);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
int precedence() const;
|
||||
|
||||
const size_t consumed_symbol_count;
|
||||
const rules::Symbol lookahead_sym;
|
||||
};
|
||||
|
||||
typedef std::unordered_set<ParseItem> ParseItemSet;
|
||||
typedef std::unordered_set<LexItem> LexItemSet;
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::build_tables::LexItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) const {
|
||||
return
|
||||
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return
|
||||
hash<string>()(item.lhs.name) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
|
||||
hash<size_t>()(item.consumed_symbol_count) ^
|
||||
hash<string>()(item.lookahead_sym.name);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct hash<const unordered_set<T>> {
|
||||
size_t operator()(const unordered_set<T> &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto item : set)
|
||||
result ^= hash<T>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_H_
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
#define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
#define COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_
|
||||
|
||||
#include <map>
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
|
|
|||
51
src/compiler/build_tables/lex_item.cc
Normal file
51
src/compiler/build_tables/lex_item.cc
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
|
||||
namespace build_tables {
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) :
|
||||
Item(lhs, rule) {}
|
||||
|
||||
bool LexItem::operator==(const LexItem &other) const {
|
||||
bool lhs_eq = other.lhs == lhs;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
return lhs_eq && rules_eq;
|
||||
}
|
||||
|
||||
bool LexItem::is_token_start() const {
|
||||
class IsTokenStart : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
bool result = apply(rule->left);
|
||||
if (!result && rule_can_be_blank(rule->left))
|
||||
result = apply(rule->right);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
auto pair = rule->value.find(rules::START_TOKEN);
|
||||
return (pair != rule->value.end()) && pair->second;
|
||||
}
|
||||
};
|
||||
|
||||
return IsTokenStart().apply(rule);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const LexItem &item) {
|
||||
return stream <<
|
||||
string("#<item ") <<
|
||||
item.lhs <<
|
||||
string(" ") <<
|
||||
*item.rule <<
|
||||
string(">");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
44
src/compiler/build_tables/lex_item.h
Normal file
44
src/compiler/build_tables/lex_item.h
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_H_
|
||||
#define COMPILER_BUILD_TABLES_LEX_ITEM_H_
|
||||
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include "compiler/build_tables/item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class LexItem : public Item {
|
||||
public:
|
||||
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
bool is_token_start() const;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
|
||||
|
||||
typedef std::unordered_set<LexItem> LexItemSet;
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::build_tables::LexItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::Item &item) const {
|
||||
return
|
||||
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hash<const tree_sitter::build_tables::LexItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto item : set)
|
||||
result ^= hash<tree_sitter::build_tables::LexItem>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_
|
||||
45
src/compiler/build_tables/parse_item.cc
Normal file
45
src/compiler/build_tables/parse_item.cc
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
|
||||
namespace build_tables {
|
||||
ParseItem::ParseItem(const rules::Symbol &lhs,
|
||||
const rules::rule_ptr rule,
|
||||
size_t consumed_symbol_count,
|
||||
const rules::Symbol &lookahead_sym) :
|
||||
Item(lhs, rule),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
lookahead_sym(lookahead_sym) {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
bool lhs_eq = other.lhs == lhs;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count);
|
||||
bool lookaheads_eq = other.lookahead_sym == lookahead_sym;
|
||||
return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq;
|
||||
}
|
||||
|
||||
int ParseItem::precedence() const {
|
||||
return get_metadata(rule, rules::PRECEDENCE);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream <<
|
||||
string("#<item ") <<
|
||||
item.lhs <<
|
||||
string(" ") <<
|
||||
*item.rule <<
|
||||
string(" ") <<
|
||||
to_string(item.consumed_symbol_count) <<
|
||||
string(" ") <<
|
||||
item.lookahead_sym <<
|
||||
string(">");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
54
src/compiler/build_tables/parse_item.h
Normal file
54
src/compiler/build_tables/parse_item.h
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs,
|
||||
rules::rule_ptr rule,
|
||||
const size_t consumed_symbol_count,
|
||||
const rules::Symbol &lookahead_sym);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
int precedence() const;
|
||||
|
||||
const size_t consumed_symbol_count;
|
||||
const rules::Symbol lookahead_sym;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
|
||||
|
||||
typedef std::unordered_set<ParseItem> ParseItemSet;
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return
|
||||
hash<string>()(item.lhs.name) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
|
||||
hash<size_t>()(item.consumed_symbol_count) ^
|
||||
hash<string>()(item.lookahead_sym.name);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hash<const tree_sitter::build_tables::ParseItemSet> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const {
|
||||
size_t result = hash<size_t>()(set.size());
|
||||
for (auto item : set)
|
||||
result ^= hash<tree_sitter::build_tables::ParseItem>()(item);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
|
|
@ -23,7 +23,7 @@ namespace tree_sitter {
|
|||
str_replace(&input, "\n", "\\n");
|
||||
return input;
|
||||
}
|
||||
|
||||
|
||||
string join(vector<string> lines, string separator) {
|
||||
string result;
|
||||
bool started = false;
|
||||
|
|
@ -38,13 +38,13 @@ namespace tree_sitter {
|
|||
string join(vector<string> lines) {
|
||||
return join(lines, "\n");
|
||||
}
|
||||
|
||||
|
||||
string indent(string input) {
|
||||
string tab = " ";
|
||||
util::str_replace(&input, "\n", "\n" + tab);
|
||||
return tab + input;
|
||||
}
|
||||
|
||||
|
||||
string character_code(char character) {
|
||||
switch (character) {
|
||||
case '\0':
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue