This commit is contained in:
Max Brunsfeld 2017-03-01 22:15:26 -08:00
parent 686dc0997c
commit abf8a4f2c2
28 changed files with 313 additions and 356 deletions

View file

@ -24,7 +24,6 @@ using std::map;
using std::set;
using std::string;
using std::vector;
using std::make_shared;
using std::unordered_map;
using rules::Blank;
using rules::Choice;
@ -62,14 +61,16 @@ class LexTableBuilder {
private:
void add_lex_state_for_parse_state(ParseState *parse_state) {
parse_state->lex_state_id =
add_lex_state(item_set_for_terminals(parse_state->terminal_entries));
parse_state->lex_state_id = add_lex_state(
item_set_for_terminals(parse_state->terminal_entries)
);
}
LexStateId add_lex_state(const LexItemSet &item_set) {
const auto &pair = lex_state_ids.find(item_set);
if (pair == lex_state_ids.end()) {
LexStateId state_id = lex_table.add_state();
LexStateId state_id = lex_table.states.size();
lex_table.states.push_back(LexState());
lex_state_ids[item_set] = state_id;
add_accept_token_actions(item_set, state_id);
add_advance_actions(item_set, state_id);
@ -83,13 +84,13 @@ class LexTableBuilder {
for (const auto &pair : item_set.transitions()) {
const CharacterSet &characters = pair.first;
const LexItemSet::Transition &transition = pair.second;
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
auto current_action = lex_table.state(state_id).accept_action;
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
auto current_action = lex_table.states[state_id].accept_action;
if (conflict_manager.resolve(transition.destination, action,
current_action)) {
action.state_index = add_lex_state(transition.destination);
lex_table.state(state_id).advance_actions[characters] = action;
lex_table.states[state_id].advance_actions[characters] = action;
}
}
}
@ -102,9 +103,9 @@ class LexTableBuilder {
item.lhs.is_built_in() ||
lex_grammar.variables[item.lhs.index].is_string);
auto current_action = lex_table.state(state_id).accept_action;
auto current_action = lex_table.states[state_id].accept_action;
if (conflict_manager.resolve(action, current_action))
lex_table.state(state_id).accept_action = action;
lex_table.states[state_id].accept_action = action;
}
}
}

View file

@ -6,7 +6,7 @@
namespace tree_sitter {
struct LexicalGrammar;
class ParseTable;
struct ParseTable;
namespace build_tables {

View file

@ -57,9 +57,9 @@ class ParseTableBuilder {
Symbol(0, Symbol::Terminal) :
Symbol(0, Symbol::NonTerminal);
Production start_production({
ProductionStep(start_symbol, 0, rules::AssociativityNone),
});
Production start_production{
ProductionStep{start_symbol, 0, rules::AssociativityNone},
};
// Placeholder for error state
add_parse_state(ParseItemSet());
@ -150,7 +150,8 @@ class ParseTableBuilder {
ParseStateId add_parse_state(const ParseItemSet &item_set) {
auto pair = parse_state_ids.find(item_set);
if (pair == parse_state_ids.end()) {
ParseStateId state_id = parse_table.add_state();
ParseStateId state_id = parse_table.states.size();
parse_table.states.push_back(ParseState());
parse_state_ids[item_set] = state_id;
parse_table.states[state_id].shift_actions_signature = item_set.unfinished_item_signature();
item_sets_to_process.push_back({ std::move(item_set), state_id });

View file

@ -15,7 +15,9 @@ using std::vector;
using std::make_tuple;
tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar
) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
const CompileError error = parse_table_result.second;

View file

@ -26,8 +26,6 @@ using std::vector;
using util::escape_char;
using rules::Symbol;
static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
static const map<char, string> REPLACEMENTS({
{ '~', "TILDE" },
{ '`', "BQUOTE" },

View file

@ -7,8 +7,8 @@ namespace tree_sitter {
struct LexicalGrammar;
struct SyntaxGrammar;
class LexTable;
class ParseTable;
struct LexTable;
struct ParseTable;
namespace generate_code {

View file

@ -44,26 +44,10 @@ bool AcceptTokenAction::operator==(const AcceptTokenAction &other) const {
LexState::LexState() : is_token_start(false) {}
set<CharacterSet> LexState::expected_inputs() const {
set<CharacterSet> result;
for (auto &pair : advance_actions)
result.insert(pair.first);
return result;
}
bool LexState::operator==(const LexState &other) const {
return advance_actions == other.advance_actions &&
accept_action == other.accept_action &&
is_token_start == other.is_token_start;
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
LexState &LexTable::state(LexStateId id) {
return states[id];
}
} // namespace tree_sitter

View file

@ -13,17 +13,9 @@ namespace tree_sitter {
typedef int64_t LexStateId;
typedef enum {
LexActionTypeError,
LexActionTypeAccept,
LexActionTypeAcceptFragile,
LexActionTypeAdvance
} LexActionType;
struct AdvanceAction {
AdvanceAction();
AdvanceAction(size_t, PrecedenceRange, bool);
bool operator==(const AdvanceAction &other) const;
LexStateId state_index;
@ -34,7 +26,6 @@ struct AdvanceAction {
struct AcceptTokenAction {
AcceptTokenAction();
AcceptTokenAction(rules::Symbol, int, bool);
bool is_present() const;
bool operator==(const AcceptTokenAction &action) const;
@ -43,16 +34,8 @@ struct AcceptTokenAction {
bool is_string;
};
} // namespace tree_sitter
namespace std {} // namespace std
namespace tree_sitter {
class LexState {
public:
struct LexState {
LexState();
std::set<rules::CharacterSet> expected_inputs() const;
bool operator==(const LexState &) const;
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
@ -60,10 +43,7 @@ class LexState {
bool is_token_start;
};
class LexTable {
public:
LexStateId add_state();
LexState &state(LexStateId state_id);
struct LexTable {
std::vector<LexState> states;
};

View file

@ -1,11 +0,0 @@
#include "compiler/lexical_grammar.h"
namespace tree_sitter {
using std::string;
LexicalVariable::LexicalVariable(
const string &name, VariableType type, const rule_ptr &rule, bool is_string)
: name(name), rule(rule), type(type), is_string(is_string) {}
} // namespace tree_sitter

View file

@ -10,11 +10,9 @@
namespace tree_sitter {
struct LexicalVariable {
LexicalVariable(const std::string &, VariableType, const rule_ptr &, bool);
std::string name;
rule_ptr rule;
VariableType type;
rule_ptr rule;
bool is_string;
};

View file

@ -148,13 +148,6 @@ bool ParseState::has_shift_action() const {
return (!nonterminal_entries.empty());
}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto &entry : terminal_entries)
result.insert(entry.first);
return result;
}
void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
for (auto &entry : terminal_entries)
for (ParseAction &action : entry.second.actions)
@ -169,18 +162,6 @@ bool ParseState::operator==(const ParseState &other) const {
nonterminal_entries == other.nonterminal_entries;
}
set<Symbol> ParseTable::all_symbols() const {
set<Symbol> result;
for (auto &pair : symbols)
result.insert(pair.first);
return result;
}
ParseStateId ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
Symbol lookahead,
ParseAction action) {

View file

@ -23,13 +23,11 @@ enum ParseActionType {
ParseActionTypeRecover,
};
class ParseAction {
struct ParseAction {
ParseAction();
ParseAction(ParseActionType type, ParseStateId state_index,
rules::Symbol symbol, size_t consumed_symbol_count,
const Production *);
public:
ParseAction();
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index);
@ -39,7 +37,6 @@ class ParseAction {
static ParseAction ShiftExtra();
bool operator==(const ParseAction &) const;
bool operator<(const ParseAction &) const;
rules::Associativity associativity() const;
int precedence() const;
@ -47,30 +44,26 @@ class ParseAction {
bool extra;
bool fragile;
ParseStateId state_index;
rules::Symbol symbol;
size_t consumed_symbol_count;
const Production *production;
};
struct ParseTableEntry {
std::vector<ParseAction> actions;
bool reusable;
bool depends_on_lookahead;
ParseTableEntry();
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
bool operator==(const ParseTableEntry &other) const;
inline bool operator!=(const ParseTableEntry &other) const {
return !operator==(other);
}
std::vector<ParseAction> actions;
bool reusable;
bool depends_on_lookahead;
};
class ParseState {
public:
struct ParseState {
ParseState();
std::set<rules::Symbol> expected_inputs() const;
bool operator==(const ParseState &) const;
bool merge(const ParseState &);
void each_referenced_state(std::function<void(ParseStateId *)>);
@ -87,10 +80,7 @@ struct ParseTableSymbolMetadata {
bool structural;
};
class ParseTable {
public:
std::set<rules::Symbol> all_symbols() const;
ParseStateId add_state();
struct ParseTable {
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);

View file

@ -41,10 +41,17 @@ class ExpandRepeats : public rules::IdentityRuleFn {
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
existing_repeats.push_back({ rule->copy(), repeat_symbol });
aux_rules.push_back(
Variable(helper_rule_name, VariableTypeAuxiliary,
Choice::build({ Seq::build({ repeat_symbol.copy(), inner_rule }),
inner_rule })));
aux_rules.push_back(Variable{
helper_rule_name,
VariableTypeAuxiliary,
Choice::build({
Seq::build({
repeat_symbol.copy(),
inner_rule,
}),
inner_rule,
})
});
return repeat_symbol.copy();
}

View file

@ -107,8 +107,11 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
*/
vector<Variable> processed_variables;
for (const Variable &variable : grammar.variables)
processed_variables.push_back(
Variable(variable.name, variable.type, extractor.apply(variable.rule)));
processed_variables.push_back(Variable{
variable.name,
variable.type,
extractor.apply(variable.rule)
});
lexical_grammar.variables = extractor.tokens;
/*

View file

@ -25,8 +25,11 @@ class FlattenRule : public rules::RuleFn<void> {
Production production;
void apply_to(const rules::Symbol *sym) {
production.push_back(ProductionStep(*sym, precedence_stack.back(),
associativity_stack.back()));
production.push_back(ProductionStep{
*sym,
precedence_stack.back(),
associativity_stack.back()
});
}
void apply_to(const rules::Metadata *metadata) {
@ -85,7 +88,7 @@ SyntaxVariable flatten_rule(const Variable &variable) {
}
}
return SyntaxVariable(variable.name, variable.type, productions);
return SyntaxVariable{variable.name, variable.type, productions};
}
pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &grammar) {

View file

@ -7,18 +7,6 @@
namespace tree_sitter {
using std::string;
using std::pair;
using std::vector;
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
const vector<Production> &productions)
: name(name), productions(productions), type(type) {}
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
rules::Associativity associativity)
: symbol(symbol), precedence(precedence), associativity(associativity) {}
bool ExternalToken::operator==(const ExternalToken &other) const {
return name == other.name && type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;

View file

@ -11,15 +11,14 @@
namespace tree_sitter {
struct ExternalToken {
bool operator==(const ExternalToken &) const;
std::string name;
VariableType type;
rules::Symbol corresponding_internal_token;
bool operator==(const ExternalToken &) const;
};
struct ProductionStep {
ProductionStep(const rules::Symbol &, int, rules::Associativity);
bool operator==(const ProductionStep &) const;
rules::Symbol symbol;
@ -30,12 +29,9 @@ struct ProductionStep {
typedef std::vector<ProductionStep> Production;
struct SyntaxVariable {
SyntaxVariable(const std::string &, VariableType,
const std::vector<Production> &);
std::string name;
std::vector<Production> productions;
VariableType type;
std::vector<Production> productions;
};
typedef std::set<rules::Symbol> ConflictSet;

View file

@ -1,11 +0,0 @@
#include "compiler/variable.h"
#include <string>
namespace tree_sitter {
using std::string;
Variable::Variable(const string &name, VariableType type, const rule_ptr &rule)
: name(name), rule(rule), type(type) {}
} // namespace tree_sitter

View file

@ -15,11 +15,9 @@ enum VariableType {
};
struct Variable {
Variable(const std::string &, VariableType, const rule_ptr &);
std::string name;
rule_ptr rule;
VariableType type;
rule_ptr rule;
};
} // namespace tree_sitter