Make separate types for syntax and lexical grammars

This way, the separator characters can be added as a field to
lexical grammars only
This commit is contained in:
Max Brunsfeld 2014-06-25 13:27:16 -07:00
parent d5674d33c4
commit 7df35f9b8d
49 changed files with 467 additions and 395 deletions

View file

@ -25,7 +25,7 @@ namespace tree_sitter {
namespace build_tables {
class LexTableBuilder {
const PreparedGrammar lex_grammar;
const LexicalGrammar lex_grammar;
ParseTable *parse_table;
LexConflictManager conflict_manager;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
@ -65,7 +65,7 @@ namespace tree_sitter {
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = char_transitions(item_set, lex_grammar);
auto transitions = char_transitions(item_set);
for (const auto &transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
@ -114,7 +114,7 @@ namespace tree_sitter {
}
public:
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
lex_grammar(lex_grammar),
parse_table(parse_table),
conflict_manager(LexConflictManager(lex_grammar)) {}
@ -129,7 +129,7 @@ namespace tree_sitter {
}
};
LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar) {
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
return LexTableBuilder(parse_table, lex_grammar).build();
}
}

View file

@ -5,12 +5,11 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class PreparedGrammar;
class LexicalGrammar;
class ParseTable;
namespace build_tables {
LexTable
build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
}
}

View file

@ -23,7 +23,7 @@ namespace tree_sitter {
namespace build_tables {
class ParseTableBuilder {
const PreparedGrammar grammar;
const SyntaxGrammar grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
ParseTable parse_table;
@ -59,7 +59,7 @@ namespace tree_sitter {
}
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const Symbol &symbol : grammar.ubiquitous_tokens()) {
for (const Symbol &symbol : grammar.ubiquitous_tokens) {
auto &actions = parse_table.states[state_id].actions;
if (actions.find(symbol) == actions.end())
parse_table.add_action(state_id, symbol, ParseAction::Shift(state_id, { 0 }));
@ -99,7 +99,7 @@ namespace tree_sitter {
}
public:
ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
grammar(grammar),
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
@ -111,7 +111,7 @@ namespace tree_sitter {
};
pair<ParseTable, vector<Conflict>>
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
}
}

View file

@ -7,11 +7,12 @@
#include "compiler/parse_table.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::pair<ParseTable, std::vector<Conflict>>
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
}

View file

@ -1,6 +1,7 @@
#include "compiler/build_tables/build_tables.h"
#include "compiler/build_tables/build_parse_table.h"
#include "compiler/build_tables/build_lex_table.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
@ -9,8 +10,8 @@ namespace tree_sitter {
namespace build_tables {
tuple<ParseTable, LexTable, vector<Conflict>>
build_tables(const PreparedGrammar &grammar,
const PreparedGrammar &lex_grammar) {
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
vector<Conflict> conflicts = parse_table_result.second;

View file

@ -8,12 +8,13 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, std::vector<Conflict>>
build_tables(const PreparedGrammar &grammar,
const PreparedGrammar &lex_grammar);
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar);
}
}

View file

@ -14,11 +14,11 @@ namespace tree_sitter {
namespace build_tables {
class FirstSet : public rules::RuleFn<set<Symbol>> {
const PreparedGrammar *grammar;
const SyntaxGrammar *grammar;
set<Symbol> visited_symbols;
public:
explicit FirstSet(const PreparedGrammar *grammar) : grammar(grammar) {}
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
set<Symbol> apply_to(const Symbol *rule) {
auto insertion_result = visited_symbols.insert(*rule);
@ -54,7 +54,7 @@ namespace tree_sitter {
}
};
set<Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return FirstSet(&grammar).apply(rule);
}
}

View file

@ -6,17 +6,17 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace build_tables {
/*
* Returns the set of terminal symbols that can appear at
* the beginning of a string derivable from a given rule,
* in a given gramamr.
* in a given grammar.
*/
std::set<rules::Symbol>
first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}

View file

@ -19,7 +19,7 @@ namespace tree_sitter {
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &starting_item,
const set<Symbol> &starting_lookahead_symbols,
const PreparedGrammar &grammar) {
const SyntaxGrammar &grammar) {
ParseItemSet result;
vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};

View file

@ -6,12 +6,12 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &item,
const std::set<rules::Symbol> &lookahead_symbols,
const PreparedGrammar &grammar);
const SyntaxGrammar &grammar);
}
}

View file

@ -4,6 +4,7 @@
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/merge_transitions.h"
#include "compiler/rules/symbol.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::map;
@ -13,7 +14,7 @@ namespace tree_sitter {
namespace build_tables {
map<Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
@ -31,7 +32,7 @@ namespace tree_sitter {
}
map<CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) {
char_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : char_transitions(item.rule)) {

View file

@ -6,7 +6,7 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace rules {
class CharacterSet;
class Symbol;
@ -14,10 +14,10 @@ namespace tree_sitter {
namespace build_tables {
std::map<rules::Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);
std::map<rules::CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
char_transitions(const LexItemSet &item_set);
}
}

View file

@ -4,6 +4,7 @@
#include <string>
#include <set>
#include "compiler/util/string_helpers.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
@ -13,7 +14,7 @@ namespace tree_sitter {
using std::set;
using std::vector;
LexConflictManager::LexConflictManager(const PreparedGrammar &grammar) :
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
grammar(grammar) {}
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,

View file

@ -8,10 +8,10 @@
namespace tree_sitter {
namespace build_tables {
class LexConflictManager {
const PreparedGrammar grammar;
const LexicalGrammar grammar;
public:
explicit LexConflictManager(const PreparedGrammar &grammar);
explicit LexConflictManager(const LexicalGrammar &grammar);
bool resolve_lex_action(const LexAction &old_action,
const LexAction &new_action);
};

View file

@ -4,6 +4,7 @@
#include <string>
#include <set>
#include "compiler/util/string_helpers.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
@ -13,8 +14,8 @@ namespace tree_sitter {
using std::set;
using std::vector;
ParseConflictManager::ParseConflictManager(const PreparedGrammar &parse_grammar,
const PreparedGrammar &lex_grammar) :
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar) :
parse_grammar(parse_grammar),
lex_grammar(lex_grammar) {}
@ -87,7 +88,7 @@ namespace tree_sitter {
return precedences + ")";
}
string message_for_action(const ParseAction &action, const PreparedGrammar &parse_grammar) {
string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
switch (action.type) {
case ParseActionTypeShift:
return "shift " + precedence_string(action);

View file

@ -13,13 +13,13 @@
namespace tree_sitter {
namespace build_tables {
class ParseConflictManager {
const PreparedGrammar parse_grammar;
const PreparedGrammar lex_grammar;
const SyntaxGrammar parse_grammar;
const LexicalGrammar lex_grammar;
std::set<Conflict> conflicts_;
public:
ParseConflictManager(const PreparedGrammar &parse_grammar,
const PreparedGrammar &lex_grammar);
ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar);
bool resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action);

View file

@ -39,13 +39,13 @@ namespace tree_sitter {
};
class CanBeBlankRecursive : public CanBeBlank {
const PreparedGrammar *grammar;
const SyntaxGrammar *grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
public:
using CanBeBlank::apply_to;
explicit CanBeBlankRecursive(const PreparedGrammar *grammar) : grammar(grammar) {}
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}
bool apply_to(const rules::Symbol *rule) {
if (visited_symbols.find(*rule) == visited_symbols.end()) {
@ -61,7 +61,7 @@ namespace tree_sitter {
return CanBeBlank().apply(rule);
}
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return CanBeBlankRecursive(&grammar).apply(rule);
}
}

View file

@ -4,11 +4,11 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}