Make separate types for syntax and lexical grammars
This way, the separator characters can be added as a field to lexical grammars only
This commit is contained in:
parent
d5674d33c4
commit
7df35f9b8d
49 changed files with 467 additions and 395 deletions
|
|
@ -25,7 +25,7 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
class LexTableBuilder {
|
||||
const PreparedGrammar lex_grammar;
|
||||
const LexicalGrammar lex_grammar;
|
||||
ParseTable *parse_table;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
|
|
@ -65,7 +65,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set, lex_grammar);
|
||||
auto transitions = char_transitions(item_set);
|
||||
for (const auto &transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
|
|
@ -114,7 +114,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
|
||||
lex_grammar(lex_grammar),
|
||||
parse_table(parse_table),
|
||||
conflict_manager(LexConflictManager(lex_grammar)) {}
|
||||
|
|
@ -129,7 +129,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar) {
|
||||
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
|
||||
return LexTableBuilder(parse_table, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,12 +5,11 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class LexicalGrammar;
|
||||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
LexTable
|
||||
build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
|
||||
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
class ParseTableBuilder {
|
||||
const PreparedGrammar grammar;
|
||||
const SyntaxGrammar grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
ParseTable parse_table;
|
||||
|
|
@ -59,7 +59,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const Symbol &symbol : grammar.ubiquitous_tokens()) {
|
||||
for (const Symbol &symbol : grammar.ubiquitous_tokens) {
|
||||
auto &actions = parse_table.states[state_id].actions;
|
||||
if (actions.find(symbol) == actions.end())
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(state_id, { 0 }));
|
||||
|
|
@ -99,7 +99,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
|
||||
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ namespace tree_sitter {
|
|||
};
|
||||
|
||||
pair<ParseTable, vector<Conflict>>
|
||||
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
|
||||
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
return ParseTableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,11 +7,12 @@
|
|||
#include "compiler/parse_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, std::vector<Conflict>>
|
||||
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
|
||||
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/build_tables/build_lex_table.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
|
|
@ -9,8 +10,8 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
tuple<ParseTable, LexTable, vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar) {
|
||||
build_tables(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
vector<Conflict> conflicts = parse_table_result.second;
|
||||
|
|
|
|||
|
|
@ -8,12 +8,13 @@
|
|||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::tuple<ParseTable, LexTable, std::vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar);
|
||||
build_tables(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,11 +14,11 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
class FirstSet : public rules::RuleFn<set<Symbol>> {
|
||||
const PreparedGrammar *grammar;
|
||||
const SyntaxGrammar *grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
|
||||
public:
|
||||
explicit FirstSet(const PreparedGrammar *grammar) : grammar(grammar) {}
|
||||
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
auto insertion_result = visited_symbols.insert(*rule);
|
||||
|
|
@ -54,7 +54,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return FirstSet(&grammar).apply(rule);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,17 +6,17 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Returns the set of terminal symbols that can appear at
|
||||
* the beginning of a string derivable from a given rule,
|
||||
* in a given gramamr.
|
||||
* in a given grammar.
|
||||
*/
|
||||
std::set<rules::Symbol>
|
||||
first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(const ParseItem &starting_item,
|
||||
const set<Symbol> &starting_lookahead_symbols,
|
||||
const PreparedGrammar &grammar) {
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
|
||||
vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(const ParseItem &item,
|
||||
const std::set<rules::Symbol> &lookahead_symbols,
|
||||
const PreparedGrammar &grammar);
|
||||
const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::map;
|
||||
|
|
@ -13,7 +14,7 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
map<Symbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
|
|
@ -31,7 +32,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
map<CharacterSet, LexItemSet>
|
||||
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
char_transitions(const LexItemSet &item_set) {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
for (auto &transition : char_transitions(item.rule)) {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
class Symbol;
|
||||
|
|
@ -14,10 +14,10 @@ namespace tree_sitter {
|
|||
|
||||
namespace build_tables {
|
||||
std::map<rules::Symbol, ParseItemSet>
|
||||
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);
|
||||
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);
|
||||
|
||||
std::map<rules::CharacterSet, LexItemSet>
|
||||
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
|
||||
char_transitions(const LexItemSet &item_set);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -13,7 +14,7 @@ namespace tree_sitter {
|
|||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
LexConflictManager::LexConflictManager(const PreparedGrammar &grammar) :
|
||||
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
|
||||
grammar(grammar) {}
|
||||
|
||||
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class LexConflictManager {
|
||||
const PreparedGrammar grammar;
|
||||
const LexicalGrammar grammar;
|
||||
|
||||
public:
|
||||
explicit LexConflictManager(const PreparedGrammar &grammar);
|
||||
explicit LexConflictManager(const LexicalGrammar &grammar);
|
||||
bool resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -13,8 +14,8 @@ namespace tree_sitter {
|
|||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
ParseConflictManager::ParseConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar) :
|
||||
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar) :
|
||||
parse_grammar(parse_grammar),
|
||||
lex_grammar(lex_grammar) {}
|
||||
|
||||
|
|
@ -87,7 +88,7 @@ namespace tree_sitter {
|
|||
return precedences + ")";
|
||||
}
|
||||
|
||||
string message_for_action(const ParseAction &action, const PreparedGrammar &parse_grammar) {
|
||||
string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift:
|
||||
return "shift " + precedence_string(action);
|
||||
|
|
|
|||
|
|
@ -13,13 +13,13 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class ParseConflictManager {
|
||||
const PreparedGrammar parse_grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
const SyntaxGrammar parse_grammar;
|
||||
const LexicalGrammar lex_grammar;
|
||||
std::set<Conflict> conflicts_;
|
||||
|
||||
public:
|
||||
ParseConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar);
|
||||
ParseConflictManager(const SyntaxGrammar &parse_grammar,
|
||||
const LexicalGrammar &lex_grammar);
|
||||
bool resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action);
|
||||
|
|
|
|||
|
|
@ -39,13 +39,13 @@ namespace tree_sitter {
|
|||
};
|
||||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const PreparedGrammar *grammar;
|
||||
const SyntaxGrammar *grammar;
|
||||
set<rules::Symbol> visited_symbols;
|
||||
using CanBeBlank::visit;
|
||||
|
||||
public:
|
||||
using CanBeBlank::apply_to;
|
||||
explicit CanBeBlankRecursive(const PreparedGrammar *grammar) : grammar(grammar) {}
|
||||
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
bool apply_to(const rules::Symbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
|
|
@ -61,7 +61,7 @@ namespace tree_sitter {
|
|||
return CanBeBlank().apply(rule);
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return CanBeBlankRecursive(&grammar).apply(rule);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue