Make separate types for syntax and lexical grammars
This way, the separator characters can be added as a field to lexical grammars only
This commit is contained in:
parent
d5674d33c4
commit
7df35f9b8d
49 changed files with 467 additions and 395 deletions
|
|
@ -50,17 +50,16 @@ namespace tree_sitter {
|
|||
vector<pair<string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules());
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
for (auto &pair : grammar.rules) {
|
||||
ExpandRepeats expander(pair.first, aux_rules.size());
|
||||
rules.push_back({ pair.first, expander.apply(pair.second) });
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
}
|
||||
|
||||
return PreparedGrammar(rules, aux_rules).
|
||||
ubiquitous_tokens(grammar.ubiquitous_tokens());
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &);
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,28 +43,29 @@ namespace tree_sitter {
|
|||
ExpandTokens() : error(nullptr) {}
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *>
|
||||
expand_tokens(const PreparedGrammar &grammar) {
|
||||
pair<LexicalGrammar, const GrammarError *>
|
||||
expand_tokens(const LexicalGrammar &grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, aux_rules;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { PreparedGrammar(), expander.error };
|
||||
return { LexicalGrammar({}, {}, {}), expander.error };
|
||||
rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
for (auto &pair : grammar.aux_rules()) {
|
||||
for (auto &pair : grammar.aux_rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { PreparedGrammar(), expander.error };
|
||||
return { LexicalGrammar({}, {}, {}), expander.error };
|
||||
aux_rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(grammar.ubiquitous_tokens()),
|
||||
nullptr };
|
||||
LexicalGrammar(rules, aux_rules, grammar.separators),
|
||||
nullptr,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,11 +5,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, const GrammarError *>
|
||||
expand_tokens(const PreparedGrammar &);
|
||||
std::pair<LexicalGrammar, const GrammarError *>
|
||||
expand_tokens(const LexicalGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/token_description.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -93,15 +94,15 @@ namespace tree_sitter {
|
|||
vector<pair<string, rule_ptr>> tokens;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
|
||||
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
|
||||
vector<Symbol> ubiquitous_tokens;
|
||||
|
||||
TokenExtractor extractor;
|
||||
map<Symbol, Symbol> symbol_replacements;
|
||||
|
||||
for (size_t i = 0; i < input_grammar.rules().size(); i++) {
|
||||
auto pair = input_grammar.rules()[i];
|
||||
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
|
||||
auto pair = input_grammar.rules[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
tokens.push_back(pair);
|
||||
symbol_replacements.insert({
|
||||
|
|
@ -113,32 +114,17 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_grammar.aux_rules().size(); i++) {
|
||||
auto pair = input_grammar.aux_rules()[i];
|
||||
if (IsToken().apply(pair.second)) {
|
||||
aux_tokens.push_back(pair);
|
||||
symbol_replacements.insert({
|
||||
Symbol(i, rules::SymbolOptionAuxiliary),
|
||||
Symbol(aux_tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionAuxiliary|rules::SymbolOptionToken))
|
||||
});
|
||||
} else {
|
||||
aux_rules.push_back({ pair.first, extractor.apply(pair.second) });
|
||||
}
|
||||
}
|
||||
|
||||
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
SymbolInliner inliner(symbol_replacements);
|
||||
for (auto &pair : rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &pair : aux_rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &symbol : input_grammar.ubiquitous_tokens())
|
||||
for (auto &symbol : input_grammar.ubiquitous_tokens)
|
||||
ubiquitous_tokens.push_back(inliner.replace_symbol(symbol));
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(ubiquitous_tokens),
|
||||
PreparedGrammar(tokens, aux_tokens)
|
||||
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
|
||||
LexicalGrammar(tokens, aux_tokens, {}),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,12 +2,14 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
|
||||
|
||||
#include <utility>
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &);
|
||||
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
|
|
@ -37,15 +38,16 @@ namespace tree_sitter {
|
|||
string missing_rule_name;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
|
||||
pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
|
||||
InternedGrammar grammar;
|
||||
return {
|
||||
PreparedGrammar({}, {}),
|
||||
grammar,
|
||||
new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'")
|
||||
};
|
||||
}
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr>> rules;
|
||||
|
||||
|
|
@ -64,10 +66,12 @@ namespace tree_sitter {
|
|||
ubiquitous_tokens.push_back(*token);
|
||||
}
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, {}).ubiquitous_tokens(ubiquitous_tokens),
|
||||
nullptr
|
||||
};
|
||||
InternedGrammar result;
|
||||
result.rules = rules;
|
||||
result.ubiquitous_tokens = ubiquitous_tokens;
|
||||
result.separators = grammar.separators();
|
||||
|
||||
return { result, nullptr };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,13 +4,13 @@
|
|||
#include <utility>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
21
src/compiler/prepare_grammar/interned_grammar.h
Normal file
21
src/compiler/prepare_grammar/interned_grammar.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
class InternedGrammar {
|
||||
public:
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
std::vector<rules::Symbol> ubiquitous_tokens;
|
||||
std::vector<char> separators;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
|
|
@ -4,29 +4,31 @@
|
|||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
|
||||
namespace prepare_grammar {
|
||||
tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
prepare_grammar(const Grammar &input_grammar) {
|
||||
auto result = intern_symbols(input_grammar);
|
||||
const PreparedGrammar &grammar = result.first;
|
||||
const InternedGrammar &grammar = result.first;
|
||||
const GrammarError *error = result.second;
|
||||
|
||||
if (error)
|
||||
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
auto grammars = extract_tokens(grammar);
|
||||
const PreparedGrammar &rule_grammar = expand_repeats(grammars.first);
|
||||
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
|
||||
auto expand_tokens_result = expand_tokens(grammars.second);
|
||||
const PreparedGrammar &lex_grammar = expand_tokens_result.first;
|
||||
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
|
||||
if (error)
|
||||
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
return make_tuple(rule_grammar, lex_grammar, nullptr);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,14 +2,14 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
|
||||
|
||||
#include <utility>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class GrammarError;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
prepare_grammar(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue