Make separate types for syntax and lexical grammars

This way, the separator characters can be added as a field to
lexical grammars only
This commit is contained in:
Max Brunsfeld 2014-06-25 13:27:16 -07:00
parent d5674d33c4
commit 7df35f9b8d
49 changed files with 467 additions and 395 deletions

View file

@ -50,17 +50,16 @@ namespace tree_sitter {
vector<pair<string, rules::rule_ptr>> aux_rules;
};
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules());
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
for (auto &pair : grammar.rules()) {
for (auto &pair : grammar.rules) {
ExpandRepeats expander(pair.first, aux_rules.size());
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
}
return PreparedGrammar(rules, aux_rules).
ubiquitous_tokens(grammar.ubiquitous_tokens());
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
}
}
}

View file

@ -4,10 +4,10 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace prepare_grammar {
PreparedGrammar expand_repeats(const PreparedGrammar &);
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
}
}

View file

@ -43,28 +43,29 @@ namespace tree_sitter {
ExpandTokens() : error(nullptr) {}
};
pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &grammar) {
pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &grammar) {
vector<pair<string, rule_ptr>> rules, aux_rules;
ExpandTokens expander;
for (auto &pair : grammar.rules()) {
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
return { LexicalGrammar({}, {}, {}), expander.error };
rules.push_back({ pair.first, rule });
}
for (auto &pair : grammar.aux_rules()) {
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
return { LexicalGrammar({}, {}, {}), expander.error };
aux_rules.push_back({ pair.first, rule });
}
return {
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(grammar.ubiquitous_tokens()),
nullptr };
LexicalGrammar(rules, aux_rules, grammar.separators),
nullptr,
};
}
}
}

View file

@ -5,11 +5,11 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class PreparedGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &);
std::pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &);
}
}

View file

@ -9,6 +9,7 @@
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepare_grammar/token_description.h"
namespace tree_sitter {
@ -93,15 +94,15 @@ namespace tree_sitter {
vector<pair<string, rule_ptr>> tokens;
};
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
vector<Symbol> ubiquitous_tokens;
TokenExtractor extractor;
map<Symbol, Symbol> symbol_replacements;
for (size_t i = 0; i < input_grammar.rules().size(); i++) {
auto pair = input_grammar.rules()[i];
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
auto pair = input_grammar.rules[i];
if (IsToken().apply(pair.second)) {
tokens.push_back(pair);
symbol_replacements.insert({
@ -113,32 +114,17 @@ namespace tree_sitter {
}
}
for (size_t i = 0; i < input_grammar.aux_rules().size(); i++) {
auto pair = input_grammar.aux_rules()[i];
if (IsToken().apply(pair.second)) {
aux_tokens.push_back(pair);
symbol_replacements.insert({
Symbol(i, rules::SymbolOptionAuxiliary),
Symbol(aux_tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionAuxiliary|rules::SymbolOptionToken))
});
} else {
aux_rules.push_back({ pair.first, extractor.apply(pair.second) });
}
}
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
SymbolInliner inliner(symbol_replacements);
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
for (auto &pair : aux_rules)
pair.second = inliner.apply(pair.second);
for (auto &symbol : input_grammar.ubiquitous_tokens())
for (auto &symbol : input_grammar.ubiquitous_tokens)
ubiquitous_tokens.push_back(inliner.replace_symbol(symbol));
return {
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(ubiquitous_tokens),
PreparedGrammar(tokens, aux_tokens)
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, {}),
};
}
}

View file

@ -2,12 +2,14 @@
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
#include <utility>
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &);
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
}
}

View file

@ -2,6 +2,7 @@
#include <memory>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepared_grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/named_symbol.h"
@ -37,15 +38,16 @@ namespace tree_sitter {
string missing_rule_name;
};
pair<PreparedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
InternedGrammar grammar;
return {
PreparedGrammar({}, {}),
grammar,
new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'")
};
}
pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
InternSymbols interner(grammar);
vector<pair<string, rule_ptr>> rules;
@ -64,10 +66,12 @@ namespace tree_sitter {
ubiquitous_tokens.push_back(*token);
}
return {
PreparedGrammar(rules, {}).ubiquitous_tokens(ubiquitous_tokens),
nullptr
};
InternedGrammar result;
result.rules = rules;
result.ubiquitous_tokens = ubiquitous_tokens;
result.separators = grammar.separators();
return { result, nullptr };
}
}
}

View file

@ -4,13 +4,13 @@
#include <utility>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class Grammar;
class PreparedGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &);
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
}
}

View file

@ -0,0 +1,21 @@
#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
#include <utility>
#include <vector>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace prepare_grammar {
class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
std::vector<rules::Symbol> ubiquitous_tokens;
std::vector<char> separators;
};
}
}
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_

View file

@ -4,29 +4,31 @@
#include "compiler/prepare_grammar/expand_repeats.h"
#include "compiler/prepare_grammar/expand_tokens.h"
#include "compiler/prepare_grammar/intern_symbols.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::make_tuple;
namespace prepare_grammar {
tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &input_grammar) {
auto result = intern_symbols(input_grammar);
const PreparedGrammar &grammar = result.first;
const InternedGrammar &grammar = result.first;
const GrammarError *error = result.second;
if (error)
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
auto grammars = extract_tokens(grammar);
const PreparedGrammar &rule_grammar = expand_repeats(grammars.first);
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
auto expand_tokens_result = expand_tokens(grammars.second);
const PreparedGrammar &lex_grammar = expand_tokens_result.first;
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error)
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
return make_tuple(rule_grammar, lex_grammar, nullptr);
}

View file

@ -2,14 +2,14 @@
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
#include <utility>
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
class Grammar;
class GrammarError;
class PreparedGrammar;
namespace prepare_grammar {
std::tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &);
}
}