Simplify treatment of rules that are always aliased one way
This commit is contained in:
parent
57f3fd9026
commit
5372a81947
9 changed files with 225 additions and 63 deletions
|
|
@ -29,6 +29,7 @@
|
|||
'src/compiler/prepare_grammar/expand_repeats.cc',
|
||||
'src/compiler/prepare_grammar/expand_tokens.cc',
|
||||
'src/compiler/prepare_grammar/extract_choices.cc',
|
||||
'src/compiler/prepare_grammar/extract_simple_aliases.cc',
|
||||
'src/compiler/prepare_grammar/extract_tokens.cc',
|
||||
'src/compiler/prepare_grammar/flatten_grammar.cc',
|
||||
'src/compiler/prepare_grammar/intern_symbols.cc',
|
||||
|
|
|
|||
|
|
@ -27,9 +27,10 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
|
|||
}
|
||||
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar);
|
||||
SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
CompileError error = get<2>(prepare_grammar_result);
|
||||
SyntaxGrammar &syntax_grammar = prepare_grammar_result.syntax_grammar;
|
||||
LexicalGrammar &lexical_grammar = prepare_grammar_result.lexical_grammar;
|
||||
auto &simple_aliases = prepare_grammar_result.simple_aliases;
|
||||
CompileError error = prepare_grammar_result.error;
|
||||
if (error.type) {
|
||||
return {nullptr, strdup(error.message.c_str()), error.type};
|
||||
}
|
||||
|
|
@ -48,7 +49,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
|
|||
move(build_tables_result.keyword_lex_table),
|
||||
build_tables_result.keyword_capture_token,
|
||||
move(syntax_grammar),
|
||||
move(lexical_grammar)
|
||||
move(lexical_grammar),
|
||||
move(simple_aliases)
|
||||
);
|
||||
|
||||
set_log_file(nullptr);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ using std::pair;
|
|||
using std::set;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::unordered_map;
|
||||
using std::unordered_set;
|
||||
using std::vector;
|
||||
using util::escape_char;
|
||||
|
|
@ -77,6 +78,7 @@ class CCodeGenerator {
|
|||
Symbol keyword_capture_token;
|
||||
const SyntaxGrammar syntax_grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
unordered_map<Symbol, Alias> simple_aliases;
|
||||
map<Symbol, string> symbol_ids;
|
||||
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
|
||||
vector<set<Symbol::Index>> external_scanner_states;
|
||||
|
|
@ -84,18 +86,21 @@ class CCodeGenerator {
|
|||
set<Alias> unique_aliases;
|
||||
|
||||
public:
|
||||
CCodeGenerator(string name, ParseTable &&parse_table, LexTable &&main_lex_table,
|
||||
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
|
||||
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar)
|
||||
: indent_level(0),
|
||||
name(name),
|
||||
parse_table(move(parse_table)),
|
||||
main_lex_table(move(main_lex_table)),
|
||||
keyword_lex_table(move(keyword_lex_table)),
|
||||
keyword_capture_token(keyword_capture_token),
|
||||
syntax_grammar(move(syntax_grammar)),
|
||||
lexical_grammar(move(lexical_grammar)),
|
||||
next_parse_action_list_index(0) {}
|
||||
CCodeGenerator(
|
||||
string name, ParseTable &&parse_table, LexTable &&main_lex_table,
|
||||
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
|
||||
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar,
|
||||
unordered_map<Symbol, Alias> &&simple_aliases
|
||||
) : indent_level(0),
|
||||
name(name),
|
||||
parse_table(move(parse_table)),
|
||||
main_lex_table(move(main_lex_table)),
|
||||
keyword_lex_table(move(keyword_lex_table)),
|
||||
keyword_capture_token(keyword_capture_token),
|
||||
syntax_grammar(move(syntax_grammar)),
|
||||
lexical_grammar(move(lexical_grammar)),
|
||||
simple_aliases(move(simple_aliases)),
|
||||
next_parse_action_list_index(0) {}
|
||||
|
||||
string code() {
|
||||
buffer = "";
|
||||
|
|
@ -757,14 +762,28 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
string symbol_name(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
if (symbol == rules::END_OF_INPUT()) {
|
||||
return "END";
|
||||
}
|
||||
|
||||
auto simple_alias_entry = simple_aliases.find(symbol);
|
||||
if (simple_alias_entry != simple_aliases.end()) {
|
||||
return simple_alias_entry->second.value;
|
||||
}
|
||||
|
||||
return entry_for_symbol(symbol).first;
|
||||
}
|
||||
|
||||
VariableType symbol_type(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
if (symbol == rules::END_OF_INPUT()) {
|
||||
return VariableTypeHidden;
|
||||
}
|
||||
|
||||
auto simple_alias_entry = simple_aliases.find(symbol);
|
||||
if (simple_alias_entry != simple_aliases.end()) {
|
||||
return simple_alias_entry->second.is_named ? VariableTypeNamed : VariableTypeHidden;
|
||||
}
|
||||
|
||||
return entry_for_symbol(symbol).second;
|
||||
}
|
||||
|
||||
|
|
@ -874,9 +893,12 @@ class CCodeGenerator {
|
|||
}
|
||||
};
|
||||
|
||||
string c_code(string name, ParseTable &&parse_table, LexTable &&lex_table,
|
||||
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
|
||||
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar) {
|
||||
string c_code(
|
||||
string name, ParseTable &&parse_table, LexTable &&lex_table,
|
||||
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
|
||||
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar,
|
||||
unordered_map<Symbol, Alias> &&simple_aliases
|
||||
) {
|
||||
return CCodeGenerator(
|
||||
name,
|
||||
move(parse_table),
|
||||
|
|
@ -884,7 +906,8 @@ string c_code(string name, ParseTable &&parse_table, LexTable &&lex_table,
|
|||
move(keyword_lex_table),
|
||||
keyword_capture_token,
|
||||
move(syntax_grammar),
|
||||
move(lexical_grammar)
|
||||
move(lexical_grammar),
|
||||
move(simple_aliases)
|
||||
).code();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#define COMPILER_GENERATE_CODE_C_CODE_H_
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -20,7 +21,8 @@ std::string c_code(
|
|||
LexTable &&,
|
||||
rules::Symbol,
|
||||
SyntaxGrammar &&,
|
||||
LexicalGrammar &&
|
||||
LexicalGrammar &&,
|
||||
std::unordered_map<rules::Symbol, rules::Alias> &&
|
||||
);
|
||||
|
||||
} // namespace generate_code
|
||||
|
|
|
|||
111
src/compiler/prepare_grammar/extract_simple_aliases.cc
Normal file
111
src/compiler/prepare_grammar/extract_simple_aliases.cc
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
#include "compiler/prepare_grammar/extract_simple_aliases.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::unordered_map;
|
||||
using rules::Alias;
|
||||
using rules::Symbol;
|
||||
|
||||
template <typename T>
|
||||
static void apply_alias(T *variable, Alias alias) {
|
||||
if (!alias.value.empty()) {
|
||||
variable->name = alias.value;
|
||||
variable->type = alias.is_named ? VariableTypeNamed : VariableTypeAnonymous;
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<rules::Symbol, rules::Alias>
|
||||
extract_simple_aliases(SyntaxGrammar *syntax_grammar, LexicalGrammar *lexical_grammar) {
|
||||
struct SymbolStatus {
|
||||
Alias alias;
|
||||
bool eligible = true;
|
||||
};
|
||||
|
||||
vector<SymbolStatus> terminal_status_list(lexical_grammar->variables.size());
|
||||
vector<SymbolStatus> non_terminal_status_list(syntax_grammar->variables.size());
|
||||
vector<SymbolStatus> external_status_list(syntax_grammar->external_tokens.size());
|
||||
|
||||
for (const SyntaxVariable &variable : syntax_grammar->variables) {
|
||||
for (const Production &production : variable.productions) {
|
||||
for (const ProductionStep &step : production.steps) {
|
||||
SymbolStatus *status;
|
||||
if (step.symbol.is_built_in()) {
|
||||
continue;
|
||||
} else if (step.symbol.is_external()) {
|
||||
status = &external_status_list[step.symbol.index];
|
||||
} else if (step.symbol.is_terminal()) {
|
||||
status = &terminal_status_list[step.symbol.index];
|
||||
} else {
|
||||
status = &non_terminal_status_list[step.symbol.index];
|
||||
}
|
||||
|
||||
if (step.alias.value.empty()) {
|
||||
status->alias = Alias();
|
||||
status->eligible = false;
|
||||
}
|
||||
|
||||
if (status->eligible) {
|
||||
if (status->alias.value.empty()) {
|
||||
status->alias = step.alias;
|
||||
} else if (status->alias != step.alias) {
|
||||
status->alias = Alias();
|
||||
status->eligible = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (SyntaxVariable &variable : syntax_grammar->variables) {
|
||||
for (Production &production : variable.productions) {
|
||||
for (ProductionStep &step : production.steps) {
|
||||
SymbolStatus *status;
|
||||
if (step.symbol.is_built_in()) {
|
||||
continue;
|
||||
} else if (step.symbol.is_external()) {
|
||||
status = &external_status_list[step.symbol.index];
|
||||
} else if (step.symbol.is_terminal()) {
|
||||
status = &terminal_status_list[step.symbol.index];
|
||||
} else {
|
||||
status = &non_terminal_status_list[step.symbol.index];
|
||||
}
|
||||
|
||||
if (!status->alias.value.empty()) {
|
||||
step.alias = Alias();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unordered_map<Symbol, Alias> result;
|
||||
|
||||
for (unsigned i = 0, n = terminal_status_list.size(); i < n; i++) {
|
||||
if (!terminal_status_list[i].alias.value.empty()) {
|
||||
result[Symbol::terminal(i)] = terminal_status_list[i].alias;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0, n = non_terminal_status_list.size(); i < n; i++) {
|
||||
if (!non_terminal_status_list[i].alias.value.empty()) {
|
||||
result[Symbol::non_terminal(i)] = non_terminal_status_list[i].alias;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0, n = external_status_list.size(); i < n; i++) {
|
||||
if (!external_status_list[i].alias.value.empty()) {
|
||||
result[Symbol::external(i)] = external_status_list[i].alias;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
21
src/compiler/prepare_grammar/extract_simple_aliases.h
Normal file
21
src/compiler/prepare_grammar/extract_simple_aliases.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_
|
||||
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include <unordered_map>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct SyntaxGrammar;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::unordered_map<rules::Symbol, rules::Alias>
|
||||
extract_simple_aliases(SyntaxGrammar *, LexicalGrammar *);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_
|
||||
|
|
@ -2,69 +2,64 @@
|
|||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/prepare_grammar/extract_simple_aliases.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/normalize_rules.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::tuple;
|
||||
using std::get;
|
||||
using std::make_tuple;
|
||||
using std::move;
|
||||
|
||||
tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
||||
const InputGrammar &input_grammar) {
|
||||
/*
|
||||
* Convert all string-based `NamedSymbols` into numerical `Symbols`
|
||||
*/
|
||||
PrepareGrammarResult prepare_grammar(const InputGrammar &input_grammar) {
|
||||
PrepareGrammarResult result;
|
||||
|
||||
// Convert all string-based `NamedSymbols` into numerical `Symbols`
|
||||
auto intern_result = intern_symbols(input_grammar);
|
||||
CompileError error = intern_result.second;
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
if (error.type) {
|
||||
result.error = error;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Separate grammar into lexical and syntactic components
|
||||
*/
|
||||
// Separate grammar into lexical and syntactic components
|
||||
auto extract_result = extract_tokens(intern_result.first);
|
||||
error = get<2>(extract_result);
|
||||
if (error.type) {
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
result.error = error;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace `Repeat` rules with pairs of recursive rules
|
||||
*/
|
||||
// Replace `Repeat` rules with pairs of recursive rules
|
||||
InitialSyntaxGrammar syntax_grammar1 = expand_repeats(get<0>(extract_result));
|
||||
|
||||
/*
|
||||
* Expand `String` and `Pattern` rules into full rule trees
|
||||
*/
|
||||
LexicalGrammar lex_grammar = get<1>(extract_result);
|
||||
// auto expand_tokens_result = expand_tokens(get<1>(extract_result));
|
||||
// LexicalGrammar lex_grammar = expand_tokens_result.first;
|
||||
// error = expand_tokens_result.second;
|
||||
// if (error.type)
|
||||
// return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
* Flatten syntax rules into lists of productions.
|
||||
*/
|
||||
// Flatten syntax rules into lists of productions.
|
||||
auto flatten_result = flatten_grammar(syntax_grammar1);
|
||||
SyntaxGrammar syntax_grammar = flatten_result.first;
|
||||
error = flatten_result.second;
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
if (error.type) {
|
||||
result.error = error;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure all lexical rules are in a consistent format.
|
||||
*/
|
||||
lex_grammar = normalize_rules(lex_grammar);
|
||||
// Ensure all lexical rules are in a consistent format.
|
||||
LexicalGrammar lexical_grammar = normalize_rules(get<1>(extract_result));
|
||||
|
||||
return make_tuple(syntax_grammar, lex_grammar, CompileError::none());
|
||||
// Find any symbols that always have the same alias applied to them.
|
||||
// Remove those aliases since they can be applied in a simpler way.
|
||||
auto simple_aliases = extract_simple_aliases(&syntax_grammar, &lexical_grammar);
|
||||
|
||||
return {
|
||||
move(syntax_grammar),
|
||||
move(lexical_grammar),
|
||||
move(simple_aliases),
|
||||
CompileError::none(),
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
|
||||
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
|
@ -10,7 +10,14 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(const InputGrammar &);
|
||||
struct PrepareGrammarResult {
|
||||
SyntaxGrammar syntax_grammar;
|
||||
LexicalGrammar lexical_grammar;
|
||||
std::unordered_map<rules::Symbol, rules::Alias> simple_aliases;
|
||||
CompileError error;
|
||||
};
|
||||
|
||||
PrepareGrammarResult prepare_grammar(const InputGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ struct Rule {
|
|||
}
|
||||
|
||||
template <typename ...FunctionTypes>
|
||||
inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward<FunctionTypes>(functions)...))){
|
||||
inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward<FunctionTypes>(functions)...))) {
|
||||
return accept(util::make_visitor(std::forward<FunctionTypes>(functions)...));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue