Simplify treatment of rules that are always aliased one way

This commit is contained in:
Max Brunsfeld 2018-08-29 18:12:41 -07:00
parent 57f3fd9026
commit 5372a81947
9 changed files with 225 additions and 63 deletions

View file

@ -29,6 +29,7 @@
'src/compiler/prepare_grammar/expand_repeats.cc',
'src/compiler/prepare_grammar/expand_tokens.cc',
'src/compiler/prepare_grammar/extract_choices.cc',
'src/compiler/prepare_grammar/extract_simple_aliases.cc',
'src/compiler/prepare_grammar/extract_tokens.cc',
'src/compiler/prepare_grammar/flatten_grammar.cc',
'src/compiler/prepare_grammar/intern_symbols.cc',

View file

@ -27,9 +27,10 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
}
auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar);
SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
CompileError error = get<2>(prepare_grammar_result);
SyntaxGrammar &syntax_grammar = prepare_grammar_result.syntax_grammar;
LexicalGrammar &lexical_grammar = prepare_grammar_result.lexical_grammar;
auto &simple_aliases = prepare_grammar_result.simple_aliases;
CompileError error = prepare_grammar_result.error;
if (error.type) {
return {nullptr, strdup(error.message.c_str()), error.type};
}
@ -48,7 +49,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
move(build_tables_result.keyword_lex_table),
build_tables_result.keyword_capture_token,
move(syntax_grammar),
move(lexical_grammar)
move(lexical_grammar),
move(simple_aliases)
);
set_log_file(nullptr);

View file

@ -23,6 +23,7 @@ using std::pair;
using std::set;
using std::string;
using std::to_string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
using util::escape_char;
@ -77,6 +78,7 @@ class CCodeGenerator {
Symbol keyword_capture_token;
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
unordered_map<Symbol, Alias> simple_aliases;
map<Symbol, string> symbol_ids;
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<set<Symbol::Index>> external_scanner_states;
@ -84,18 +86,21 @@ class CCodeGenerator {
set<Alias> unique_aliases;
public:
CCodeGenerator(string name, ParseTable &&parse_table, LexTable &&main_lex_table,
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar)
: indent_level(0),
name(name),
parse_table(move(parse_table)),
main_lex_table(move(main_lex_table)),
keyword_lex_table(move(keyword_lex_table)),
keyword_capture_token(keyword_capture_token),
syntax_grammar(move(syntax_grammar)),
lexical_grammar(move(lexical_grammar)),
next_parse_action_list_index(0) {}
CCodeGenerator(
string name, ParseTable &&parse_table, LexTable &&main_lex_table,
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar,
unordered_map<Symbol, Alias> &&simple_aliases
) : indent_level(0),
name(name),
parse_table(move(parse_table)),
main_lex_table(move(main_lex_table)),
keyword_lex_table(move(keyword_lex_table)),
keyword_capture_token(keyword_capture_token),
syntax_grammar(move(syntax_grammar)),
lexical_grammar(move(lexical_grammar)),
simple_aliases(move(simple_aliases)),
next_parse_action_list_index(0) {}
string code() {
buffer = "";
@ -757,14 +762,28 @@ class CCodeGenerator {
}
string symbol_name(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
if (symbol == rules::END_OF_INPUT()) {
return "END";
}
auto simple_alias_entry = simple_aliases.find(symbol);
if (simple_alias_entry != simple_aliases.end()) {
return simple_alias_entry->second.value;
}
return entry_for_symbol(symbol).first;
}
VariableType symbol_type(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
if (symbol == rules::END_OF_INPUT()) {
return VariableTypeHidden;
}
auto simple_alias_entry = simple_aliases.find(symbol);
if (simple_alias_entry != simple_aliases.end()) {
return simple_alias_entry->second.is_named ? VariableTypeNamed : VariableTypeHidden;
}
return entry_for_symbol(symbol).second;
}
@ -874,9 +893,12 @@ class CCodeGenerator {
}
};
string c_code(string name, ParseTable &&parse_table, LexTable &&lex_table,
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar) {
string c_code(
string name, ParseTable &&parse_table, LexTable &&lex_table,
LexTable &&keyword_lex_table, Symbol keyword_capture_token,
SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar,
unordered_map<Symbol, Alias> &&simple_aliases
) {
return CCodeGenerator(
name,
move(parse_table),
@ -884,7 +906,8 @@ string c_code(string name, ParseTable &&parse_table, LexTable &&lex_table,
move(keyword_lex_table),
keyword_capture_token,
move(syntax_grammar),
move(lexical_grammar)
move(lexical_grammar),
move(simple_aliases)
).code();
}

View file

@ -2,6 +2,7 @@
#define COMPILER_GENERATE_CODE_C_CODE_H_
#include <string>
#include <unordered_map>
#include "compiler/rule.h"
namespace tree_sitter {
@ -20,7 +21,8 @@ std::string c_code(
LexTable &&,
rules::Symbol,
SyntaxGrammar &&,
LexicalGrammar &&
LexicalGrammar &&,
std::unordered_map<rules::Symbol, rules::Alias> &&
);
} // namespace generate_code

View file

@ -0,0 +1,111 @@
#include "compiler/prepare_grammar/extract_simple_aliases.h"
#include "compiler/lexical_grammar.h"
#include "compiler/syntax_grammar.h"
#include <unordered_map>
#include <vector>
namespace tree_sitter {
namespace prepare_grammar {
using std::pair;
using std::vector;
using std::unordered_map;
using rules::Alias;
using rules::Symbol;
template <typename T>
static void apply_alias(T *variable, Alias alias) {
if (!alias.value.empty()) {
variable->name = alias.value;
variable->type = alias.is_named ? VariableTypeNamed : VariableTypeAnonymous;
}
}
std::unordered_map<rules::Symbol, rules::Alias>
extract_simple_aliases(SyntaxGrammar *syntax_grammar, LexicalGrammar *lexical_grammar) {
struct SymbolStatus {
Alias alias;
bool eligible = true;
};
vector<SymbolStatus> terminal_status_list(lexical_grammar->variables.size());
vector<SymbolStatus> non_terminal_status_list(syntax_grammar->variables.size());
vector<SymbolStatus> external_status_list(syntax_grammar->external_tokens.size());
for (const SyntaxVariable &variable : syntax_grammar->variables) {
for (const Production &production : variable.productions) {
for (const ProductionStep &step : production.steps) {
SymbolStatus *status;
if (step.symbol.is_built_in()) {
continue;
} else if (step.symbol.is_external()) {
status = &external_status_list[step.symbol.index];
} else if (step.symbol.is_terminal()) {
status = &terminal_status_list[step.symbol.index];
} else {
status = &non_terminal_status_list[step.symbol.index];
}
if (step.alias.value.empty()) {
status->alias = Alias();
status->eligible = false;
}
if (status->eligible) {
if (status->alias.value.empty()) {
status->alias = step.alias;
} else if (status->alias != step.alias) {
status->alias = Alias();
status->eligible = false;
}
}
}
}
}
for (SyntaxVariable &variable : syntax_grammar->variables) {
for (Production &production : variable.productions) {
for (ProductionStep &step : production.steps) {
SymbolStatus *status;
if (step.symbol.is_built_in()) {
continue;
} else if (step.symbol.is_external()) {
status = &external_status_list[step.symbol.index];
} else if (step.symbol.is_terminal()) {
status = &terminal_status_list[step.symbol.index];
} else {
status = &non_terminal_status_list[step.symbol.index];
}
if (!status->alias.value.empty()) {
step.alias = Alias();
}
}
}
}
unordered_map<Symbol, Alias> result;
for (unsigned i = 0, n = terminal_status_list.size(); i < n; i++) {
if (!terminal_status_list[i].alias.value.empty()) {
result[Symbol::terminal(i)] = terminal_status_list[i].alias;
}
}
for (unsigned i = 0, n = non_terminal_status_list.size(); i < n; i++) {
if (!non_terminal_status_list[i].alias.value.empty()) {
result[Symbol::non_terminal(i)] = non_terminal_status_list[i].alias;
}
}
for (unsigned i = 0, n = external_status_list.size(); i < n; i++) {
if (!external_status_list[i].alias.value.empty()) {
result[Symbol::external(i)] = external_status_list[i].alias;
}
}
return result;
}
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -0,0 +1,21 @@
#ifndef COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include <unordered_map>
namespace tree_sitter {
struct SyntaxGrammar;
struct LexicalGrammar;
namespace prepare_grammar {
std::unordered_map<rules::Symbol, rules::Alias>
extract_simple_aliases(SyntaxGrammar *, LexicalGrammar *);
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_

View file

@ -2,69 +2,64 @@
#include "compiler/prepare_grammar/expand_repeats.h"
#include "compiler/prepare_grammar/expand_tokens.h"
#include "compiler/prepare_grammar/extract_tokens.h"
#include "compiler/prepare_grammar/extract_simple_aliases.h"
#include "compiler/prepare_grammar/intern_symbols.h"
#include "compiler/prepare_grammar/flatten_grammar.h"
#include "compiler/prepare_grammar/normalize_rules.h"
#include "compiler/lexical_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {
namespace prepare_grammar {
using std::tuple;
using std::get;
using std::make_tuple;
using std::move;
tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
const InputGrammar &input_grammar) {
/*
* Convert all string-based `NamedSymbols` into numerical `Symbols`
*/
PrepareGrammarResult prepare_grammar(const InputGrammar &input_grammar) {
PrepareGrammarResult result;
// Convert all string-based `NamedSymbols` into numerical `Symbols`
auto intern_result = intern_symbols(input_grammar);
CompileError error = intern_result.second;
if (error.type)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
if (error.type) {
result.error = error;
return result;
}
/*
* Separate grammar into lexical and syntactic components
*/
// Separate grammar into lexical and syntactic components
auto extract_result = extract_tokens(intern_result.first);
error = get<2>(extract_result);
if (error.type) {
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
result.error = error;
return result;
}
/*
* Replace `Repeat` rules with pairs of recursive rules
*/
// Replace `Repeat` rules with pairs of recursive rules
InitialSyntaxGrammar syntax_grammar1 = expand_repeats(get<0>(extract_result));
/*
* Expand `String` and `Pattern` rules into full rule trees
*/
LexicalGrammar lex_grammar = get<1>(extract_result);
// auto expand_tokens_result = expand_tokens(get<1>(extract_result));
// LexicalGrammar lex_grammar = expand_tokens_result.first;
// error = expand_tokens_result.second;
// if (error.type)
// return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
/*
* Flatten syntax rules into lists of productions.
*/
// Flatten syntax rules into lists of productions.
auto flatten_result = flatten_grammar(syntax_grammar1);
SyntaxGrammar syntax_grammar = flatten_result.first;
error = flatten_result.second;
if (error.type)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
if (error.type) {
result.error = error;
return result;
}
/*
* Ensure all lexical rules are in a consistent format.
*/
lex_grammar = normalize_rules(lex_grammar);
// Ensure all lexical rules are in a consistent format.
LexicalGrammar lexical_grammar = normalize_rules(get<1>(extract_result));
return make_tuple(syntax_grammar, lex_grammar, CompileError::none());
// Find any symbols that always have the same alias applied to them.
// Remove those aliases since they can be applied in a simpler way.
auto simple_aliases = extract_simple_aliases(&syntax_grammar, &lexical_grammar);
return {
move(syntax_grammar),
move(lexical_grammar),
move(simple_aliases),
CompileError::none(),
};
}
} // namespace prepare_grammar

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
#include <tuple>
#include <unordered_map>
#include "compiler/grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
@ -10,7 +10,14 @@
namespace tree_sitter {
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(const InputGrammar &);
struct PrepareGrammarResult {
SyntaxGrammar syntax_grammar;
LexicalGrammar lexical_grammar;
std::unordered_map<rules::Symbol, rules::Alias> simple_aliases;
CompileError error;
};
PrepareGrammarResult prepare_grammar(const InputGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -91,7 +91,7 @@ struct Rule {
}
template <typename ...FunctionTypes>
inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward<FunctionTypes>(functions)...))){
inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward<FunctionTypes>(functions)...))) {
return accept(util::make_visitor(std::forward<FunctionTypes>(functions)...));
}