diff --git a/project.gyp b/project.gyp index feeb1fac..77002a65 100644 --- a/project.gyp +++ b/project.gyp @@ -29,6 +29,7 @@ 'src/compiler/prepare_grammar/expand_repeats.cc', 'src/compiler/prepare_grammar/expand_tokens.cc', 'src/compiler/prepare_grammar/extract_choices.cc', + 'src/compiler/prepare_grammar/extract_simple_aliases.cc', 'src/compiler/prepare_grammar/extract_tokens.cc', 'src/compiler/prepare_grammar/flatten_grammar.cc', 'src/compiler/prepare_grammar/intern_symbols.cc', diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 49a3468b..9fe3f7ef 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -27,9 +27,10 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) } auto prepare_grammar_result = prepare_grammar::prepare_grammar(parse_result.grammar); - SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); - LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); - CompileError error = get<2>(prepare_grammar_result); + SyntaxGrammar &syntax_grammar = prepare_grammar_result.syntax_grammar; + LexicalGrammar &lexical_grammar = prepare_grammar_result.lexical_grammar; + auto &simple_aliases = prepare_grammar_result.simple_aliases; + CompileError error = prepare_grammar_result.error; if (error.type) { return {nullptr, strdup(error.message.c_str()), error.type}; } @@ -48,7 +49,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) move(build_tables_result.keyword_lex_table), build_tables_result.keyword_capture_token, move(syntax_grammar), - move(lexical_grammar) + move(lexical_grammar), + move(simple_aliases) ); set_log_file(nullptr); diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index b1f13cbb..f0af966f 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -23,6 +23,7 @@ using std::pair; using std::set; using std::string; using std::to_string; +using std::unordered_map; using std::unordered_set; using std::vector; using util::escape_char; @@ -77,6 +78,7 @@ class CCodeGenerator { Symbol keyword_capture_token; const SyntaxGrammar syntax_grammar; const LexicalGrammar lexical_grammar; + unordered_map simple_aliases; map symbol_ids; vector> parse_table_entries; vector> external_scanner_states; @@ -84,18 +86,21 @@ class CCodeGenerator { set unique_aliases; public: - CCodeGenerator(string name, ParseTable &&parse_table, LexTable &&main_lex_table, - LexTable &&keyword_lex_table, Symbol keyword_capture_token, - SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar) - : indent_level(0), - name(name), - parse_table(move(parse_table)), - main_lex_table(move(main_lex_table)), - keyword_lex_table(move(keyword_lex_table)), - keyword_capture_token(keyword_capture_token), - syntax_grammar(move(syntax_grammar)), - lexical_grammar(move(lexical_grammar)), - next_parse_action_list_index(0) {} + CCodeGenerator( + string name, ParseTable &&parse_table, LexTable &&main_lex_table, + LexTable &&keyword_lex_table, Symbol keyword_capture_token, + SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar, + unordered_map &&simple_aliases + ) : indent_level(0), + name(name), + parse_table(move(parse_table)), + main_lex_table(move(main_lex_table)), + keyword_lex_table(move(keyword_lex_table)), + keyword_capture_token(keyword_capture_token), + syntax_grammar(move(syntax_grammar)), + lexical_grammar(move(lexical_grammar)), + simple_aliases(move(simple_aliases)), + next_parse_action_list_index(0) {} string code() { buffer = ""; @@ -757,14 +762,28 @@ class CCodeGenerator { } string symbol_name(const Symbol &symbol) { - if (symbol == rules::END_OF_INPUT()) + if (symbol == rules::END_OF_INPUT()) { return "END"; + } + + auto simple_alias_entry = simple_aliases.find(symbol); + if (simple_alias_entry != simple_aliases.end()) { + return simple_alias_entry->second.value; + } + return entry_for_symbol(symbol).first; } VariableType symbol_type(const Symbol &symbol) { - if (symbol == rules::END_OF_INPUT()) + if (symbol == rules::END_OF_INPUT()) { return VariableTypeHidden; + } + + auto simple_alias_entry = simple_aliases.find(symbol); + if (simple_alias_entry != simple_aliases.end()) { + return simple_alias_entry->second.is_named ? VariableTypeNamed : VariableTypeHidden; + } + return entry_for_symbol(symbol).second; } @@ -874,9 +893,12 @@ class CCodeGenerator { } }; -string c_code(string name, ParseTable &&parse_table, LexTable &&lex_table, - LexTable &&keyword_lex_table, Symbol keyword_capture_token, - SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar) { +string c_code( + string name, ParseTable &&parse_table, LexTable &&lex_table, + LexTable &&keyword_lex_table, Symbol keyword_capture_token, + SyntaxGrammar &&syntax_grammar, LexicalGrammar &&lexical_grammar, + unordered_map &&simple_aliases +) { return CCodeGenerator( name, move(parse_table), @@ -884,7 +906,8 @@ string c_code(string name, ParseTable &&parse_table, LexTable &&lex_table, move(keyword_lex_table), keyword_capture_token, move(syntax_grammar), - move(lexical_grammar) + move(lexical_grammar), + move(simple_aliases) ).code(); } diff --git a/src/compiler/generate_code/c_code.h b/src/compiler/generate_code/c_code.h index dc0a8ddf..a7ce3c7f 100644 --- a/src/compiler/generate_code/c_code.h +++ b/src/compiler/generate_code/c_code.h @@ -2,6 +2,7 @@ #define COMPILER_GENERATE_CODE_C_CODE_H_ #include +#include #include "compiler/rule.h" namespace tree_sitter { @@ -20,7 +21,8 @@ std::string c_code( LexTable &&, rules::Symbol, SyntaxGrammar &&, - LexicalGrammar && + LexicalGrammar &&, + std::unordered_map && ); } // namespace generate_code diff --git a/src/compiler/prepare_grammar/extract_simple_aliases.cc b/src/compiler/prepare_grammar/extract_simple_aliases.cc new file mode 100644 index 00000000..208fe6f4 --- /dev/null +++ b/src/compiler/prepare_grammar/extract_simple_aliases.cc @@ -0,0 +1,111 @@ +#include "compiler/prepare_grammar/extract_simple_aliases.h" +#include "compiler/lexical_grammar.h" +#include "compiler/syntax_grammar.h" +#include +#include + +namespace tree_sitter { +namespace prepare_grammar { + +using std::pair; +using std::vector; +using std::unordered_map; +using rules::Alias; +using rules::Symbol; + +template +static void apply_alias(T *variable, Alias alias) { + if (!alias.value.empty()) { + variable->name = alias.value; + variable->type = alias.is_named ? VariableTypeNamed : VariableTypeAnonymous; + } +} + +std::unordered_map +extract_simple_aliases(SyntaxGrammar *syntax_grammar, LexicalGrammar *lexical_grammar) { + struct SymbolStatus { + Alias alias; + bool eligible = true; + }; + + vector terminal_status_list(lexical_grammar->variables.size()); + vector non_terminal_status_list(syntax_grammar->variables.size()); + vector external_status_list(syntax_grammar->external_tokens.size()); + + for (const SyntaxVariable &variable : syntax_grammar->variables) { + for (const Production &production : variable.productions) { + for (const ProductionStep &step : production.steps) { + SymbolStatus *status; + if (step.symbol.is_built_in()) { + continue; + } else if (step.symbol.is_external()) { + status = &external_status_list[step.symbol.index]; + } else if (step.symbol.is_terminal()) { + status = &terminal_status_list[step.symbol.index]; + } else { + status = &non_terminal_status_list[step.symbol.index]; + } + + if (step.alias.value.empty()) { + status->alias = Alias(); + status->eligible = false; + } + + if (status->eligible) { + if (status->alias.value.empty()) { + status->alias = step.alias; + } else if (status->alias != step.alias) { + status->alias = Alias(); + status->eligible = false; + } + } + } + } + } + + for (SyntaxVariable &variable : syntax_grammar->variables) { + for (Production &production : variable.productions) { + for (ProductionStep &step : production.steps) { + SymbolStatus *status; + if (step.symbol.is_built_in()) { + continue; + } else if (step.symbol.is_external()) { + status = &external_status_list[step.symbol.index]; + } else if (step.symbol.is_terminal()) { + status = &terminal_status_list[step.symbol.index]; + } else { + status = &non_terminal_status_list[step.symbol.index]; + } + + if (!status->alias.value.empty()) { + step.alias = Alias(); + } + } + } + } + + unordered_map result; + + for (unsigned i = 0, n = terminal_status_list.size(); i < n; i++) { + if (!terminal_status_list[i].alias.value.empty()) { + result[Symbol::terminal(i)] = terminal_status_list[i].alias; + } + } + + for (unsigned i = 0, n = non_terminal_status_list.size(); i < n; i++) { + if (!non_terminal_status_list[i].alias.value.empty()) { + result[Symbol::non_terminal(i)] = non_terminal_status_list[i].alias; + } + } + + for (unsigned i = 0, n = external_status_list.size(); i < n; i++) { + if (!external_status_list[i].alias.value.empty()) { + result[Symbol::external(i)] = external_status_list[i].alias; + } + } + + return result; +} + +} // namespace prepare_grammar +} // namespace tree_sitter diff --git a/src/compiler/prepare_grammar/extract_simple_aliases.h b/src/compiler/prepare_grammar/extract_simple_aliases.h new file mode 100644 index 00000000..9970ad1a --- /dev/null +++ b/src/compiler/prepare_grammar/extract_simple_aliases.h @@ -0,0 +1,21 @@ +#ifndef COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_ +#define COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_ + +#include "compiler/rules/symbol.h" +#include "compiler/rules/metadata.h" +#include + +namespace tree_sitter { + +struct SyntaxGrammar; +struct LexicalGrammar; + +namespace prepare_grammar { + +std::unordered_map +extract_simple_aliases(SyntaxGrammar *, LexicalGrammar *); + +} // namespace prepare_grammar +} // namespace tree_sitter + +#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_SIMPLE_ALIASES_H_ diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index ac573a28..aef16846 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -2,69 +2,64 @@ #include "compiler/prepare_grammar/expand_repeats.h" #include "compiler/prepare_grammar/expand_tokens.h" #include "compiler/prepare_grammar/extract_tokens.h" +#include "compiler/prepare_grammar/extract_simple_aliases.h" #include "compiler/prepare_grammar/intern_symbols.h" #include "compiler/prepare_grammar/flatten_grammar.h" #include "compiler/prepare_grammar/normalize_rules.h" -#include "compiler/lexical_grammar.h" #include "compiler/prepare_grammar/initial_syntax_grammar.h" +#include "compiler/lexical_grammar.h" #include "compiler/syntax_grammar.h" namespace tree_sitter { namespace prepare_grammar { -using std::tuple; using std::get; -using std::make_tuple; +using std::move; -tuple prepare_grammar( - const InputGrammar &input_grammar) { - /* - * Convert all string-based `NamedSymbols` into numerical `Symbols` - */ +PrepareGrammarResult prepare_grammar(const InputGrammar &input_grammar) { + PrepareGrammarResult result; + + // Convert all string-based `NamedSymbols` into numerical `Symbols` auto intern_result = intern_symbols(input_grammar); CompileError error = intern_result.second; - if (error.type) - return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + if (error.type) { + result.error = error; + return result; + } - /* - * Separate grammar into lexical and syntactic components - */ + // Separate grammar into lexical and syntactic components auto extract_result = extract_tokens(intern_result.first); error = get<2>(extract_result); if (error.type) { - return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + result.error = error; + return result; } - /* - * Replace `Repeat` rules with pairs of recursive rules - */ + // Replace `Repeat` rules with pairs of recursive rules InitialSyntaxGrammar syntax_grammar1 = expand_repeats(get<0>(extract_result)); - /* - * Expand `String` and `Pattern` rules into full rule trees - */ - LexicalGrammar lex_grammar = get<1>(extract_result); - // auto expand_tokens_result = expand_tokens(get<1>(extract_result)); - // LexicalGrammar lex_grammar = expand_tokens_result.first; - // error = expand_tokens_result.second; - // if (error.type) - // return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); - - /* - * Flatten syntax rules into lists of productions. - */ + // Flatten syntax rules into lists of productions. auto flatten_result = flatten_grammar(syntax_grammar1); SyntaxGrammar syntax_grammar = flatten_result.first; error = flatten_result.second; - if (error.type) - return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); + if (error.type) { + result.error = error; + return result; + } - /* - * Ensure all lexical rules are in a consistent format. - */ - lex_grammar = normalize_rules(lex_grammar); + // Ensure all lexical rules are in a consistent format. + LexicalGrammar lexical_grammar = normalize_rules(get<1>(extract_result)); - return make_tuple(syntax_grammar, lex_grammar, CompileError::none()); + // Find any symbols that always have the same alias applied to them. + // Remove those aliases since they can be applied in a simpler way. + auto simple_aliases = extract_simple_aliases(&syntax_grammar, &lexical_grammar); + + return { + move(syntax_grammar), + move(lexical_grammar), + move(simple_aliases), + CompileError::none(), + }; } } // namespace prepare_grammar diff --git a/src/compiler/prepare_grammar/prepare_grammar.h b/src/compiler/prepare_grammar/prepare_grammar.h index bed59a53..d71beed1 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.h +++ b/src/compiler/prepare_grammar/prepare_grammar.h @@ -1,7 +1,7 @@ #ifndef COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_ #define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_ -#include +#include #include "compiler/grammar.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" @@ -10,7 +10,14 @@ namespace tree_sitter { namespace prepare_grammar { -std::tuple prepare_grammar(const InputGrammar &); +struct PrepareGrammarResult { + SyntaxGrammar syntax_grammar; + LexicalGrammar lexical_grammar; + std::unordered_map simple_aliases; + CompileError error; +}; + +PrepareGrammarResult prepare_grammar(const InputGrammar &); } // namespace prepare_grammar } // namespace tree_sitter diff --git a/src/compiler/rule.h b/src/compiler/rule.h index 5c4064e5..b66e2c63 100644 --- a/src/compiler/rule.h +++ b/src/compiler/rule.h @@ -91,7 +91,7 @@ struct Rule { } template - inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward(functions)...))){ + inline auto match(FunctionTypes && ...functions) const -> decltype(accept(util::make_visitor(std::forward(functions)...))) { return accept(util::make_visitor(std::forward(functions)...)); }