diff --git a/project.gyp b/project.gyp index 79e9aa27..5751ec8d 100644 --- a/project.gyp +++ b/project.gyp @@ -23,6 +23,7 @@ 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/parse_conflict_manager.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', + 'src/compiler/build_tables/symbols_by_first_symbol.cc', 'src/compiler/compile.cc', 'src/compiler/generate_code/c_code.cc', 'src/compiler/lex_table.cc', diff --git a/spec/compiler/build_tables/symbols_by_first_symbol_spec.cc b/spec/compiler/build_tables/symbols_by_first_symbol_spec.cc new file mode 100644 index 00000000..19508063 --- /dev/null +++ b/spec/compiler/build_tables/symbols_by_first_symbol_spec.cc @@ -0,0 +1,83 @@ +#include "spec_helper.h" +#include "helpers/stream_methods.h" +#include "compiler/build_tables/symbols_by_first_symbol.h" +#include "compiler/syntax_grammar.h" + +using namespace rules; +using build_tables::symbols_by_first_symbol; + +START_TEST + +describe("symbols_by_first_symbol", [&]() { + SyntaxGrammar grammar{{ + + // starts with token-11 and token-13 + SyntaxVariable("rule-0", VariableTypeNamed, vector({ + Production({ + ProductionStep(Symbol(11, true), 0, rules::AssociativityNone), + ProductionStep(Symbol(12, true), 0, rules::AssociativityNone), + }), + Production({ + ProductionStep(Symbol(13, true), 0, rules::AssociativityNone), + ProductionStep(Symbol(14, true), 0, rules::AssociativityNone), + }), + })), + + // starts with rule-0, which implies token-11 and token-13 + SyntaxVariable("rule-1", VariableTypeNamed, vector({ + Production({ + ProductionStep(Symbol(0), 0, rules::AssociativityNone), + ProductionStep(Symbol(12, true), 0, rules::AssociativityNone), + }), + })), + + // starts with token-15 and rule-1, which implies token-11 and token-13 + SyntaxVariable("rule-2", VariableTypeNamed, vector({ + Production({ + ProductionStep(Symbol(1), 0, rules::AssociativityNone), + }), + Production({ + ProductionStep(Symbol(15, true), 0, rules::AssociativityNone), + }), + })), + + // starts with token-15 + SyntaxVariable("rule-3", VariableTypeNamed, vector({ + Production({ + ProductionStep(Symbol(15, true), 0, rules::AssociativityNone), + }), + })) + }, {}, {}}; + + it("gives the set of non-terminals that can start with any given terminal", [&]() { + auto result = symbols_by_first_symbol(grammar); + + AssertThat(result, Equals(map>({ + { + Symbol(11, true), { + Symbol(11, true), + Symbol(0), + Symbol(1), + Symbol(2), + } + }, + { + Symbol(13, true), { + Symbol(13, true), + Symbol(0), + Symbol(1), + Symbol(2), + } + }, + { + Symbol(15, true), { + Symbol(15, true), + Symbol(2), + Symbol(3) + } + }, + }))); + }); +}); + +END_TEST diff --git a/src/compiler/build_tables/symbols_by_first_symbol.cc b/src/compiler/build_tables/symbols_by_first_symbol.cc new file mode 100644 index 00000000..f1da78b4 --- /dev/null +++ b/src/compiler/build_tables/symbols_by_first_symbol.cc @@ -0,0 +1,55 @@ +#include "compiler/build_tables/symbols_by_first_symbol.h" +#include "compiler/syntax_grammar.h" +#include "compiler/rules/symbol.h" + +namespace tree_sitter { +namespace build_tables { + +using std::map; +using std::set; +using rules::Symbol; + +map> symbols_by_first_symbol(const SyntaxGrammar &grammar) { + map> result; + + size_t variable_index = -1; + for (const SyntaxVariable &variable : grammar.variables) { + variable_index++; + Symbol symbol(variable_index); + result[symbol].insert(symbol); + for (const Production &production : variable.productions) + if (!production.empty()) { + Symbol first_symbol = production[0].symbol; + result[first_symbol].insert(symbol); + result[first_symbol].insert(first_symbol); + } + } + + bool done = false; + while (!done) { + done = true; + for (auto &entry : result) { + set new_symbols; + for (const Symbol &symbol : entry.second) + for (const Symbol &other_symbol : result[symbol]) + new_symbols.insert(other_symbol); + + for (const Symbol &new_symbol : new_symbols) + if (entry.second.insert(new_symbol).second) + done = false; + } + } + + for (auto iter = result.begin(), end = result.end(); iter != end;) { + if (!iter->first.is_token) { + result.erase(iter++); + } else { + iter++; + } + } + + return result; +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/symbols_by_first_symbol.h b/src/compiler/build_tables/symbols_by_first_symbol.h new file mode 100644 index 00000000..0cd15178 --- /dev/null +++ b/src/compiler/build_tables/symbols_by_first_symbol.h @@ -0,0 +1,19 @@ +#ifndef COMPILER_BUILD_TABLES_SYMBOLS_BY_FIRST_SYMBOL_H_ +#define COMPILER_BUILD_TABLES_SYMBOLS_BY_FIRST_SYMBOL_H_ + +#include +#include +#include "compiler/rules/symbol.h" + +namespace tree_sitter { + +struct SyntaxGrammar; + +namespace build_tables { + +std::map> symbols_by_first_symbol(const SyntaxGrammar &); + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_SYMBOLS_BY_FIRST_SYMBOL_H_