Compute the set of variables that can begin with each terminal symbol

This commit is contained in:
Max Brunsfeld 2016-02-12 23:39:11 -08:00
parent 5a34d74702
commit dee1f697c1
4 changed files with 158 additions and 0 deletions

View file

@ -23,6 +23,7 @@
'src/compiler/build_tables/parse_item.cc',
'src/compiler/build_tables/parse_conflict_manager.cc',
'src/compiler/build_tables/rule_can_be_blank.cc',
'src/compiler/build_tables/symbols_by_first_symbol.cc',
'src/compiler/compile.cc',
'src/compiler/generate_code/c_code.cc',
'src/compiler/lex_table.cc',

View file

@ -0,0 +1,83 @@
#include "spec_helper.h"
#include "helpers/stream_methods.h"
#include "compiler/build_tables/symbols_by_first_symbol.h"
#include "compiler/syntax_grammar.h"
using namespace rules;
using build_tables::symbols_by_first_symbol;
START_TEST
describe("symbols_by_first_symbol", [&]() {
SyntaxGrammar grammar{{
// starts with token-11 and token-13
SyntaxVariable("rule-0", VariableTypeNamed, vector<Production>({
Production({
ProductionStep(Symbol(11, true), 0, rules::AssociativityNone),
ProductionStep(Symbol(12, true), 0, rules::AssociativityNone),
}),
Production({
ProductionStep(Symbol(13, true), 0, rules::AssociativityNone),
ProductionStep(Symbol(14, true), 0, rules::AssociativityNone),
}),
})),
// starts with rule-0, which implies token-11 and token-13
SyntaxVariable("rule-1", VariableTypeNamed, vector<Production>({
Production({
ProductionStep(Symbol(0), 0, rules::AssociativityNone),
ProductionStep(Symbol(12, true), 0, rules::AssociativityNone),
}),
})),
// starts with token-15 and rule-1, which implies token-11 and token-13
SyntaxVariable("rule-2", VariableTypeNamed, vector<Production>({
Production({
ProductionStep(Symbol(1), 0, rules::AssociativityNone),
}),
Production({
ProductionStep(Symbol(15, true), 0, rules::AssociativityNone),
}),
})),
// starts with token-15
SyntaxVariable("rule-3", VariableTypeNamed, vector<Production>({
Production({
ProductionStep(Symbol(15, true), 0, rules::AssociativityNone),
}),
}))
}, {}, {}};
it("gives the set of non-terminals that can start with any given terminal", [&]() {
auto result = symbols_by_first_symbol(grammar);
AssertThat(result, Equals(map<Symbol, set<Symbol>>({
{
Symbol(11, true), {
Symbol(11, true),
Symbol(0),
Symbol(1),
Symbol(2),
}
},
{
Symbol(13, true), {
Symbol(13, true),
Symbol(0),
Symbol(1),
Symbol(2),
}
},
{
Symbol(15, true), {
Symbol(15, true),
Symbol(2),
Symbol(3)
}
},
})));
});
});
END_TEST

View file

@ -0,0 +1,55 @@
#include "compiler/build_tables/symbols_by_first_symbol.h"
#include "compiler/syntax_grammar.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
using std::map;
using std::set;
using rules::Symbol;
map<Symbol, set<Symbol>> symbols_by_first_symbol(const SyntaxGrammar &grammar) {
map<Symbol, set<Symbol>> result;
size_t variable_index = -1;
for (const SyntaxVariable &variable : grammar.variables) {
variable_index++;
Symbol symbol(variable_index);
result[symbol].insert(symbol);
for (const Production &production : variable.productions)
if (!production.empty()) {
Symbol first_symbol = production[0].symbol;
result[first_symbol].insert(symbol);
result[first_symbol].insert(first_symbol);
}
}
bool done = false;
while (!done) {
done = true;
for (auto &entry : result) {
set<Symbol> new_symbols;
for (const Symbol &symbol : entry.second)
for (const Symbol &other_symbol : result[symbol])
new_symbols.insert(other_symbol);
for (const Symbol &new_symbol : new_symbols)
if (entry.second.insert(new_symbol).second)
done = false;
}
}
for (auto iter = result.begin(), end = result.end(); iter != end;) {
if (!iter->first.is_token) {
result.erase(iter++);
} else {
iter++;
}
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -0,0 +1,19 @@
#ifndef COMPILER_BUILD_TABLES_SYMBOLS_BY_FIRST_SYMBOL_H_
#define COMPILER_BUILD_TABLES_SYMBOLS_BY_FIRST_SYMBOL_H_
#include <map>
#include <set>
#include "compiler/rules/symbol.h"
namespace tree_sitter {
struct SyntaxGrammar;
namespace build_tables {
std::map<rules::Symbol, std::set<rules::Symbol>> symbols_by_first_symbol(const SyntaxGrammar &);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_SYMBOLS_BY_FIRST_SYMBOL_H_