Give better symbol names to generated tokens

This should make debugging easier
This commit is contained in:
Max Brunsfeld 2014-03-27 12:54:54 -07:00
parent 2226234924
commit e1ac62edc5
14 changed files with 1017 additions and 842 deletions

View file

@ -16,17 +16,17 @@ enum {
};
SYMBOL_NAMES = {
"error",
"end",
"expression",
"factor",
"number",
"plus",
"term",
"times",
"variable",
"token1",
"token2",
[ts_sym_expression] = "expression",
[ts_sym_factor] = "factor",
[ts_sym_number] = "number",
[ts_sym_plus] = "plus",
[ts_sym_term] = "term",
[ts_sym_times] = "times",
[ts_sym_variable] = "variable",
[ts_aux_sym_token1] = "'('",
[ts_aux_sym_token2] = "')'",
[ts_builtin_sym_end] = "end",
[ts_builtin_sym_error] = "error",
};
HIDDEN_SYMBOLS = {

File diff suppressed because it is too large Load diff

View file

@ -23,24 +23,24 @@ enum {
};
SYMBOL_NAMES = {
"error",
"end",
"array",
"false",
"null",
"number",
"object",
"string",
"true",
"value",
"repeat_helper1",
"repeat_helper2",
"token1",
"token2",
"token3",
"token4",
"token5",
"token6",
[ts_sym_array] = "array",
[ts_sym_false] = "false",
[ts_sym_null] = "null",
[ts_sym_number] = "number",
[ts_sym_object] = "object",
[ts_sym_string] = "string",
[ts_sym_true] = "true",
[ts_sym_value] = "value",
[ts_aux_sym_repeat_helper1] = "repeat_helper1",
[ts_aux_sym_repeat_helper2] = "repeat_helper2",
[ts_aux_sym_token1] = "'{'",
[ts_aux_sym_token2] = "':'",
[ts_aux_sym_token3] = "','",
[ts_aux_sym_token4] = "'}'",
[ts_aux_sym_token5] = "'['",
[ts_aux_sym_token6] = "']'",
[ts_builtin_sym_end] = "end",
[ts_builtin_sym_error] = "error",
};
HIDDEN_SYMBOLS = {

View file

@ -0,0 +1,51 @@
#include "compiler_spec_helper.h"
#include "compiler/name_symbols/name_symbols.h"
#include "compiler/prepared_grammar.h"
using namespace rules;
using name_symbols::name_symbols;
START_TEST
describe("assigning user-visible names to symbols", [&]() {
PreparedGrammar lexical_grammar({
{ "some_given_name", str("the-string") },
}, {
{ "some_generated_string_name", str("the-string") },
{ "some_generated_pattern_name", pattern("the-pattern") },
});
describe("for symbols that are not in the lexical grammar (syntactic rules)", [&]() {
it("uses the symbol's normal name", [&]() {
auto symbol = Symbol("some_syntactic_symbol");
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
{ symbol, "some_syntactic_symbol" }
})));
});
});
describe("for symbols that are in the lexical grammar", [&]() {
it("uses symbols' normal names when they are given by the user", [&]() {
auto symbol = Symbol("some_given_name");
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
{ symbol, "some_given_name" }
})));
});
it("assigns names to string rules based on their string value", [&]() {
auto symbol = Symbol("some_generated_string_name", rules::SymbolTypeAuxiliary);
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
{ symbol, "'the-string'" }
})));
});
it("assigns names to pattern rules based on their pattern value", [&]() {
auto symbol = Symbol("some_generated_pattern_name", rules::SymbolTypeAuxiliary);
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
{ symbol, "/the-pattern/" }
})));
});
});
});
END_TEST

View file

@ -3,11 +3,19 @@
#include "compiler/build_tables/build_tables.h"
#include "compiler/generate_code/c_code.h"
#include "compiler/prepared_grammar.h"
#include "compiler/name_symbols/name_symbols.h"
namespace tree_sitter {
std::string compile(const Grammar &grammar, std::string name) {
auto grammars = prepare_grammar::prepare_grammar(grammar);
auto tables = build_tables::build_tables(grammars.first, grammars.second);
return generate_code::c_code(name, tables.first, tables.second);
PreparedGrammar &syntax_grammar = grammars.first;
PreparedGrammar &lexical_grammar = grammars.second;
auto tables = build_tables::build_tables(syntax_grammar, lexical_grammar);
ParseTable &parse_table = tables.first;
LexTable &lex_table = tables.second;
auto symbol_names = name_symbols::name_symbols(parse_table.symbols, lexical_grammar);
return generate_code::c_code(name, parse_table, lex_table, symbol_names);
}
}

View file

@ -49,11 +49,16 @@ namespace tree_sitter {
const string name;
const ParseTable parse_table;
const LexTable lex_table;
const map<rules::Symbol, string> symbol_names;
public:
CCodeGenerator(string name, const ParseTable &parse_table, const LexTable &lex_table) :
CCodeGenerator(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const map<rules::Symbol, string> &symbol_names) :
name(name),
parse_table(parse_table),
lex_table(lex_table)
lex_table(lex_table),
symbol_names(symbol_names)
{}
string code() {
@ -61,7 +66,7 @@ namespace tree_sitter {
includes(),
state_and_symbol_counts(),
symbol_enum(),
rule_names_list(),
symbol_names_list(),
hidden_symbols_list(),
lex_function(),
lex_states_list(),
@ -83,21 +88,6 @@ namespace tree_sitter {
}
}
string character_code(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
case '\n':
return "\\n";
case '\\':
return "\\\\";
default:
return string() + character;
}
}
string condition_for_character_range(const rules::CharacterRange &range) {
string lookahead("lookahead");
if (range.min == range.max) {
@ -191,14 +181,14 @@ namespace tree_sitter {
return result + "};";
}
string rule_names_list() {
string result = "SYMBOL_NAMES = {\n";
result += indent(string("\"") + "error") + "\",\n";
result += indent(string("\"") + "end") + "\",\n";
string symbol_names_list() {
set<rules::Symbol> symbols(parse_table.symbols);
symbols.insert(rules::Symbol("end", rules::SymbolTypeBuiltIn));
symbols.insert(rules::Symbol("error", rules::SymbolTypeBuiltIn));
string result = "SYMBOL_NAMES = {\n";
for (auto symbol : parse_table.symbols)
if (!symbol.is_built_in())
result += indent(string("\"") + symbol.name) + "\",\n";
result += indent("[" + symbol_id(symbol) + "] = \"" + symbol_names.find(symbol)->second) + "\",\n";
return result + "};";
}
@ -261,8 +251,11 @@ namespace tree_sitter {
}
};
string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table) {
return CCodeGenerator(name, parse_table, lex_table).code();
string c_code(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const map<rules::Symbol, string> &symbol_names) {
return CCodeGenerator(name, parse_table, lex_table, symbol_names).code();
}
}
}

View file

@ -2,12 +2,16 @@
#define COMPILER_GENERATE_CODE_C_CODE_H_
#include <string>
#include <map>
#include "compiler/parse_table.h"
#include "compiler/lex_table.h"
namespace tree_sitter {
namespace generate_code {
std::string c_code(std::string name, const ParseTable &parse_table, const LexTable &lex_table);
std::string c_code(std::string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const std::map<rules::Symbol, std::string> &symbol_names);
}
}

View file

@ -1,18 +1,11 @@
#include "compiler/generate_code/helpers.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
using std::vector;
namespace generate_code {
static void str_replace(string *input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {
pos = input->find(search, pos);
if (pos == string::npos) break;
input->erase(pos, search.length());
input->insert(pos, replace);
pos += replace.length();
}
}
string join(vector<string> lines, string separator) {
string result;
bool started = false;
@ -30,13 +23,23 @@ namespace tree_sitter {
string indent(string input) {
string tab = " ";
str_replace(&input, "\n", "\n" + tab);
util::str_replace(&input, "\n", "\n" + tab);
return tab + input;
}
string escape_string(string input) {
str_replace(&input, "\"", "\\\"");
return input;
string character_code(char character) {
switch (character) {
case '\0':
return "\\0";
case '"':
return "\\\"";
case '\n':
return "\\n";
case '\\':
return "\\\\";
default:
return string() + character;
}
}
}
}

View file

@ -5,14 +5,11 @@
#include <vector>
namespace tree_sitter {
using std::string;
using std::vector;
namespace generate_code {
string indent(string input);
string join(vector<string> lines, string separator);
string join(vector<string> lines);
string escape_string(string input);
std::string indent(std::string input);
std::string join(std::vector<std::string> lines, std::string separator);
std::string join(std::vector<std::string> lines);
std::string character_code(char character);
}
}

View file

@ -0,0 +1,38 @@
#include "compiler/name_symbols/name_symbols.h"
#include "compiler/prepared_grammar.h"
#include "compiler/rules/rule.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/string.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace name_symbols {
using std::map;
using std::set;
using std::string;
class TokenName : public rules::RuleFn<string> {
protected:
virtual void visit(const rules::Pattern *rule) {
value = "/" + util::escape_string(rule->value) + "/";
}
virtual void visit(const rules::String *rule) {
value = "'" + util::escape_string(rule->value) + "'";
}
};
map<rules::Symbol, string> name_symbols(const set<rules::Symbol> &symbols,
const PreparedGrammar &lexical_grammar) {
map<rules::Symbol, string> result;
for (auto &symbol : symbols) {
string name = (symbol.is_auxiliary() && lexical_grammar.has_definition(symbol)) ?
TokenName().apply(lexical_grammar.rule(symbol)) :
symbol.name;
result.insert({ symbol, name });
}
return result;
}
}
}

View file

@ -0,0 +1,18 @@
#ifndef COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
#define COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
#include <map>
#include <set>
#include <string>
#include "compiler/rules/symbol.h"
namespace tree_sitter {
class PreparedGrammar;
namespace name_symbols {
std::map<rules::Symbol, std::string> name_symbols(const std::set<rules::Symbol> &symbols,
const PreparedGrammar &lexical_grammar);
}
}
#endif // COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_

View file

@ -7,7 +7,6 @@
namespace tree_sitter {
namespace rules {
class Pattern : public Rule {
const std::string value;
public:
explicit Pattern(const std::string &string);
@ -17,6 +16,7 @@ namespace tree_sitter {
std::string to_string() const;
void accept(Visitor *visitor) const;
const std::string value;
rule_ptr to_rule_tree() const;
};
}

View file

@ -0,0 +1,24 @@
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
namespace util {
void str_replace(string *input, const string &search, const string &replace) {
size_t pos = 0;
while (1) {
pos = input->find(search, pos);
if (pos == string::npos) break;
input->erase(pos, search.length());
input->insert(pos, replace);
pos += replace.length();
}
}
string escape_string(string input) {
str_replace(&input, "\"", "\\\"");
str_replace(&input, "\n", "\\n");
return input;
}
}
}

View file

@ -0,0 +1,13 @@
#ifndef COMPILER_UTIL_STRING_HELPERS_H_
#define COMPILER_UTIL_STRING_HELPERS_H_
#include <string>
namespace tree_sitter {
namespace util {
void str_replace(std::string *input, const std::string &search, const std::string &replace);
std::string escape_string(std::string input);
}
}
#endif // COMPILER_UTIL_STRING_HELPERS_H_