Give better symbol names to generated tokens
This should make debugging easier
This commit is contained in:
parent
2226234924
commit
e1ac62edc5
14 changed files with 1017 additions and 842 deletions
|
|
@ -16,17 +16,17 @@ enum {
|
|||
};
|
||||
|
||||
SYMBOL_NAMES = {
|
||||
"error",
|
||||
"end",
|
||||
"expression",
|
||||
"factor",
|
||||
"number",
|
||||
"plus",
|
||||
"term",
|
||||
"times",
|
||||
"variable",
|
||||
"token1",
|
||||
"token2",
|
||||
[ts_sym_expression] = "expression",
|
||||
[ts_sym_factor] = "factor",
|
||||
[ts_sym_number] = "number",
|
||||
[ts_sym_plus] = "plus",
|
||||
[ts_sym_term] = "term",
|
||||
[ts_sym_times] = "times",
|
||||
[ts_sym_variable] = "variable",
|
||||
[ts_aux_sym_token1] = "'('",
|
||||
[ts_aux_sym_token2] = "')'",
|
||||
[ts_builtin_sym_end] = "end",
|
||||
[ts_builtin_sym_error] = "error",
|
||||
};
|
||||
|
||||
HIDDEN_SYMBOLS = {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -23,24 +23,24 @@ enum {
|
|||
};
|
||||
|
||||
SYMBOL_NAMES = {
|
||||
"error",
|
||||
"end",
|
||||
"array",
|
||||
"false",
|
||||
"null",
|
||||
"number",
|
||||
"object",
|
||||
"string",
|
||||
"true",
|
||||
"value",
|
||||
"repeat_helper1",
|
||||
"repeat_helper2",
|
||||
"token1",
|
||||
"token2",
|
||||
"token3",
|
||||
"token4",
|
||||
"token5",
|
||||
"token6",
|
||||
[ts_sym_array] = "array",
|
||||
[ts_sym_false] = "false",
|
||||
[ts_sym_null] = "null",
|
||||
[ts_sym_number] = "number",
|
||||
[ts_sym_object] = "object",
|
||||
[ts_sym_string] = "string",
|
||||
[ts_sym_true] = "true",
|
||||
[ts_sym_value] = "value",
|
||||
[ts_aux_sym_repeat_helper1] = "repeat_helper1",
|
||||
[ts_aux_sym_repeat_helper2] = "repeat_helper2",
|
||||
[ts_aux_sym_token1] = "'{'",
|
||||
[ts_aux_sym_token2] = "':'",
|
||||
[ts_aux_sym_token3] = "','",
|
||||
[ts_aux_sym_token4] = "'}'",
|
||||
[ts_aux_sym_token5] = "'['",
|
||||
[ts_aux_sym_token6] = "']'",
|
||||
[ts_builtin_sym_end] = "end",
|
||||
[ts_builtin_sym_error] = "error",
|
||||
};
|
||||
|
||||
HIDDEN_SYMBOLS = {
|
||||
|
|
|
|||
51
spec/compiler/name_symbols/name_symbols_spec.cc
Normal file
51
spec/compiler/name_symbols/name_symbols_spec.cc
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/name_symbols/name_symbols.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using name_symbols::name_symbols;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("assigning user-visible names to symbols", [&]() {
|
||||
PreparedGrammar lexical_grammar({
|
||||
{ "some_given_name", str("the-string") },
|
||||
}, {
|
||||
{ "some_generated_string_name", str("the-string") },
|
||||
{ "some_generated_pattern_name", pattern("the-pattern") },
|
||||
});
|
||||
|
||||
describe("for symbols that are not in the lexical grammar (syntactic rules)", [&]() {
|
||||
it("uses the symbol's normal name", [&]() {
|
||||
auto symbol = Symbol("some_syntactic_symbol");
|
||||
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
|
||||
{ symbol, "some_syntactic_symbol" }
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("for symbols that are in the lexical grammar", [&]() {
|
||||
it("uses symbols' normal names when they are given by the user", [&]() {
|
||||
auto symbol = Symbol("some_given_name");
|
||||
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
|
||||
{ symbol, "some_given_name" }
|
||||
})));
|
||||
});
|
||||
|
||||
it("assigns names to string rules based on their string value", [&]() {
|
||||
auto symbol = Symbol("some_generated_string_name", rules::SymbolTypeAuxiliary);
|
||||
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
|
||||
{ symbol, "'the-string'" }
|
||||
})));
|
||||
});
|
||||
|
||||
it("assigns names to pattern rules based on their pattern value", [&]() {
|
||||
auto symbol = Symbol("some_generated_pattern_name", rules::SymbolTypeAuxiliary);
|
||||
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
|
||||
{ symbol, "/the-pattern/" }
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -3,11 +3,19 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include "compiler/generate_code/c_code.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/name_symbols/name_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
std::string compile(const Grammar &grammar, std::string name) {
|
||||
auto grammars = prepare_grammar::prepare_grammar(grammar);
|
||||
auto tables = build_tables::build_tables(grammars.first, grammars.second);
|
||||
return generate_code::c_code(name, tables.first, tables.second);
|
||||
PreparedGrammar &syntax_grammar = grammars.first;
|
||||
PreparedGrammar &lexical_grammar = grammars.second;
|
||||
|
||||
auto tables = build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
ParseTable &parse_table = tables.first;
|
||||
LexTable &lex_table = tables.second;
|
||||
|
||||
auto symbol_names = name_symbols::name_symbols(parse_table.symbols, lexical_grammar);
|
||||
return generate_code::c_code(name, parse_table, lex_table, symbol_names);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,11 +49,16 @@ namespace tree_sitter {
|
|||
const string name;
|
||||
const ParseTable parse_table;
|
||||
const LexTable lex_table;
|
||||
const map<rules::Symbol, string> symbol_names;
|
||||
public:
|
||||
CCodeGenerator(string name, const ParseTable &parse_table, const LexTable &lex_table) :
|
||||
CCodeGenerator(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const map<rules::Symbol, string> &symbol_names) :
|
||||
name(name),
|
||||
parse_table(parse_table),
|
||||
lex_table(lex_table)
|
||||
lex_table(lex_table),
|
||||
symbol_names(symbol_names)
|
||||
{}
|
||||
|
||||
string code() {
|
||||
|
|
@ -61,7 +66,7 @@ namespace tree_sitter {
|
|||
includes(),
|
||||
state_and_symbol_counts(),
|
||||
symbol_enum(),
|
||||
rule_names_list(),
|
||||
symbol_names_list(),
|
||||
hidden_symbols_list(),
|
||||
lex_function(),
|
||||
lex_states_list(),
|
||||
|
|
@ -83,21 +88,6 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
string character_code(char character) {
|
||||
switch (character) {
|
||||
case '\0':
|
||||
return "\\0";
|
||||
case '"':
|
||||
return "\\\"";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\\':
|
||||
return "\\\\";
|
||||
default:
|
||||
return string() + character;
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_range(const rules::CharacterRange &range) {
|
||||
string lookahead("lookahead");
|
||||
if (range.min == range.max) {
|
||||
|
|
@ -191,14 +181,14 @@ namespace tree_sitter {
|
|||
return result + "};";
|
||||
}
|
||||
|
||||
string rule_names_list() {
|
||||
string result = "SYMBOL_NAMES = {\n";
|
||||
result += indent(string("\"") + "error") + "\",\n";
|
||||
result += indent(string("\"") + "end") + "\",\n";
|
||||
string symbol_names_list() {
|
||||
set<rules::Symbol> symbols(parse_table.symbols);
|
||||
symbols.insert(rules::Symbol("end", rules::SymbolTypeBuiltIn));
|
||||
symbols.insert(rules::Symbol("error", rules::SymbolTypeBuiltIn));
|
||||
|
||||
string result = "SYMBOL_NAMES = {\n";
|
||||
for (auto symbol : parse_table.symbols)
|
||||
if (!symbol.is_built_in())
|
||||
result += indent(string("\"") + symbol.name) + "\",\n";
|
||||
result += indent("[" + symbol_id(symbol) + "] = \"" + symbol_names.find(symbol)->second) + "\",\n";
|
||||
return result + "};";
|
||||
}
|
||||
|
||||
|
|
@ -261,8 +251,11 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table) {
|
||||
return CCodeGenerator(name, parse_table, lex_table).code();
|
||||
string c_code(string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const map<rules::Symbol, string> &symbol_names) {
|
||||
return CCodeGenerator(name, parse_table, lex_table, symbol_names).code();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2,12 +2,16 @@
|
|||
#define COMPILER_GENERATE_CODE_C_CODE_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace generate_code {
|
||||
std::string c_code(std::string name, const ParseTable &parse_table, const LexTable &lex_table);
|
||||
std::string c_code(std::string name,
|
||||
const ParseTable &parse_table,
|
||||
const LexTable &lex_table,
|
||||
const std::map<rules::Symbol, std::string> &symbol_names);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,18 +1,11 @@
|
|||
#include "compiler/generate_code/helpers.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace generate_code {
|
||||
static void str_replace(string *input, const string &search, const string &replace) {
|
||||
size_t pos = 0;
|
||||
while (1) {
|
||||
pos = input->find(search, pos);
|
||||
if (pos == string::npos) break;
|
||||
input->erase(pos, search.length());
|
||||
input->insert(pos, replace);
|
||||
pos += replace.length();
|
||||
}
|
||||
}
|
||||
|
||||
string join(vector<string> lines, string separator) {
|
||||
string result;
|
||||
bool started = false;
|
||||
|
|
@ -30,13 +23,23 @@ namespace tree_sitter {
|
|||
|
||||
string indent(string input) {
|
||||
string tab = " ";
|
||||
str_replace(&input, "\n", "\n" + tab);
|
||||
util::str_replace(&input, "\n", "\n" + tab);
|
||||
return tab + input;
|
||||
}
|
||||
|
||||
string escape_string(string input) {
|
||||
str_replace(&input, "\"", "\\\"");
|
||||
return input;
|
||||
|
||||
string character_code(char character) {
|
||||
switch (character) {
|
||||
case '\0':
|
||||
return "\\0";
|
||||
case '"':
|
||||
return "\\\"";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\\':
|
||||
return "\\\\";
|
||||
default:
|
||||
return string() + character;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5,14 +5,11 @@
|
|||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace generate_code {
|
||||
string indent(string input);
|
||||
string join(vector<string> lines, string separator);
|
||||
string join(vector<string> lines);
|
||||
string escape_string(string input);
|
||||
std::string indent(std::string input);
|
||||
std::string join(std::vector<std::string> lines, std::string separator);
|
||||
std::string join(std::vector<std::string> lines);
|
||||
std::string character_code(char character);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
38
src/compiler/name_symbols/name_symbols.cc
Normal file
38
src/compiler/name_symbols/name_symbols.cc
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#include "compiler/name_symbols/name_symbols.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/rule.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace name_symbols {
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::string;
|
||||
|
||||
class TokenName : public rules::RuleFn<string> {
|
||||
protected:
|
||||
virtual void visit(const rules::Pattern *rule) {
|
||||
value = "/" + util::escape_string(rule->value) + "/";
|
||||
}
|
||||
|
||||
virtual void visit(const rules::String *rule) {
|
||||
value = "'" + util::escape_string(rule->value) + "'";
|
||||
}
|
||||
};
|
||||
|
||||
map<rules::Symbol, string> name_symbols(const set<rules::Symbol> &symbols,
|
||||
const PreparedGrammar &lexical_grammar) {
|
||||
map<rules::Symbol, string> result;
|
||||
for (auto &symbol : symbols) {
|
||||
string name = (symbol.is_auxiliary() && lexical_grammar.has_definition(symbol)) ?
|
||||
TokenName().apply(lexical_grammar.rule(symbol)) :
|
||||
symbol.name;
|
||||
result.insert({ symbol, name });
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
18
src/compiler/name_symbols/name_symbols.h
Normal file
18
src/compiler/name_symbols/name_symbols.h
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
|
||||
#define COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace name_symbols {
|
||||
std::map<rules::Symbol, std::string> name_symbols(const std::set<rules::Symbol> &symbols,
|
||||
const PreparedGrammar &lexical_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_NAME_SYMBOLS_NAME_SYMBOLS_H_
|
||||
|
|
@ -7,7 +7,6 @@
|
|||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Pattern : public Rule {
|
||||
const std::string value;
|
||||
public:
|
||||
explicit Pattern(const std::string &string);
|
||||
|
||||
|
|
@ -17,6 +16,7 @@ namespace tree_sitter {
|
|||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const std::string value;
|
||||
rule_ptr to_rule_tree() const;
|
||||
};
|
||||
}
|
||||
|
|
|
|||
24
src/compiler/util/string_helpers.cpp
Normal file
24
src/compiler/util/string_helpers.cpp
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
||||
namespace util {
|
||||
void str_replace(string *input, const string &search, const string &replace) {
|
||||
size_t pos = 0;
|
||||
while (1) {
|
||||
pos = input->find(search, pos);
|
||||
if (pos == string::npos) break;
|
||||
input->erase(pos, search.length());
|
||||
input->insert(pos, replace);
|
||||
pos += replace.length();
|
||||
}
|
||||
}
|
||||
|
||||
string escape_string(string input) {
|
||||
str_replace(&input, "\"", "\\\"");
|
||||
str_replace(&input, "\n", "\\n");
|
||||
return input;
|
||||
}
|
||||
}
|
||||
}
|
||||
13
src/compiler/util/string_helpers.h
Normal file
13
src/compiler/util/string_helpers.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef COMPILER_UTIL_STRING_HELPERS_H_
|
||||
#define COMPILER_UTIL_STRING_HELPERS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace util {
|
||||
void str_replace(std::string *input, const std::string &search, const std::string &replace);
|
||||
std::string escape_string(std::string input);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_UTIL_STRING_HELPERS_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue