Fix symbol names for extracted tokens
This commit is contained in:
parent
649f200831
commit
c30055ba18
10 changed files with 17018 additions and 16970 deletions
|
|
@ -7,7 +7,6 @@
|
|||
#include "compiler/util/string_helpers.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/generate_code/token_description.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
|
@ -56,6 +55,7 @@ namespace tree_sitter {
|
|||
const LexTable lex_table;
|
||||
const PreparedGrammar syntax_grammar;
|
||||
const PreparedGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
|
||||
public:
|
||||
CCodeGenerator(string name,
|
||||
|
|
@ -88,6 +88,40 @@ namespace tree_sitter {
|
|||
const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
|
||||
return symbol.is_token() ? lexical_grammar : syntax_grammar;
|
||||
}
|
||||
|
||||
string sanitize_name(string name) {
|
||||
auto existing = sanitized_names.find(name);
|
||||
if (existing != sanitized_names.end())
|
||||
return existing->second;
|
||||
|
||||
string stripped_name;
|
||||
for (char c : name) {
|
||||
if (('a' <= c && c <= 'z') ||
|
||||
('A' <= c && c <= 'Z') ||
|
||||
('0' <= c && c <= '9') ||
|
||||
(c == '_')) {
|
||||
stripped_name += c;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t extra_number = 0;; extra_number++) {
|
||||
string suffix = extra_number ? to_string(extra_number) : "";
|
||||
string unique_name = stripped_name + suffix;
|
||||
if (unique_name == "")
|
||||
continue;
|
||||
if (!has_sanitized_name(unique_name)) {
|
||||
sanitized_names.insert({ name, unique_name });
|
||||
return unique_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool has_sanitized_name(string name) {
|
||||
for (auto &pair : sanitized_names)
|
||||
if (pair.second == name)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
|
|
@ -95,7 +129,7 @@ namespace tree_sitter {
|
|||
"ts_builtin_sym_error" :
|
||||
"ts_builtin_sym_end";
|
||||
} else {
|
||||
string name = grammar_for_symbol(symbol).rule_name(symbol);
|
||||
string name = sanitize_name(grammar_for_symbol(symbol).rule_name(symbol));
|
||||
if (symbol.is_auxiliary())
|
||||
return "ts_aux_sym_" + name;
|
||||
else
|
||||
|
|
@ -111,7 +145,7 @@ namespace tree_sitter {
|
|||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ? "error" : "end";
|
||||
} else if (symbol.is_token() && symbol.is_auxiliary()) {
|
||||
return token_description(grammar_for_symbol(symbol).rule(symbol));
|
||||
return grammar_for_symbol(symbol).rule_name(symbol);
|
||||
} else {
|
||||
return grammar_for_symbol(symbol).rule_name(symbol);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/prepare_grammar/token_description.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
|
|
@ -60,13 +61,13 @@ namespace tree_sitter {
|
|||
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);
|
||||
|
||||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
rule_ptr apply_to_token(const rules::Rule *rule) {
|
||||
auto result = rule->copy();
|
||||
rule_ptr apply_to_token(const rules::Rule *input) {
|
||||
auto rule = input->copy();
|
||||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].second->operator==(*rule))
|
||||
return make_shared<Symbol>(i, SymbolOptionAuxToken);
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back({ "token" + to_string(index), result });
|
||||
tokens.push_back({ token_description(rule), rule });
|
||||
return make_shared<Symbol>(index, SymbolOptionAuxToken);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,8 +5,6 @@
|
|||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
|
||||
#include "stream_methods.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
|
|
|
|||
|
|
@ -1,14 +1,18 @@
|
|||
#include "compiler/generate_code/token_description.h"
|
||||
#include "compiler/prepare_grammar/token_description.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
||||
namespace generate_code {
|
||||
namespace prepare_grammar {
|
||||
class TokenDescription : public rules::RuleFn<string> {
|
||||
string apply_to(const rules::Pattern *rule) {
|
||||
return "/" + rule->value + "/";
|
||||
|
|
@ -21,6 +25,17 @@ namespace tree_sitter {
|
|||
string apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
string apply_to(const rules::Seq *rule) {
|
||||
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
|
||||
}
|
||||
|
||||
string apply_to(const rules::Choice *rule) {
|
||||
string result = "(choice";
|
||||
for (auto &element : rule->elements)
|
||||
result += " " + apply(element);
|
||||
return result + ")";
|
||||
}
|
||||
};
|
||||
|
||||
std::string token_description(const rules::rule_ptr &rule) {
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace generate_code {
|
||||
namespace prepare_grammar {
|
||||
std::string token_description(const rules::rule_ptr &);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue