Fix symbol names for extracted tokens

This commit is contained in:
Max Brunsfeld 2014-05-19 22:43:33 -07:00
parent 649f200831
commit c30055ba18
10 changed files with 17018 additions and 16970 deletions

View file

@ -7,7 +7,6 @@
#include "compiler/util/string_helpers.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/prepared_grammar.h"
#include "compiler/generate_code/token_description.h"
namespace tree_sitter {
using std::string;
@ -56,6 +55,7 @@ namespace tree_sitter {
const LexTable lex_table;
const PreparedGrammar syntax_grammar;
const PreparedGrammar lexical_grammar;
map<string, string> sanitized_names;
public:
CCodeGenerator(string name,
@ -88,6 +88,40 @@ namespace tree_sitter {
const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
return symbol.is_token() ? lexical_grammar : syntax_grammar;
}
string sanitize_name(string name) {
auto existing = sanitized_names.find(name);
if (existing != sanitized_names.end())
return existing->second;
string stripped_name;
for (char c : name) {
if (('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') ||
(c == '_')) {
stripped_name += c;
}
}
for (size_t extra_number = 0;; extra_number++) {
string suffix = extra_number ? to_string(extra_number) : "";
string unique_name = stripped_name + suffix;
if (unique_name == "")
continue;
if (!has_sanitized_name(unique_name)) {
sanitized_names.insert({ name, unique_name });
return unique_name;
}
}
}
bool has_sanitized_name(string name) {
for (auto &pair : sanitized_names)
if (pair.second == name)
return true;
return false;
}
string symbol_id(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
@ -95,7 +129,7 @@ namespace tree_sitter {
"ts_builtin_sym_error" :
"ts_builtin_sym_end";
} else {
string name = grammar_for_symbol(symbol).rule_name(symbol);
string name = sanitize_name(grammar_for_symbol(symbol).rule_name(symbol));
if (symbol.is_auxiliary())
return "ts_aux_sym_" + name;
else
@ -111,7 +145,7 @@ namespace tree_sitter {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "error" : "end";
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return token_description(grammar_for_symbol(symbol).rule(symbol));
return grammar_for_symbol(symbol).rule_name(symbol);
} else {
return grammar_for_symbol(symbol).rule_name(symbol);
}

View file

@ -9,6 +9,7 @@
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/prepare_grammar/token_description.h"
namespace tree_sitter {
using std::pair;
@ -60,13 +61,13 @@ namespace tree_sitter {
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);
class TokenExtractor : public rules::IdentityRuleFn {
rule_ptr apply_to_token(const rules::Rule *rule) {
auto result = rule->copy();
rule_ptr apply_to_token(const rules::Rule *input) {
auto rule = input->copy();
for (size_t i = 0; i < tokens.size(); i++)
if (tokens[i].second->operator==(*rule))
return make_shared<Symbol>(i, SymbolOptionAuxToken);
size_t index = tokens.size();
tokens.push_back({ "token" + to_string(index), result });
tokens.push_back({ token_description(rule), rule });
return make_shared<Symbol>(index, SymbolOptionAuxToken);
}

View file

@ -5,8 +5,6 @@
#include "compiler/prepare_grammar/expand_tokens.h"
#include "compiler/prepare_grammar/intern_symbols.h"
#include "stream_methods.h"
namespace tree_sitter {
using std::tuple;
using std::make_tuple;

View file

@ -1,14 +1,18 @@
#include "compiler/generate_code/token_description.h"
#include "compiler/prepare_grammar/token_description.h"
#include "tree_sitter/compiler.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
using std::string;
namespace generate_code {
namespace prepare_grammar {
class TokenDescription : public rules::RuleFn<string> {
string apply_to(const rules::Pattern *rule) {
return "/" + rule->value + "/";
@ -21,6 +25,17 @@ namespace tree_sitter {
string apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
string apply_to(const rules::Seq *rule) {
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
}
string apply_to(const rules::Choice *rule) {
string result = "(choice";
for (auto &element : rule->elements)
result += " " + apply(element);
return result + ")";
}
};
std::string token_description(const rules::rule_ptr &rule) {

View file

@ -5,7 +5,7 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace generate_code {
namespace prepare_grammar {
std::string token_description(const rules::rule_ptr &);
}
}