Merge pull request #7 from maxbrunsfeld/more-string-escaping

Escape backslashes and quotes in symbol name strings
This commit is contained in:
Max Brunsfeld 2015-11-09 09:36:53 -08:00
commit d6f87fbb6b
4 changed files with 191 additions and 0 deletions

View file

@ -16,6 +16,7 @@ extern const Grammar json;
extern const Grammar golang;
extern const Grammar c;
extern const Grammar cpp;
extern const Grammar anonymous_tokens;
} // namespace tree_sitter_examples
@ -39,12 +40,17 @@ describe("compiling the example grammars", []() {
});
};
// example languages
compile_grammar(tree_sitter_examples::arithmetic, "arithmetic");
compile_grammar(tree_sitter_examples::json, "json");
compile_grammar(tree_sitter_examples::javascript, "javascript");
compile_grammar(tree_sitter_examples::golang, "golang");
compile_grammar(tree_sitter_examples::c, "c");
compile_grammar(tree_sitter_examples::cpp, "cpp");
// edge cases
compile_grammar(tree_sitter_examples::anonymous_tokens, "anonymous_tokens");
});
END_TEST

View file

@ -0,0 +1,16 @@
#include "tree_sitter/compiler.h"
#include "helpers.h"
namespace tree_sitter_examples {
extern const Grammar anonymous_tokens = Grammar({
{ "program", choice({
str("\n"),
str("\r"),
pattern("\\d"),
str("\"hello\"") }) },
}).ubiquitous_tokens({
pattern("\\s"),
});
} // namespace tree_sitter_examples

167
spec/fixtures/parsers/anonymous_tokens.c vendored Normal file
View file

@ -0,0 +1,167 @@
#include "tree_sitter/parser.h"
#define STATE_COUNT 3
#define SYMBOL_COUNT 7
enum {
sym_program = ts_builtin_sym_start,
anon_sym_LF,
anon_sym_CR,
aux_sym_SLASH_BSLASHd_SLASH,
anon_sym_DQUOTEhello_DQUOTE,
};
static const char *ts_symbol_names[] = {
[sym_program] = "program",
[ts_builtin_sym_error] = "ERROR",
[ts_builtin_sym_end] = "END",
[anon_sym_LF] = "\n",
[anon_sym_CR] = "\r",
[aux_sym_SLASH_BSLASHd_SLASH] = "/\\d/",
[anon_sym_DQUOTEhello_DQUOTE] = "\"hello\"",
};
static const TSNodeType ts_node_types[SYMBOL_COUNT] = {
[sym_program] = TSNodeTypeNamed,
[ts_builtin_sym_error] = TSNodeTypeNamed,
[ts_builtin_sym_end] = TSNodeTypeHidden,
[anon_sym_LF] = TSNodeTypeAnonymous,
[anon_sym_CR] = TSNodeTypeAnonymous,
[aux_sym_SLASH_BSLASHd_SLASH] = TSNodeTypeHidden,
[anon_sym_DQUOTEhello_DQUOTE] = TSNodeTypeAnonymous,
};
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
START_LEXER();
switch (lex_state) {
case 1:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == ' '))
ADVANCE(1);
if (lookahead == '\n')
ADVANCE(2);
if (lookahead == '\r')
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(11);
LEX_ERROR();
case 2:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_LF);
case 3:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_CR);
case 4:
if (lookahead == 'h')
ADVANCE(5);
LEX_ERROR();
case 5:
if (lookahead == 'e')
ADVANCE(6);
LEX_ERROR();
case 6:
if (lookahead == 'l')
ADVANCE(7);
LEX_ERROR();
case 7:
if (lookahead == 'l')
ADVANCE(8);
LEX_ERROR();
case 8:
if (lookahead == 'o')
ADVANCE(9);
LEX_ERROR();
case 9:
if (lookahead == '\"')
ADVANCE(10);
LEX_ERROR();
case 10:
ACCEPT_TOKEN(anon_sym_DQUOTEhello_DQUOTE);
case 11:
ACCEPT_TOKEN(aux_sym_SLASH_BSLASHd_SLASH);
case 12:
START_TOKEN();
if (lookahead == 0)
ADVANCE(13);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(12);
LEX_ERROR();
case 13:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 14:
START_TOKEN();
if (lookahead == 0)
ADVANCE(13);
if ((lookahead == '\t') ||
(lookahead == ' '))
ADVANCE(14);
if (lookahead == '\n')
ADVANCE(15);
if (lookahead == '\r')
ADVANCE(16);
if (lookahead == '\"')
ADVANCE(4);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(11);
LEX_ERROR();
case 15:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_LF);
case 16:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_CR);
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(13);
if ((lookahead == '\t') ||
(lookahead == ' '))
ADVANCE(14);
if (lookahead == '\n')
ADVANCE(15);
if (lookahead == '\r')
ADVANCE(16);
if (lookahead == '\"')
ADVANCE(4);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(11);
LEX_ERROR();
default:
LEX_ERROR();
}
}
static TSStateId ts_lex_states[STATE_COUNT] = {
[0] = 1,
[1] = 12,
[2] = 12,
};
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
static const TSParseAction *ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[0] = {
[sym_program] = ACTIONS(SHIFT(1)),
[anon_sym_LF] = ACTIONS(SHIFT(2)),
[anon_sym_CR] = ACTIONS(SHIFT(2)),
[aux_sym_SLASH_BSLASHd_SLASH] = ACTIONS(SHIFT(2)),
[anon_sym_DQUOTEhello_DQUOTE] = ACTIONS(SHIFT(2)),
},
[1] = {
[ts_builtin_sym_end] = ACTIONS(ACCEPT_INPUT()),
},
[2] = {
[ts_builtin_sym_end] = ACTIONS(REDUCE(sym_program, 1)),
},
};
#pragma GCC diagnostic pop
EXPORT_LANGUAGE(ts_language_anonymous_tokens);

View file

@ -411,8 +411,10 @@ class CCodeGenerator {
}
string sanitize_name_for_string(string name) {
util::str_replace(&name, "\\", "\\\\");
util::str_replace(&name, "\n", "\\n");
util::str_replace(&name, "\r", "\\r");
util::str_replace(&name, "\"", "\\\"");
return name;
}