From 9366f11dcb605c5d92b4332b2041cca6c94c25d6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 Aug 2014 08:12:15 -0700 Subject: [PATCH] In generated C, only format printable characters as char literals --- examples/parsers/arithmetic.c | 6 ++-- examples/parsers/golang.c | 16 ++++----- examples/parsers/javascript.c | 54 ++++++++++++++-------------- examples/parsers/json.c | 7 ++-- src/compiler/generate_code/c_code.cc | 7 ++-- src/compiler/util/string_helpers.cc | 23 +++++++----- 6 files changed, 58 insertions(+), 55 deletions(-) diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index 07880f0c..7e79db8f 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -85,7 +85,7 @@ LEX_FN() { ACCEPT_TOKEN(ts_sym_variable); case 5: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(6); if (('\t' <= lookahead && lookahead <= '\n') || (lookahead == '\r') || @@ -147,7 +147,7 @@ LEX_FN() { LEX_ERROR(); case 15: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(6); if (('\t' <= lookahead && lookahead <= '\n') || (lookahead == '\r') || @@ -175,7 +175,7 @@ LEX_FN() { LEX_ERROR(); case ts_lex_state_error: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(6); if (('\t' <= lookahead && lookahead <= '\n') || (lookahead == '\r') || diff --git a/examples/parsers/golang.c b/examples/parsers/golang.c index 15c9a5cc..cbc6a529 100644 --- a/examples/parsers/golang.c +++ b/examples/parsers/golang.c @@ -291,7 +291,7 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_package); case 12: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || @@ -306,7 +306,7 @@ LEX_FN() { ACCEPT_TOKEN(ts_builtin_sym_end); case 14: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || @@ -408,7 +408,7 @@ LEX_FN() { ACCEPT_TOKEN(ts_sym__identifier); case 34: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || @@ -1720,7 +1720,7 @@ LEX_FN() { LEX_ERROR(); case 148: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || @@ -1741,7 +1741,7 @@ LEX_FN() { LEX_ERROR(); case 149: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || @@ -1782,7 +1782,7 @@ LEX_FN() { LEX_ERROR(); case 150: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || @@ -1805,7 +1805,7 @@ LEX_FN() { LEX_ERROR(); case 151: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || @@ -2144,7 +2144,7 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_type); case ts_lex_state_error: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\r') || diff --git a/examples/parsers/javascript.c b/examples/parsers/javascript.c index e45f9b8b..bc2a6a13 100644 --- a/examples/parsers/javascript.c +++ b/examples/parsers/javascript.c @@ -299,7 +299,7 @@ LEX_FN() { switch (lex_state) { case 1: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -1333,7 +1333,7 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_1); case 111: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7381,7 +7381,7 @@ LEX_FN() { LEX_ERROR(); case 318: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7464,7 +7464,7 @@ LEX_FN() { LEX_ERROR(); case 319: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7529,7 +7529,7 @@ LEX_FN() { LEX_ERROR(); case 320: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7612,7 +7612,7 @@ LEX_FN() { LEX_ERROR(); case 321: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7676,7 +7676,7 @@ LEX_FN() { LEX_ERROR(); case 322: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7760,7 +7760,7 @@ LEX_FN() { LEX_ERROR(); case 323: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7826,7 +7826,7 @@ LEX_FN() { LEX_ERROR(); case 324: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7910,7 +7910,7 @@ LEX_FN() { LEX_ERROR(); case 325: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -7975,7 +7975,7 @@ LEX_FN() { LEX_ERROR(); case 326: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8060,7 +8060,7 @@ LEX_FN() { LEX_ERROR(); case 327: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8127,7 +8127,7 @@ LEX_FN() { LEX_ERROR(); case 328: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8212,7 +8212,7 @@ LEX_FN() { LEX_ERROR(); case 329: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8275,7 +8275,7 @@ LEX_FN() { LEX_ERROR(); case 330: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8358,7 +8358,7 @@ LEX_FN() { LEX_ERROR(); case 331: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8423,7 +8423,7 @@ LEX_FN() { LEX_ERROR(); case 332: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8506,7 +8506,7 @@ LEX_FN() { LEX_ERROR(); case 333: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8570,7 +8570,7 @@ LEX_FN() { LEX_ERROR(); case 334: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8654,7 +8654,7 @@ LEX_FN() { LEX_ERROR(); case 335: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8720,7 +8720,7 @@ LEX_FN() { LEX_ERROR(); case 336: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8804,7 +8804,7 @@ LEX_FN() { LEX_ERROR(); case 337: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8868,7 +8868,7 @@ LEX_FN() { LEX_ERROR(); case 338: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -8952,7 +8952,7 @@ LEX_FN() { LEX_ERROR(); case 339: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -9018,7 +9018,7 @@ LEX_FN() { LEX_ERROR(); case 340: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -9102,7 +9102,7 @@ LEX_FN() { LEX_ERROR(); case 341: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || @@ -9197,7 +9197,7 @@ LEX_FN() { LEX_ERROR(); case ts_lex_state_error: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || diff --git a/examples/parsers/json.c b/examples/parsers/json.c index bfd1fcee..aabf1c69 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -173,7 +173,7 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_1); case 24: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(25); if ((lookahead == '\t') || (lookahead == '\n') || @@ -289,7 +289,7 @@ LEX_FN() { LEX_ERROR(); case 38: START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(25); if (('\t' <= lookahead && lookahead <= '\n') || (lookahead == '\r') || @@ -319,8 +319,7 @@ LEX_FN() { ADVANCE(27); LEX_ERROR(); case ts_lex_state_error: - START_TOKEN(); - if (lookahead == '\0') + if (lookahead == 0) ADVANCE(25); if (('\t' <= lookahead && lookahead <= '\n') || (lookahead == '\r') || diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 6e4c8afc..edfabba3 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -232,11 +232,10 @@ class CCodeGenerator { string condition_for_character_range(const rules::CharacterRange &range) { string lookahead("lookahead"); if (range.min == range.max) { - return lookahead + " == '" + escape_char(range.min) + "'"; + return lookahead + " == " + escape_char(range.min); } else { - return string("'") + escape_char(range.min) + string("' <= ") + - lookahead + " && " + lookahead + " <= '" + escape_char(range.max) + - "'"; + return escape_char(range.min) + string(" <= ") + lookahead + " && " + + lookahead + " <= " + escape_char(range.max); } } diff --git a/src/compiler/util/string_helpers.cc b/src/compiler/util/string_helpers.cc index 0d136832..26603666 100644 --- a/src/compiler/util/string_helpers.cc +++ b/src/compiler/util/string_helpers.cc @@ -1,5 +1,6 @@ #include "compiler/util/string_helpers.h" #include +#include namespace tree_sitter { namespace util { @@ -28,22 +29,26 @@ string escape_string(string input) { string escape_char(char character) { switch (character) { - case '\0': - return "\\0"; case '"': - return "\\\""; + return "'\\\"'"; case '\'': - return "\\'"; + return "'\\''"; case '\n': - return "\\n"; + return "'\\n'"; case '\r': - return "\\r"; + return "'\\r'"; case '\t': - return "\\t"; + return "'\\t'"; case '\\': - return "\\\\"; + return "'\\\\'"; default: - return string() + character; + if (character >= ' ' && character <= '~') { + return string("'") + character + "'"; + } else { + char buffer[5]; + sprintf(buffer, "%d", int(character)); + return string(buffer); + } } }