From ad4089a4bfa8a6ffc758cf80e6585a15ec37563b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 14 Jan 2016 10:26:52 -0800
Subject: [PATCH] Move anonymous tokens grammar into integration spec

---
 spec/compiler/compile_examples.cc          |   5 -
 spec/fixtures/grammars/anonymous_tokens.cc |  16 ---
 spec/fixtures/parsers/anonymous_tokens.c   | 153 ---------------------
 spec/integration/compile_grammar_spec.cc   |  46 ++++++-
 4 files changed, 40 insertions(+), 180 deletions(-)
 delete mode 100644 spec/fixtures/grammars/anonymous_tokens.cc
 delete mode 100644 spec/fixtures/parsers/anonymous_tokens.c

diff --git a/spec/compiler/compile_examples.cc b/spec/compiler/compile_examples.cc
index 67fc9f08..0852eb8d 100644
--- a/spec/compiler/compile_examples.cc
+++ b/spec/compiler/compile_examples.cc
@@ -41,17 +41,12 @@ describe("compiling the example grammars", []() {
     });
   };
 
-  // example languages
   compile_grammar(tree_sitter_examples::arithmetic, "arithmetic");
   compile_grammar(tree_sitter_examples::json, "json");
   compile_grammar(tree_sitter_examples::javascript, "javascript");
   compile_grammar(tree_sitter_examples::golang, "golang");
   compile_grammar(tree_sitter_examples::c, "c");
   compile_grammar(tree_sitter_examples::cpp, "cpp");
-
-  // edge cases
-  compile_grammar(tree_sitter_examples::anonymous_tokens, "anonymous_tokens");
-
 });
 
 END_TEST
diff --git a/spec/fixtures/grammars/anonymous_tokens.cc b/spec/fixtures/grammars/anonymous_tokens.cc
deleted file mode 100644
index d9639b51..00000000
--- a/spec/fixtures/grammars/anonymous_tokens.cc
+++ /dev/null
@@ -1,16 +0,0 @@
-#include "tree_sitter/compiler.h"
-#include "helpers.h"
-
-namespace tree_sitter_examples {
-
-extern const Grammar anonymous_tokens{{
-  { "program", choice({
-    str("\n"),
-    str("\r"),
-    pattern("\\d"),
-    str("\"hello\"") }) },
-}, {
-  pattern("\\s"),
-}, {}};
-
-}  // namespace tree_sitter_examples
diff --git a/spec/fixtures/parsers/anonymous_tokens.c b/spec/fixtures/parsers/anonymous_tokens.c
deleted file mode 100644
index bc6ede0f..00000000
--- a/spec/fixtures/parsers/anonymous_tokens.c
+++ /dev/null
@@ -1,153 +0,0 @@
-#include "tree_sitter/parser.h"
-
-#define STATE_COUNT 3
-#define SYMBOL_COUNT 7
-
-enum {
-    sym_program = ts_builtin_sym_start,
-    anon_sym_LF,
-    anon_sym_CR,
-    aux_sym_SLASH_BSLASHd_SLASH,
-    anon_sym_DQUOTEhello_DQUOTE,
-};
-
-static const char *ts_symbol_names[] = {
-    [sym_program] = "program",
-    [ts_builtin_sym_error] = "ERROR",
-    [ts_builtin_sym_end] = "END",
-    [anon_sym_LF] = "\n",
-    [anon_sym_CR] = "\r",
-    [aux_sym_SLASH_BSLASHd_SLASH] = "/\\d/",
-    [anon_sym_DQUOTEhello_DQUOTE] = "\"hello\"",
-};
-
-static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {
-    [sym_program] = {.visible = true, .named = true, .structural = true, .extra = false},
-    [ts_builtin_sym_error] = {.visible = true, .named = true, .structural = false, .extra = false},
-    [ts_builtin_sym_end] = {.visible = false, .named = false, .structural = true, .extra = false},
-    [anon_sym_LF] = {.visible = true, .named = false, .structural = true, .extra = false},
-    [anon_sym_CR] = {.visible = true, .named = false, .structural = true, .extra = false},
-    [aux_sym_SLASH_BSLASHd_SLASH] = {.visible = false, .named = false, .structural = true, .extra = false},
-    [anon_sym_DQUOTEhello_DQUOTE] = {.visible = true, .named = false, .structural = true, .extra = false},
-};
-
-static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {
-    START_LEXER();
-    switch (state) {
-        case 0:
-            START_TOKEN();
-            if (lookahead == 0)
-                ADVANCE(1);
-            if ((lookahead == '\t') ||
-                (lookahead == ' '))
-                ADVANCE(0);
-            if (lookahead == '\n')
-                ADVANCE(2);
-            if (lookahead == '\r')
-                ADVANCE(3);
-            if (lookahead == '\"')
-                ADVANCE(4);
-            if ('0' <= lookahead && lookahead <= '9')
-                ADVANCE(11);
-            LEX_ERROR();
-        case 1:
-            ACCEPT_TOKEN(ts_builtin_sym_end);
-        case 2:
-            START_TOKEN();
-            ACCEPT_TOKEN(anon_sym_LF);
-        case 3:
-            START_TOKEN();
-            ACCEPT_TOKEN(anon_sym_CR);
-        case 4:
-            if (lookahead == 'h')
-                ADVANCE(5);
-            LEX_ERROR();
-        case 5:
-            if (lookahead == 'e')
-                ADVANCE(6);
-            LEX_ERROR();
-        case 6:
-            if (lookahead == 'l')
-                ADVANCE(7);
-            LEX_ERROR();
-        case 7:
-            if (lookahead == 'l')
-                ADVANCE(8);
-            LEX_ERROR();
-        case 8:
-            if (lookahead == 'o')
-                ADVANCE(9);
-            LEX_ERROR();
-        case 9:
-            if (lookahead == '\"')
-                ADVANCE(10);
-            LEX_ERROR();
-        case 10:
-            ACCEPT_TOKEN(anon_sym_DQUOTEhello_DQUOTE);
-        case 11:
-            ACCEPT_TOKEN(aux_sym_SLASH_BSLASHd_SLASH);
-        case 12:
-            START_TOKEN();
-            if ((lookahead == '\t') ||
-                (lookahead == ' '))
-                ADVANCE(12);
-            if (lookahead == '\n')
-                ADVANCE(2);
-            if (lookahead == '\r')
-                ADVANCE(3);
-            if (lookahead == '\"')
-                ADVANCE(4);
-            if ('0' <= lookahead && lookahead <= '9')
-                ADVANCE(11);
-            LEX_ERROR();
-        case 13:
-            START_TOKEN();
-            if (lookahead == 0)
-                ADVANCE(1);
-            if ((lookahead == '\t') ||
-                (lookahead == '\n') ||
-                (lookahead == '\r') ||
-                (lookahead == ' '))
-                ADVANCE(13);
-            LEX_ERROR();
-        default:
-            LEX_ERROR();
-    }
-}
-
-static TSStateId ts_lex_states[STATE_COUNT] = {
-    [0] = 12,
-    [1] = 13,
-    [2] = 13,
-};
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-
-static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {
-    [0] = {
-        [sym_program] = 2,
-        [anon_sym_LF] = 4,
-        [anon_sym_CR] = 4,
-        [aux_sym_SLASH_BSLASHd_SLASH] = 4,
-        [anon_sym_DQUOTEhello_DQUOTE] = 4,
-    },
-    [1] = {
-        [ts_builtin_sym_end] = 6,
-    },
-    [2] = {
-        [ts_builtin_sym_end] = 8,
-    },
-};
-
-static TSParseActionEntry ts_parse_actions[] = {
-    [0] = {.count = 1}, ERROR(),
-    [2] = {.count = 1}, SHIFT(1, 0),
-    [4] = {.count = 1}, SHIFT(2, 0),
-    [6] = {.count = 1}, ACCEPT_INPUT(),
-    [8] = {.count = 1}, REDUCE(sym_program, 1, 0),
-};
-
-#pragma GCC diagnostic pop
-
-EXPORT_LANGUAGE(ts_language_anonymous_tokens);
diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc
index 80fe0763..783474f2 100644
--- a/spec/integration/compile_grammar_spec.cc
+++ b/spec/integration/compile_grammar_spec.cc
@@ -25,12 +25,10 @@ describe("compile_grammar", []() {
         }
       )JSON");
 
-      const TSLanguage *language = load_language("test_language", result);
+      ts_document_set_language(document, load_language("test_language", result));
 
-      ts_document_set_language(document, language);
       ts_document_set_input_string(document, "the-value");
       ts_document_parse(document);
-
       TSNode root_node = ts_document_root_node(document);
       AssertThat(ts_node_string(root_node, document), Equals("(first_rule)"));
     });
@@ -47,17 +45,53 @@ describe("compile_grammar", []() {
         }
       )JSON");
 
-      const TSLanguage *language = load_language("test_language", result);
+      ts_document_set_language(document, load_language("test_language", result));
 
-      ts_document_set_language(document, language);
       ts_document_set_input_string(document, "");
       ts_document_parse(document);
-
       TSNode root_node = ts_document_root_node(document);
       AssertThat(ts_node_string(root_node, document), Equals("(first_rule)"));
     });
   });
 
+  describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
+    it("escapes the escaped characters properly in the generated parser", [&]() {
+      TSCompileResult result = ts_compile_grammar(R"JSON(
+        {
+          "name": "test_language",
+          "rules": {
+            "first_rule": {
+              "type": "CHOICE",
+              "members": [
+                {"type": "STRING", "value": "\n"},
+                {"type": "STRING", "value": "\r"},
+                {"type": "STRING", "value": "'hello'"},
+                {"type": "PATTERN", "value": "\\d+"}
+              ]
+            }
+          }
+        }
+      )JSON");
+
+      ts_document_set_language(document, load_language("test_language", result));
+
+      ts_document_set_input_string(document, "1234");
+      ts_document_parse(document);
+      TSNode root_node = ts_document_root_node(document);
+      AssertThat(ts_node_string(root_node, document), Equals("(first_rule)"));
+
+      ts_document_set_input_string(document, "\n");
+      ts_document_parse(document);
+      root_node = ts_document_root_node(document);
+      AssertThat(ts_node_string(root_node, document), Equals("(first_rule)"));
+
+      ts_document_set_input_string(document, "'hello'");
+      ts_document_parse(document);
+      root_node = ts_document_root_node(document);
+      AssertThat(ts_node_string(root_node, document), Equals("(first_rule)"));
+    });
+  });
+
   describe("the grammar in the README", [&]() {
     it("works", [&]() {
       TSCompileResult result = ts_compile_grammar(R"JSON(