From f04d7c5860be23dd2372deec9bf4c4f72534fbcc Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Mar 2017 21:16:37 -0800
Subject: [PATCH] Handle unused tokens

---
 src/compiler/generate_code/c_code.cc          | 12 ++-
 .../test_grammars/unused_rules/corpus.txt     |  9 +++
 .../test_grammars/unused_rules/grammar.json   | 73 +++++++++++++++++++
 .../test_grammars/unused_rules/readme.md      |  1 +
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 test/fixtures/test_grammars/unused_rules/corpus.txt
 create mode 100644 test/fixtures/test_grammars/unused_rules/grammar.json
 create mode 100644 test/fixtures/test_grammars/unused_rules/readme.md

diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc
index 0bd01c29..111340c1 100644
--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@@ -126,10 +126,16 @@ class CCodeGenerator {
   }
 
   void add_stats() {
-    size_t token_count = 1 + lexical_grammar.variables.size();
-    for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
-      if (external_token.corresponding_internal_token == rules::NONE()) {
+    size_t token_count = 0;
+    for (const auto &entry : parse_table.symbols) {
+      const Symbol &symbol = entry.first;
+      if (symbol.is_token()) {
         token_count++;
+      } else if (symbol.is_external()) {
+        const ExternalToken &external_token = syntax_grammar.external_tokens[symbol.index];
+        if (external_token.corresponding_internal_token == rules::NONE()) {
+          token_count++;
+        }
       }
     }
 
diff --git a/test/fixtures/test_grammars/unused_rules/corpus.txt b/test/fixtures/test_grammars/unused_rules/corpus.txt
new file mode 100644
index 00000000..11fd569d
--- /dev/null
+++ b/test/fixtures/test_grammars/unused_rules/corpus.txt
@@ -0,0 +1,9 @@
+=========================
+the language
+=========================
+
+E F I J
+
+---
+
+(a (d (e) (f)) (h (i) (j)))
\ No newline at end of file
diff --git a/test/fixtures/test_grammars/unused_rules/grammar.json b/test/fixtures/test_grammars/unused_rules/grammar.json
new file mode 100644
index 00000000..7ed2a0da
--- /dev/null
+++ b/test/fixtures/test_grammars/unused_rules/grammar.json
@@ -0,0 +1,73 @@
+{
+  "name": "unused_rules",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "a": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "d"},
+        {"type": "SYMBOL", "name": "h"}
+      ]
+    },
+
+    "b": {
+      "type": "STRING",
+      "value": "B"
+    },
+
+    "c": {
+      "type": "STRING",
+      "value": "C"
+    },
+
+    "d": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "e"},
+        {"type": "SYMBOL", "name": "f"}
+      ]
+    },
+
+    "e": {
+      "type": "STRING",
+      "value": "E"
+    },
+
+    "f": {
+      "type": "STRING",
+      "value": "F"
+    },
+
+    "g": {
+      "type": "STRING",
+      "value": "G"
+    },
+
+    "h": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "i"},
+        {"type": "SYMBOL", "name": "j"}
+      ]
+    },
+
+    "i": {
+      "type": "STRING",
+      "value": "I"
+    },
+
+    "j": {
+      "type": "STRING",
+      "value": "J"
+    },
+
+    "k": {
+      "type": "STRING",
+      "value": "K"
+    }
+  }
+}
\ No newline at end of file
diff --git a/test/fixtures/test_grammars/unused_rules/readme.md b/test/fixtures/test_grammars/unused_rules/readme.md
new file mode 100644
index 00000000..6390bdeb
--- /dev/null
+++ b/test/fixtures/test_grammars/unused_rules/readme.md
@@ -0,0 +1 @@
+The generated parsers use the grammar's token count to distinguish between terminal and non-terminal symbols. When the grammar has unused tokens, these tokens don't appear in the parser, so they need to be omitted from the token count.
\ No newline at end of file