Handle unused tokens

2017-03-09 21:16:37 -08:00 · 2017-03-09 21:16:37 -08:00 · f04d7c5860
commit f04d7c5860
parent c79fae6d21
4 changed files with 92 additions and 3 deletions
--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@ -126,10 +126,16 @@ class CCodeGenerator {
  }

  void add_stats() {
-    size_t token_count = 1 + lexical_grammar.variables.size();
-    for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
-      if (external_token.corresponding_internal_token == rules::NONE()) {
+    size_t token_count = 0;
+    for (const auto &entry : parse_table.symbols) {
+      const Symbol &symbol = entry.first;
+      if (symbol.is_token()) {
        token_count++;
+      } else if (symbol.is_external()) {
+        const ExternalToken &external_token = syntax_grammar.external_tokens[symbol.index];
+        if (external_token.corresponding_internal_token == rules::NONE()) {
+          token_count++;
+        }
      }
    }

--- a/test/fixtures/test_grammars/unused_rules/corpus.txt
+++ b/test/fixtures/test_grammars/unused_rules/corpus.txt
@ -0,0 +1,9 @@
+=========================
+the language
+=========================
+
+E F I J
+
+---
+
+(a (d (e) (f)) (h (i) (j)))
--- a/test/fixtures/test_grammars/unused_rules/grammar.json
+++ b/test/fixtures/test_grammars/unused_rules/grammar.json
@ -0,0 +1,73 @@
+{
+  "name": "unused_rules",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "a": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "d"},
+        {"type": "SYMBOL", "name": "h"}
+      ]
+    },
+
+    "b": {
+      "type": "STRING",
+      "value": "B"
+    },
+
+    "c": {
+      "type": "STRING",
+      "value": "C"
+    },
+
+    "d": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "e"},
+        {"type": "SYMBOL", "name": "f"}
+      ]
+    },
+
+    "e": {
+      "type": "STRING",
+      "value": "E"
+    },
+
+    "f": {
+      "type": "STRING",
+      "value": "F"
+    },
+
+    "g": {
+      "type": "STRING",
+      "value": "G"
+    },
+
+    "h": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "i"},
+        {"type": "SYMBOL", "name": "j"}
+      ]
+    },
+
+    "i": {
+      "type": "STRING",
+      "value": "I"
+    },
+
+    "j": {
+      "type": "STRING",
+      "value": "J"
+    },
+
+    "k": {
+      "type": "STRING",
+      "value": "K"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/unused_rules/readme.md
+++ b/test/fixtures/test_grammars/unused_rules/readme.md
@ -0,0 +1 @@
+The generated parsers use the grammar's token count to distinguish between terminal and non-terminal symbols. When the grammar has unused tokens, these tokens don't appear in the parser, so they need to be omitted from the token count.
				`@ -0,0 +1 @@`
				`The generated parsers use the grammar's token count to distinguish between terminal and non-terminal symbols. When the grammar has unused tokens, these tokens don't appear in the parser, so they need to be omitted from the token count.`