From f04d7c5860be23dd2372deec9bf4c4f72534fbcc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Mar 2017 21:16:37 -0800 Subject: [PATCH] Handle unused tokens --- src/compiler/generate_code/c_code.cc | 12 ++- .../test_grammars/unused_rules/corpus.txt | 9 +++ .../test_grammars/unused_rules/grammar.json | 73 +++++++++++++++++++ .../test_grammars/unused_rules/readme.md | 1 + 4 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 test/fixtures/test_grammars/unused_rules/corpus.txt create mode 100644 test/fixtures/test_grammars/unused_rules/grammar.json create mode 100644 test/fixtures/test_grammars/unused_rules/readme.md diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 0bd01c29..111340c1 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -126,10 +126,16 @@ class CCodeGenerator { } void add_stats() { - size_t token_count = 1 + lexical_grammar.variables.size(); - for (const ExternalToken &external_token : syntax_grammar.external_tokens) { - if (external_token.corresponding_internal_token == rules::NONE()) { + size_t token_count = 0; + for (const auto &entry : parse_table.symbols) { + const Symbol &symbol = entry.first; + if (symbol.is_token()) { token_count++; + } else if (symbol.is_external()) { + const ExternalToken &external_token = syntax_grammar.external_tokens[symbol.index]; + if (external_token.corresponding_internal_token == rules::NONE()) { + token_count++; + } } } diff --git a/test/fixtures/test_grammars/unused_rules/corpus.txt b/test/fixtures/test_grammars/unused_rules/corpus.txt new file mode 100644 index 00000000..11fd569d --- /dev/null +++ b/test/fixtures/test_grammars/unused_rules/corpus.txt @@ -0,0 +1,9 @@ +========================= +the language +========================= + +E F I J + +--- + +(a (d (e) (f)) (h (i) (j))) \ No newline at end of file diff --git a/test/fixtures/test_grammars/unused_rules/grammar.json b/test/fixtures/test_grammars/unused_rules/grammar.json new file mode 100644 index 00000000..7ed2a0da --- /dev/null +++ b/test/fixtures/test_grammars/unused_rules/grammar.json @@ -0,0 +1,73 @@ +{ + "name": "unused_rules", + + "extras": [ + {"type": "PATTERN", "value": "\\s"} + ], + + "rules": { + "a": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "d"}, + {"type": "SYMBOL", "name": "h"} + ] + }, + + "b": { + "type": "STRING", + "value": "B" + }, + + "c": { + "type": "STRING", + "value": "C" + }, + + "d": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "e"}, + {"type": "SYMBOL", "name": "f"} + ] + }, + + "e": { + "type": "STRING", + "value": "E" + }, + + "f": { + "type": "STRING", + "value": "F" + }, + + "g": { + "type": "STRING", + "value": "G" + }, + + "h": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "i"}, + {"type": "SYMBOL", "name": "j"} + ] + }, + + "i": { + "type": "STRING", + "value": "I" + }, + + "j": { + "type": "STRING", + "value": "J" + }, + + "k": { + "type": "STRING", + "value": "K" + } + } +} \ No newline at end of file diff --git a/test/fixtures/test_grammars/unused_rules/readme.md b/test/fixtures/test_grammars/unused_rules/readme.md new file mode 100644 index 00000000..6390bdeb --- /dev/null +++ b/test/fixtures/test_grammars/unused_rules/readme.md @@ -0,0 +1 @@ +The generated parsers use the grammar's token count to distinguish between terminal and non-terminal symbols. When the grammar has unused tokens, these tokens don't appear in the parser, so they need to be omitted from the token count. \ No newline at end of file