From 569b9d4099d685df70377b26c56a7a597efc0901 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 14 Jan 2016 11:28:13 -0800
Subject: [PATCH] Allow comments within grammar JSON

---
 README.md                                | 22 ++++++++++++++++++++--
 spec/integration/compile_grammar_spec.cc | 22 ++++++++++++++++++++--
 src/compiler/parse_grammar.cc            |  2 +-
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index fe950c5c..b4ec7a8f 100644
--- a/README.md
+++ b/README.md
@@ -54,11 +54,16 @@ int main() {
     {
       "name": "arithmetic",
 
+      // Things that can appear anywhere in the language, like comments
+      // and whitespace, are expressed as 'extras'.
       "extras": [
         {"type": "PATTERN", "value": "\\s"},
+        {"type": "SYMBOL", "name": "comment"}
       ],
 
       "rules": {
+
+        // The first rule listed in the grammar becomes the 'start rule'.
         "expression": {
           "type": "CHOICE",
           "members": [
@@ -70,13 +75,22 @@ int main() {
               "type": "SEQ",
               "members": [
                 {"type": "STRING", "value": "("},
-                {"type": "SYMBOL", "name": "expression"},
+
+                // Error recovery is controlled by wrapping rule subtrees
+                // in an 'ERROR' rule.
+                {
+                  "type": "ERROR",
+                  "content": {"type": "SYMBOL", "name": "expression"}
+                },
+
                 {"type": "STRING", "value": ")"}
               ]
             }
           ]
         },
 
+        // Tokens like '+' and '*' are described directly within the
+        // grammar's rules, as opposed to in a seperate lexer description.
         "sum": {
           "type": "PREC_LEFT",
           "value": 1,
@@ -90,6 +104,8 @@ int main() {
           }
         },
 
+        // Ambiguities can be resolved at compile time by assigning precedence
+        // values to rule subtrees.
         "product": {
           "type": "PREC_LEFT",
           "value": 2,
@@ -103,8 +119,10 @@ int main() {
           }
         },
 
+        // Tokens can be specified using ECMAScript regexps.
         "number": {"type": "PATTERN", "value": "\\d+"},
-        "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
+        "comment": {"type": "PATTERN", "value": "#.*"},
+        "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"},
       }
     }
   )JSON");
diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc
index 783474f2..99215c95 100644
--- a/spec/integration/compile_grammar_spec.cc
+++ b/spec/integration/compile_grammar_spec.cc
@@ -98,11 +98,16 @@ describe("compile_grammar", []() {
         {
           "name": "arithmetic",
 
+          // Things that can appear anywhere in the language, like comments
+          // and whitespace, are expressed as 'extras'.
           "extras": [
-            {"type": "PATTERN", "value": "\\s"}
+            {"type": "PATTERN", "value": "\\s"},
+            {"type": "SYMBOL", "name": "comment"}
           ],
 
           "rules": {
+
+            // The first rule listed in the grammar becomes the 'start rule'.
             "expression": {
               "type": "CHOICE",
               "members": [
@@ -114,13 +119,22 @@ describe("compile_grammar", []() {
                   "type": "SEQ",
                   "members": [
                     {"type": "STRING", "value": "("},
-                    {"type": "SYMBOL", "name": "expression"},
+
+                    // Error recovery is controlled by wrapping rule subtrees
+                    // in an 'ERROR' rule.
+                    {
+                      "type": "ERROR",
+                      "content": {"type": "SYMBOL", "name": "expression"}
+                    },
+
                     {"type": "STRING", "value": ")"}
                   ]
                 }
               ]
             },
 
+            // Tokens like '+' and '*' are described directly within the
+            // grammar's rules, as opposed to in a seperate lexer description.
             "sum": {
               "type": "PREC_LEFT",
               "value": 1,
@@ -134,6 +148,8 @@ describe("compile_grammar", []() {
               }
             },
 
+            // Ambiguities can be resolved at compile time by assigning precedence
+            // values to rule subtrees.
             "product": {
               "type": "PREC_LEFT",
               "value": 2,
@@ -147,7 +163,9 @@ describe("compile_grammar", []() {
               }
             },
 
+            // Tokens can be specified using ECMAScript regexps.
             "number": {"type": "PATTERN", "value": "\\d+"},
+            "comment": {"type": "PATTERN", "value": "#.*"},
             "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
           }
         }
diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc
index d71e0030..263c68ae 100644
--- a/src/compiler/parse_grammar.cc
+++ b/src/compiler/parse_grammar.cc
@@ -223,7 +223,7 @@ ParseGrammarResult parse_grammar(const string &input) {
   Grammar grammar;
   json_value name_json, rules_json, extras_json, conflicts_json;
 
-  json_settings settings = { 0, 0, 0, 0, 0, 0 };
+  json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
   char parse_error[json_error_max];
   json_value *grammar_json =
     json_parse_ex(&settings, input.c_str(), input.size(), parse_error);