Allow comments within grammar JSON

2016-01-14 11:28:13 -08:00 · 2016-01-14 11:28:13 -08:00 · 569b9d4099
commit 569b9d4099
parent ad4089a4bf
3 changed files with 41 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -54,11 +54,16 @@ int main() {
    {
      "name": "arithmetic",

+      // Things that can appear anywhere in the language, like comments
+      // and whitespace, are expressed as 'extras'.
      "extras": [
        {"type": "PATTERN", "value": "\\s"},
+        {"type": "SYMBOL", "name": "comment"}
      ],

      "rules": {
+
+        // The first rule listed in the grammar becomes the 'start rule'.
        "expression": {
          "type": "CHOICE",
          "members": [
@ -70,13 +75,22 @@ int main() {
              "type": "SEQ",
              "members": [
                {"type": "STRING", "value": "("},
-                {"type": "SYMBOL", "name": "expression"},
+
+                // Error recovery is controlled by wrapping rule subtrees
+                // in an 'ERROR' rule.
+                {
+                  "type": "ERROR",
+                  "content": {"type": "SYMBOL", "name": "expression"}
+                },
+
                {"type": "STRING", "value": ")"}
              ]
            }
          ]
        },

+        // Tokens like '+' and '*' are described directly within the
+        // grammar's rules, as opposed to in a seperate lexer description.
        "sum": {
          "type": "PREC_LEFT",
          "value": 1,
@ -90,6 +104,8 @@ int main() {
          }
        },

+        // Ambiguities can be resolved at compile time by assigning precedence
+        // values to rule subtrees.
        "product": {
          "type": "PREC_LEFT",
          "value": 2,
@ -103,8 +119,10 @@ int main() {
          }
        },

+        // Tokens can be specified using ECMAScript regexps.
        "number": {"type": "PATTERN", "value": "\\d+"},
-        "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
+        "comment": {"type": "PATTERN", "value": "#.*"},
+        "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"},
      }
    }
  )JSON");
--- a/spec/integration/compile_grammar_spec.cc
+++ b/spec/integration/compile_grammar_spec.cc
@ -98,11 +98,16 @@ describe("compile_grammar", []() {
        {
          "name": "arithmetic",

+          // Things that can appear anywhere in the language, like comments
+          // and whitespace, are expressed as 'extras'.
          "extras": [
-            {"type": "PATTERN", "value": "\\s"}
+            {"type": "PATTERN", "value": "\\s"},
+            {"type": "SYMBOL", "name": "comment"}
          ],

          "rules": {
+
+            // The first rule listed in the grammar becomes the 'start rule'.
            "expression": {
              "type": "CHOICE",
              "members": [
@ -114,13 +119,22 @@ describe("compile_grammar", []() {
                  "type": "SEQ",
                  "members": [
                    {"type": "STRING", "value": "("},
-                    {"type": "SYMBOL", "name": "expression"},
+
+                    // Error recovery is controlled by wrapping rule subtrees
+                    // in an 'ERROR' rule.
+                    {
+                      "type": "ERROR",
+                      "content": {"type": "SYMBOL", "name": "expression"}
+                    },
+
                    {"type": "STRING", "value": ")"}
                  ]
                }
              ]
            },

+            // Tokens like '+' and '*' are described directly within the
+            // grammar's rules, as opposed to in a seperate lexer description.
            "sum": {
              "type": "PREC_LEFT",
              "value": 1,
@ -134,6 +148,8 @@ describe("compile_grammar", []() {
              }
            },

+            // Ambiguities can be resolved at compile time by assigning precedence
+            // values to rule subtrees.
            "product": {
              "type": "PREC_LEFT",
              "value": 2,
@ -147,7 +163,9 @@ describe("compile_grammar", []() {
              }
            },

+            // Tokens can be specified using ECMAScript regexps.
            "number": {"type": "PATTERN", "value": "\\d+"},
+            "comment": {"type": "PATTERN", "value": "#.*"},
            "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
          }
        }
--- a/src/compiler/parse_grammar.cc
+++ b/src/compiler/parse_grammar.cc
@ -223,7 +223,7 @@ ParseGrammarResult parse_grammar(const string &input) {
  Grammar grammar;
  json_value name_json, rules_json, extras_json, conflicts_json;

-  json_settings settings = { 0, 0, 0, 0, 0, 0 };
+  json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
  char parse_error[json_error_max];
  json_value *grammar_json =
    json_parse_ex(&settings, input.c_str(), input.size(), parse_error);