From 569b9d4099d685df70377b26c56a7a597efc0901 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Jan 2016 11:28:13 -0800 Subject: [PATCH] Allow comments within grammar JSON --- README.md | 22 ++++++++++++++++++++-- spec/integration/compile_grammar_spec.cc | 22 ++++++++++++++++++++-- src/compiler/parse_grammar.cc | 2 +- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fe950c5c..b4ec7a8f 100644 --- a/README.md +++ b/README.md @@ -54,11 +54,16 @@ int main() { { "name": "arithmetic", + // Things that can appear anywhere in the language, like comments + // and whitespace, are expressed as 'extras'. "extras": [ {"type": "PATTERN", "value": "\\s"}, + {"type": "SYMBOL", "name": "comment"} ], "rules": { + + // The first rule listed in the grammar becomes the 'start rule'. "expression": { "type": "CHOICE", "members": [ @@ -70,13 +75,22 @@ int main() { "type": "SEQ", "members": [ {"type": "STRING", "value": "("}, - {"type": "SYMBOL", "name": "expression"}, + + // Error recovery is controlled by wrapping rule subtrees + // in an 'ERROR' rule. + { + "type": "ERROR", + "content": {"type": "SYMBOL", "name": "expression"} + }, + {"type": "STRING", "value": ")"} ] } ] }, + // Tokens like '+' and '*' are described directly within the + // grammar's rules, as opposed to in a seperate lexer description. "sum": { "type": "PREC_LEFT", "value": 1, @@ -90,6 +104,8 @@ int main() { } }, + // Ambiguities can be resolved at compile time by assigning precedence + // values to rule subtrees. "product": { "type": "PREC_LEFT", "value": 2, @@ -103,8 +119,10 @@ int main() { } }, + // Tokens can be specified using ECMAScript regexps. "number": {"type": "PATTERN", "value": "\\d+"}, - "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"} + "comment": {"type": "PATTERN", "value": "#.*"}, + "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}, } } )JSON"); diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc index 783474f2..99215c95 100644 --- a/spec/integration/compile_grammar_spec.cc +++ b/spec/integration/compile_grammar_spec.cc @@ -98,11 +98,16 @@ describe("compile_grammar", []() { { "name": "arithmetic", + // Things that can appear anywhere in the language, like comments + // and whitespace, are expressed as 'extras'. "extras": [ - {"type": "PATTERN", "value": "\\s"} + {"type": "PATTERN", "value": "\\s"}, + {"type": "SYMBOL", "name": "comment"} ], "rules": { + + // The first rule listed in the grammar becomes the 'start rule'. "expression": { "type": "CHOICE", "members": [ @@ -114,13 +119,22 @@ describe("compile_grammar", []() { "type": "SEQ", "members": [ {"type": "STRING", "value": "("}, - {"type": "SYMBOL", "name": "expression"}, + + // Error recovery is controlled by wrapping rule subtrees + // in an 'ERROR' rule. + { + "type": "ERROR", + "content": {"type": "SYMBOL", "name": "expression"} + }, + {"type": "STRING", "value": ")"} ] } ] }, + // Tokens like '+' and '*' are described directly within the + // grammar's rules, as opposed to in a seperate lexer description. "sum": { "type": "PREC_LEFT", "value": 1, @@ -134,6 +148,8 @@ describe("compile_grammar", []() { } }, + // Ambiguities can be resolved at compile time by assigning precedence + // values to rule subtrees. "product": { "type": "PREC_LEFT", "value": 2, @@ -147,7 +163,9 @@ describe("compile_grammar", []() { } }, + // Tokens can be specified using ECMAScript regexps. "number": {"type": "PATTERN", "value": "\\d+"}, + "comment": {"type": "PATTERN", "value": "#.*"}, "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"} } } diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index d71e0030..263c68ae 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -223,7 +223,7 @@ ParseGrammarResult parse_grammar(const string &input) { Grammar grammar; json_value name_json, rules_json, extras_json, conflicts_json; - json_settings settings = { 0, 0, 0, 0, 0, 0 }; + json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; char parse_error[json_error_max]; json_value *grammar_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error);