From 7bf2484d8128054123a15f186f11cc0ee88967ee Mon Sep 17 00:00:00 2001
From: mliszcz <liszcz.michal@gmail.com>
Date: Wed, 2 Nov 2022 15:36:59 +0100
Subject: [PATCH] Fix test output formatting for rules starting with M/U

Previously the rule names could not begin with an uppercase M or U
because the test output formatter assumed that they represent special
tokens: MISSING or UEXPECTED.

Fixes #1940.
---
 cli/src/test.rs                    | 21 ++++++++++++---------
 docs/section-3-creating-parsers.md |  2 +-
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/cli/src/test.rs b/cli/src/test.rs
index 3e82c02a..69c4a663 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -294,15 +294,10 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
             // "(node_name"
             write!(formatted, "{}", s).unwrap();
 
-            let mut c_iter = s.chars();
-            c_iter.next();
-            match c_iter.next() {
-                Some('M') | Some('U') => {
-                    // "(MISSING node_name" or "(UNEXPECTED 'x'"
-                    let s = s_iter.next().unwrap();
-                    write!(formatted, " {}", s).unwrap();
-                }
-                Some(_) | None => {}
+            // "(MISSING node_name" or "(UNEXPECTED 'x'"
+            if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") {
+                let s = s_iter.next().unwrap();
+                write!(formatted, " {}", s).unwrap();
             }
         } else if s.ends_with(':') {
             // "field:"
@@ -597,6 +592,14 @@ abc
             .to_string()
         );
         assert_eq!(format_sexp(&"()".to_string()), "()".to_string());
+        assert_eq!(
+            format_sexp(&"(A (M (B)))".to_string()),
+            "(A\n  (M\n    (B)))"
+        );
+        assert_eq!(
+            format_sexp(&"(A (U (B)))".to_string()),
+            "(A\n  (U\n    (B)))"
+        );
     }
 
     #[test]
diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md
index e17d1ce3..07f9d865 100644
--- a/docs/section-3-creating-parsers.md
+++ b/docs/section-3-creating-parsers.md
@@ -200,7 +200,7 @@ You can run syntax highlighting on an arbitrary file using `tree-sitter highligh
 
 The following is a complete list of built-in functions you can use in your `grammar.js` to define rules. Use-cases for some of these functions will be explained in more detail in later sections.
 
-* **Symbols (the `$` object)** - Every grammar rule is written as a JavaScript function that takes a parameter conventionally called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule.
+* **Symbols (the `$` object)** - Every grammar rule is written as a JavaScript function that takes a parameter conventionally called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule. Names starting with `$.MISSING` or `$.UNEXPECTED` should be avoided as they have special meaning for the `tree-sitter test` command.
 * **String and Regex literals** - The terminal symbols in a grammar are described using JavaScript strings and regular expressions. Of course during parsing, Tree-sitter does not actually use JavaScript's regex engine to evaluate these regexes; it generates its own regex-matching logic as part of each parser. Regex literals are just used as a convenient way of writing regular expressions in your grammar.
 * **Sequences : `seq(rule1, rule2, ...)`** - This function creates a rule that matches any number of other rules, one after another. It is analogous to simply writing multiple symbols next to each other in [EBNF notation][ebnf].
 * **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation.