From 7bf2484d8128054123a15f186f11cc0ee88967ee Mon Sep 17 00:00:00 2001 From: mliszcz Date: Wed, 2 Nov 2022 15:36:59 +0100 Subject: [PATCH] Fix test output formatting for rules starting with M/U Previously the rule names could not begin with an uppercase M or U because the test output formatter assumed that they represent special tokens: MISSING or UEXPECTED. Fixes #1940. --- cli/src/test.rs | 21 ++++++++++++--------- docs/section-3-creating-parsers.md | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/cli/src/test.rs b/cli/src/test.rs index 3e82c02a..69c4a663 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -294,15 +294,10 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String { // "(node_name" write!(formatted, "{}", s).unwrap(); - let mut c_iter = s.chars(); - c_iter.next(); - match c_iter.next() { - Some('M') | Some('U') => { - // "(MISSING node_name" or "(UNEXPECTED 'x'" - let s = s_iter.next().unwrap(); - write!(formatted, " {}", s).unwrap(); - } - Some(_) | None => {} + // "(MISSING node_name" or "(UNEXPECTED 'x'" + if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") { + let s = s_iter.next().unwrap(); + write!(formatted, " {}", s).unwrap(); } } else if s.ends_with(':') { // "field:" @@ -597,6 +592,14 @@ abc .to_string() ); assert_eq!(format_sexp(&"()".to_string()), "()".to_string()); + assert_eq!( + format_sexp(&"(A (M (B)))".to_string()), + "(A\n (M\n (B)))" + ); + assert_eq!( + format_sexp(&"(A (U (B)))".to_string()), + "(A\n (U\n (B)))" + ); } #[test] diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index e17d1ce3..07f9d865 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -200,7 +200,7 @@ You can run syntax highlighting on an arbitrary file using `tree-sitter highligh The following is a complete list of built-in functions you can use in your `grammar.js` to define rules. Use-cases for some of these functions will be explained in more detail in later sections. -* **Symbols (the `$` object)** - Every grammar rule is written as a JavaScript function that takes a parameter conventionally called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule. +* **Symbols (the `$` object)** - Every grammar rule is written as a JavaScript function that takes a parameter conventionally called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule. Names starting with `$.MISSING` or `$.UNEXPECTED` should be avoided as they have special meaning for the `tree-sitter test` command. * **String and Regex literals** - The terminal symbols in a grammar are described using JavaScript strings and regular expressions. Of course during parsing, Tree-sitter does not actually use JavaScript's regex engine to evaluate these regexes; it generates its own regex-matching logic as part of each parser. Regex literals are just used as a convenient way of writing regular expressions in your grammar. * **Sequences : `seq(rule1, rule2, ...)`** - This function creates a rule that matches any number of other rules, one after another. It is analogous to simply writing multiple symbols next to each other in [EBNF notation][ebnf]. * **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation.