diff --git a/cli/src/main.rs b/cli/src/main.rs index bb21233b..b8027ea8 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -22,7 +22,7 @@ use tree_sitter_cli::{ highlight, init::{generate_grammar_files, get_root_path, migrate_package_json, JsonConfigOpts}, logger, - parse::{self, ParseFileOptions, ParseOutput}, + parse::{self, ParseFileOptions, ParseOutput, ParseTheme}, playground, query, tags, test::{self, TestOptions}, test_highlight, test_tags, util, wasm, @@ -183,6 +183,12 @@ struct Parse { help = "Output the parse data in XML format" )] pub output_xml: bool, + #[arg( + long = "cst", + short = 'c', + help = "Output the parse data in a pretty-printed CST format" + )] + pub output_cst: bool, #[arg(long, short, help = "Show parsing statistic")] pub stat: bool, #[arg(long, help = "Interrupt the parsing process by timeout (µs)")] @@ -787,12 +793,25 @@ impl Parse { ParseOutput::Dot } else if self.output_xml { ParseOutput::Xml + } else if self.output_cst { + ParseOutput::Cst } else if self.quiet { ParseOutput::Quiet } else { ParseOutput::Normal }; + let parse_theme = if color { + config + .get::() + .with_context(|| "Failed to parse CST theme")? + .parse_theme + .unwrap_or_default() + .into() + } else { + ParseTheme::empty() + }; + let encoding = self.encoding.map(|e| match e { Encoding::Utf8 => ffi::TSInputEncodingUTF8, Encoding::Utf16LE => ffi::TSInputEncodingUTF16LE, @@ -868,6 +887,7 @@ impl Parse { encoding, open_log: self.open_log, no_ranges: self.no_ranges, + parse_theme: &parse_theme, }; let parse_result = parse::parse_file_at_path(&mut parser, &opts)?; diff --git a/cli/src/parse.rs b/cli/src/parse.rs index d72c770c..6fe0f6a5 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -1,16 +1,18 @@ use std::{ fmt, fs, - io::{self, Write}, + io::{self, StdoutLock, Write}, path::Path, sync::atomic::AtomicUsize, time::{Duration, Instant}, }; +use anstyle::{AnsiColor, Color, RgbColor}; use anyhow::{anyhow, Context, Result}; -use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree}; +use serde::{Deserialize, Serialize}; +use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Range, Tree, TreeCursor}; use super::util; -use crate::fuzz::edits::Edit; +use crate::{fuzz::edits::Edit, test::paint}; #[derive(Debug, Default)] pub struct Stats { @@ -39,11 +41,136 @@ impl fmt::Display for Stats { } } +/// Sets the color used in the output of `tree-sitter parse --cst` +#[derive(Debug, Copy, Clone)] +pub struct ParseTheme { + /// The color of node kinds + pub node_kind: Option, + /// The color of text associated with a node + pub node_text: Option, + /// The color of node fields + pub field: Option, + /// The color of the range information for unnamed nodes + pub row_color: Option, + /// The color of the range information for named nodes + pub row_color_named: Option, + /// The color of extra nodes + pub extra: Option, + /// The color of ERROR nodes + pub error: Option, + /// The color of MISSING nodes and their associated text + pub missing: Option, + /// The color of newline characters + pub line_feed: Option, + /// The color of backticks + pub backtick: Option, + /// The color of literals + pub literal: Option, +} + +impl ParseTheme { + const GRAY: Color = Color::Rgb(RgbColor(118, 118, 118)); + const LIGHT_GRAY: Color = Color::Rgb(RgbColor(166, 172, 181)); + const ORANGE: Color = Color::Rgb(RgbColor(255, 153, 51)); + const YELLOW: Color = Color::Rgb(RgbColor(219, 219, 173)); + const GREEN: Color = Color::Rgb(RgbColor(101, 192, 67)); + + #[must_use] + pub const fn empty() -> Self { + Self { + node_kind: None, + node_text: None, + field: None, + row_color: None, + row_color_named: None, + extra: None, + error: None, + missing: None, + line_feed: None, + backtick: None, + literal: None, + } + } +} + +impl Default for ParseTheme { + fn default() -> Self { + Self { + node_kind: Some(AnsiColor::BrightCyan.into()), + node_text: Some(Self::GRAY), + field: Some(AnsiColor::Blue.into()), + row_color: Some(AnsiColor::White.into()), + row_color_named: Some(AnsiColor::BrightCyan.into()), + extra: Some(AnsiColor::BrightMagenta.into()), + error: Some(AnsiColor::Red.into()), + missing: Some(Self::ORANGE), + line_feed: Some(Self::LIGHT_GRAY), + backtick: Some(Self::GREEN), + literal: Some(Self::YELLOW), + } + } +} + +#[derive(Debug, Copy, Clone, Deserialize, Serialize)] +pub struct Rgb(pub u8, pub u8, pub u8); + +impl From for RgbColor { + fn from(val: Rgb) -> Self { + Self(val.0, val.1, val.2) + } +} + +#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct Config { + pub parse_theme: Option, +} + +#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct ParseThemeRaw { + pub node_kind: Option, + pub node_text: Option, + pub field: Option, + pub row_color: Option, + pub row_color_named: Option, + pub extra: Option, + pub error: Option, + pub missing: Option, + pub line_feed: Option, + pub backtick: Option, + pub literal: Option, +} + +impl From for ParseTheme { + fn from(value: ParseThemeRaw) -> Self { + let val_or_default = |val: Option, default: Option| -> Option { + val.map_or(default, |v| Some(Color::Rgb(v.into()))) + }; + let default = Self::default(); + + Self { + node_kind: val_or_default(value.node_kind, default.node_kind), + node_text: val_or_default(value.node_text, default.node_text), + field: val_or_default(value.field, default.field), + row_color: val_or_default(value.row_color, default.row_color), + row_color_named: val_or_default(value.row_color_named, default.row_color_named), + extra: val_or_default(value.extra, default.extra), + error: val_or_default(value.error, default.error), + missing: val_or_default(value.missing, default.missing), + line_feed: val_or_default(value.line_feed, default.line_feed), + backtick: val_or_default(value.backtick, default.backtick), + literal: val_or_default(value.literal, default.literal), + } + } +} + #[derive(Copy, Clone, PartialEq, Eq)] pub enum ParseOutput { Normal, Quiet, Xml, + Cst, Dot, } @@ -61,6 +188,7 @@ pub struct ParseFileOptions<'a> { pub encoding: Option, pub open_log: bool, pub no_ranges: bool, + pub parse_theme: &'a ParseTheme, } #[derive(Copy, Clone)] @@ -219,6 +347,49 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: &ParseFileOptions) -> Resul println!(); } + if opts.output == ParseOutput::Cst { + let lossy_source_code = String::from_utf8_lossy(&source_code); + let total_width = lossy_source_code + .lines() + .enumerate() + .map(|(row, col)| { + (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1 + }) + .max() + .unwrap_or(1); + let mut indent_level = 1; + let mut did_visit_children = false; + loop { + if did_visit_children { + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + } else { + break; + } + } else { + cst_render_node( + opts, + &mut cursor, + &source_code, + &mut stdout, + total_width, + indent_level, + )?; + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + } else { + did_visit_children = true; + } + } + } + cursor.reset(tree.root_node()); + println!(); + } + if opts.output == ParseOutput::Xml { let mut needs_newline = false; let mut indent_level = 0; @@ -294,11 +465,6 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: &ParseFileOptions) -> Resul let end = node.end_byte(); let value = std::str::from_utf8(&source_code[start..end]).expect("has a string"); - // if !is_named { - // for _ in 0..indent_level { - // stdout.write_all(b" ")?; - // } - // } if !is_named && needs_newline { stdout.write_all(b"\n")?; for _ in 0..indent_level { @@ -393,6 +559,212 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: &ParseFileOptions) -> Resul }) } +const fn escape_invisible(c: char) -> Option<&'static str> { + Some(match c { + '\n' => "\\n", + '\r' => "\\r", + '\t' => "\\t", + '\0' => "\\0", + '\\' => "\\\\", + '\x0b' => "\\v", + '\x0c' => "\\f", + _ => return None, + }) +} + +fn render_node_text(source: &str) -> String { + source + .chars() + .fold(String::with_capacity(source.len()), |mut acc, c| { + if let Some(esc) = escape_invisible(c) { + acc.push_str(esc); + } else { + acc.push(c); + } + acc + }) +} + +fn write_node_text( + opts: &ParseFileOptions, + stdout: &mut StdoutLock<'static>, + cursor: &TreeCursor, + is_named: bool, + source: &str, + color: Option + Copy>, + text_info: (usize, usize), +) -> Result<()> { + let (total_width, indent_level) = text_info; + let (quote, quote_color) = if is_named { + ('`', opts.parse_theme.backtick) + } else { + ('\"', color.map(|c| c.into())) + }; + + if !is_named { + write!( + stdout, + "{}{}{}", + paint(quote_color, &String::from(quote)), + paint(color, &render_node_text(source)), + paint(quote_color, &String::from(quote)), + )?; + } else { + for (i, line) in source.split_inclusive('\n').enumerate() { + if line.is_empty() { + break; + } + let mut node_range = cursor.node().range(); + // For each line of text, adjust the row by shifting it down `i` rows, + // and adjust the column by setting it to the length of *this* line. + node_range.start_point.row += i; + node_range.end_point.row = node_range.start_point.row; + node_range.end_point.column = line.len(); + let formatted_line = render_line_feed(line, opts); + if !opts.no_ranges { + write!( + stdout, + "\n{}{}{}{}{}", + render_node_range(opts, cursor, is_named, true, total_width, node_range), + " ".repeat(indent_level + 1), + paint(quote_color, &String::from(quote)), + &paint(color, &render_node_text(&formatted_line)), + paint(quote_color, &String::from(quote)), + )?; + } else { + write!( + stdout, + "\n{}{}{}{}", + " ".repeat(indent_level + 1), + paint(quote_color, &String::from(quote)), + &paint(color, &render_node_text(&formatted_line)), + paint(quote_color, &String::from(quote)), + )?; + } + } + } + + Ok(()) +} + +fn render_line_feed(source: &str, opts: &ParseFileOptions) -> String { + if cfg!(windows) { + source.replace("\r\n", &paint(opts.parse_theme.line_feed, "\r\n")) + } else { + source.replace('\n', &paint(opts.parse_theme.line_feed, "\n")) + } +} + +fn render_node_range( + opts: &ParseFileOptions, + cursor: &TreeCursor, + is_named: bool, + is_multiline: bool, + total_width: usize, + range: Range, +) -> String { + let has_field_name = cursor.field_name().is_some(); + let range_color = if is_named && !is_multiline && !has_field_name { + opts.parse_theme.row_color_named + } else { + opts.parse_theme.row_color + }; + + let remaining_width_start = (total_width + - (range.start_point.row as f64).log10() as usize + - (range.start_point.column as f64).log10() as usize) + .max(1); + let remaining_width_end = (total_width + - (range.end_point.row as f64).log10() as usize + - (range.end_point.column as f64).log10() as usize) + .max(1); + paint( + range_color, + &format!( + "{}:{}{:remaining_width_start$}- {}:{}{:remaining_width_end$}", + range.start_point.row, + range.start_point.column, + ' ', + range.end_point.row, + range.end_point.column, + ' ', + ), + ) +} + +fn cst_render_node( + opts: &ParseFileOptions, + cursor: &mut TreeCursor, + source_code: &[u8], + stdout: &mut StdoutLock<'static>, + total_width: usize, + indent_level: usize, +) -> Result<()> { + let node = cursor.node(); + let is_named = node.is_named(); + if !opts.no_ranges { + write!( + stdout, + "{}", + render_node_range(opts, cursor, is_named, false, total_width, node.range()) + )?; + } + write!(stdout, "{}", " ".repeat(indent_level))?; + if is_named { + if let Some(field_name) = cursor.field_name() { + write!( + stdout, + "{}", + paint(opts.parse_theme.field, &format!("{field_name}: ")) + )?; + } + + let kind_color = if node.has_error() { + write!(stdout, "{}", paint(opts.parse_theme.error, "•"))?; + opts.parse_theme.error + } else if node.is_extra() || node.parent().is_some_and(|p| p.is_extra()) { + opts.parse_theme.extra + } else { + opts.parse_theme.node_kind + }; + write!(stdout, "{} ", paint(kind_color, node.kind()))?; + + if node.child_count() == 0 { + // Node text from a pattern or external scanner + write_node_text( + opts, + stdout, + cursor, + is_named, + &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]), + opts.parse_theme.node_text, + (total_width, indent_level), + )?; + } + } else if node.is_missing() { + write!(stdout, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?; + write!( + stdout, + "\"{}\"", + paint(opts.parse_theme.missing, node.kind()) + )?; + } else { + // Terminal literals, like "fn" + write_node_text( + opts, + stdout, + cursor, + is_named, + node.kind(), + opts.parse_theme.literal, + (total_width, indent_level), + )?; + } + writeln!(stdout)?; + + Ok(()) +} + pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> Result { let start_byte = edit.position; let old_end_byte = edit.position + edit.deleted_length; diff --git a/cli/src/test.rs b/cli/src/test.rs index 2502c44c..aca94574 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -327,8 +327,8 @@ pub fn print_diff(actual: &str, expected: &str, use_color: bool) { println!(); } -pub fn paint(color: Option, text: &str) -> String { - let style = Style::new().fg_color(color.map(Color::Ansi)); +pub fn paint(color: Option>, text: &str) -> String { + let style = Style::new().fg_color(color.map(Into::into)); format!("{style}{text}{style:#}") } diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index e2b445b5..2c32a10f 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -390,7 +390,7 @@ You can run your parser on an arbitrary file using `tree-sitter parse`. This wil (int_literal [1, 9] - [1, 10])))))) ``` -You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. Additionally, the `--stat` flag prints out aggregated parse success/failure information for all processed files. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error: +You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. Passing the `--cst` flag will output a pretty-printed CST instead of the normal S-expression representation. You can also prevent the syntax trees from being printed using the `--quiet` flag. Additionally, the `--stat` flag prints out aggregated parse success/failure information for all processed files. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error: ```sh tree-sitter parse 'examples/**/*.go' --quiet --stat diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index 4e7c8c3e..67c2bc25 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -58,6 +58,39 @@ The Tree-sitter highlighting system works by annotating ranges of source code wi In your config file, the `"theme"` value is an object whose keys are dot-separated highlight names like `function.builtin` or `keyword`, and whose values are JSON expressions that represent text styling parameters. +### Parse Theme + +The Tree-sitter `parse` command will output a pretty-printed CST when the `--cst` option is used. You can control which colors are used for various parts of the tree in your configuration file. Note that omitting a field will cause the relevant text to be rendered with its default color. + +```json5 +{ + "parse-theme": { + // The color of node kinds + "node-kind": [20, 20, 20], + // The color of text associated with a node + "node-text": [255, 255, 255], + // The color of node fields + "field": [42, 42, 42], + // The color of the range information for unnamed nodes + "row-color": [255, 255, 255], + // The color of the range information for named nodes + "row-color-named": [255, 130, 0], + // The color of extra nodes + "extra": [255, 0, 255], + // The color of ERROR nodes + "error": [255, 0, 0], + // The color of MISSING nodes and their associated text + "missing": [153, 75, 0], + // The color of newline characters + "line-feed": [150, 150, 150], + // The color of backtick characters + "backtick": [0, 200, 0], + // The color of literals + "literal": [0, 0, 200], + } +} +``` + #### Highlight Names A theme can contain multiple keys that share a common subsequence. Examples: