From bd466febb405ff78c146ffef26e715927578c23b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 16 Jul 2019 11:40:19 -0700 Subject: [PATCH] highlight: Fix panic when cancelled before parsing a nested document --- cli/src/highlight.rs | 37 ++++++- cli/src/main.rs | 1 + cli/src/parse.rs | 2 +- cli/src/test.rs | 2 +- cli/src/tests/highlight_test.rs | 56 ++++++++-- highlight/include/tree_sitter/highlight.h | 1 + highlight/src/c_lib.rs | 20 +++- highlight/src/lib.rs | 121 ++++++++++++++-------- lib/binding_rust/lib.rs | 35 ++++--- 9 files changed, 201 insertions(+), 74 deletions(-) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 4bd0b19a..ddcdaa77 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -6,8 +6,10 @@ use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::{json, Value}; use std::collections::HashMap; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; use std::time::Instant; -use std::{fmt, fs, io, path}; +use std::{fmt, fs, io, path, thread}; use tree_sitter::{Language, PropertySheet}; use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties}; @@ -252,6 +254,19 @@ fn color_to_css(color: Color) -> &'static str { } } +fn cancel_on_stdin() -> Arc { + let result = Arc::new(AtomicUsize::new(0)); + thread::spawn({ + let flag = result.clone(); + move || { + let mut line = String::new(); + io::stdin().read_line(&mut line).unwrap(); + flag.store(1, Ordering::Relaxed); + } + }); + result +} + pub fn ansi( loader: &Loader, theme: &Theme, @@ -264,11 +279,19 @@ pub fn ansi( let stdout = io::stdout(); let mut stdout = stdout.lock(); + let cancellation_flag = cancel_on_stdin(); let time = Instant::now(); let mut highlight_stack = Vec::new(); - for event in highlight(source, language, property_sheet, |s| { - language_for_injection_string(loader, s) - })? { + for event in highlight( + source, + language, + property_sheet, + Some(cancellation_flag.as_ref()), + |s| language_for_injection_string(loader, s), + ) + .map_err(|e| e.to_string())? + { + let event = event.map_err(|e| e.to_string())?; match event { HighlightEvent::Source(s) => { if let Some(style) = highlight_stack.last().and_then(|s| theme.ansi_style(*s)) { @@ -332,10 +355,13 @@ pub fn html( let stdout = io::stdout(); let mut stdout = stdout.lock(); write!(&mut stdout, "\n")?; + + let cancellation_flag = cancel_on_stdin(); let lines = highlight_html( source, language, property_sheet, + Some(cancellation_flag.as_ref()), |s| language_for_injection_string(loader, s), |highlight| { if let Some(css_style) = theme.css_style(highlight) { @@ -344,7 +370,8 @@ pub fn html( "" } }, - )?; + ) + .map_err(|e| e.to_string())?; for (i, line) in lines.into_iter().enumerate() { write!( &mut stdout, diff --git a/cli/src/main.rs b/cli/src/main.rs index 7a6eba35..e8e9734f 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -14,6 +14,7 @@ const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA"); fn main() { if let Err(e) = run() { + println!(""); eprintln!("{}", e.message()); exit(1); } diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 0320c2b4..d1ddb499 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -28,7 +28,7 @@ pub fn parse_file_at_path( ) -> Result { let mut _log_session = None; let mut parser = Parser::new(); - parser.set_language(language)?; + parser.set_language(language).map_err(|e| e.to_string())?; let mut source_code = fs::read(path).map_err(Error::wrap(|| { format!("Error reading source file {:?}", path) }))?; diff --git a/cli/src/test.rs b/cli/src/test.rs index 82166afb..bc05f296 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -58,7 +58,7 @@ pub fn run_tests_at_path( let test_entry = parse_tests(path)?; let mut _log_session = None; let mut parser = Parser::new(); - parser.set_language(language)?; + parser.set_language(language).map_err(|e| e.to_string())?; if debug_graph { _log_session = Some(util::log_graphs(&mut parser, "log.html")?); diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 6a6fd6e7..79e70546 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,9 +1,13 @@ use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json}; use lazy_static::lazy_static; use std::ffi::CString; + +use std::sync::atomic::{AtomicUsize, Ordering}; use std::{ptr, slice, str}; use tree_sitter::{Language, PropertySheet}; -use tree_sitter_highlight::{c, highlight, highlight_html, Highlight, HighlightEvent, Properties}; +use tree_sitter_highlight::{ + c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties, +}; lazy_static! { static ref JS_SHEET: PropertySheet = @@ -209,9 +213,7 @@ fn test_highlighting_with_local_variable_tracking() { (")", vec![Highlight::PunctuationBracket]), (";", vec![Highlight::PunctuationDelimiter]), ], - vec![ - ("}", vec![Highlight::PunctuationBracket]) - ] + vec![("}", vec![Highlight::PunctuationBracket])] ], ); } @@ -307,6 +309,44 @@ fn test_highlighting_with_content_children_included() { ); } +#[test] +fn test_highlighting_cancellation() { + // An HTML document with a large injected JavaScript document: + let mut source = "\n"; + + // Cancel the highlighting before parsing the injected document. + let cancellation_flag = AtomicUsize::new(0); + let injection_callback = |name: &str| { + cancellation_flag.store(1, Ordering::SeqCst); + test_language_for_injection_string(name) + }; + + // Constructing the highlighter, which eagerly parses the outer document, + // should not fail. + let highlighter = highlight( + source.as_bytes(), + get_language("html"), + &HTML_SHEET, + Some(&cancellation_flag), + injection_callback, + ) + .unwrap(); + + // Iterating the scopes should not panic. It should return an error + // once the cancellation is detected. + for event in highlighter { + if let Err(e) = event { + assert_eq!(e, Error::Cancelled); + return; + } + } + panic!("Expected an error while iterating highlighter"); +} + #[test] fn test_highlighting_via_c_api() { let js_lang = get_language("javascript"); @@ -410,11 +450,12 @@ fn to_html<'a>( src: &'a str, language: Language, property_sheet: &'a PropertySheet, -) -> Result, String> { +) -> Result, Error> { highlight_html( src.as_bytes(), language, property_sheet, + None, &test_language_for_injection_string, &|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(), ) @@ -424,7 +465,7 @@ fn to_token_vector<'a>( src: &'a str, language: Language, property_sheet: &'a PropertySheet, -) -> Result)>>, String> { +) -> Result)>>, Error> { let mut lines = Vec::new(); let mut highlights = Vec::new(); let mut line = Vec::new(); @@ -432,9 +473,10 @@ fn to_token_vector<'a>( src.as_bytes(), language, property_sheet, + None, &test_language_for_injection_string, )? { - match event { + match event? { HighlightEvent::HighlightStart(s) => highlights.push(s), HighlightEvent::HighlightEnd => { highlights.pop(); diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index b77fb9b7..8e879b5e 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -11,6 +11,7 @@ typedef enum { TSHighlightOk, TSHighlightUnknownScope, TSHighlightTimeout, + TSHighlightInvalidLanguage, } TSHighlightError; // The list of scopes which can be styled for syntax highlighting. diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index 73ebda7d..8ddb476b 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -1,4 +1,4 @@ -use super::{escape, load_property_sheet, Highlight, HighlightEvent, Highlighter, Properties}; +use super::{escape, load_property_sheet, Error, Highlight, HighlightEvent, Highlighter, Properties}; use regex::Regex; use std::collections::HashMap; use std::ffi::CStr; @@ -30,6 +30,7 @@ pub enum ErrorCode { Ok, UnknownScope, Timeout, + InvalidLanguage, } #[no_mangle] @@ -188,18 +189,27 @@ impl TSHighlighter { let mut highlights = Vec::new(); for event in highlighter { match event { - HighlightEvent::HighlightStart(s) => { + Ok(HighlightEvent::HighlightStart(s)) => { highlights.push(s); output.start_highlight(s, &self.attribute_strings); } - HighlightEvent::HighlightEnd => { + Ok(HighlightEvent::HighlightEnd) => { highlights.pop(); output.end_highlight(); } - HighlightEvent::Source(src) => { + Ok(HighlightEvent::Source(src)) => { output.add_text(src, &highlights, &self.attribute_strings); + }, + Err(Error::Cancelled) => { + return ErrorCode::Timeout; + }, + Err(Error::InvalidLanguage) => { + return ErrorCode::InvalidLanguage; + }, + Err(Error::Unknown) => { + return ErrorCode::Timeout; } - }; + } } ErrorCode::Ok } else { diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 2e3d3233..0663bf92 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -12,6 +12,13 @@ use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, Tre const CANCELLATION_CHECK_INTERVAL: usize = 100; +#[derive(Debug, PartialEq, Eq)] +pub enum Error { + Cancelled, + InvalidLanguage, + Unknown, +} + #[derive(Debug)] enum TreeStep { Child { @@ -194,6 +201,16 @@ pub enum PropertySheetError { InvalidFormat(String), } +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::Cancelled => write!(f, "Cancelled"), + Error::InvalidLanguage => write!(f, "Invalid language"), + Error::Unknown => write!(f, "Unknown error"), + } + } +} + impl fmt::Display for PropertySheetError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -440,13 +457,13 @@ where property_sheet: &'a PropertySheet, injection_callback: F, cancellation_flag: Option<&'a AtomicUsize>, - ) -> Result { + ) -> Result { let mut parser = Parser::new(); unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) }; - parser.set_language(language)?; - let tree = parser - .parse(source, None) - .ok_or_else(|| format!("Tree-sitter: failed to parse"))?; + parser + .set_language(language) + .map_err(|_| Error::InvalidLanguage)?; + let tree = parser.parse(source, None).ok_or_else(|| Error::Cancelled)?; Ok(Self { parser, source, @@ -472,24 +489,24 @@ where }) } - fn emit_source(&mut self, next_offset: usize) -> Option> { + fn emit_source(&mut self, next_offset: usize) -> Option, Error>> { let input = &self.source[self.source_offset..next_offset]; match str::from_utf8(input) { Ok(valid) => { self.source_offset = next_offset; - Some(HighlightEvent::Source(valid)) + Some(Ok(HighlightEvent::Source(valid))) } Err(error) => { if let Some(error_len) = error.error_len() { if error.valid_up_to() > 0 { let prefix = &input[0..error.valid_up_to()]; self.utf8_error_len = Some(error_len); - Some(HighlightEvent::Source(unsafe { + Some(Ok(HighlightEvent::Source(unsafe { str::from_utf8_unchecked(prefix) - })) + }))) } else { self.source_offset += error_len; - Some(HighlightEvent::Source("\u{FFFD}")) + Some(Ok(HighlightEvent::Source("\u{FFFD}"))) } } else { None @@ -665,31 +682,32 @@ where ranges: Vec, depth: usize, includes_children: bool, - ) { + ) -> Option { if let Some((language, property_sheet)) = (self.injection_callback)(language_string) { - self.parser - .set_language(language) - .expect("Failed to set language"); - self.parser.set_included_ranges(&ranges); - let tree = self - .parser - .parse(self.source, None) - .expect("Failed to parse"); - let layer = Layer::new( - self.source, - tree, - property_sheet, - ranges, - depth, - includes_children, - ); - if includes_children && depth > self.max_opaque_layer_depth { - self.max_opaque_layer_depth = depth; + if self.parser.set_language(language).is_err() { + return Some(Error::InvalidLanguage); + } + self.parser.set_included_ranges(&ranges); + if let Some(tree) = self.parser.parse(self.source, None) { + let layer = Layer::new( + self.source, + tree, + property_sheet, + ranges, + depth, + includes_children, + ); + if includes_children && depth > self.max_opaque_layer_depth { + self.max_opaque_layer_depth = depth; + } + match self.layers.binary_search_by(|l| l.cmp(&layer)) { + Ok(i) | Err(i) => self.layers.insert(i, layer), + }; + } else { + return Some(Error::Cancelled); } - match self.layers.binary_search_by(|l| l.cmp(&layer)) { - Ok(i) | Err(i) => self.layers.insert(i, layer), - }; } + None } fn remove_first_layer(&mut self) { @@ -709,7 +727,7 @@ impl<'a, T> Iterator for Highlighter<'a, T> where T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, { - type Item = HighlightEvent<'a>; + type Item = Result, Error>; fn next(&mut self) -> Option { if let Some(cancellation_flag) = self.cancellation_flag { @@ -717,14 +735,14 @@ where if self.operation_count >= CANCELLATION_CHECK_INTERVAL { self.operation_count = 0; if cancellation_flag.load(Ordering::Relaxed) != 0 { - return None; + return Some(Err(Error::Cancelled)); } } } if let Some(utf8_error_len) = self.utf8_error_len.take() { self.source_offset += utf8_error_len; - return Some(HighlightEvent::Source("\u{FFFD}")); + return Some(Ok(HighlightEvent::Source("\u{FFFD}"))); } while !self.layers.is_empty() { @@ -771,7 +789,11 @@ where let depth = first_layer.depth + 1; for (language, ranges, includes_children) in injections { - self.add_layer(&language, ranges, depth, includes_children); + if let Some(error) = + self.add_layer(&language, ranges, depth, includes_children) + { + return Some(Err(error)); + } } } @@ -790,9 +812,9 @@ where } scope_event = if first_layer.at_node_end { - Some(HighlightEvent::HighlightEnd) + Some(Ok(HighlightEvent::HighlightEnd)) } else { - Some(HighlightEvent::HighlightStart(highlight)) + Some(Ok(HighlightEvent::HighlightStart(highlight))) }; } } @@ -1057,29 +1079,44 @@ pub fn highlight<'a, F>( source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, + cancellation_flag: Option<&'a AtomicUsize>, injection_callback: F, -) -> Result> + 'a, String> +) -> Result, Error>> + 'a, Error> where F: Fn(&str) -> Option<(Language, &'a PropertySheet)> + 'a, { - Highlighter::new(source, language, property_sheet, injection_callback, None) + Highlighter::new( + source, + language, + property_sheet, + injection_callback, + cancellation_flag, + ) } pub fn highlight_html<'a, F1, F2>( source: &'a [u8], language: Language, property_sheet: &'a PropertySheet, + cancellation_flag: Option<&'a AtomicUsize>, injection_callback: F1, attribute_callback: F2, -) -> Result, String> +) -> Result, Error> where F1: Fn(&str) -> Option<(Language, &'a PropertySheet)>, F2: Fn(Highlight) -> &'a str, { - let highlighter = Highlighter::new(source, language, property_sheet, injection_callback, None)?; + let highlighter = Highlighter::new( + source, + language, + property_sheet, + injection_callback, + cancellation_flag, + )?; let mut renderer = HtmlRenderer::new(attribute_callback); let mut scopes = Vec::new(); for event in highlighter { + let event = event?; match event { HighlightEvent::HighlightStart(s) => { scopes.push(s); diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index ba2d01f7..f6ee0632 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -25,6 +25,11 @@ pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/par #[repr(transparent)] pub struct Language(*const ffi::TSLanguage); +#[derive(Debug, PartialEq, Eq)] +pub struct LanguageError { + version: usize, +} + #[derive(Debug, PartialEq, Eq)] pub enum LogType { Parse, @@ -162,6 +167,18 @@ impl Language { } } +impl fmt::Display for LanguageError { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!( + f, + "Incompatible language version {}. Expected minimum {}, maximum {}", + self.version, + ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, + ffi::TREE_SITTER_LANGUAGE_VERSION + ) + } +} + unsafe impl Send for Language {} unsafe impl Sync for Language {} @@ -174,21 +191,13 @@ impl Parser { } } - pub fn set_language(&mut self, language: Language) -> Result<(), String> { + pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> { unsafe { let version = ffi::ts_language_version(language.0) as usize; - if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION { - Err(format!( - "Incompatible language version {}. Expected {} or greater.", - version, - ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION - )) - } else if version > ffi::TREE_SITTER_LANGUAGE_VERSION { - Err(format!( - "Incompatible language version {}. Expected {}.", - version, - ffi::TREE_SITTER_LANGUAGE_VERSION - )) + if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + || version > ffi::TREE_SITTER_LANGUAGE_VERSION + { + Err(LanguageError { version }) } else { ffi::ts_parser_set_language(self.0, language.0); Ok(())