highlight: Fix panic when cancelled before parsing a nested document

This commit is contained in:
Max Brunsfeld 2019-07-16 11:40:19 -07:00
parent 34de25ce54
commit bd466febb4
9 changed files with 201 additions and 74 deletions

View file

@ -6,8 +6,10 @@ use serde::ser::SerializeMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Instant;
use std::{fmt, fs, io, path};
use std::{fmt, fs, io, path, thread};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties};
@ -252,6 +254,19 @@ fn color_to_css(color: Color) -> &'static str {
}
}
fn cancel_on_stdin() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
thread::spawn({
let flag = result.clone();
move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
flag.store(1, Ordering::Relaxed);
}
});
result
}
pub fn ansi(
loader: &Loader,
theme: &Theme,
@ -264,11 +279,19 @@ pub fn ansi(
let stdout = io::stdout();
let mut stdout = stdout.lock();
let cancellation_flag = cancel_on_stdin();
let time = Instant::now();
let mut highlight_stack = Vec::new();
for event in highlight(source, language, property_sheet, |s| {
language_for_injection_string(loader, s)
})? {
for event in highlight(
source,
language,
property_sheet,
Some(cancellation_flag.as_ref()),
|s| language_for_injection_string(loader, s),
)
.map_err(|e| e.to_string())?
{
let event = event.map_err(|e| e.to_string())?;
match event {
HighlightEvent::Source(s) => {
if let Some(style) = highlight_stack.last().and_then(|s| theme.ansi_style(*s)) {
@ -332,10 +355,13 @@ pub fn html(
let stdout = io::stdout();
let mut stdout = stdout.lock();
write!(&mut stdout, "<table>\n")?;
let cancellation_flag = cancel_on_stdin();
let lines = highlight_html(
source,
language,
property_sheet,
Some(cancellation_flag.as_ref()),
|s| language_for_injection_string(loader, s),
|highlight| {
if let Some(css_style) = theme.css_style(highlight) {
@ -344,7 +370,8 @@ pub fn html(
""
}
},
)?;
)
.map_err(|e| e.to_string())?;
for (i, line) in lines.into_iter().enumerate() {
write!(
&mut stdout,

View file

@ -14,6 +14,7 @@ const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
fn main() {
if let Err(e) = run() {
println!("");
eprintln!("{}", e.message());
exit(1);
}

View file

@ -28,7 +28,7 @@ pub fn parse_file_at_path(
) -> Result<bool> {
let mut _log_session = None;
let mut parser = Parser::new();
parser.set_language(language)?;
parser.set_language(language).map_err(|e| e.to_string())?;
let mut source_code = fs::read(path).map_err(Error::wrap(|| {
format!("Error reading source file {:?}", path)
}))?;

View file

@ -58,7 +58,7 @@ pub fn run_tests_at_path(
let test_entry = parse_tests(path)?;
let mut _log_session = None;
let mut parser = Parser::new();
parser.set_language(language)?;
parser.set_language(language).map_err(|e| e.to_string())?;
if debug_graph {
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);

View file

@ -1,9 +1,13 @@
use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
use lazy_static::lazy_static;
use std::ffi::CString;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{ptr, slice, str};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{c, highlight, highlight_html, Highlight, HighlightEvent, Properties};
use tree_sitter_highlight::{
c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties,
};
lazy_static! {
static ref JS_SHEET: PropertySheet<Properties> =
@ -209,9 +213,7 @@ fn test_highlighting_with_local_variable_tracking() {
(")", vec![Highlight::PunctuationBracket]),
(";", vec![Highlight::PunctuationDelimiter]),
],
vec![
("}", vec![Highlight::PunctuationBracket])
]
vec![("}", vec![Highlight::PunctuationBracket])]
],
);
}
@ -307,6 +309,44 @@ fn test_highlighting_with_content_children_included() {
);
}
#[test]
fn test_highlighting_cancellation() {
// An HTML document with a large injected JavaScript document:
let mut source = "<script>\n".to_string();
for _ in 0..500 {
source += "function a() { console.log('hi'); }\n";
}
source += "</script>\n";
// Cancel the highlighting before parsing the injected document.
let cancellation_flag = AtomicUsize::new(0);
let injection_callback = |name: &str| {
cancellation_flag.store(1, Ordering::SeqCst);
test_language_for_injection_string(name)
};
// Constructing the highlighter, which eagerly parses the outer document,
// should not fail.
let highlighter = highlight(
source.as_bytes(),
get_language("html"),
&HTML_SHEET,
Some(&cancellation_flag),
injection_callback,
)
.unwrap();
// Iterating the scopes should not panic. It should return an error
// once the cancellation is detected.
for event in highlighter {
if let Err(e) = event {
assert_eq!(e, Error::Cancelled);
return;
}
}
panic!("Expected an error while iterating highlighter");
}
#[test]
fn test_highlighting_via_c_api() {
let js_lang = get_language("javascript");
@ -410,11 +450,12 @@ fn to_html<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<String>, String> {
) -> Result<Vec<String>, Error> {
highlight_html(
src.as_bytes(),
language,
property_sheet,
None,
&test_language_for_injection_string,
&|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(),
)
@ -424,7 +465,7 @@ fn to_token_vector<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<Vec<(&'a str, Vec<Highlight>)>>, String> {
) -> Result<Vec<Vec<(&'a str, Vec<Highlight>)>>, Error> {
let mut lines = Vec::new();
let mut highlights = Vec::new();
let mut line = Vec::new();
@ -432,9 +473,10 @@ fn to_token_vector<'a>(
src.as_bytes(),
language,
property_sheet,
None,
&test_language_for_injection_string,
)? {
match event {
match event? {
HighlightEvent::HighlightStart(s) => highlights.push(s),
HighlightEvent::HighlightEnd => {
highlights.pop();

View file

@ -11,6 +11,7 @@ typedef enum {
TSHighlightOk,
TSHighlightUnknownScope,
TSHighlightTimeout,
TSHighlightInvalidLanguage,
} TSHighlightError;
// The list of scopes which can be styled for syntax highlighting.

View file

@ -1,4 +1,4 @@
use super::{escape, load_property_sheet, Highlight, HighlightEvent, Highlighter, Properties};
use super::{escape, load_property_sheet, Error, Highlight, HighlightEvent, Highlighter, Properties};
use regex::Regex;
use std::collections::HashMap;
use std::ffi::CStr;
@ -30,6 +30,7 @@ pub enum ErrorCode {
Ok,
UnknownScope,
Timeout,
InvalidLanguage,
}
#[no_mangle]
@ -188,18 +189,27 @@ impl TSHighlighter {
let mut highlights = Vec::new();
for event in highlighter {
match event {
HighlightEvent::HighlightStart(s) => {
Ok(HighlightEvent::HighlightStart(s)) => {
highlights.push(s);
output.start_highlight(s, &self.attribute_strings);
}
HighlightEvent::HighlightEnd => {
Ok(HighlightEvent::HighlightEnd) => {
highlights.pop();
output.end_highlight();
}
HighlightEvent::Source(src) => {
Ok(HighlightEvent::Source(src)) => {
output.add_text(src, &highlights, &self.attribute_strings);
},
Err(Error::Cancelled) => {
return ErrorCode::Timeout;
},
Err(Error::InvalidLanguage) => {
return ErrorCode::InvalidLanguage;
},
Err(Error::Unknown) => {
return ErrorCode::Timeout;
}
};
}
}
ErrorCode::Ok
} else {

View file

@ -12,6 +12,13 @@ use tree_sitter::{Language, Node, Parser, Point, PropertySheet, Range, Tree, Tre
const CANCELLATION_CHECK_INTERVAL: usize = 100;
#[derive(Debug, PartialEq, Eq)]
pub enum Error {
Cancelled,
InvalidLanguage,
Unknown,
}
#[derive(Debug)]
enum TreeStep {
Child {
@ -194,6 +201,16 @@ pub enum PropertySheetError {
InvalidFormat(String),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Error::Cancelled => write!(f, "Cancelled"),
Error::InvalidLanguage => write!(f, "Invalid language"),
Error::Unknown => write!(f, "Unknown error"),
}
}
}
impl fmt::Display for PropertySheetError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
@ -440,13 +457,13 @@ where
property_sheet: &'a PropertySheet<Properties>,
injection_callback: F,
cancellation_flag: Option<&'a AtomicUsize>,
) -> Result<Self, String> {
) -> Result<Self, Error> {
let mut parser = Parser::new();
unsafe { parser.set_cancellation_flag(cancellation_flag.clone()) };
parser.set_language(language)?;
let tree = parser
.parse(source, None)
.ok_or_else(|| format!("Tree-sitter: failed to parse"))?;
parser
.set_language(language)
.map_err(|_| Error::InvalidLanguage)?;
let tree = parser.parse(source, None).ok_or_else(|| Error::Cancelled)?;
Ok(Self {
parser,
source,
@ -472,24 +489,24 @@ where
})
}
fn emit_source(&mut self, next_offset: usize) -> Option<HighlightEvent<'a>> {
fn emit_source(&mut self, next_offset: usize) -> Option<Result<HighlightEvent<'a>, Error>> {
let input = &self.source[self.source_offset..next_offset];
match str::from_utf8(input) {
Ok(valid) => {
self.source_offset = next_offset;
Some(HighlightEvent::Source(valid))
Some(Ok(HighlightEvent::Source(valid)))
}
Err(error) => {
if let Some(error_len) = error.error_len() {
if error.valid_up_to() > 0 {
let prefix = &input[0..error.valid_up_to()];
self.utf8_error_len = Some(error_len);
Some(HighlightEvent::Source(unsafe {
Some(Ok(HighlightEvent::Source(unsafe {
str::from_utf8_unchecked(prefix)
}))
})))
} else {
self.source_offset += error_len;
Some(HighlightEvent::Source("\u{FFFD}"))
Some(Ok(HighlightEvent::Source("\u{FFFD}")))
}
} else {
None
@ -665,31 +682,32 @@ where
ranges: Vec<Range>,
depth: usize,
includes_children: bool,
) {
) -> Option<Error> {
if let Some((language, property_sheet)) = (self.injection_callback)(language_string) {
self.parser
.set_language(language)
.expect("Failed to set language");
self.parser.set_included_ranges(&ranges);
let tree = self
.parser
.parse(self.source, None)
.expect("Failed to parse");
let layer = Layer::new(
self.source,
tree,
property_sheet,
ranges,
depth,
includes_children,
);
if includes_children && depth > self.max_opaque_layer_depth {
self.max_opaque_layer_depth = depth;
if self.parser.set_language(language).is_err() {
return Some(Error::InvalidLanguage);
}
self.parser.set_included_ranges(&ranges);
if let Some(tree) = self.parser.parse(self.source, None) {
let layer = Layer::new(
self.source,
tree,
property_sheet,
ranges,
depth,
includes_children,
);
if includes_children && depth > self.max_opaque_layer_depth {
self.max_opaque_layer_depth = depth;
}
match self.layers.binary_search_by(|l| l.cmp(&layer)) {
Ok(i) | Err(i) => self.layers.insert(i, layer),
};
} else {
return Some(Error::Cancelled);
}
match self.layers.binary_search_by(|l| l.cmp(&layer)) {
Ok(i) | Err(i) => self.layers.insert(i, layer),
};
}
None
}
fn remove_first_layer(&mut self) {
@ -709,7 +727,7 @@ impl<'a, T> Iterator for Highlighter<'a, T>
where
T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
{
type Item = HighlightEvent<'a>;
type Item = Result<HighlightEvent<'a>, Error>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(cancellation_flag) = self.cancellation_flag {
@ -717,14 +735,14 @@ where
if self.operation_count >= CANCELLATION_CHECK_INTERVAL {
self.operation_count = 0;
if cancellation_flag.load(Ordering::Relaxed) != 0 {
return None;
return Some(Err(Error::Cancelled));
}
}
}
if let Some(utf8_error_len) = self.utf8_error_len.take() {
self.source_offset += utf8_error_len;
return Some(HighlightEvent::Source("\u{FFFD}"));
return Some(Ok(HighlightEvent::Source("\u{FFFD}")));
}
while !self.layers.is_empty() {
@ -771,7 +789,11 @@ where
let depth = first_layer.depth + 1;
for (language, ranges, includes_children) in injections {
self.add_layer(&language, ranges, depth, includes_children);
if let Some(error) =
self.add_layer(&language, ranges, depth, includes_children)
{
return Some(Err(error));
}
}
}
@ -790,9 +812,9 @@ where
}
scope_event = if first_layer.at_node_end {
Some(HighlightEvent::HighlightEnd)
Some(Ok(HighlightEvent::HighlightEnd))
} else {
Some(HighlightEvent::HighlightStart(highlight))
Some(Ok(HighlightEvent::HighlightStart(highlight)))
};
}
}
@ -1057,29 +1079,44 @@ pub fn highlight<'a, F>(
source: &'a [u8],
language: Language,
property_sheet: &'a PropertySheet<Properties>,
cancellation_flag: Option<&'a AtomicUsize>,
injection_callback: F,
) -> Result<impl Iterator<Item = HighlightEvent<'a>> + 'a, String>
) -> Result<impl Iterator<Item = Result<HighlightEvent<'a>, Error>> + 'a, Error>
where
F: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)> + 'a,
{
Highlighter::new(source, language, property_sheet, injection_callback, None)
Highlighter::new(
source,
language,
property_sheet,
injection_callback,
cancellation_flag,
)
}
pub fn highlight_html<'a, F1, F2>(
source: &'a [u8],
language: Language,
property_sheet: &'a PropertySheet<Properties>,
cancellation_flag: Option<&'a AtomicUsize>,
injection_callback: F1,
attribute_callback: F2,
) -> Result<Vec<String>, String>
) -> Result<Vec<String>, Error>
where
F1: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
F2: Fn(Highlight) -> &'a str,
{
let highlighter = Highlighter::new(source, language, property_sheet, injection_callback, None)?;
let highlighter = Highlighter::new(
source,
language,
property_sheet,
injection_callback,
cancellation_flag,
)?;
let mut renderer = HtmlRenderer::new(attribute_callback);
let mut scopes = Vec::new();
for event in highlighter {
let event = event?;
match event {
HighlightEvent::HighlightStart(s) => {
scopes.push(s);

View file

@ -25,6 +25,11 @@ pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/par
#[repr(transparent)]
pub struct Language(*const ffi::TSLanguage);
#[derive(Debug, PartialEq, Eq)]
pub struct LanguageError {
version: usize,
}
#[derive(Debug, PartialEq, Eq)]
pub enum LogType {
Parse,
@ -162,6 +167,18 @@ impl Language {
}
}
impl fmt::Display for LanguageError {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(
f,
"Incompatible language version {}. Expected minimum {}, maximum {}",
self.version,
ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION,
ffi::TREE_SITTER_LANGUAGE_VERSION
)
}
}
unsafe impl Send for Language {}
unsafe impl Sync for Language {}
@ -174,21 +191,13 @@ impl Parser {
}
}
pub fn set_language(&mut self, language: Language) -> Result<(), String> {
pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> {
unsafe {
let version = ffi::ts_language_version(language.0) as usize;
if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION {
Err(format!(
"Incompatible language version {}. Expected {} or greater.",
version,
ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
))
} else if version > ffi::TREE_SITTER_LANGUAGE_VERSION {
Err(format!(
"Incompatible language version {}. Expected {}.",
version,
ffi::TREE_SITTER_LANGUAGE_VERSION
))
if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
|| version > ffi::TREE_SITTER_LANGUAGE_VERSION
{
Err(LanguageError { version })
} else {
ffi::ts_parser_set_language(self.0, language.0);
Ok(())