diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index 294c8a97..50ee5370 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -17,7 +17,6 @@ lazy_static! { static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT") .map(|s| usize::from_str_radix(&s, 10).unwrap()) .unwrap_or(5); - static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone()); static ref EXAMPLE_PATHS_BY_LANGUAGE_DIR: BTreeMap> = { fn process_dir(result: &mut BTreeMap>, dir: &Path) { @@ -137,7 +136,7 @@ fn main() { eprintln!(""); } -fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> { +fn aggregate(speeds: &Vec) -> Option<(usize, usize)> { if speeds.is_empty() { return None; } diff --git a/cli/src/query.rs b/cli/src/query.rs index c3a03679..47242273 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -51,13 +51,25 @@ pub fn query_files_at_paths( for m in query_cursor.matches(&query, tree.root_node(), text_callback) { writeln!(&mut stdout, " pattern: {}", m.pattern_index)?; for capture in m.captures { - writeln!( - &mut stdout, - " capture: {}, row: {}, text: {:?}", - &query.capture_names()[capture.index as usize], - capture.node.start_position().row, - capture.node.utf8_text(&source_code).unwrap_or("") - )?; + let start = capture.node.start_position(); + let end = capture.node.end_position(); + if end.row == start.row { + writeln!( + &mut stdout, + " capture: {}, start: {}, text: {:?}", + &query.capture_names()[capture.index as usize], + start, + capture.node.utf8_text(&source_code).unwrap_or("") + )?; + } else { + writeln!( + &mut stdout, + " capture: {}, start: {}, end: {}", + &query.capture_names()[capture.index as usize], + start, + end, + )?; + } } } } diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 35b69f4e..9f6cd12e 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -317,8 +317,8 @@ fn test_highlighting_empty_lines() { } #[test] -fn test_highlighting_ejs() { - let source = vec!["
<% foo() %>
"].join("\n"); +fn test_highlighting_ejs_with_html_and_javascript() { + let source = vec!["
<% foo() %>
"].join("\n"); assert_eq!( &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), @@ -335,7 +335,18 @@ fn test_highlighting_ejs() { ("%>", vec!["keyword"]), ("", vec!["punctuation.bracket"]) + (">", vec!["punctuation.bracket"]), + ("<", vec!["punctuation.bracket"]), + ("script", vec!["tag"]), + (">", vec!["punctuation.bracket"]), + (" ", vec![]), + ("bar", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("", vec!["punctuation.bracket"]), ]], ); } diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index f1398b80..7caf5dcb 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1238,6 +1238,45 @@ fn test_query_comments() { }); } +#[test] +fn test_query_disable_pattern() { + allocations::record(|| { + let language = get_language("javascript"); + let mut query = Query::new( + language, + " + (function_declaration + name: (identifier) @name) + (function_declaration + body: (statement_block) @body) + (class_declaration + name: (identifier) @name) + (class_declaration + body: (class_body) @body) + ", + ) + .unwrap(); + + // disable the patterns that match names + query.disable_pattern(0); + query.disable_pattern(2); + + let source = "class A { constructor() {} } function b() { return 1; }"; + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_matches(matches, &query, source), + &[ + (3, vec![("body", "{ constructor() {} }")]), + (1, vec![("body", "{ return 1; }")]), + ], + ); + }); +} + fn collect_matches<'a>( matches: impl Iterator>, query: &'a Query, diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index ea9ddbad..7364291e 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -5,7 +5,8 @@ pub use c_lib as c; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ - Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, Range, Tree, + Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch, + Range, Tree, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -36,12 +37,11 @@ pub enum HighlightEvent { pub struct HighlightConfiguration { pub language: Language, pub query: Query, - injections_query: Query, + combined_injections_query: Option, locals_pattern_index: usize, highlights_pattern_index: usize, highlight_indices: Vec>, non_local_variable_patterns: Vec, - injection_site_capture_index: Option, injection_content_capture_index: Option, injection_language_capture_index: Option, local_scope_capture_index: Option, @@ -87,7 +87,6 @@ where source: &'a [u8], byte_offset: usize, highlighter: &'a mut Highlighter, - injections_cursor: QueryCursor, injection_callback: F, cancellation_flag: Option<&'a AtomicUsize>, layers: Vec>, @@ -125,13 +124,14 @@ impl Highlighter { config: &'a HighlightConfiguration, source: &'a [u8], cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, + mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, ) -> Result> + 'a, Error> { - let layer = HighlightIterLayer::new( - config, + let layers = HighlightIterLayer::new( source, self, cancellation_flag, + &mut injection_callback, + config, 0, vec![Range { start_byte: 0, @@ -140,21 +140,19 @@ impl Highlighter { end_point: Point::new(usize::MAX, usize::MAX), }], )?; - - let injections_cursor = self.cursors.pop().unwrap_or(QueryCursor::new()); - - Ok(HighlightIter { + let mut result = HighlightIter { source, byte_offset: 0, injection_callback, cancellation_flag, - injections_cursor, highlighter: self, iter_count: 0, - layers: vec![layer], + layers: layers, next_event: None, last_highlight_range: None, - }) + }; + result.sort_layers(); + Ok(result) } } @@ -187,17 +185,9 @@ impl HighlightConfiguration { let highlights_query_offset = query_source.len(); query_source.push_str(highlights_query); - // Construct a query with the concatenated string. + // Construct a single query by concatenating the three query strings, but record the + // range of pattern indices that belong to each individual string. let mut query = Query::new(language, &query_source)?; - - let injections_query = Query::new(language, injection_query)?; - for injection_capture in injections_query.capture_names() { - if injection_capture != "injection.site" { - query.disable_capture(injection_capture); - } - } - - // Determine the range of pattern indices that belong to each section of the query. let mut locals_pattern_index = 0; let mut highlights_pattern_index = 0; for i in 0..(query.pattern_count()) { @@ -212,6 +202,27 @@ impl HighlightConfiguration { } } + // Construct a separate query just for dealing with the 'combined injections'. + // Disable the combined injection patterns in the main query. + let mut combined_injections_query = Query::new(language, injection_query)?; + let mut has_combined_queries = false; + for pattern_index in 0..locals_pattern_index { + let settings = query.property_settings(pattern_index); + if settings.iter().any(|s| &*s.key == "injection.combined") { + has_combined_queries = true; + query.disable_pattern(pattern_index); + } else { + combined_injections_query.disable_pattern(pattern_index); + } + } + let combined_injections_query = if has_combined_queries { + Some(combined_injections_query) + } else { + None + }; + + // Find all of the highlighting patterns that are disabled for nodes that + // have been identified as local variables. let non_local_variable_patterns = (0..query.pattern_count()) .map(|i| { query @@ -221,9 +232,9 @@ impl HighlightConfiguration { }) .collect(); + // Store the numeric ids for all of the special captures. let mut injection_content_capture_index = None; let mut injection_language_capture_index = None; - let mut injection_site_capture_index = None; let mut local_def_capture_index = None; let mut local_def_value_capture_index = None; let mut local_ref_capture_index = None; @@ -233,7 +244,6 @@ impl HighlightConfiguration { match name.as_str() { "injection.content" => injection_content_capture_index = i, "injection.language" => injection_language_capture_index = i, - "injection.site" => injection_site_capture_index = i, "local.definition" => local_def_capture_index = i, "local.definition-value" => local_def_value_capture_index = i, "local.reference" => local_ref_capture_index = i, @@ -243,18 +253,16 @@ impl HighlightConfiguration { } let highlight_indices = vec![None; query.capture_names().len()]; - Ok(HighlightConfiguration { language, query, - injections_query, + combined_injections_query, locals_pattern_index, highlights_pattern_index, highlight_indices, non_local_variable_patterns, injection_content_capture_index, injection_language_capture_index, - injection_site_capture_index, local_def_capture_index, local_def_value_capture_index, local_ref_capture_index, @@ -308,70 +316,125 @@ impl HighlightConfiguration { } impl<'a> HighlightIterLayer<'a> { - fn new( - config: &'a HighlightConfiguration, + /// Create a new 'layer' of highlighting for this document. + /// + /// In the even that the new layer contains "combined injections" (injections where multiple + /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and + /// added to the returned vector. + fn new Option<&'a HighlightConfiguration> + 'a>( source: &'a [u8], highlighter: &mut Highlighter, cancellation_flag: Option<&'a AtomicUsize>, - depth: usize, - ranges: Vec, - ) -> Result { - highlighter - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; + injection_callback: &mut F, + mut config: &'a HighlightConfiguration, + mut depth: usize, + mut ranges: Vec, + ) -> Result, Error> { + let mut result = Vec::with_capacity(1); + let mut queue = Vec::new(); + loop { + highlighter + .parser + .set_language(config.language) + .map_err(|_| Error::InvalidLanguage)?; + highlighter.parser.set_included_ranges(&ranges); + unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; + let tree = highlighter + .parser + .parse(source, None) + .ok_or(Error::Cancelled)?; + unsafe { highlighter.parser.set_cancellation_flag(None) }; + let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new()); - highlighter.parser.set_included_ranges(&ranges); + // Process combined injections. + if let Some(combined_injections_query) = &config.combined_injections_query { + let mut injections_by_pattern_index = + vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; + let matches = + cursor.matches(combined_injections_query, tree.root_node(), |n: Node| { + &source[n.byte_range()] + }); + for mat in matches { + let entry = &mut injections_by_pattern_index[mat.pattern_index]; + let (language_name, content_node, include_children) = + injection_for_match(config, combined_injections_query, &mat, source); + if language_name.is_some() { + entry.0 = language_name; + } + if let Some(content_node) = content_node { + entry.1.push(content_node); + } + entry.2 = include_children; + } + for (lang_name, content_nodes, includes_children) in injections_by_pattern_index { + if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { + if let Some(next_config) = (injection_callback)(lang_name) { + let ranges = + Self::intersect_ranges(&ranges, &content_nodes, includes_children); + if !ranges.is_empty() { + queue.push((next_config, depth + 1, ranges)); + } + } + } + } + } - let tree = highlighter - .parser - .parse(source, None) - .ok_or(Error::Cancelled)?; - let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new()); + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; + let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + let captures = cursor_ref + .captures(&config.query, tree_ref.root_node(), move |n: Node| { + &source[n.byte_range()] + }) + .peekable(); - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; - let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let captures = cursor_ref - .captures(&config.query, tree_ref.root_node(), move |n| { - &source[n.byte_range()] - }) - .peekable(); + result.push(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + depth, + _tree: tree, + captures, + config, + ranges, + }); - Ok(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth, - _tree: tree, - captures, - config, - ranges, - }) + if queue.is_empty() { + break; + } else { + let (next_config, next_depth, next_ranges) = queue.remove(0); + config = next_config; + depth = next_depth; + ranges = next_ranges; + } + } + + Ok(result) } // Compute the ranges that should be included when parsing an injection. // This takes into account three things: - // * `parent_ranges` - The new injection may be nested inside of *another* injection - // (e.g. JavaScript within HTML within ERB). The parent injection's ranges must - // be taken into account. + // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. // * `nodes` - Every injection takes place within a set of nodes. The injection ranges // are the ranges of those nodes. // * `includes_children` - For some injections, the content nodes' children should be // excluded from the nested document, so that only the content nodes' *own* content // is reparsed. For other injections, the content nodes' entire ranges should be // reparsed, including the ranges of their children. - fn intersect_ranges(&self, nodes: &Vec, includes_children: bool) -> Vec { + fn intersect_ranges( + parent_ranges: &[Range], + nodes: &[Node], + includes_children: bool, + ) -> Vec { let mut cursor = nodes[0].walk(); let mut result = Vec::new(); - let mut parent_range_iter = self.ranges.iter(); + let mut parent_range_iter = parent_ranges.iter(); let mut parent_range = parent_range_iter .next() .expect("Layers should only be constructed with non-empty ranges vectors"); @@ -558,7 +621,7 @@ where } } - // If none of the layers have any more scope boundaries, terminate. + // If none of the layers have any more highlight boundaries, terminate. if self.layers.is_empty() { if self.byte_offset < self.source.len() { let result = Some(Ok(HighlightEvent::Source { @@ -572,8 +635,7 @@ where } } - // Get the next capture. If there are no more captures, then emit the rest of the - // source code. + // Get the next capture from whichever layer has the earliest highlight boundary. let match_; let mut captures; let mut capture; @@ -584,7 +646,10 @@ where captures = match_.captures; pattern_index = match_.pattern_index; capture = captures[*capture_index]; - } else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + } + // If there are no more captures, then emit any remaining highlight end events. + // And if there are none of those, then just advance to the end of the document. + else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { layer.highlight_end_stack.pop(); return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); } else { @@ -602,131 +667,51 @@ where } } - // Remove from the scope stack any local scopes that have already ended. + // Remove from the local scope stack any local scopes that have already ended. while range.start > layer.scope_stack.last().unwrap().range.end { layer.scope_stack.pop(); } // If this capture represents an injection, then process the injection. if pattern_index < layer.config.locals_pattern_index { - let site_capture_index = layer.config.injection_site_capture_index; - let content_capture_index = layer.config.injection_content_capture_index; - let language_capture_index = layer.config.injection_language_capture_index; - - // Injections must have a `injection.site` capture, which contains all of the - // information about the injection. - let site_node = match_.captures.iter().find_map(|c| { - if Some(c.index) == site_capture_index { - return Some(c.node); - } else { - return None; - } - }); + let (language_name, content_node, include_children) = + injection_for_match(&layer.config, &layer.config.query, match_, &self.source); // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. + // in the stream of captures. The `unwrap` is ok because layer.captures.next().unwrap().0.remove(); - if let Some(site_node) = site_node { - // Discard any subsequent matches for same injection site. - while let Some((next_match, _)) = layer.captures.peek() { - if next_match.pattern_index < layer.config.locals_pattern_index - && next_match - .captures - .iter() - .any(|c| Some(c.index) == site_capture_index && c.node == site_node) - { - layer.captures.next().unwrap().0.remove(); - continue; - } - break; - } - - // Find the language name and the nodes that represents the injection content. - // Use a separate Query and QueryCursor in order to avoid the injection - // captures being intermixed with other captures related to local variables - // and syntax highlighting. - let source = self.source; - let mut injections = Vec::<(usize, Option<&str>, Vec, bool)>::new(); - for mat in self.injections_cursor.matches( - &layer.config.injections_query, - site_node, - move |node| &source[node.byte_range()], - ) { - let entry = if let Some(entry) = - injections.iter_mut().find(|e| e.0 == mat.pattern_index) - { - entry - } else { - injections.push((mat.pattern_index, None, Vec::new(), false)); - injections.last_mut().unwrap() - }; - - for capture in mat.captures { - let index = Some(capture.index); - if index == site_capture_index { - if capture.node != site_node { - break; - } - } else if index == language_capture_index && entry.1.is_none() { - entry.1 = capture.node.utf8_text(self.source).ok(); - } else if index == content_capture_index { - entry.2.push(capture.node); - } - } - } - - for (pattern_index, language, _, include_children) in injections.iter_mut() { - for prop in layer.config.query.property_settings(*pattern_index) { - match prop.key.as_ref() { - // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `set!` predicate - // that sets the injection.language key. - "injection.language" => { - if language.is_none() { - *language = prop.value.as_ref().map(|s| s.as_ref()) + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let (Some(language_name), Some(content_node)) = (language_name, content_node) { + if let Some(config) = (self.injection_callback)(language_name) { + let ranges = HighlightIterLayer::intersect_ranges( + &self.layers[0].ranges, + &[content_node], + include_children, + ); + if !ranges.is_empty() { + match HighlightIterLayer::new( + self.source, + self.highlighter, + self.cancellation_flag, + &mut self.injection_callback, + config, + self.layers[0].depth + 1, + ranges, + ) { + Ok(layers) => { + for layer in layers { + self.insert_layer(layer); } } - - // By default, injections do not include the *children* of an - // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `set!` predicate that - // sets the `injection.include-children` key. - "injection.include-children" => *include_children = true, - _ => {} + Err(e) => return Some(Err(e)), } } } - - for (_, language, content_nodes, include_children) in injections { - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let Some(language) = language { - if let Some(config) = (self.injection_callback)(language) { - if !content_nodes.is_empty() { - let ranges = self.layers[0] - .intersect_ranges(&content_nodes, include_children); - if !ranges.is_empty() { - match HighlightIterLayer::new( - config, - self.source, - self.highlighter, - self.cancellation_flag, - self.layers[0].depth + 1, - ranges, - ) { - Ok(layer) => self.insert_layer(layer), - Err(e) => return Some(Err(e)), - } - } - } - } - } - } - - self.sort_layers(); } + self.sort_layers(); continue; } @@ -826,7 +811,11 @@ where break; } + // Otherwise, this capture must represent a highlight. let mut has_highlight = true; + + // If this exact range has already been highlighted by an earlier pattern, or by + // a different layer, then skip over this one. if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { if range.start == last_start && range.end == last_end && layer.depth < last_depth { has_highlight = false; @@ -993,3 +982,47 @@ impl HtmlRenderer { } } } + +fn injection_for_match<'a>( + config: &HighlightConfiguration, + query: &'a Query, + query_match: &QueryMatch<'a>, + source: &'a [u8], +) -> (Option<&'a str>, Option>, bool) { + let content_capture_index = config.injection_content_capture_index; + let language_capture_index = config.injection_language_capture_index; + + let mut language_name = None; + let mut content_node = None; + for capture in query_match.captures { + let index = Some(capture.index); + if index == language_capture_index { + language_name = capture.node.utf8_text(source).ok(); + } else if index == content_capture_index { + content_node = Some(capture.node); + } + } + + let mut include_children = false; + for prop in query.property_settings(query_match.pattern_index) { + match prop.key.as_ref() { + // In addition to specifying the language name via the text of a + // captured node, it can also be hard-coded via a `set!` predicate + // that sets the injection.language key. + "injection.language" => { + if language_name.is_none() { + language_name = prop.value.as_ref().map(|s| s.as_ref()) + } + } + + // By default, injections do not include the *children* of an + // `injection.content` node - only the ranges that belong to the + // node itself. This can be changed using a `set!` predicate that + // sets the `injection.include-children` key. + "injection.include-children" => include_children = true, + _ => {} + } + } + + (language_name, content_node, include_children) +} diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index f6515608..dfc280fa 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -668,6 +668,12 @@ extern "C" { arg3: u32, ); } +extern "C" { + #[doc = " Disable a certain pattern within a query. This prevents the pattern"] + #[doc = " from matching and removes most of the overhead associated with the"] + #[doc = " pattern."] + pub fn ts_query_disable_pattern(arg1: *mut TSQuery, arg2: u32); +} extern "C" { #[doc = " Create a new cursor for executing a given query."] #[doc = ""] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 94332667..0f308d45 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1322,6 +1322,14 @@ impl Query { } } + /// Disable a certain pattern within a query. + /// + /// This prevents the pattern from matching, and also avoids any resource usage + /// associated with the pattern. + pub fn disable_pattern(&mut self, index: usize) { + unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } + } + fn parse_property( function_name: &str, capture_names: &[String], diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 40187e3d..17b6d0c4 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -732,12 +732,22 @@ const char *ts_query_string_value_for_id( ); /** - * Disable a certain capture within a query. This prevents the capture - * from being returned in matches, and also avoids any resource usage - * associated with recording the capture. + * Disable a certain capture within a query. + * + * This prevents the capture from being returned in matches, and also avoids + * any resource usage associated with recording the capture. Currently, there + * is no way to undo this. */ void ts_query_disable_capture(TSQuery *, const char *, uint32_t); +/** + * Disable a certain pattern within a query. + * + * This prevents the pattern from matching and removes most of the overhead + * associated with the pattern. Currently, there is no way to undo this. + */ +void ts_query_disable_pattern(TSQuery *, uint32_t); + /** * Create a new cursor for executing a given query. * diff --git a/lib/src/query.c b/lib/src/query.c index a5ce8603..21e8bd9a 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -62,10 +62,12 @@ typedef struct { } SymbolTable; /* - * PatternEntry - The set of steps needed to match a particular pattern, - * represented as a slice of a shared array. These entries are stored in a - * 'pattern map' - a sorted array that makes it possible to efficiently lookup - * patterns based on the symbol for their first step. + * PatternEntry - Information about the starting point for matching a + * particular pattern, consisting of the index of the pattern within the query, + * and the index of the patter's first step in the shared `steps` array. These + * entries are stored in a 'pattern map' - a sorted array that makes it + * possible to efficiently lookup patterns based on the symbol for their first + * step. */ typedef struct { uint16_t step_index; @@ -333,7 +335,7 @@ static uint16_t symbol_table_insert_name( // to quickly find the starting steps of all of the patterns whose root matches // that node. Each entry has two fields: a `pattern_index`, which identifies one // of the patterns in the query, and a `step_index`, which indicates the start -// offset of that pattern's steps pattern within the `steps` array. +// offset of that pattern's steps within the `steps` array. // // The entries are sorted by the patterns' root symbols, and lookups use a // binary search. This ensures that the cost of this initial lookup step @@ -891,6 +893,8 @@ void ts_query_disable_capture( const char *name, uint32_t length ) { + // Remove capture information for any pattern step that previously + // captured with the given name. int id = symbol_table_id_for_name(&self->captures, name, length); if (id != -1) { for (unsigned i = 0; i < self->steps.size; i++) { @@ -899,8 +903,23 @@ void ts_query_disable_capture( step->capture_id = NONE; } } + ts_query__finalize_steps(self); + } +} + +void ts_query_disable_pattern( + TSQuery *self, + uint32_t pattern_index +) { + // Remove the given pattern from the pattern map. Its steps will still + // be in the `steps` array, but they will never be read. + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (pattern->pattern_index == pattern_index) { + array_erase(&self->pattern_map, i); + i--; + } } - ts_query__finalize_steps(self); } /*************** @@ -1010,7 +1029,7 @@ static bool ts_query_cursor__first_in_progress_capture( static bool ts_query__cursor_add_state( TSQueryCursor *self, - const PatternEntry *slice + const PatternEntry *pattern ) { uint32_t list_id = capture_list_pool_acquire(&self->capture_list_pool); @@ -1037,11 +1056,11 @@ static bool ts_query__cursor_add_state( } } - LOG(" start state. pattern:%u\n", slice->pattern_index); + LOG(" start state. pattern:%u\n", pattern->pattern_index); array_push(&self->states, ((QueryState) { .capture_list_id = list_id, - .step_index = slice->step_index, - .pattern_index = slice->pattern_index, + .step_index = pattern->step_index, + .pattern_index = pattern->pattern_index, .start_depth = self->depth, .capture_count = 0, .consumed_capture_count = 0, @@ -1157,31 +1176,31 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // Add new states for any patterns whose root node is a wildcard. for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { - PatternEntry *slice = &self->query->pattern_map.contents[i]; - QueryStep *step = &self->query->steps.contents[slice->step_index]; + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[pattern->step_index]; // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query__cursor_add_state(self, slice)) break; + if (!ts_query__cursor_add_state(self, pattern)) break; } // Add new states for any patterns whose root node matches this node. unsigned i; if (ts_query__pattern_map_search(self->query, symbol, &i)) { - PatternEntry *slice = &self->query->pattern_map.contents[i]; - QueryStep *step = &self->query->steps.contents[slice->step_index]; + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[pattern->step_index]; do { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query__cursor_add_state(self, slice)) break; + if (!ts_query__cursor_add_state(self, pattern)) break; // Advance to the next pattern whose root node matches this node. i++; if (i == self->query->pattern_map.size) break; - slice = &self->query->pattern_map.contents[i]; - step = &self->query->steps.contents[slice->step_index]; + pattern = &self->query->pattern_map.contents[i]; + step = &self->query->steps.contents[pattern->step_index]; } while (step->symbol == symbol); } diff --git a/script/fetch-fixtures b/script/fetch-fixtures index 94f9eddd..f4f84017 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -24,9 +24,9 @@ fetch_grammar() { fetch_grammar bash master fetch_grammar c master fetch_grammar cpp master -fetch_grammar embedded-template master +fetch_grammar embedded-template new-highlight-injection-api fetch_grammar go master -fetch_grammar html master +fetch_grammar html new-highlight-injection-api fetch_grammar javascript master fetch_grammar json master fetch_grammar python master diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 011d73ff..b5072003 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -3,9 +3,9 @@ call:fetch_grammar bash master call:fetch_grammar c master call:fetch_grammar cpp master -call:fetch_grammar embedded-template master +call:fetch_grammar embedded-template new-highlight-injection-api call:fetch_grammar go master -call:fetch_grammar html master +call:fetch_grammar html new-highlight-injection-api call:fetch_grammar javascript master call:fetch_grammar json master call:fetch_grammar python master