From 4151a428ec9059b70f3a7beecba8384b649eb621 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Sep 2019 09:18:30 -0700 Subject: [PATCH 001/558] docs: Add link to WIP swift grammar --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index d5381f59..62a24a87 100644 --- a/docs/index.md +++ b/docs/index.md @@ -57,6 +57,7 @@ Parsers for these languages are in development: * [Julia](https://github.com/tree-sitter/tree-sitter-julia) * [Nix](https://github.com/cstrahan/tree-sitter-nix) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) +* [Swift](https://github.com/tree-sitter/tree-sitter-swift) ### Talks on Tree-sitter From fe7c74e7aa90e4f935c466763c84aad4449742cb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 9 Sep 2019 15:41:13 -0700 Subject: [PATCH 002/558] Start work on an API for querying trees --- cli/src/tests/mod.rs | 1 + cli/src/tests/query_test.rs | 216 +++++++++ lib/binding_rust/bindings.rs | 80 ++++ lib/binding_rust/lib.rs | 130 +++++- lib/include/tree_sitter/api.h | 115 +++++ lib/src/bits.h | 25 ++ lib/src/lib.c | 1 + lib/src/query.c | 810 ++++++++++++++++++++++++++++++++++ lib/src/tree_cursor.c | 63 ++- lib/src/tree_cursor.h | 1 + 10 files changed, 1430 insertions(+), 12 deletions(-) create mode 100644 cli/src/tests/query_test.rs create mode 100644 lib/src/bits.h create mode 100644 lib/src/query.c diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 143e8297..1a2a71ff 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -4,4 +4,5 @@ mod highlight_test; mod node_test; mod parser_test; mod properties_test; +mod query_test; mod tree_test; diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs new file mode 100644 index 00000000..32adba62 --- /dev/null +++ b/cli/src/tests/query_test.rs @@ -0,0 +1,216 @@ +use super::helpers::allocations; +use super::helpers::fixtures::get_language; +use tree_sitter::{Parser, Query, QueryError, QueryMatch}; + +#[test] +fn test_query_errors_on_invalid_syntax() { + allocations::start_recording(); + + let language = get_language("javascript"); + + assert!(Query::new(language, "(if_statement)").is_ok()); + assert!(Query::new(language, "(if_statement condition:(identifier))").is_ok()); + + // Mismatched parens + assert_eq!( + Query::new(language, "(if_statement"), + Err(QueryError::Syntax(13)) + ); + assert_eq!( + Query::new(language, "(if_statement))"), + Err(QueryError::Syntax(14)) + ); + + // Return an error at the *beginning* of a bare identifier not followed a colon. + // If there's a colon but no pattern, return an error at the end of the colon. + assert_eq!( + Query::new(language, "(if_statement identifier)"), + Err(QueryError::Syntax(14)) + ); + assert_eq!( + Query::new(language, "(if_statement condition:)"), + Err(QueryError::Syntax(24)) + ); + + assert_eq!( + Query::new(language, "(if_statement condition:)"), + Err(QueryError::Syntax(24)) + ); + + allocations::stop_recording(); +} + +#[test] +fn test_query_errors_on_invalid_symbols() { + allocations::start_recording(); + + let language = get_language("javascript"); + + assert_eq!( + Query::new(language, "(non_existent1)"), + Err(QueryError::NodeType("non_existent1")) + ); + assert_eq!( + Query::new(language, "(if_statement (non_existent2))"), + Err(QueryError::NodeType("non_existent2")) + ); + assert_eq!( + Query::new(language, "(if_statement condition: (non_existent3))"), + Err(QueryError::NodeType("non_existent3")) + ); + assert_eq!( + Query::new(language, "(if_statement not_a_field: (identifier))"), + Err(QueryError::Field("not_a_field")) + ); + + allocations::stop_recording(); +} + +#[test] +fn test_query_capture_names() { + allocations::start_recording(); + + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (if_statement + condition: (binary_expression + left: * @left-operand + operator: "||" + right: * @right-operand) + consequence: (statement_block) @body) + + (while_statement + condition:* @loop-condition) + "#, + ) + .unwrap(); + + assert_eq!( + query.capture_names(), + &[ + "left-operand".to_string(), + "right-operand".to_string(), + "body".to_string(), + "loop-condition".to_string(), + ] + ); + + drop(query); + allocations::stop_recording(); +} + +#[test] +fn test_query_exec_with_simple_pattern() { + allocations::start_recording(); + + let language = get_language("javascript"); + let query = Query::new( + language, + "(function_declaration name: (identifier) @fn-name)", + ) + .unwrap(); + + let source = "function one() { two(); function three() {} }"; + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("fn-name", "one")]), + (0, vec![("fn-name", "three")]) + ], + ); + + drop(context); + drop(parser); + drop(query); + drop(tree); + allocations::stop_recording(); +} + +#[test] +fn test_query_exec_with_multiple_matches_same_root() { + allocations::start_recording(); + + let language = get_language("javascript"); + let query = Query::new( + language, + "(class_declaration + name: (identifier) @the-class-name + (class_body + (method_definition + name: (property_identifier) @the-method-name)))", + ) + .unwrap(); + + let source = " + class Person { + // the constructor + constructor(name) { this.name = name; } + + // the getter + getFullName() { return this.name; } + } + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + ( + 0, + vec![ + ("the-class-name", "Person"), + ("the-method-name", "constructor") + ] + ), + ( + 0, + vec![ + ("the-class-name", "Person"), + ("the-method-name", "getFullName") + ] + ), + ], + ); + + drop(context); + drop(parser); + drop(query); + drop(tree); + allocations::stop_recording(); +} + +fn collect_matches<'a>( + matches: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { + matches + .map(|m| { + ( + m.pattern_index(), + m.captures() + .map(|(capture_id, node)| { + ( + query.capture_names()[capture_id].as_str(), + node.utf8_text(source.as_bytes()).unwrap(), + ) + }) + .collect(), + ) + }) + .collect() +} diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index a71b297e..53b77405 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -19,6 +19,16 @@ pub struct TSParser { pub struct TSTree { _unused: [u8; 0], } +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSQuery { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSQueryContext { + _unused: [u8; 0], +} pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; pub type TSInputEncoding = u32; @@ -93,6 +103,17 @@ pub struct TSTreeCursor { pub id: *const ::std::os::raw::c_void, pub context: [u32; 2usize], } +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSQueryCapture { + pub node: TSNode, + pub index: u32, +} +pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0; +pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; +pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; +pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; +pub type TSQueryError = u32; extern "C" { #[doc = " Create a new parser."] pub fn ts_parser_new() -> *mut TSParser; @@ -538,6 +559,65 @@ extern "C" { extern "C" { pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor; } +extern "C" { + #[doc = " Create a new query based on a given language and string containing"] + #[doc = " one or more S-expression patterns."] + #[doc = ""] + #[doc = " If all of the given patterns are valid, this returns a `TSQuery`."] + #[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"] + #[doc = " of information about the problem:"] + #[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."] + #[doc = " 2. The type of error is written to the `error_type` parameter."] + pub fn ts_query_new( + arg1: *const TSLanguage, + source: *const ::std::os::raw::c_char, + source_len: u32, + error_offset: *mut u32, + error_type: *mut TSQueryError, + ) -> *mut TSQuery; +} +extern "C" { + #[doc = " Delete a query, freeing all of the memory that it used."] + pub fn ts_query_delete(arg1: *mut TSQuery); +} +extern "C" { + pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32; +} +extern "C" { + pub fn ts_query_capture_name_for_id( + self_: *const TSQuery, + index: u32, + length: *mut u32, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn ts_query_capture_id_for_name( + self_: *const TSQuery, + name: *const ::std::os::raw::c_char, + length: u32, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ts_query_context_new(arg1: *const TSQuery) -> *mut TSQueryContext; +} +extern "C" { + pub fn ts_query_context_delete(arg1: *mut TSQueryContext); +} +extern "C" { + pub fn ts_query_context_exec(arg1: *mut TSQueryContext, arg2: TSNode); +} +extern "C" { + pub fn ts_query_context_next(arg1: *mut TSQueryContext) -> bool; +} +extern "C" { + pub fn ts_query_context_matched_pattern_index(arg1: *const TSQueryContext) -> u32; +} +extern "C" { + pub fn ts_query_context_matched_captures( + arg1: *const TSQueryContext, + arg2: *mut u32, + ) -> *const TSQueryCapture; +} extern "C" { #[doc = " Get the number of distinct node types in the language."] pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 4c34d202..80e56ba9 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -17,7 +17,7 @@ use std::ffi::CStr; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; use std::sync::atomic::AtomicUsize; -use std::{fmt, ptr, str, u16}; +use std::{char, fmt, ptr, slice, str, u16}; pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION; pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); @@ -136,6 +136,23 @@ pub struct TreePropertyCursor<'a, P> { source: &'a [u8], } +#[derive(Debug)] +pub struct Query { + ptr: *mut ffi::TSQuery, + capture_names: Vec, +} + +pub struct QueryContext<'a>(*mut ffi::TSQueryContext, PhantomData<&'a ()>); + +pub struct QueryMatch<'a>(&'a QueryContext<'a>); + +#[derive(Debug, PartialEq, Eq)] +pub enum QueryError<'a> { + Syntax(usize), + NodeType(&'a str), + Field(&'a str), +} + impl Language { pub fn version(&self) -> usize { unsafe { ffi::ts_language_version(self.0) as usize } @@ -921,6 +938,117 @@ impl<'a, P> TreePropertyCursor<'a, P> { } } +impl Query { + pub fn new(language: Language, source: &str) -> Result { + let mut error_offset = 0u32; + let mut error_type: ffi::TSQueryError = 0; + let bytes = source.as_bytes(); + let ptr = unsafe { + ffi::ts_query_new( + language.0, + bytes.as_ptr() as *const c_char, + bytes.len() as u32, + &mut error_offset as *mut u32, + &mut error_type as *mut ffi::TSQueryError, + ) + }; + if ptr.is_null() { + let offset = error_offset as usize; + Err(match error_type { + ffi::TSQueryError_TSQueryErrorNodeType | ffi::TSQueryError_TSQueryErrorField => { + let suffix = source.split_at(offset).1; + let end_offset = suffix + .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') + .unwrap_or(source.len()); + let name = suffix.split_at(end_offset).0; + if error_type == ffi::TSQueryError_TSQueryErrorNodeType { + QueryError::NodeType(name) + } else { + QueryError::Field(name) + } + } + _ => QueryError::Syntax(offset), + }) + } else { + let capture_count = unsafe { ffi::ts_query_capture_count(ptr) }; + let capture_names = (0..capture_count) + .map(|i| unsafe { + let mut length = 0u32; + let name = + ffi::ts_query_capture_name_for_id(ptr, i as u32, &mut length as *mut u32) + as *const u8; + let name = slice::from_raw_parts(name, length as usize); + let name = str::from_utf8_unchecked(name); + name.to_string() + }) + .collect(); + Ok(Query { ptr, capture_names }) + } + } + + pub fn capture_names(&self) -> &[String] { + &self.capture_names + } + + pub fn context(&self) -> QueryContext { + let context = unsafe { ffi::ts_query_context_new(self.ptr) }; + QueryContext(context, PhantomData) + } +} + +impl<'a> QueryContext<'a> { + pub fn exec(&'a self, node: Node<'a>) -> impl Iterator> + 'a { + unsafe { + ffi::ts_query_context_exec(self.0, node.0); + } + std::iter::from_fn(move || -> Option> { + unsafe { + if ffi::ts_query_context_next(self.0) { + Some(QueryMatch(self)) + } else { + None + } + } + }) + } +} + +impl<'a> QueryMatch<'a> { + pub fn pattern_index(&self) -> usize { + unsafe { ffi::ts_query_context_matched_pattern_index((self.0).0) as usize } + } + + pub fn captures(&self) -> impl ExactSizeIterator { + unsafe { + let mut capture_count = 0u32; + let captures = + ffi::ts_query_context_matched_captures((self.0).0, &mut capture_count as *mut u32); + let captures = slice::from_raw_parts(captures, capture_count as usize); + captures + .iter() + .map(move |capture| (capture.index as usize, Node::new(capture.node).unwrap())) + } + } +} + +impl PartialEq for Query { + fn eq(&self, other: &Self) -> bool { + self.ptr == other.ptr + } +} + +impl Drop for Query { + fn drop(&mut self) { + unsafe { ffi::ts_query_delete(self.ptr) } + } +} + +impl<'a> Drop for QueryContext<'a> { + fn drop(&mut self) { + unsafe { ffi::ts_query_context_delete(self.0) } + } +} + impl Point { pub fn new(row: usize, column: usize) -> Self { Point { row, column } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index d39d0521..ad991818 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -26,6 +26,8 @@ typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; typedef struct TSParser TSParser; typedef struct TSTree TSTree; +typedef struct TSQuery TSQuery; +typedef struct TSQueryContext TSQueryContext; typedef enum { TSInputEncodingUTF8, @@ -87,6 +89,18 @@ typedef struct { uint32_t context[2]; } TSTreeCursor; +typedef struct { + TSNode node; + uint32_t index; +} TSQueryCapture; + +typedef enum { + TSQueryErrorNone = 0, + TSQueryErrorSyntax, + TSQueryErrorNodeType, + TSQueryErrorField, +} TSQueryError; + /********************/ /* Section - Parser */ /********************/ @@ -602,6 +616,107 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t); TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *); +/*******************/ +/* Section - Query */ +/*******************/ + +/** + * Create a new query from a string containing one or more S-expression + * patterns. The query is associated with a particular language, and can + * only be run on syntax nodes parsed with that language. + * + * If all of the given patterns are valid, this returns a `TSQuery`. + * If a pattern is invalid, this returns `NULL`, and provides two pieces + * of information about the problem: + * 1. The byte offset of the error is written to the `error_offset` parameter. + * 2. The type of error is written to the `error_type` parameter. + */ +TSQuery *ts_query_new( + const TSLanguage *language, + const char *source, + uint32_t source_len, + uint32_t *error_offset, + TSQueryError *error_type +); + +/** + * Delete a query, freeing all of the memory that it used. + */ +void ts_query_delete(TSQuery *); + +/* + * Get the number of distinct capture names in the query. + */ +uint32_t ts_query_capture_count(const TSQuery *); + +/* + * Get the name and length of one of the query's capture. Each capture + * is associated with a numeric id based on the order that it appeared + * in the query's source. + */ +const char *ts_query_capture_name_for_id( + const TSQuery *self, + uint32_t index, + uint32_t *length +); + +/* + * Get the numeric id of the capture with the given name. + */ +int ts_query_capture_id_for_name( + const TSQuery *self, + const char *name, + uint32_t length +); + +/* + * Create a new context for executing a given query. + * + * The context stores the state that is needed to iteratively search + * for matches. To use the query context: + * 1. First call `ts_query_context_exec` to start running the query + * on a particular syntax node. + * 2. Then repeatedly call `ts_query_context_next` to iterate over + * the matches. + * 3. For each match, you can call `ts_query_context_matched_pattern_index` + * to determine which pattern matched. You can also call + * `ts_query_context_matched_captures` to determine which nodes + * were captured by which capture names. + * + * If you don't care about finding all of the matches, you can stop calling + * `ts_query_context_next` at any point. And you can start executing the + * query against a different node by calling `ts_query_context_exec` again. + */ +TSQueryContext *ts_query_context_new(const TSQuery *); + +/* + * Delete a query context, freeing all of the memory that it used. + */ +void ts_query_context_delete(TSQueryContext *); + +/* + * Start running a query on a given node. + */ +void ts_query_context_exec(TSQueryContext *, TSNode); + +/* + * Advance to the next match of the currently running query. + */ +bool ts_query_context_next(TSQueryContext *); + +/* + * Check which pattern matched. + */ +uint32_t ts_query_context_matched_pattern_index(const TSQueryContext *); + +/* + * Check which pattern matched. + */ +const TSQueryCapture *ts_query_context_matched_captures( + const TSQueryContext *, + uint32_t * +); + /**********************/ /* Section - Language */ /**********************/ diff --git a/lib/src/bits.h b/lib/src/bits.h new file mode 100644 index 00000000..0caa1d8d --- /dev/null +++ b/lib/src/bits.h @@ -0,0 +1,25 @@ +#ifndef TREE_SITTER_BITS_H_ +#define TREE_SITTER_BITS_H_ + +#include + +#ifdef _WIN32 + +#include + +static inline uint32_t count_leading_zeros(uint32_t x) { + if (x == 0) return 32; + uint32_t result; + _BitScanReverse(&reuslt, x); + return result; +} + +#else + +static inline uint32_t count_leading_zeros(uint32_t x) { + if (x == 0) return 32; + return __builtin_clz(x); +} + +#endif +#endif // TREE_SITTER_BITS_H_ diff --git a/lib/src/lib.c b/lib/src/lib.c index fc5fbc92..900304f0 100644 --- a/lib/src/lib.c +++ b/lib/src/lib.c @@ -12,6 +12,7 @@ #include "./lexer.c" #include "./node.c" #include "./parser.c" +#include "./query.c" #include "./stack.c" #include "./subtree.c" #include "./tree_cursor.c" diff --git a/lib/src/query.c b/lib/src/query.c new file mode 100644 index 00000000..f7836a86 --- /dev/null +++ b/lib/src/query.c @@ -0,0 +1,810 @@ +#include "tree_sitter/api.h" +#include "./alloc.h" +#include "./array.h" +#include "./bits.h" +#include "utf8proc.h" +#include + +/* + * Stream - A sequence of unicode characters derived from a UTF8 string. + * This struct is used in parsing query S-expressions. + */ +typedef struct { + const char *input; + const char *end; + int32_t next; + uint8_t next_size; +} Stream; + +/* + * QueryStep - A step in the process of matching a query. Each node within + * a query S-expression maps to one of these steps. An entire pattern is + * represented as a sequence of these steps. + */ +typedef struct { + TSSymbol symbol; + TSFieldId field; + uint16_t capture_id; + uint8_t depth; + bool field_is_multiple; +} QueryStep; + +/* + * CaptureSlice - The name of a capture, represented as a slice of a + * shared string. + */ +typedef struct { + uint32_t offset; + uint32_t length; +} CaptureSlice; + +/* + * PatternSlice - The set of steps needed to match a particular pattern, + * represented as a slice of a shared array. + */ +typedef struct { + uint16_t step_index; + uint16_t pattern_index; +} PatternSlice; + +/* + * QueryState - The state of an in-progress match of a particular pattern + * in a query. While executing, a QueryContext must keep track of a number + * of possible in-progress matches. Each of those possible matches is + * represented as one of these states. + */ +typedef struct { + uint16_t step_index; + uint16_t pattern_index; + uint16_t start_depth; + uint16_t capture_list_id; + uint16_t capture_count; +} QueryState; + +/* + * CaptureListPool - A collection of *lists* of captures. Each QueryState + * needs to maintain its own list of captures. They are all represented as + * slices of one shared array. The CaptureListPool keeps track of which + * parts of the shared array are currently in use by a QueryState. + */ +typedef struct { + TSQueryCapture *contents; + uint32_t list_size; + uint32_t usage_map; +} CaptureListPool; + +/* + * TSQuery - A tree query, compiled from a string of S-expressions. The query + * itself is immutable. The mutable state used in the process of executing the + * query is stored in a `TSQueryContext`. + */ +struct TSQuery { + Array(QueryStep) steps; + Array(char) capture_data; + Array(CaptureSlice) capture_names; + Array(PatternSlice) pattern_map; + const TSLanguage *language; + uint16_t max_capture_count; + uint16_t wildcard_root_pattern_count; +}; + +/* + * TSQueryContext - A stateful struct used to execute a query on a tree. + */ +struct TSQueryContext { + const TSQuery *query; + TSTreeCursor cursor; + Array(QueryState) states; + Array(QueryState) finished_states; + CaptureListPool capture_list_pool; + bool ascending; + uint32_t depth; +}; + +static const TSQueryError PARENT_DONE = -1; +static const uint8_t PATTERN_DONE_MARKER = UINT8_MAX; +static const uint16_t NONE = UINT16_MAX; +static const TSSymbol WILDCARD_SYMBOL = 0; +static const uint16_t MAX_STATE_COUNT = 32; + +/********** + * Stream + **********/ + +static bool stream_advance(Stream *self) { + if (self->input >= self->end) return false; + self->input += self->next_size; + int size = utf8proc_iterate( + (const uint8_t *)self->input, + self->end - self->input, + &self->next + ); + if (size <= 0) return false; + self->next_size = size; + return true; +} + +static void stream_reset(Stream *self, const char *input) { + self->input = input; + self->next_size = 0; + stream_advance(self); +} + +static Stream stream_new(const char *string, uint32_t length) { + Stream self = { + .next = 0, + .input = string, + .end = string + length, + }; + stream_advance(&self); + return self; +} + +static void stream_skip_whitespace(Stream *stream) { + while (iswspace(stream->next)) stream_advance(stream); +} + +static bool stream_is_ident_start(Stream *stream) { + return iswalpha(stream->next) || stream->next == '_' || stream->next == '-'; +} + +static void stream_scan_identifier(Stream *stream) { + do { + stream_advance(stream); + } while ( + iswalnum(stream->next) || + stream->next == '_' || + stream->next == '-' || + stream->next == '.' + ); +} + +/****************** + * CaptureListPool + ******************/ + +static CaptureListPool capture_list_pool_new(uint16_t list_size) { + return (CaptureListPool) { + .contents = ts_calloc(MAX_STATE_COUNT * list_size, sizeof(TSQueryCapture)), + .list_size = list_size, + .usage_map = UINT32_MAX, + }; +} + +static void capture_list_pool_clear(CaptureListPool *self) { + self->usage_map = UINT32_MAX; +} + +static void capture_list_pool_delete(CaptureListPool *self) { + ts_free(self->contents); +} + +static TSQueryCapture *capture_list_pool_get(CaptureListPool *self, uint16_t id) { + return &self->contents[id * self->list_size]; +} + +static uint16_t capture_list_pool_acquire(CaptureListPool *self) { + uint16_t id = count_leading_zeros(self->usage_map); + if (id == 32) return NONE; + self->usage_map &= ~(1 << id); + return id; +} + +static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { + self->usage_map |= (1 << id); +} + +/********* + * Query + *********/ + +static TSSymbol ts_query_intern_node_name( + const TSQuery *self, + const char *name, + uint32_t length, + TSSymbolType symbol_type +) { + uint32_t symbol_count = ts_language_symbol_count(self->language); + for (TSSymbol i = 0; i < symbol_count; i++) { + if ( + ts_language_symbol_type(self->language, i) == symbol_type && + !strncmp(ts_language_symbol_name(self->language, i), name, length) + ) return i; + } + return 0; +} + +static uint16_t ts_query_intern_capture_name( + TSQuery *self, + const char *name, + uint32_t length +) { + int id = ts_query_capture_id_for_name(self, name, length); + if (id >= 0) { + return (uint16_t)id; + } + + CaptureSlice capture = { + .offset = self->capture_data.size, + .length = length, + }; + array_grow_by(&self->capture_data, length + 1); + memcpy(&self->capture_data.contents[capture.offset], name, length); + self->capture_data.contents[self->capture_data.size - 1] = 0; + array_push(&self->capture_names, capture); + return self->capture_names.size - 1; +} + +static inline bool ts_query__pattern_map_search( + const TSQuery *self, + TSSymbol needle, + uint32_t *result +) { + uint32_t base_index = self->wildcard_root_pattern_count; + uint32_t size = self->pattern_map.size - base_index; + if (size == 0) { + *result = base_index; + return false; + } + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = base_index + half_size; + TSSymbol mid_symbol = self->steps.contents[ + self->pattern_map.contents[mid_index].step_index + ].symbol; + if (needle > mid_symbol) base_index = mid_index; + size -= half_size; + } + TSSymbol symbol = self->steps.contents[ + self->pattern_map.contents[base_index].step_index + ].symbol; + if (needle > symbol) { + *result = base_index; + return false; + } else if (needle == symbol) { + *result = base_index; + return true; + } else { + *result = base_index + 1; + return false; + } +} + +static inline void ts_query__pattern_map_insert( + TSQuery *self, + TSSymbol symbol, + uint32_t start_step_index +) { + uint32_t index; + ts_query__pattern_map_search(self, symbol, &index); + array_insert(&self->pattern_map, index, ((PatternSlice) { + .step_index = start_step_index, + .pattern_index = self->pattern_map.size, + })); +} + +static TSQueryError ts_query_parse_pattern( + TSQuery *self, + Stream *stream, + uint32_t depth, + uint32_t *capture_count +) { + uint16_t starting_step_index = self->steps.size; + + if (stream->next == 0) return TSQueryErrorSyntax; + + // Finish the parent S-expression + if (stream->next == ')') { + return PARENT_DONE; + } + + // Parse a parenthesized node expression + else if (stream->next == '(') { + stream_advance(stream); + stream_skip_whitespace(stream); + TSSymbol symbol; + + // Parse the wildcard symbol + if (stream->next == '*') { + symbol = WILDCARD_SYMBOL; + stream_advance(stream); + } + + // Parse a normal node name + else if (stream_is_ident_start(stream)) { + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - node_name; + symbol = ts_query_intern_node_name( + self, + node_name, + length, + TSSymbolTypeRegular + ); + if (!symbol) { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + } else { + return TSQueryErrorSyntax; + } + + // Add a step for the node. + array_push(&self->steps, ((QueryStep) { + .depth = depth, + .symbol = symbol, + .field = 0, + .capture_id = NONE, + })); + + // Parse the child patterns + stream_skip_whitespace(stream); + for (;;) { + TSQueryError e = ts_query_parse_pattern(self, stream, depth + 1, capture_count); + if (e == PARENT_DONE) { + stream_advance(stream); + break; + } else if (e) { + return e; + } + } + } + + // Parse a double-quoted anonymous leaf node expression + else if (stream->next == '"') { + stream_advance(stream); + + // Parse the string content + const char *string_content = stream->input; + while (stream->next && stream->next != '"') stream_advance(stream); + uint32_t length = stream->input - string_content; + + // Add a step for the node + TSSymbol symbol = ts_query_intern_node_name( + self, + string_content, + length, + TSSymbolTypeAnonymous + ); + if (!symbol) { + stream_reset(stream, string_content); + return TSQueryErrorNodeType; + } + array_push(&self->steps, ((QueryStep) { + .depth = depth, + .symbol = symbol, + .field = 0, + })); + + if (stream->next != '"') return TSQueryErrorSyntax; + stream_advance(stream); + } + + // Parse a field-prefixed pattern + else if (stream_is_ident_start(stream)) { + // Parse the field name + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - field_name; + stream_skip_whitespace(stream); + + if (stream->next != ':') { + stream_reset(stream, field_name); + return TSQueryErrorSyntax; + } + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse the pattern + uint32_t step_index = self->steps.size; + TSQueryError e = ts_query_parse_pattern(self, stream, depth, capture_count); + if (e == PARENT_DONE) return TSQueryErrorSyntax; + if (e) return e; + + // Add the field name to the first step of the pattern + TSFieldId field_id = ts_language_field_id_for_name( + self->language, + field_name, + length + ); + if (!field_id) { + stream->input = field_name; + return TSQueryErrorField; + } + self->steps.contents[step_index].field = field_id; + } + + // Parse a wildcard pattern + else if (stream->next == '*') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Add a step that matches any kind of node + array_push(&self->steps, ((QueryStep) { + .depth = depth, + .symbol = WILDCARD_SYMBOL, + .field = 0, + })); + } + + // No match + else { + return TSQueryErrorSyntax; + } + + stream_skip_whitespace(stream); + + // Parse a '@'-suffixed capture pattern + if (stream->next == '@') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse the capture name + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *capture_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - capture_name; + + // Add the capture id to the first step of the pattern + uint16_t capture_id = ts_query_intern_capture_name( + self, + capture_name, + length + ); + self->steps.contents[starting_step_index].capture_id = capture_id; + (*capture_count)++; + + stream_skip_whitespace(stream); + } + + return 0; +} + +TSQuery *ts_query_new( + const TSLanguage *language, + const char *source, + uint32_t source_len, + uint32_t *error_offset, + TSQueryError *error_type +) { + TSQuery *self = ts_malloc(sizeof(TSQuery)); + *self = (TSQuery) { + .steps = array_new(), + .pattern_map = array_new(), + .wildcard_root_pattern_count = 0, + .max_capture_count = 0, + .language = language, + }; + + // Parse all of the S-expressions in the given string. + Stream stream = stream_new(source, source_len); + stream_skip_whitespace(&stream); + uint32_t start_step_index; + for (;;) { + start_step_index = self->steps.size; + uint32_t capture_count = 0; + *error_type = ts_query_parse_pattern(self, &stream, 0, &capture_count); + array_push(&self->steps, ((QueryStep) { .depth = PATTERN_DONE_MARKER })); + + // If any pattern could not be parsed, then report the error information + // and terminate. + if (*error_type) { + *error_offset = stream.input - source; + ts_query_delete(self); + return NULL; + } + + // Maintain a map that can look up patterns for a given root symbol. + ts_query__pattern_map_insert( + self, + self->steps.contents[start_step_index].symbol, + start_step_index + ); + if (self->steps.contents[start_step_index].symbol == WILDCARD_SYMBOL) { + self->wildcard_root_pattern_count++; + } + + if (capture_count > self->max_capture_count) { + self->max_capture_count = capture_count; + } + + if (stream.input == stream.end) break; + } + + return self; +} + +void ts_query_delete(TSQuery *self) { + if (self) { + array_delete(&self->steps); + array_delete(&self->pattern_map); + array_delete(&self->capture_data); + array_delete(&self->capture_names); + ts_free(self); + } +} + +uint32_t ts_query_capture_count(const TSQuery *self) { + return self->capture_names.size; +} + +const char *ts_query_capture_name_for_id( + const TSQuery *self, + uint32_t index, + uint32_t *length +) { + CaptureSlice name = self->capture_names.contents[index]; + *length = name.length; + return &self->capture_data.contents[name.offset]; +} + +int ts_query_capture_id_for_name( + const TSQuery *self, + const char *name, + uint32_t length +) { + for (unsigned i = 0; i < self->capture_names.size; i++) { + CaptureSlice existing = self->capture_names.contents[i]; + if ( + existing.length == length && + !strncmp(&self->capture_data.contents[existing.offset], name, length) + ) return i; + } + return -1; +} + +/*************** + * QueryContext + ***************/ + +TSQueryContext *ts_query_context_new(const TSQuery *query) { + TSQueryContext *self = ts_malloc(sizeof(TSQueryContext)); + *self = (TSQueryContext) { + .query = query, + .ascending = false, + .states = array_new(), + .finished_states = array_new(), + .capture_list_pool = capture_list_pool_new(query->max_capture_count), + }; + return self; +} + +void ts_query_context_delete(TSQueryContext *self) { + array_delete(&self->states); + array_delete(&self->finished_states); + ts_tree_cursor_delete(&self->cursor); + capture_list_pool_delete(&self->capture_list_pool); + ts_free(self); +} + +void ts_query_context_exec(TSQueryContext *self, TSNode node) { + array_clear(&self->states); + array_clear(&self->finished_states); + ts_tree_cursor_reset(&self->cursor, node); + capture_list_pool_clear(&self->capture_list_pool); + self->depth = 0; + self->ascending = false; +} + +bool ts_query_context_next(TSQueryContext *self) { + if (self->finished_states.size > 0) { + array_pop(&self->finished_states); + } + + while (self->finished_states.size == 0) { + if (self->ascending) { + // Remove any states that were started within this node and are still + // not complete. + uint32_t deleted_count = 0; + for (unsigned i = 0, n = self->states.size; i < n; i++) { + QueryState *state = &self->states.contents[i]; + if (state->start_depth == self->depth) { + + // printf("FAIL STATE pattern: %u, step: %u\n", state->pattern_index, state->step_index); + + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + deleted_count++; + } else if (deleted_count > 0) { + self->states.contents[i - deleted_count] = *state; + } + } + + // if (deleted_count) { + // printf("FAILED %u of %u states\n", deleted_count, self->states.size); + // } + + self->states.size -= deleted_count; + + if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { + self->ascending = false; + } else if (ts_tree_cursor_goto_parent(&self->cursor)) { + self->depth--; + } else { + return false; + } + } else { + TSFieldId field_id = NONE; + bool field_occurs_in_later_sibling = false; + TSNode node = ts_tree_cursor_current_node(&self->cursor); + TSSymbol symbol = ts_node_symbol(node); + + // printf("DESCEND INTO NODE: %s\n", ts_node_type(node)); + + // Add new states for any patterns whose root node is a wildcard. + for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { + PatternSlice *slice = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[slice->step_index]; + + // Check that the node matches the criteria for the first step + // of the pattern. + if (step->field) { + if (field_id == NONE) { + field_id = ts_tree_cursor_current_field_id_ext( + &self->cursor, + &field_occurs_in_later_sibling + ); + } + if (field_id != step->field) continue; + } + + // Add a new state at the start of this pattern. + uint32_t capture_list_id = capture_list_pool_acquire( + &self->capture_list_pool + ); + if (capture_list_id == NONE) break; + array_push(&self->states, ((QueryState) { + .step_index = slice->step_index, + .pattern_index = slice->pattern_index, + .capture_list_id = capture_list_id, + })); + } + + // Add new states for any patterns whose root node matches this node. + unsigned i; + if (ts_query__pattern_map_search(self->query, symbol, &i)) { + PatternSlice *slice = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[slice->step_index]; + do { + if (step->field) { + if (field_id == NONE) { + field_id = ts_tree_cursor_current_field_id_ext( + &self->cursor, + &field_occurs_in_later_sibling + ); + } + if (field_id != step->field) continue; + } + + // printf("START NEW STATE: %u\n", slice->pattern_index); + + // If the node matches the first step of the pattern, then add + // a new in-progress state. First, acquire a list to hold the + // pattern's captures. + uint32_t capture_list_id = capture_list_pool_acquire( + &self->capture_list_pool + ); + if (capture_list_id == NONE) break; + + array_push(&self->states, ((QueryState) { + .pattern_index = slice->pattern_index, + .step_index = slice->step_index + 1, + .start_depth = self->depth, + .capture_list_id = capture_list_id, + .capture_count = 0, + })); + + i++; + if (i == self->query->pattern_map.size) break; + slice = &self->query->pattern_map.contents[i]; + step = &self->query->steps.contents[slice->step_index]; + } while (step->symbol == symbol); + } + + // Update all of the in-progress states with current node. + for (unsigned i = 0, n = self->states.size; i < n; i++) { + QueryState *state = &self->states.contents[i]; + QueryStep *step = &self->query->steps.contents[state->step_index]; + + // Check that the node matches all of the criteria for the next + // step of the pattern. + if (state->start_depth + step->depth != self->depth) continue; + if (step->symbol && step->symbol != symbol) continue; + if (step->field) { + // Only compute the current field if it is needed for the current + // step of some in-progress pattern. + if (field_id == NONE) { + field_id = ts_tree_cursor_current_field_id_ext( + &self->cursor, + &field_occurs_in_later_sibling + ); + } + if (field_id != step->field) continue; + } + + // Some patterns can match their root node in multiple ways, + // capturing different children. If this pattern step could match + // later children within the same parent, then this query state + // cannot simply be updated in place. It must be split into two + // states: one that captures this node, and one which skips over + // this node, to preserve the possibility of capturing later + // siblings. + QueryState *next_state = state; + if (step->depth > 0 && (!step->field || field_occurs_in_later_sibling)) { + uint32_t capture_list_id = capture_list_pool_acquire( + &self->capture_list_pool + ); + if (capture_list_id != NONE) { + array_push(&self->states, *state); + next_state = array_back(&self->states); + next_state->capture_list_id = capture_list_id; + } + } + + // Record captures + if (step->capture_id != NONE) { + // printf("CAPTURE id: %u\n", step->capture_id); + + TSQueryCapture *capture_list = capture_list_pool_get( + &self->capture_list_pool, + next_state->capture_list_id + ); + capture_list[next_state->capture_count++] = (TSQueryCapture) { + node, + step->capture_id + }; + } + + // If the pattern is now done, then populate the query context's + // finished state. + next_state->step_index++; + QueryStep *next_step = step + 1; + if (next_step->depth == PATTERN_DONE_MARKER) { + // printf("FINISHED MATCH pattern: %u\n", next_state->pattern_index); + + array_push(&self->finished_states, *next_state); + if (next_state == state) { + array_erase(&self->states, i); + i--; + n--; + } else { + array_pop(&self->states); + } + } + } + + if (ts_tree_cursor_goto_first_child(&self->cursor)) { + self->depth++; + } else { + self->ascending = true; + } + } + } + + return true; +} + +uint32_t ts_query_context_matched_pattern_index(const TSQueryContext *self) { + if (self->finished_states.size > 0) { + QueryState *state = array_back(&self->finished_states); + return state->pattern_index; + } + return 0; +} + +const TSQueryCapture *ts_query_context_matched_captures( + const TSQueryContext *self, + uint32_t *count +) { + if (self->finished_states.size > 0) { + QueryState *state = array_back(&self->finished_states); + *count = state->capture_count; + return capture_list_pool_get( + (CaptureListPool *)&self->capture_list_pool, + state->capture_list_id + ); + } + return NULL; +} diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index ba77ebc0..2ba3f947 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -244,7 +244,12 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { ); } -TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { +static inline TSFieldId ts_tree_cursor__current_field_info( + const TSTreeCursor *_self, + const TSFieldMapEntry **field_map, + const TSFieldMapEntry **field_map_end, + uint32_t *child_index +) { const TreeCursor *self = (const TreeCursor *)_self; // Walk up the tree, visiting the current node and its invisible ancestors. @@ -264,25 +269,61 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { } } - const TSFieldMapEntry *field_map, *field_map_end; + if (ts_subtree_extra(*entry->subtree)) break; + ts_language_field_map( self->tree->language, parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end + field_map, field_map_end ); - - while (field_map < field_map_end) { - if ( - !ts_subtree_extra(*entry->subtree) && - !field_map->inherited && - field_map->child_index == entry->structural_child_index - ) return field_map->field_id; - field_map++; + for (const TSFieldMapEntry *i = *field_map; i < *field_map_end; i++) { + if (!i->inherited && i->child_index == entry->structural_child_index) { + *child_index = entry->structural_child_index; + return i->field_id; + } } } return 0; } +TSFieldId ts_tree_cursor_current_field_id_ext( + const TSTreeCursor *self, + bool *field_has_additional +) { + uint32_t child_index; + const TSFieldMapEntry *field_map, *field_map_end; + TSFieldId field_id = ts_tree_cursor__current_field_info( + self, + &field_map, + &field_map_end, + &child_index + ); + + // After finding the field, check if any other later children have + // the same field name. + if (field_id) { + for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { + if (i->field_id == field_id && i->child_index > child_index) { + *field_has_additional = true; + } + } + } + + return field_id; +} + + +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self) { + uint32_t child_index; + const TSFieldMapEntry *field_map, *field_map_end; + return ts_tree_cursor__current_field_info( + self, + &field_map, + &field_map_end, + &child_index + ); +} + const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { TSFieldId id = ts_tree_cursor_current_field_id(_self); if (id) { diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 55bdad86..9b438843 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -16,5 +16,6 @@ typedef struct { } TreeCursor; void ts_tree_cursor_init(TreeCursor *, TSNode); +TSFieldId ts_tree_cursor_current_field_id_ext(const TSTreeCursor *, bool *); #endif // TREE_SITTER_TREE_CURSOR_H_ From 52cda5f54101d8ee009decb4e486ac7fac9a1cbb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 10 Sep 2019 20:54:21 -0700 Subject: [PATCH 003/558] Start work on wasm binding to query API --- lib/binding_web/binding.c | 41 +++++++++++ lib/binding_web/binding.js | 111 +++++++++++++++++++++++++---- lib/binding_web/exports.json | 7 ++ lib/binding_web/test/query-test.js | 51 +++++++++++++ 4 files changed, 198 insertions(+), 12 deletions(-) create mode 100644 lib/binding_web/test/query-test.js diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c index e6018b03..e94c5aa0 100644 --- a/lib/binding_web/binding.c +++ b/lib/binding_web/binding.c @@ -566,3 +566,44 @@ int ts_node_is_missing_wasm(const TSTree *tree) { TSNode node = unmarshal_node(tree); return ts_node_is_missing(node); } + +/******************/ +/* Section - Query */ +/******************/ + +void ts_query_exec_wasm( + const TSQuery *self, + TSQueryContext *context, + const TSTree *tree +) { + TSNode node = unmarshal_node(tree); + + Array(const void *) result = array_new(); + + unsigned index = 0; + unsigned match_count = 0; + ts_query_context_exec(context, node); + while (ts_query_context_next(context)) { + match_count++; + uint32_t pattern_index = ts_query_context_matched_pattern_index(context); + uint32_t capture_count; + const TSQueryCapture *captures = ts_query_context_matched_captures( + context, + &capture_count + ); + + array_grow_by(&result, 1 + 6 * capture_count); + + result.contents[index++] = (const void *)pattern_index; + result.contents[index++] = (const void *)capture_count; + for (unsigned i = 0; i < capture_count; i++) { + const TSQueryCapture *capture = &captures[i]; + result.contents[index++] = (const void *)capture->index; + marshal_node(result.contents + index, capture->node); + index += 5; + } + } + + TRANSFER_BUFFER[0] = (const void *)(match_count); + TRANSFER_BUFFER[1] = result.contents; +} diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 4ce334cc..ac48cb70 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -5,6 +5,7 @@ const SIZE_OF_NODE = 5 * SIZE_OF_INT; const SIZE_OF_POINT = 2 * SIZE_OF_INT; const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT; const ZERO_POINT = {row: 0, column: 0}; +const QUERY_WORD_REGEX = /[\w-.]*/; var VERSION; var MIN_COMPATIBLE_VERSION; @@ -143,9 +144,7 @@ class Parser { class Tree { constructor(internal, address, language, textCallback) { - if (internal !== INTERNAL) { - throw new Error('Illegal constructor') - } + assertInternal(internal); this[0] = address; this.language = language; this.textCallback = textCallback; @@ -201,9 +200,7 @@ class Tree { class Node { constructor(internal, tree) { - if (internal !== INTERNAL) { - throw new Error('Illegal constructor') - } + assertInternal(internal); this.tree = tree; } @@ -526,9 +523,7 @@ class Node { class TreeCursor { constructor(internal, tree) { - if (internal !== INTERNAL) { - throw new Error('Illegal constructor') - } + assertInternal(internal); this.tree = tree; unmarshalTreeCursor(this); } @@ -630,9 +625,7 @@ class TreeCursor { class Language { constructor(internal, address) { - if (internal !== INTERNAL) { - throw new Error('Illegal constructor') - } + assertInternal(internal); this[0] = address; this.types = new Array(C._ts_language_symbol_count(this[0])); for (let i = 0, n = this.types.length; i < n; i++) { @@ -672,6 +665,51 @@ class Language { return this.fields[fieldName] || null; } + query(source) { + const sourceLength = lengthBytesUTF8(source); + const sourceAddress = C._malloc(sourceLength + 1); + stringToUTF8(source, sourceAddress, sourceLength + 1); + const address = C._ts_query_new( + this[0], + sourceAddress, + sourceLength, + TRANSFER_BUFFER, + TRANSFER_BUFFER + SIZE_OF_INT + ); + if (address) { + const contextAddress = C._ts_query_context_new(address); + const captureCount = C._ts_query_capture_count(address); + const captureNames = new Array(captureCount); + for (let i = 0; i < captureCount; i++) { + const nameAddress = C._ts_query_capture_name_for_id( + address, + i, + TRANSFER_BUFFER + ); + const nameLength = getValue(TRANSFER_BUFFER, 'i32'); + captureNames[i] = UTF8ToString(nameAddress, nameLength); + } + return new Query(INTERNAL, address, contextAddress, captureNames); + } else { + const errorId = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); + const utf8ErrorOffset = getValue(TRANSFER_BUFFER, 'i32'); + const errorOffset = UTF8ToString(sourceAddress, utf8ErrorOffset).length; + C._free(sourceAddress); + const suffix = source.slice(errorOffset, 100); + switch (errorId) { + case 2: throw new RangeError( + `Bad node name '${suffix.match(QUERY_WORD_REGEX)[0]}'` + ); + case 3: throw new RangeError( + `Bad field name '${suffix.match(QUERY_WORD_REGEX)[0]}'` + ); + default: throw new SyntaxError( + `Bad syntax at offset ${errorOffset}: '${suffix}'...` + ); + } + } + } + static load(url) { let bytes; if ( @@ -704,6 +742,55 @@ class Language { } } +class Query { + constructor(internal, address, contextAddress, captureNames) { + assertInternal(internal); + this[0] = address; + this[1] = contextAddress; + this.captureNames = captureNames; + } + + delete() { + C._ts_query_delete(this[0]); + C._ts_query_context_delete(this[0]); + } + + exec(queryNode) { + marshalNode(queryNode); + + C._ts_query_exec_wasm(this[0], this[1], queryNode.tree[0]); + + const matchCount = getValue(TRANSFER_BUFFER, 'i32'); + const nodesAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); + const result = new Array(matchCount); + + let address = nodesAddress; + for (let i = 0; i < matchCount; i++) { + const pattern = getValue(address, 'i32'); + address += SIZE_OF_INT; + const captures = new Array(getValue(address, 'i32')); + address += SIZE_OF_INT; + for (let j = 0, n = captures.length; j < n; j++) { + const captureIndex = getValue(address, 'i32'); + address += SIZE_OF_INT; + const node = unmarshalNode(queryNode.tree, address); + address += SIZE_OF_NODE; + captures[j] = {name: this.captureNames[captureIndex], node}; + } + result[i] = {pattern, captures}; + } + + // Free the intermediate buffers + C._free(nodesAddress); + + return result; + } +} + +function assertInternal(x) { + if (x !== INTERNAL) throw new Error('Illegal constructor') +} + function isPoint(point) { return ( point && diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index a0cf9305..e2b187f7 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -68,6 +68,13 @@ "_ts_parser_new_wasm", "_ts_parser_parse_wasm", "_ts_parser_set_language", + "_ts_query_capture_count", + "_ts_query_capture_name_for_id", + "_ts_query_context_delete", + "_ts_query_context_new", + "_ts_query_delete", + "_ts_query_exec_wasm", + "_ts_query_new", "_ts_tree_cursor_current_field_id_wasm", "_ts_tree_cursor_current_node_id_wasm", "_ts_tree_cursor_current_node_is_missing_wasm", diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js new file mode 100644 index 00000000..f02c5d86 --- /dev/null +++ b/lib/binding_web/test/query-test.js @@ -0,0 +1,51 @@ +const {assert} = require('chai'); +let Parser, JavaScript; + +describe("Query", () => { + let parser, tree, query; + + before(async () => + ({Parser, JavaScript} = await require('./helper')) + ); + + beforeEach(() => { + parser = new Parser().setLanguage(JavaScript); + }); + + afterEach(() => { + parser.delete(); + if (tree) tree.delete(); + if (query) query.delete(); + }); + + it('throws an error on invalid syntax', () => { + assert.throws(() => { + JavaScript.query("(function_declaration wat)") + }, "Bad syntax at offset 22: \'wat)\'..."); + assert.throws(() => { + JavaScript.query("(non_existent)") + }, "Bad node name 'non_existent'"); + assert.throws(() => { + JavaScript.query("(function_declaration non_existent:(identifier))") + }, "Bad field name 'non_existent'"); + }); + + it('matches simple queries', () => { + tree = parser.parse("function one() { two(); function three() {} }"); + const query = JavaScript.query(` + (function_declaration name:(identifier) @the-name) + `); + const matches = query.exec(tree.rootNode); + assert.deepEqual( + matches.map(({pattern, captures}) => ({ + pattern, + captures: captures.map(({name, node}) => ({name, text: node.text})) + })), + [ + {pattern: 0, captures: [{name: 'the-name', text: 'one'}]}, + // {pattern: 0, captures: [{name: 'the-function', text: 'two'}]}, + {pattern: 0, captures: [{name: 'the-name', text: 'three'}]}, + ] + ); + }); +}); From 60467ae701e37db8415d8bbdd1e3c67f1c51c958 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 10 Sep 2019 22:30:27 -0700 Subject: [PATCH 004/558] Fix bugs in binary search used in tree queries --- cli/src/tests/query_test.rs | 42 ++++++++++++++++++++++++++++++ lib/binding_web/test/query-test.js | 11 ++++---- lib/src/query.c | 6 ++--- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 32adba62..28becda2 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -193,6 +193,48 @@ fn test_query_exec_with_multiple_matches_same_root() { allocations::stop_recording(); } +#[test] +fn test_query_exec_multiple_patterns() { + allocations::start_recording(); + + let language = get_language("javascript"); + let query = Query::new( + language, + " + (function_declaration name:(identifier) @fn-def) + (call_expression function:(identifier) @fn-ref) + ", + ) + .unwrap(); + + let source = " + function f1() { + f2(f3()); + } + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("fn-def", "f1")]), + (1, vec![("fn-ref", "f2")]), + (1, vec![("fn-ref", "f3")]), + ], + ); + + drop(context); + drop(parser); + drop(query); + drop(tree); + allocations::stop_recording(); +} + fn collect_matches<'a>( matches: impl Iterator>, query: &'a Query, diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index f02c5d86..38c3209e 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -32,8 +32,9 @@ describe("Query", () => { it('matches simple queries', () => { tree = parser.parse("function one() { two(); function three() {} }"); - const query = JavaScript.query(` - (function_declaration name:(identifier) @the-name) + query = JavaScript.query(` + (function_declaration name:(identifier) @fn-def) + (call_expression function:(identifier) @fn-ref) `); const matches = query.exec(tree.rootNode); assert.deepEqual( @@ -42,9 +43,9 @@ describe("Query", () => { captures: captures.map(({name, node}) => ({name, text: node.text})) })), [ - {pattern: 0, captures: [{name: 'the-name', text: 'one'}]}, - // {pattern: 0, captures: [{name: 'the-function', text: 'two'}]}, - {pattern: 0, captures: [{name: 'the-name', text: 'three'}]}, + {pattern: 0, captures: [{name: 'fn-def', text: 'one'}]}, + {pattern: 1, captures: [{name: 'fn-ref', text: 'two'}]}, + {pattern: 0, captures: [{name: 'fn-def', text: 'three'}]}, ] ); }); diff --git a/lib/src/query.c b/lib/src/query.c index f7836a86..167de1d7 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -252,20 +252,20 @@ static inline bool ts_query__pattern_map_search( TSSymbol mid_symbol = self->steps.contents[ self->pattern_map.contents[mid_index].step_index ].symbol; - if (needle > mid_symbol) base_index = mid_index; + if (needle >= mid_symbol) base_index = mid_index; size -= half_size; } TSSymbol symbol = self->steps.contents[ self->pattern_map.contents[base_index].step_index ].symbol; if (needle > symbol) { - *result = base_index; + *result = base_index + 1; return false; } else if (needle == symbol) { *result = base_index; return true; } else { - *result = base_index + 1; + *result = base_index; return false; } } From 4fa0b02d67253c36b04f5ef81ac9f00ff2ed086c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 12:06:38 -0700 Subject: [PATCH 005/558] Fix management of capture lists in query execution --- cli/src/tests/helpers/allocations.rs | 6 + cli/src/tests/query_test.rs | 447 +++++++++++++++------------ lib/src/query.c | 79 +++-- 3 files changed, 322 insertions(+), 210 deletions(-) diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index c64762bd..2f89c173 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -51,6 +51,12 @@ pub fn stop_recording() { } } +pub fn record(f: impl FnOnce()) { + start_recording(); + f(); + stop_recording(); +} + fn record_alloc(ptr: *mut c_void) { let mut recorder = RECORDER.lock(); if recorder.enabled { diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 28becda2..cfa6a2ba 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -4,235 +4,302 @@ use tree_sitter::{Parser, Query, QueryError, QueryMatch}; #[test] fn test_query_errors_on_invalid_syntax() { - allocations::start_recording(); + allocations::record(|| { + let language = get_language("javascript"); - let language = get_language("javascript"); + assert!(Query::new(language, "(if_statement)").is_ok()); + assert!(Query::new(language, "(if_statement condition:(identifier))").is_ok()); - assert!(Query::new(language, "(if_statement)").is_ok()); - assert!(Query::new(language, "(if_statement condition:(identifier))").is_ok()); + // Mismatched parens + assert_eq!( + Query::new(language, "(if_statement"), + Err(QueryError::Syntax(13)) + ); + assert_eq!( + Query::new(language, "(if_statement))"), + Err(QueryError::Syntax(14)) + ); - // Mismatched parens - assert_eq!( - Query::new(language, "(if_statement"), - Err(QueryError::Syntax(13)) - ); - assert_eq!( - Query::new(language, "(if_statement))"), - Err(QueryError::Syntax(14)) - ); + // Return an error at the *beginning* of a bare identifier not followed a colon. + // If there's a colon but no pattern, return an error at the end of the colon. + assert_eq!( + Query::new(language, "(if_statement identifier)"), + Err(QueryError::Syntax(14)) + ); + assert_eq!( + Query::new(language, "(if_statement condition:)"), + Err(QueryError::Syntax(24)) + ); - // Return an error at the *beginning* of a bare identifier not followed a colon. - // If there's a colon but no pattern, return an error at the end of the colon. - assert_eq!( - Query::new(language, "(if_statement identifier)"), - Err(QueryError::Syntax(14)) - ); - assert_eq!( - Query::new(language, "(if_statement condition:)"), - Err(QueryError::Syntax(24)) - ); - - assert_eq!( - Query::new(language, "(if_statement condition:)"), - Err(QueryError::Syntax(24)) - ); - - allocations::stop_recording(); + assert_eq!( + Query::new(language, "(if_statement condition:)"), + Err(QueryError::Syntax(24)) + ); + }); } #[test] fn test_query_errors_on_invalid_symbols() { - allocations::start_recording(); + allocations::record(|| { + let language = get_language("javascript"); - let language = get_language("javascript"); - - assert_eq!( - Query::new(language, "(non_existent1)"), - Err(QueryError::NodeType("non_existent1")) - ); - assert_eq!( - Query::new(language, "(if_statement (non_existent2))"), - Err(QueryError::NodeType("non_existent2")) - ); - assert_eq!( - Query::new(language, "(if_statement condition: (non_existent3))"), - Err(QueryError::NodeType("non_existent3")) - ); - assert_eq!( - Query::new(language, "(if_statement not_a_field: (identifier))"), - Err(QueryError::Field("not_a_field")) - ); - - allocations::stop_recording(); + assert_eq!( + Query::new(language, "(non_existent1)"), + Err(QueryError::NodeType("non_existent1")) + ); + assert_eq!( + Query::new(language, "(if_statement (non_existent2))"), + Err(QueryError::NodeType("non_existent2")) + ); + assert_eq!( + Query::new(language, "(if_statement condition: (non_existent3))"), + Err(QueryError::NodeType("non_existent3")) + ); + assert_eq!( + Query::new(language, "(if_statement not_a_field: (identifier))"), + Err(QueryError::Field("not_a_field")) + ); + }); } #[test] fn test_query_capture_names() { - allocations::start_recording(); + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (if_statement + condition: (binary_expression + left: * @left-operand + operator: "||" + right: * @right-operand) + consequence: (statement_block) @body) - let language = get_language("javascript"); - let query = Query::new( - language, - r#" - (if_statement - condition: (binary_expression - left: * @left-operand - operator: "||" - right: * @right-operand) - consequence: (statement_block) @body) + (while_statement + condition:* @loop-condition) + "#, + ) + .unwrap(); - (while_statement - condition:* @loop-condition) - "#, - ) - .unwrap(); - - assert_eq!( - query.capture_names(), - &[ - "left-operand".to_string(), - "right-operand".to_string(), - "body".to_string(), - "loop-condition".to_string(), - ] - ); - - drop(query); - allocations::stop_recording(); + assert_eq!( + query.capture_names(), + &[ + "left-operand".to_string(), + "right-operand".to_string(), + "body".to_string(), + "loop-condition".to_string(), + ] + ); + }); } #[test] fn test_query_exec_with_simple_pattern() { - allocations::start_recording(); + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + "(function_declaration name: (identifier) @fn-name)", + ) + .unwrap(); - let language = get_language("javascript"); - let query = Query::new( - language, - "(function_declaration name: (identifier) @fn-name)", - ) - .unwrap(); + let source = "function one() { two(); function three() {} }"; + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); - let source = "function one() { two(); function three() {} }"; - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); - let context = query.context(); - let matches = context.exec(tree.root_node()); - - assert_eq!( - collect_matches(matches, &query, source), - &[ - (0, vec![("fn-name", "one")]), - (0, vec![("fn-name", "three")]) - ], - ); - - drop(context); - drop(parser); - drop(query); - drop(tree); - allocations::stop_recording(); + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("fn-name", "one")]), + (0, vec![("fn-name", "three")]) + ], + ); + }); } #[test] fn test_query_exec_with_multiple_matches_same_root() { - allocations::start_recording(); + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + "(class_declaration + name: (identifier) @the-class-name + (class_body + (method_definition + name: (property_identifier) @the-method-name)))", + ) + .unwrap(); - let language = get_language("javascript"); - let query = Query::new( - language, - "(class_declaration - name: (identifier) @the-class-name - (class_body - (method_definition - name: (property_identifier) @the-method-name)))", - ) - .unwrap(); + let source = " + class Person { + // the constructor + constructor(name) { this.name = name; } - let source = " - class Person { - // the constructor - constructor(name) { this.name = name; } + // the getter + getFullName() { return this.name; } + } + "; - // the getter - getFullName() { return this.name; } - } - "; + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); - - assert_eq!( - collect_matches(matches, &query, source), - &[ - ( - 0, - vec![ - ("the-class-name", "Person"), - ("the-method-name", "constructor") - ] - ), - ( - 0, - vec![ - ("the-class-name", "Person"), - ("the-method-name", "getFullName") - ] - ), - ], - ); - - drop(context); - drop(parser); - drop(query); - drop(tree); - allocations::stop_recording(); + assert_eq!( + collect_matches(matches, &query, source), + &[ + ( + 0, + vec![ + ("the-class-name", "Person"), + ("the-method-name", "constructor") + ] + ), + ( + 0, + vec![ + ("the-class-name", "Person"), + ("the-method-name", "getFullName") + ] + ), + ], + ); + }); } #[test] fn test_query_exec_multiple_patterns() { - allocations::start_recording(); + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (function_declaration name:(identifier) @fn-def) + (call_expression function:(identifier) @fn-ref) + ", + ) + .unwrap(); - let language = get_language("javascript"); - let query = Query::new( - language, - " - (function_declaration name:(identifier) @fn-def) - (call_expression function:(identifier) @fn-ref) + let source = " + function f1() { + f2(f3()); + } + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("fn-def", "f1")]), + (1, vec![("fn-ref", "f2")]), + (1, vec![("fn-ref", "f3")]), + ], + ); + }); +} + +#[test] +fn test_query_exec_nested_matches_without_fields() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (array + (array + (identifier) @element-1 + (identifier) @element-2)) + ", + ) + .unwrap(); + + let source = " + [[a]]; + [[c, d], [e, f, g]]; + [[h], [i]]; + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("element-1", "c"), ("element-2", "d")]), + (0, vec![("element-1", "e"), ("element-2", "f")]), + (0, vec![("element-1", "f"), ("element-2", "g")]), + (0, vec![("element-1", "e"), ("element-2", "g")]), + ], + ); + }); +} + +#[test] +fn test_query_exec_many_matches() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, "(array (identifier) @element)").unwrap(); + + let source = "[hello];\n".repeat(50); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source.as_str()), + vec![(0, vec![("element", "hello")]); 50], + ); + }); +} + +#[test] +fn test_query_exec_too_many_match_permutations_to_track() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (array (identifier) @pre (identifier) @post) ", - ) - .unwrap(); + ) + .unwrap(); - let source = " - function f1() { - f2(f3()); - } - "; + let mut source = "hello, ".repeat(50); + source.insert(0, '['); + source.push_str("];"); - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); - assert_eq!( - collect_matches(matches, &query, source), - &[ - (0, vec![("fn-def", "f1")]), - (1, vec![("fn-ref", "f2")]), - (1, vec![("fn-ref", "f3")]), - ], - ); - - drop(context); - drop(parser); - drop(query); - drop(tree); - allocations::stop_recording(); + // For this pathological query, some match permutations will be dropped. + // Just check that a subset of the results are returned, and crash or + // leak occurs. + assert_eq!( + collect_matches(matches, &query, source.as_str())[0], + (0, vec![("pre", "hello"), ("post", "hello")]), + ); + }); } fn collect_matches<'a>( diff --git a/lib/src/query.c b/lib/src/query.c index 167de1d7..9325424b 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -107,6 +107,12 @@ static const uint16_t NONE = UINT16_MAX; static const TSSymbol WILDCARD_SYMBOL = 0; static const uint16_t MAX_STATE_COUNT = 32; +#ifdef DEBUG_QUERY +#define LOG printf +#else +#define LOG(...) +#endif + /********** * Stream **********/ @@ -183,15 +189,23 @@ static TSQueryCapture *capture_list_pool_get(CaptureListPool *self, uint16_t id) return &self->contents[id * self->list_size]; } +static inline uint32_t capture_list_bitmask_for_id(uint16_t id) { + // An id of zero corresponds to the highest-order bit in the bitmask. + return (1u << (31 - id)); +} + static uint16_t capture_list_pool_acquire(CaptureListPool *self) { + // In the usage_map bitmask, ones represent free lists, and zeros represent + // lists that are in use. A free list can quickly be found by counting + // the leading zeros in the usage map. uint16_t id = count_leading_zeros(self->usage_map); if (id == 32) return NONE; - self->usage_map &= ~(1 << id); + self->usage_map &= ~capture_list_bitmask_for_id(id); return id; } static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - self->usage_map |= (1 << id); + self->usage_map |= capture_list_bitmask_for_id(id); } /********* @@ -586,9 +600,31 @@ void ts_query_context_exec(TSQueryContext *self, TSNode node) { self->ascending = false; } +static QueryState *ts_query_context_copy_state( + TSQueryContext *self, + QueryState *state +) { + uint32_t capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); + if (capture_list_id == NONE) return NULL; + array_push(&self->states, *state); + QueryState *new_state = array_back(&self->states); + new_state->capture_list_id = capture_list_id; + TSQueryCapture *old_captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + TSQueryCapture *new_captures = capture_list_pool_get( + &self->capture_list_pool, + capture_list_id + ); + memcpy(new_captures, old_captures, state->capture_count * sizeof(TSQueryCapture)); + return new_state; +} + bool ts_query_context_next(TSQueryContext *self) { if (self->finished_states.size > 0) { - array_pop(&self->finished_states); + QueryState state = array_pop(&self->finished_states); + capture_list_pool_release(&self->capture_list_pool, state.capture_list_id); } while (self->finished_states.size == 0) { @@ -598,9 +634,14 @@ bool ts_query_context_next(TSQueryContext *self) { uint32_t deleted_count = 0; for (unsigned i = 0, n = self->states.size; i < n; i++) { QueryState *state = &self->states.contents[i]; - if (state->start_depth == self->depth) { + QueryStep *step = &self->query->steps.contents[state->step_index]; - // printf("FAIL STATE pattern: %u, step: %u\n", state->pattern_index, state->step_index); + if (state->start_depth + step->depth > self->depth) { + LOG( + "fail state with pattern: %u, step: %u\n", + state->pattern_index, + state->step_index + ); capture_list_pool_release( &self->capture_list_pool, @@ -612,9 +653,9 @@ bool ts_query_context_next(TSQueryContext *self) { } } - // if (deleted_count) { - // printf("FAILED %u of %u states\n", deleted_count, self->states.size); - // } + if (deleted_count) { + LOG("failed %u of %u states\n", deleted_count, self->states.size); + } self->states.size -= deleted_count; @@ -631,7 +672,7 @@ bool ts_query_context_next(TSQueryContext *self) { TSNode node = ts_tree_cursor_current_node(&self->cursor); TSSymbol symbol = ts_node_symbol(node); - // printf("DESCEND INTO NODE: %s\n", ts_node_type(node)); + LOG("enter node %s\n", ts_node_type(node)); // Add new states for any patterns whose root node is a wildcard. for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { @@ -678,7 +719,7 @@ bool ts_query_context_next(TSQueryContext *self) { if (field_id != step->field) continue; } - // printf("START NEW STATE: %u\n", slice->pattern_index); + LOG("start pattern %u\n", slice->pattern_index); // If the node matches the first step of the pattern, then add // a new in-progress state. First, acquire a list to hold the @@ -733,19 +774,15 @@ bool ts_query_context_next(TSQueryContext *self) { // siblings. QueryState *next_state = state; if (step->depth > 0 && (!step->field || field_occurs_in_later_sibling)) { - uint32_t capture_list_id = capture_list_pool_acquire( - &self->capture_list_pool - ); - if (capture_list_id != NONE) { - array_push(&self->states, *state); - next_state = array_back(&self->states); - next_state->capture_list_id = capture_list_id; - } + QueryState *copy = ts_query_context_copy_state(self, state); + if (copy) next_state = copy; } + LOG("advance state for pattern %u\n", next_state->pattern_index); + // Record captures if (step->capture_id != NONE) { - // printf("CAPTURE id: %u\n", step->capture_id); + LOG("capture id %u\n", step->capture_id); TSQueryCapture *capture_list = capture_list_pool_get( &self->capture_list_pool, @@ -762,7 +799,7 @@ bool ts_query_context_next(TSQueryContext *self) { next_state->step_index++; QueryStep *next_step = step + 1; if (next_step->depth == PATTERN_DONE_MARKER) { - // printf("FINISHED MATCH pattern: %u\n", next_state->pattern_index); + LOG("finish pattern %u\n", next_state->pattern_index); array_push(&self->finished_states, *next_state); if (next_state == state) { @@ -808,3 +845,5 @@ const TSQueryCapture *ts_query_context_matched_captures( } return NULL; } + +#undef LOG From 33f89522f6c31d8364a39f8b42cf17ead3133491 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 12:16:05 -0700 Subject: [PATCH 006/558] Allow lisp-style comments in tree queries --- cli/src/tests/query_test.rs | 93 ++++++++++++++++++++++++------------- lib/src/query.c | 13 +++++- 2 files changed, 73 insertions(+), 33 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index cfa6a2ba..5bf781b4 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -62,38 +62,6 @@ fn test_query_errors_on_invalid_symbols() { }); } -#[test] -fn test_query_capture_names() { - allocations::record(|| { - let language = get_language("javascript"); - let query = Query::new( - language, - r#" - (if_statement - condition: (binary_expression - left: * @left-operand - operator: "||" - right: * @right-operand) - consequence: (statement_block) @body) - - (while_statement - condition:* @loop-condition) - "#, - ) - .unwrap(); - - assert_eq!( - query.capture_names(), - &[ - "left-operand".to_string(), - "right-operand".to_string(), - "body".to_string(), - "loop-condition".to_string(), - ] - ); - }); -} - #[test] fn test_query_exec_with_simple_pattern() { allocations::record(|| { @@ -302,6 +270,67 @@ fn test_query_exec_too_many_match_permutations_to_track() { }); } +#[test] +fn test_query_capture_names() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (if_statement + condition: (binary_expression + left: * @left-operand + operator: "||" + right: * @right-operand) + consequence: (statement_block) @body) + + (while_statement + condition:* @loop-condition) + "#, + ) + .unwrap(); + + assert_eq!( + query.capture_names(), + &[ + "left-operand".to_string(), + "right-operand".to_string(), + "body".to_string(), + "loop-condition".to_string(), + ] + ); + }); +} + +#[test] +fn test_query_comments() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + ; this is my first comment + ; i have two comments here + (function_declaration + ; there is also a comment here + ; and here + name: (identifier) @fn-name)", + ) + .unwrap(); + + let source = "function one() { }"; + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + assert_eq!( + collect_matches(matches, &query, source), + &[(0, vec![("fn-name", "one")]),], + ); + }); +} + fn collect_matches<'a>( matches: impl Iterator>, query: &'a Query, diff --git a/lib/src/query.c b/lib/src/query.c index 9325424b..fe8d2eec 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -147,7 +147,18 @@ static Stream stream_new(const char *string, uint32_t length) { } static void stream_skip_whitespace(Stream *stream) { - while (iswspace(stream->next)) stream_advance(stream); + for (;;) { + if (iswspace(stream->next)) { + stream_advance(stream); + } else if (stream->next == ';') { + stream_advance(stream); + while (stream->next && stream->next != '\n') { + if (!stream_advance(stream)) break; + } + } else { + break; + } + } } static bool stream_is_ident_start(Stream *stream) { From d674bc139a0550422193163c253fe23d00cf7278 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 007/558] Fix more bugs in binary search used in tree queries This binary search implementation differs from Rust's `slice::binary_search_by` method in how they deal with ties. In Rust's implementation: > If there are multiple matches, then any one of the matches > could be returned. This implementation needs to return the index of the *first* match. --- cli/src/tests/query_test.rs | 43 ++++++++++++++++++++++++++++++++++++- lib/src/query.c | 42 ++++++++++++++++++++++++------------ 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5bf781b4..6ab77d1f 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -143,7 +143,7 @@ fn test_query_exec_with_multiple_matches_same_root() { } #[test] -fn test_query_exec_multiple_patterns() { +fn test_query_exec_multiple_patterns_different_roots() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( @@ -178,6 +178,47 @@ fn test_query_exec_multiple_patterns() { }); } +#[test] +fn test_query_exec_multiple_patterns_same_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (pair + key: (property_identifier) @method-def + value: (function)) + + (pair + key: (property_identifier) @method-def + value: (arrow_function)) + ", + ) + .unwrap(); + + let source = " + a = { + b: () => { return c; }, + d: function() { return d; } + }; + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (1, vec![("method-def", "b")]), + (0, vec![("method-def", "d")]), + ], + ); + }); +} + #[test] fn test_query_exec_nested_matches_without_fields() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index fe8d2eec..d728238d 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -260,6 +260,12 @@ static uint16_t ts_query_intern_capture_name( return self->capture_names.size - 1; } +// The `pattern_map` contains a mapping from TSSymbol values to indices in the +// `steps` array. For a given syntax node, the `pattern_map` makes it possible +// to quickly find the starting steps of all of the patterns whose root matches +// that node. It is represented as an array of `(symbol, step index)` pairs, +// sorted by symbol. Lookups use a binary search so that their cost scales +// logarithmically with the number of patterns in the query. static inline bool ts_query__pattern_map_search( const TSQuery *self, TSSymbol needle, @@ -277,24 +283,29 @@ static inline bool ts_query__pattern_map_search( TSSymbol mid_symbol = self->steps.contents[ self->pattern_map.contents[mid_index].step_index ].symbol; - if (needle >= mid_symbol) base_index = mid_index; + if (needle > mid_symbol) base_index = mid_index; size -= half_size; } + TSSymbol symbol = self->steps.contents[ self->pattern_map.contents[base_index].step_index ].symbol; + if (needle > symbol) { - *result = base_index + 1; - return false; - } else if (needle == symbol) { - *result = base_index; - return true; - } else { - *result = base_index; - return false; + base_index++; + if (base_index < self->pattern_map.size) { + symbol = self->steps.contents[ + self->pattern_map.contents[base_index].step_index + ].symbol; + } } + + *result = base_index; + return needle == symbol; } +// Insert a new pattern's start index into the pattern map, maintaining +// the pattern map's ordering invariant. static inline void ts_query__pattern_map_insert( TSQuery *self, TSSymbol symbol, @@ -308,6 +319,9 @@ static inline void ts_query__pattern_map_insert( })); } +// Read one S-expression pattern from the stream, and incorporate it into +// the query's internal state machine representation. For nested patterns, +// this function calls itself recursively. static TSQueryError ts_query_parse_pattern( TSQuery *self, Stream *stream, @@ -459,7 +473,7 @@ static TSQueryError ts_query_parse_pattern( stream_skip_whitespace(stream); - // Parse a '@'-suffixed capture pattern + // Parse an '@'-suffixed capture pattern if (stream->next == '@') { stream_advance(stream); stream_skip_whitespace(stream); @@ -615,18 +629,18 @@ static QueryState *ts_query_context_copy_state( TSQueryContext *self, QueryState *state ) { - uint32_t capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - if (capture_list_id == NONE) return NULL; + uint32_t new_list_id = capture_list_pool_acquire(&self->capture_list_pool); + if (new_list_id == NONE) return NULL; array_push(&self->states, *state); QueryState *new_state = array_back(&self->states); - new_state->capture_list_id = capture_list_id; + new_state->capture_list_id = new_list_id; TSQueryCapture *old_captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); TSQueryCapture *new_captures = capture_list_pool_get( &self->capture_list_pool, - capture_list_id + new_list_id ); memcpy(new_captures, old_captures, state->capture_count * sizeof(TSQueryCapture)); return new_state; From ad3f21b0e549b2f6726f46e8695bb76ceb3d066f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 008/558] Add simple CLI command for running tree queries --- cli/src/lib.rs | 1 + cli/src/main.rs | 120 +++++++++++++++++++++++++++++++---------------- cli/src/query.rs | 49 +++++++++++++++++++ 3 files changed, 130 insertions(+), 40 deletions(-) create mode 100644 cli/src/query.rs diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 33a9904f..6a7c9507 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -5,6 +5,7 @@ pub mod highlight; pub mod loader; pub mod logger; pub mod parse; +pub mod query; pub mod test; pub mod util; pub mod wasm; diff --git a/cli/src/main.rs b/cli/src/main.rs index 59d04a97..9e8f885b 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,10 +1,12 @@ use clap::{App, AppSettings, Arg, SubCommand}; use error::Error; +use std::io::Read; use std::path::Path; use std::process::exit; -use std::{env, fs, u64}; +use std::{env, fs, io, u64}; +use tree_sitter::Language; use tree_sitter_cli::{ - config, error, generate, highlight, loader, logger, parse, test, wasm, web_ui, + config, error, generate, highlight, loader, logger, parse, query, test, wasm, web_ui, }; const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -50,7 +52,7 @@ fn run() -> error::Result<()> { ) .subcommand( SubCommand::with_name("parse") - .about("Parse a file") + .about("Parse files") .arg( Arg::with_name("path") .index(1) @@ -73,6 +75,18 @@ fn run() -> error::Result<()> { .number_of_values(1), ), ) + .subcommand( + SubCommand::with_name("query") + .about("Search files using a syntax tree query") + .arg(Arg::with_name("query-path").index(1).required(true)) + .arg( + Arg::with_name("path") + .index(2) + .multiple(true) + .required(true), + ) + .arg(Arg::with_name("scope").long("scope").takes_value(true)), + ) .subcommand( SubCommand::with_name("test") .about("Run a parser's tests") @@ -168,7 +182,6 @@ fn run() -> error::Result<()> { let timeout = matches .value_of("timeout") .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); - loader.find_all_languages(&config.parser_directories)?; let paths = matches .values_of("path") .unwrap() @@ -176,43 +189,11 @@ fn run() -> error::Result<()> { .collect::>(); let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap(); let mut has_error = false; + loader.find_all_languages(&config.parser_directories)?; for path in paths { let path = Path::new(path); - let language = if let Some(scope) = matches.value_of("scope") { - if let Some(config) = - loader - .language_configuration_for_scope(scope) - .map_err(Error::wrap(|| { - format!("Failed to load language for scope '{}'", scope) - }))? - { - config.0 - } else { - return Error::err(format!("Unknown scope '{}'", scope)); - } - } else if let Some((lang, _)) = loader - .language_configuration_for_file_name(path) - .map_err(Error::wrap(|| { - format!( - "Failed to load language for file name {:?}", - path.file_name().unwrap() - ) - }))? - { - lang - } else if let Some(lang) = loader - .languages_at_path(¤t_dir) - .map_err(Error::wrap(|| { - "Failed to load language in current directory" - }))? - .first() - .cloned() - { - lang - } else { - eprintln!("No language found"); - return Ok(()); - }; + let language = + select_language(&mut loader, path, ¤t_dir, matches.value_of("scope"))?; has_error |= parse::parse_file_at_path( language, path, @@ -226,10 +207,25 @@ fn run() -> error::Result<()> { allow_cancellation, )?; } - if has_error { return Error::err(String::new()); } + } else if let Some(matches) = matches.subcommand_matches("query") { + let paths = matches + .values_of("path") + .unwrap() + .into_iter() + .map(Path::new) + .collect::>(); + loader.find_all_languages(&config.parser_directories)?; + let language = select_language( + &mut loader, + paths[0], + ¤t_dir, + matches.value_of("scope"), + )?; + let query_path = Path::new(matches.value_of("query-path").unwrap()); + query::query_files_at_paths(language, paths, query_path)?; } else if let Some(matches) = matches.subcommand_matches("highlight") { let paths = matches.values_of("path").unwrap().into_iter(); let html_mode = matches.is_present("html"); @@ -296,3 +292,47 @@ fn run() -> error::Result<()> { Ok(()) } + +fn select_language( + loader: &mut loader::Loader, + path: &Path, + current_dir: &Path, + scope: Option<&str>, +) -> Result { + if let Some(scope) = scope { + if let Some(config) = + loader + .language_configuration_for_scope(scope) + .map_err(Error::wrap(|| { + format!("Failed to load language for scope '{}'", scope) + }))? + { + Ok(config.0) + } else { + return Error::err(format!("Unknown scope '{}'", scope)); + } + } else if let Some((lang, _)) = + loader + .language_configuration_for_file_name(path) + .map_err(Error::wrap(|| { + format!( + "Failed to load language for file name {:?}", + path.file_name().unwrap() + ) + }))? + { + Ok(lang) + } else if let Some(lang) = loader + .languages_at_path(¤t_dir) + .map_err(Error::wrap(|| { + "Failed to load language in current directory" + }))? + .first() + .cloned() + { + Ok(lang) + } else { + eprintln!("No language found"); + Error::err("No language found".to_string()) + } +} diff --git a/cli/src/query.rs b/cli/src/query.rs new file mode 100644 index 00000000..74f3e327 --- /dev/null +++ b/cli/src/query.rs @@ -0,0 +1,49 @@ +use super::error::{Error, Result}; +use std::fs; +use std::io::{self, Write}; +use std::path::Path; +use tree_sitter::{Language, Parser, Query}; + +pub fn query_files_at_paths( + language: Language, + paths: Vec<&Path>, + query_path: &Path, +) -> Result<()> { + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + + let query_source = fs::read_to_string(query_path).map_err(Error::wrap(|| { + format!("Error reading query file {:?}", query_path) + }))?; + let query = Query::new(language, &query_source) + .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?; + + let query_context = query.context(); + + let mut parser = Parser::new(); + parser.set_language(language).map_err(|e| e.to_string())?; + + for path in paths { + writeln!(&mut stdout, "{}", path.to_str().unwrap())?; + + let source_code = fs::read(path).map_err(Error::wrap(|| { + format!("Error reading source file {:?}", path) + }))?; + + let tree = parser.parse(&source_code, None).unwrap(); + + for mat in query_context.exec(tree.root_node()) { + writeln!(&mut stdout, " pattern: {}", mat.pattern_index())?; + for (capture_id, node) in mat.captures() { + writeln!( + &mut stdout, + " {}: {:?}", + &query.capture_names()[capture_id], + node.utf8_text(&source_code).unwrap_or("") + )?; + } + } + } + + Ok(()) +} From beb5eec7d9c96c19d5902fb805fd41ca289b4a99 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 009/558] Fix handling of single-node patterns in tree queries --- cli/src/tests/query_test.rs | 31 +++++++++++++++++++++++++++++++ lib/src/query.c | 3 ++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 6ab77d1f..927df294 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -311,6 +311,37 @@ fn test_query_exec_too_many_match_permutations_to_track() { }); } +#[test] +fn test_query_exec_with_anonymous_tokens() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + ";" @ punctuation + "&&" @ operator + "#, + ) + .unwrap(); + + let source = "foo(a && b);"; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let context = query.context(); + let matches = context.exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (1, vec![("operator", "&&")]), + (0, vec![("punctuation", ";")]), + ] + ); + }); +} + #[test] fn test_query_capture_names() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index d728238d..042b5d9e 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -413,6 +413,7 @@ static TSQueryError ts_query_parse_pattern( .depth = depth, .symbol = symbol, .field = 0, + .capture_id = NONE, })); if (stream->next != '"') return TSQueryErrorSyntax; @@ -756,7 +757,7 @@ bool ts_query_context_next(TSQueryContext *self) { array_push(&self->states, ((QueryState) { .pattern_index = slice->pattern_index, - .step_index = slice->step_index + 1, + .step_index = slice->step_index, .start_depth = self->depth, .capture_list_id = capture_list_id, .capture_count = 0, From c8c75782e3b6c5ac70978ce895905f593b6b11b4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 010/558] Allow tree queries to execute within a limited range --- cli/src/tests/query_test.rs | 26 +++++++++++++++++ lib/binding_rust/bindings.rs | 45 +++++++++++++++++++++++++++-- lib/binding_rust/lib.rs | 14 +++++++++ lib/binding_web/binding.c | 7 +---- lib/include/tree_sitter/api.h | 33 +++++++++++++--------- lib/src/point.h | 1 + lib/src/query.c | 53 +++++++++++++++++++++++++++++++++++ 7 files changed, 157 insertions(+), 22 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 927df294..6131d1ea 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -342,6 +342,32 @@ fn test_query_exec_with_anonymous_tokens() { }); } +#[test] +fn test_query_exec_within_byte_range() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, "(identifier) @element").unwrap(); + + let source = "[a, b, c, d, e, f, g]"; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let mut context = query.context(); + let matches = context.set_byte_range(5, 15).exec(tree.root_node()); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("element", "c")]), + (0, vec![("element", "d")]), + (0, vec![("element", "e")]), + ] + ); + }); +} + #[test] fn test_query_capture_names() { allocations::record(|| { diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 53b77405..b44b5622 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -560,8 +560,9 @@ extern "C" { pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor; } extern "C" { - #[doc = " Create a new query based on a given language and string containing"] - #[doc = " one or more S-expression patterns."] + #[doc = " Create a new query from a string containing one or more S-expression"] + #[doc = " patterns. The query is associated with a particular language, and can"] + #[doc = " only be run on syntax nodes parsed with that language."] #[doc = ""] #[doc = " If all of the given patterns are valid, this returns a `TSQuery`."] #[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"] @@ -569,7 +570,7 @@ extern "C" { #[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."] #[doc = " 2. The type of error is written to the `error_type` parameter."] pub fn ts_query_new( - arg1: *const TSLanguage, + language: *const TSLanguage, source: *const ::std::os::raw::c_char, source_len: u32, error_offset: *mut u32, @@ -581,9 +582,13 @@ extern "C" { pub fn ts_query_delete(arg1: *mut TSQuery); } extern "C" { + #[doc = " Get the number of distinct capture names in the query."] pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32; } extern "C" { + #[doc = " Get the name and length of one of the query\'s capture. Each capture"] + #[doc = " is associated with a numeric id based on the order that it appeared"] + #[doc = " in the query\'s source."] pub fn ts_query_capture_name_for_id( self_: *const TSQuery, index: u32, @@ -591,6 +596,7 @@ extern "C" { ) -> *const ::std::os::raw::c_char; } extern "C" { + #[doc = " Get the numeric id of the capture with the given name."] pub fn ts_query_capture_id_for_name( self_: *const TSQuery, name: *const ::std::os::raw::c_char, @@ -598,21 +604,54 @@ extern "C" { ) -> ::std::os::raw::c_int; } extern "C" { + #[doc = " Create a new context for executing a given query."] + #[doc = ""] + #[doc = " The context stores the state that is needed to iteratively search"] + #[doc = " for matches. To use the query context:"] + #[doc = " 1. First call `ts_query_context_exec` to start running the query"] + #[doc = " on a particular syntax node."] + #[doc = " 2. Then repeatedly call `ts_query_context_next` to iterate over"] + #[doc = " the matches."] + #[doc = " 3. After each successful call to `ts_query_context_next`, you can call"] + #[doc = " `ts_query_context_matched_pattern_index` to determine which pattern"] + #[doc = " matched. You can also call `ts_query_context_matched_captures` to"] + #[doc = " determine which nodes were captured by which capture names."] + #[doc = ""] + #[doc = " If you don\'t care about finding all of the matches, you can stop calling"] + #[doc = " `ts_query_context_next` at any point. And you can start executing the"] + #[doc = " query against a different node by calling `ts_query_context_exec` again."] pub fn ts_query_context_new(arg1: *const TSQuery) -> *mut TSQueryContext; } extern "C" { + #[doc = " Delete a query context, freeing all of the memory that it used."] pub fn ts_query_context_delete(arg1: *mut TSQueryContext); } extern "C" { + #[doc = " Start running a query on a given node."] pub fn ts_query_context_exec(arg1: *mut TSQueryContext, arg2: TSNode); } extern "C" { + #[doc = " Set the range of bytes or (row, column) positions in which the query"] + #[doc = " will be executed."] + pub fn ts_query_context_set_byte_range(arg1: *mut TSQueryContext, arg2: u32, arg3: u32); +} +extern "C" { + pub fn ts_query_context_set_point_range( + arg1: *mut TSQueryContext, + arg2: TSPoint, + arg3: TSPoint, + ); +} +extern "C" { + #[doc = " Advance to the next match of the currently running query."] pub fn ts_query_context_next(arg1: *mut TSQueryContext) -> bool; } extern "C" { + #[doc = " Check which pattern matched."] pub fn ts_query_context_matched_pattern_index(arg1: *const TSQueryContext) -> u32; } extern "C" { + #[doc = " Check which pattern matched."] pub fn ts_query_context_matched_captures( arg1: *const TSQueryContext, arg2: *mut u32, diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 80e56ba9..8d29a3c3 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1011,6 +1011,20 @@ impl<'a> QueryContext<'a> { } }) } + + pub fn set_byte_range(&mut self, start: usize, end: usize) -> &mut Self { + unsafe { + ffi::ts_query_context_set_byte_range(self.0, start as u32, end as u32); + } + self + } + + pub fn set_point_range(&mut self, start: Point, end: Point) -> &mut Self { + unsafe { + ffi::ts_query_context_set_point_range(self.0, start.into(), end.into()); + } + self + } } impl<'a> QueryMatch<'a> { diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c index e94c5aa0..f46d1def 100644 --- a/lib/binding_web/binding.c +++ b/lib/binding_web/binding.c @@ -2,6 +2,7 @@ #include #include #include "array.h" +#include "point.h" /*****************************/ /* Section - Data marshaling */ @@ -464,12 +465,6 @@ void ts_node_named_children_wasm(const TSTree *tree) { TRANSFER_BUFFER[1] = result; } -bool point_lte(TSPoint a, TSPoint b) { - if (a.row < b.row) return true; - if (a.row > b.row) return false; - return a.column <= b.column; -} - bool symbols_contain(const uint32_t *set, uint32_t length, uint32_t value) { for (unsigned i = 0; i < length; i++) { if (set[i] == value) return true; diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index ad991818..d951a35a 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -644,12 +644,12 @@ TSQuery *ts_query_new( */ void ts_query_delete(TSQuery *); -/* +/** * Get the number of distinct capture names in the query. */ uint32_t ts_query_capture_count(const TSQuery *); -/* +/** * Get the name and length of one of the query's capture. Each capture * is associated with a numeric id based on the order that it appeared * in the query's source. @@ -660,7 +660,7 @@ const char *ts_query_capture_name_for_id( uint32_t *length ); -/* +/** * Get the numeric id of the capture with the given name. */ int ts_query_capture_id_for_name( @@ -669,7 +669,7 @@ int ts_query_capture_id_for_name( uint32_t length ); -/* +/** * Create a new context for executing a given query. * * The context stores the state that is needed to iteratively search @@ -678,10 +678,10 @@ int ts_query_capture_id_for_name( * on a particular syntax node. * 2. Then repeatedly call `ts_query_context_next` to iterate over * the matches. - * 3. For each match, you can call `ts_query_context_matched_pattern_index` - * to determine which pattern matched. You can also call - * `ts_query_context_matched_captures` to determine which nodes - * were captured by which capture names. + * 3. After each successful call to `ts_query_context_next`, you can call + * `ts_query_context_matched_pattern_index` to determine which pattern + * matched. You can also call `ts_query_context_matched_captures` to + * determine which nodes were captured by which capture names. * * If you don't care about finding all of the matches, you can stop calling * `ts_query_context_next` at any point. And you can start executing the @@ -689,27 +689,34 @@ int ts_query_capture_id_for_name( */ TSQueryContext *ts_query_context_new(const TSQuery *); -/* +/** * Delete a query context, freeing all of the memory that it used. */ void ts_query_context_delete(TSQueryContext *); -/* +/** * Start running a query on a given node. */ void ts_query_context_exec(TSQueryContext *, TSNode); -/* +/** + * Set the range of bytes or (row, column) positions in which the query + * will be executed. + */ +void ts_query_context_set_byte_range(TSQueryContext *, uint32_t, uint32_t); +void ts_query_context_set_point_range(TSQueryContext *, TSPoint, TSPoint); + +/** * Advance to the next match of the currently running query. */ bool ts_query_context_next(TSQueryContext *); -/* +/** * Check which pattern matched. */ uint32_t ts_query_context_matched_pattern_index(const TSQueryContext *); -/* +/** * Check which pattern matched. */ const TSQueryCapture *ts_query_context_matched_captures( diff --git a/lib/src/point.h b/lib/src/point.h index 4d0aed18..a50d2021 100644 --- a/lib/src/point.h +++ b/lib/src/point.h @@ -3,6 +3,7 @@ #include "tree_sitter/api.h" +#define POINT_ZERO ((TSPoint) {0, 0}) #define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX}) static inline TSPoint point__new(unsigned row, unsigned column) { diff --git a/lib/src/query.c b/lib/src/query.c index 042b5d9e..8b4deb81 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -2,6 +2,7 @@ #include "./alloc.h" #include "./array.h" #include "./bits.h" +#include "./point.h" #include "utf8proc.h" #include @@ -99,6 +100,10 @@ struct TSQueryContext { CaptureListPool capture_list_pool; bool ascending; uint32_t depth; + uint32_t start_byte; + uint32_t end_byte; + TSPoint start_point; + TSPoint end_point; }; static const TSQueryError PARENT_DONE = -1; @@ -605,6 +610,10 @@ TSQueryContext *ts_query_context_new(const TSQuery *query) { .states = array_new(), .finished_states = array_new(), .capture_list_pool = capture_list_pool_new(query->max_capture_count), + .start_byte = 0, + .end_byte = UINT32_MAX, + .start_point = {0, 0}, + .end_point = POINT_MAX, }; return self; } @@ -626,6 +635,32 @@ void ts_query_context_exec(TSQueryContext *self, TSNode node) { self->ascending = false; } +void ts_query_context_set_byte_range( + TSQueryContext *self, + uint32_t start_byte, + uint32_t end_byte +) { + if (end_byte == 0) { + start_byte = 0; + end_byte = UINT32_MAX; + } + self->start_byte = start_byte; + self->end_byte = end_byte; +} + +void ts_query_context_set_point_range( + TSQueryContext *self, + TSPoint start_point, + TSPoint end_point +) { + if (end_point.row == 0 && end_point.column == 0) { + start_point = POINT_ZERO; + end_point = POINT_MAX; + } + self->start_point = start_point; + self->end_point = end_point; +} + static QueryState *ts_query_context_copy_state( TSQueryContext *self, QueryState *state @@ -698,6 +733,24 @@ bool ts_query_context_next(TSQueryContext *self) { TSNode node = ts_tree_cursor_current_node(&self->cursor); TSSymbol symbol = ts_node_symbol(node); + // If this node is before the selected range, then avoid + // descending into it. + if ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ) { + if (!ts_tree_cursor_goto_next_sibling(&self->cursor)) { + self->ascending = true; + } + continue; + } + + // If this node is after the selected range, then stop walking. + if ( + self->end_byte <= ts_node_start_byte(node) || + point_lte(self->end_point, ts_node_start_point(node)) + ) return false; + LOG("enter node %s\n", ts_node_type(node)); // Add new states for any patterns whose root node is a wildcard. From c71de5bd81ca33eb62840f832d56e1029ff3753b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 011/558] Tweak query interface * Rename TSQueryContext -> TSQueryCursor * Remove the permanent association between the cursor and its query. The cursor can now be used again for a different query. --- cli/src/main.rs | 3 +- cli/src/query.rs | 6 +-- cli/src/tests/query_test.rs | 42 ++++++++++----------- lib/binding_rust/bindings.rs | 54 ++++++++++++-------------- lib/binding_rust/lib.rs | 35 +++++++++-------- lib/binding_web/binding.c | 19 +++++----- lib/binding_web/binding.js | 9 ++--- lib/include/tree_sitter/api.h | 50 ++++++++++++------------ lib/src/query.c | 71 +++++++++++++++++++---------------- 9 files changed, 142 insertions(+), 147 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 9e8f885b..4b387050 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,9 +1,8 @@ use clap::{App, AppSettings, Arg, SubCommand}; use error::Error; -use std::io::Read; use std::path::Path; use std::process::exit; -use std::{env, fs, io, u64}; +use std::{env, fs, u64}; use tree_sitter::Language; use tree_sitter_cli::{ config, error, generate, highlight, loader, logger, parse, query, test, wasm, web_ui, diff --git a/cli/src/query.rs b/cli/src/query.rs index 74f3e327..2f50325a 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -2,7 +2,7 @@ use super::error::{Error, Result}; use std::fs; use std::io::{self, Write}; use std::path::Path; -use tree_sitter::{Language, Parser, Query}; +use tree_sitter::{Language, Parser, Query, QueryCursor}; pub fn query_files_at_paths( language: Language, @@ -18,7 +18,7 @@ pub fn query_files_at_paths( let query = Query::new(language, &query_source) .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?; - let query_context = query.context(); + let mut query_cursor = QueryCursor::new(); let mut parser = Parser::new(); parser.set_language(language).map_err(|e| e.to_string())?; @@ -32,7 +32,7 @@ pub fn query_files_at_paths( let tree = parser.parse(&source_code, None).unwrap(); - for mat in query_context.exec(tree.root_node()) { + for mat in query_cursor.exec(&query, tree.root_node()) { writeln!(&mut stdout, " pattern: {}", mat.pattern_index())?; for (capture_id, node) in mat.captures() { writeln!( diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 6131d1ea..c95bdfa2 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1,6 +1,6 @@ use super::helpers::allocations; use super::helpers::fixtures::get_language; -use tree_sitter::{Parser, Query, QueryError, QueryMatch}; +use tree_sitter::{Parser, Query, QueryCursor, QueryError, QueryMatch}; #[test] fn test_query_errors_on_invalid_syntax() { @@ -77,8 +77,8 @@ fn test_query_exec_with_simple_pattern() { parser.set_language(language).unwrap(); let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), @@ -117,8 +117,8 @@ fn test_query_exec_with_multiple_matches_same_root() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), @@ -164,8 +164,8 @@ fn test_query_exec_multiple_patterns_different_roots() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), @@ -206,8 +206,8 @@ fn test_query_exec_multiple_patterns_same_root() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), @@ -243,8 +243,8 @@ fn test_query_exec_nested_matches_without_fields() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), @@ -269,8 +269,8 @@ fn test_query_exec_many_matches() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source.as_str()), @@ -298,8 +298,8 @@ fn test_query_exec_too_many_match_permutations_to_track() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); // For this pathological query, some match permutations will be dropped. // Just check that a subset of the results are returned, and crash or @@ -329,8 +329,8 @@ fn test_query_exec_with_anonymous_tokens() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), @@ -354,8 +354,8 @@ fn test_query_exec_within_byte_range() { parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); - let mut context = query.context(); - let matches = context.set_byte_range(5, 15).exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.set_byte_range(5, 15).exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), @@ -420,8 +420,8 @@ fn test_query_comments() { let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(source, None).unwrap(); - let context = query.context(); - let matches = context.exec(tree.root_node()); + let mut cursor = QueryCursor::new(); + let matches = cursor.exec(&query, tree.root_node()); assert_eq!( collect_matches(matches, &query, source), &[(0, vec![("fn-name", "one")]),], diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index b44b5622..effd0c81 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -26,7 +26,7 @@ pub struct TSQuery { } #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct TSQueryContext { +pub struct TSQueryCursor { _unused: [u8; 0], } pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; @@ -604,56 +604,52 @@ extern "C" { ) -> ::std::os::raw::c_int; } extern "C" { - #[doc = " Create a new context for executing a given query."] + #[doc = " Create a new cursor for executing a given query."] #[doc = ""] - #[doc = " The context stores the state that is needed to iteratively search"] - #[doc = " for matches. To use the query context:"] - #[doc = " 1. First call `ts_query_context_exec` to start running the query"] - #[doc = " on a particular syntax node."] - #[doc = " 2. Then repeatedly call `ts_query_context_next` to iterate over"] - #[doc = " the matches."] - #[doc = " 3. After each successful call to `ts_query_context_next`, you can call"] - #[doc = " `ts_query_context_matched_pattern_index` to determine which pattern"] - #[doc = " matched. You can also call `ts_query_context_matched_captures` to"] - #[doc = " determine which nodes were captured by which capture names."] + #[doc = " The cursor stores the state that is needed to iteratively search"] + #[doc = " for matches. To use the query cursor:"] + #[doc = " 1. First call `ts_query_cursor_exec` to start running a given query on"] + #[doc = "a given syntax node."] + #[doc = " 2. Then repeatedly call `ts_query_cursor_next` to iterate over the matches."] + #[doc = " This will return `false` when there are no more matches left."] + #[doc = " 3. After each successful call to `ts_query_cursor_next`, you can call"] + #[doc = " `ts_query_cursor_matched_pattern_index` to determine which pattern"] + #[doc = " matched. You can also call `ts_query_cursor_matched_captures` to"] + #[doc = " determine which nodes were captured, and by which capture names."] #[doc = ""] #[doc = " If you don\'t care about finding all of the matches, you can stop calling"] - #[doc = " `ts_query_context_next` at any point. And you can start executing the"] - #[doc = " query against a different node by calling `ts_query_context_exec` again."] - pub fn ts_query_context_new(arg1: *const TSQuery) -> *mut TSQueryContext; + #[doc = " `ts_query_cursor_next` at any point. And you can start executing another"] + #[doc = " query on another node by calling `ts_query_cursor_exec` again."] + pub fn ts_query_cursor_new() -> *mut TSQueryCursor; } extern "C" { - #[doc = " Delete a query context, freeing all of the memory that it used."] - pub fn ts_query_context_delete(arg1: *mut TSQueryContext); + #[doc = " Delete a query cursor, freeing all of the memory that it used."] + pub fn ts_query_cursor_delete(arg1: *mut TSQueryCursor); } extern "C" { - #[doc = " Start running a query on a given node."] - pub fn ts_query_context_exec(arg1: *mut TSQueryContext, arg2: TSNode); + #[doc = " Start running a given query on a given node."] + pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); } extern "C" { #[doc = " Set the range of bytes or (row, column) positions in which the query"] #[doc = " will be executed."] - pub fn ts_query_context_set_byte_range(arg1: *mut TSQueryContext, arg2: u32, arg3: u32); + pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32); } extern "C" { - pub fn ts_query_context_set_point_range( - arg1: *mut TSQueryContext, - arg2: TSPoint, - arg3: TSPoint, - ); + pub fn ts_query_cursor_set_point_range(arg1: *mut TSQueryCursor, arg2: TSPoint, arg3: TSPoint); } extern "C" { #[doc = " Advance to the next match of the currently running query."] - pub fn ts_query_context_next(arg1: *mut TSQueryContext) -> bool; + pub fn ts_query_cursor_next(arg1: *mut TSQueryCursor) -> bool; } extern "C" { #[doc = " Check which pattern matched."] - pub fn ts_query_context_matched_pattern_index(arg1: *const TSQueryContext) -> u32; + pub fn ts_query_cursor_matched_pattern_index(arg1: *const TSQueryCursor) -> u32; } extern "C" { #[doc = " Check which pattern matched."] - pub fn ts_query_context_matched_captures( - arg1: *const TSQueryContext, + pub fn ts_query_cursor_matched_captures( + arg1: *const TSQueryCursor, arg2: *mut u32, ) -> *const TSQueryCapture; } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 8d29a3c3..2c20fd12 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -142,9 +142,9 @@ pub struct Query { capture_names: Vec, } -pub struct QueryContext<'a>(*mut ffi::TSQueryContext, PhantomData<&'a ()>); +pub struct QueryCursor(*mut ffi::TSQueryCursor); -pub struct QueryMatch<'a>(&'a QueryContext<'a>); +pub struct QueryMatch<'a>(*mut ffi::TSQueryCursor, PhantomData<&'a ()>); #[derive(Debug, PartialEq, Eq)] pub enum QueryError<'a> { @@ -989,22 +989,21 @@ impl Query { pub fn capture_names(&self) -> &[String] { &self.capture_names } - - pub fn context(&self) -> QueryContext { - let context = unsafe { ffi::ts_query_context_new(self.ptr) }; - QueryContext(context, PhantomData) - } } -impl<'a> QueryContext<'a> { - pub fn exec(&'a self, node: Node<'a>) -> impl Iterator> + 'a { +impl QueryCursor { + pub fn new() -> Self { + QueryCursor(unsafe { ffi::ts_query_cursor_new() }) + } + + pub fn exec<'a>(&'a mut self, query: &'a Query, node: Node<'a>) -> impl Iterator> + 'a { unsafe { - ffi::ts_query_context_exec(self.0, node.0); + ffi::ts_query_cursor_exec(self.0, query.ptr, node.0); } std::iter::from_fn(move || -> Option> { unsafe { - if ffi::ts_query_context_next(self.0) { - Some(QueryMatch(self)) + if ffi::ts_query_cursor_next(self.0) { + Some(QueryMatch(self.0, PhantomData)) } else { None } @@ -1014,14 +1013,14 @@ impl<'a> QueryContext<'a> { pub fn set_byte_range(&mut self, start: usize, end: usize) -> &mut Self { unsafe { - ffi::ts_query_context_set_byte_range(self.0, start as u32, end as u32); + ffi::ts_query_cursor_set_byte_range(self.0, start as u32, end as u32); } self } pub fn set_point_range(&mut self, start: Point, end: Point) -> &mut Self { unsafe { - ffi::ts_query_context_set_point_range(self.0, start.into(), end.into()); + ffi::ts_query_cursor_set_point_range(self.0, start.into(), end.into()); } self } @@ -1029,14 +1028,14 @@ impl<'a> QueryContext<'a> { impl<'a> QueryMatch<'a> { pub fn pattern_index(&self) -> usize { - unsafe { ffi::ts_query_context_matched_pattern_index((self.0).0) as usize } + unsafe { ffi::ts_query_cursor_matched_pattern_index(self.0) as usize } } pub fn captures(&self) -> impl ExactSizeIterator { unsafe { let mut capture_count = 0u32; let captures = - ffi::ts_query_context_matched_captures((self.0).0, &mut capture_count as *mut u32); + ffi::ts_query_cursor_matched_captures(self.0, &mut capture_count as *mut u32); let captures = slice::from_raw_parts(captures, capture_count as usize); captures .iter() @@ -1057,9 +1056,9 @@ impl Drop for Query { } } -impl<'a> Drop for QueryContext<'a> { +impl Drop for QueryCursor { fn drop(&mut self) { - unsafe { ffi::ts_query_context_delete(self.0) } + unsafe { ffi::ts_query_cursor_delete(self.0) } } } diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c index f46d1def..501cf1cd 100644 --- a/lib/binding_web/binding.c +++ b/lib/binding_web/binding.c @@ -306,6 +306,7 @@ void ts_tree_cursor_current_node_wasm(const TSTree *tree) { /******************/ static TSTreeCursor scratch_cursor = {0}; +static TSQueryCursor *scratch_query_cursor = NULL; uint16_t ts_node_symbol_wasm(const TSTree *tree) { TSNode node = unmarshal_node(tree); @@ -566,24 +567,22 @@ int ts_node_is_missing_wasm(const TSTree *tree) { /* Section - Query */ /******************/ -void ts_query_exec_wasm( - const TSQuery *self, - TSQueryContext *context, - const TSTree *tree -) { +void ts_query_exec_wasm(const TSQuery *self, const TSTree *tree) { + if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new(); + TSNode node = unmarshal_node(tree); Array(const void *) result = array_new(); unsigned index = 0; unsigned match_count = 0; - ts_query_context_exec(context, node); - while (ts_query_context_next(context)) { + ts_query_cursor_exec(scratch_query_cursor, self, node); + while (ts_query_cursor_next(scratch_query_cursor)) { match_count++; - uint32_t pattern_index = ts_query_context_matched_pattern_index(context); + uint32_t pattern_index = ts_query_cursor_matched_pattern_index(scratch_query_cursor); uint32_t capture_count; - const TSQueryCapture *captures = ts_query_context_matched_captures( - context, + const TSQueryCapture *captures = ts_query_cursor_matched_captures( + scratch_query_cursor, &capture_count ); diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index ac48cb70..85bd4053 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -677,7 +677,6 @@ class Language { TRANSFER_BUFFER + SIZE_OF_INT ); if (address) { - const contextAddress = C._ts_query_context_new(address); const captureCount = C._ts_query_capture_count(address); const captureNames = new Array(captureCount); for (let i = 0; i < captureCount; i++) { @@ -689,7 +688,7 @@ class Language { const nameLength = getValue(TRANSFER_BUFFER, 'i32'); captureNames[i] = UTF8ToString(nameAddress, nameLength); } - return new Query(INTERNAL, address, contextAddress, captureNames); + return new Query(INTERNAL, address, captureNames); } else { const errorId = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); const utf8ErrorOffset = getValue(TRANSFER_BUFFER, 'i32'); @@ -743,22 +742,20 @@ class Language { } class Query { - constructor(internal, address, contextAddress, captureNames) { + constructor(internal, address, captureNames) { assertInternal(internal); this[0] = address; - this[1] = contextAddress; this.captureNames = captureNames; } delete() { C._ts_query_delete(this[0]); - C._ts_query_context_delete(this[0]); } exec(queryNode) { marshalNode(queryNode); - C._ts_query_exec_wasm(this[0], this[1], queryNode.tree[0]); + C._ts_query_exec_wasm(this[0], queryNode.tree[0]); const matchCount = getValue(TRANSFER_BUFFER, 'i32'); const nodesAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index d951a35a..624658b4 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -27,7 +27,7 @@ typedef struct TSLanguage TSLanguage; typedef struct TSParser TSParser; typedef struct TSTree TSTree; typedef struct TSQuery TSQuery; -typedef struct TSQueryContext TSQueryContext; +typedef struct TSQueryCursor TSQueryCursor; typedef enum { TSInputEncodingUTF8, @@ -670,57 +670,57 @@ int ts_query_capture_id_for_name( ); /** - * Create a new context for executing a given query. + * Create a new cursor for executing a given query. * - * The context stores the state that is needed to iteratively search - * for matches. To use the query context: - * 1. First call `ts_query_context_exec` to start running the query - * on a particular syntax node. - * 2. Then repeatedly call `ts_query_context_next` to iterate over - * the matches. - * 3. After each successful call to `ts_query_context_next`, you can call - * `ts_query_context_matched_pattern_index` to determine which pattern - * matched. You can also call `ts_query_context_matched_captures` to - * determine which nodes were captured by which capture names. + * The cursor stores the state that is needed to iteratively search + * for matches. To use the query cursor: + * 1. First call `ts_query_cursor_exec` to start running a given query on + a given syntax node. + * 2. Then repeatedly call `ts_query_cursor_next` to iterate over the matches. + * This will return `false` when there are no more matches left. + * 3. After each successful call to `ts_query_cursor_next`, you can call + * `ts_query_cursor_matched_pattern_index` to determine which pattern + * matched. You can also call `ts_query_cursor_matched_captures` to + * determine which nodes were captured, and by which capture names. * * If you don't care about finding all of the matches, you can stop calling - * `ts_query_context_next` at any point. And you can start executing the - * query against a different node by calling `ts_query_context_exec` again. + * `ts_query_cursor_next` at any point. And you can start executing another + * query on another node by calling `ts_query_cursor_exec` again. */ -TSQueryContext *ts_query_context_new(const TSQuery *); +TSQueryCursor *ts_query_cursor_new(); /** - * Delete a query context, freeing all of the memory that it used. + * Delete a query cursor, freeing all of the memory that it used. */ -void ts_query_context_delete(TSQueryContext *); +void ts_query_cursor_delete(TSQueryCursor *); /** - * Start running a query on a given node. + * Start running a given query on a given node. */ -void ts_query_context_exec(TSQueryContext *, TSNode); +void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode); /** * Set the range of bytes or (row, column) positions in which the query * will be executed. */ -void ts_query_context_set_byte_range(TSQueryContext *, uint32_t, uint32_t); -void ts_query_context_set_point_range(TSQueryContext *, TSPoint, TSPoint); +void ts_query_cursor_set_byte_range(TSQueryCursor *, uint32_t, uint32_t); +void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint); /** * Advance to the next match of the currently running query. */ -bool ts_query_context_next(TSQueryContext *); +bool ts_query_cursor_next(TSQueryCursor *); /** * Check which pattern matched. */ -uint32_t ts_query_context_matched_pattern_index(const TSQueryContext *); +uint32_t ts_query_cursor_matched_pattern_index(const TSQueryCursor *); /** * Check which pattern matched. */ -const TSQueryCapture *ts_query_context_matched_captures( - const TSQueryContext *, +const TSQueryCapture *ts_query_cursor_matched_captures( + const TSQueryCursor *, uint32_t * ); diff --git a/lib/src/query.c b/lib/src/query.c index 8b4deb81..a082e131 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -50,7 +50,7 @@ typedef struct { /* * QueryState - The state of an in-progress match of a particular pattern - * in a query. While executing, a QueryContext must keep track of a number + * in a query. While executing, a `TSQueryCursor` must keep track of a number * of possible in-progress matches. Each of those possible matches is * represented as one of these states. */ @@ -69,15 +69,14 @@ typedef struct { * parts of the shared array are currently in use by a QueryState. */ typedef struct { - TSQueryCapture *contents; - uint32_t list_size; + Array(TSQueryCapture) list; uint32_t usage_map; } CaptureListPool; /* * TSQuery - A tree query, compiled from a string of S-expressions. The query * itself is immutable. The mutable state used in the process of executing the - * query is stored in a `TSQueryContext`. + * query is stored in a `TSQueryCursor`. */ struct TSQuery { Array(QueryStep) steps; @@ -90,9 +89,9 @@ struct TSQuery { }; /* - * TSQueryContext - A stateful struct used to execute a query on a tree. + * TSQueryCursor - A stateful struct used to execute a query on a tree. */ -struct TSQueryContext { +struct TSQueryCursor { const TSQuery *query; TSTreeCursor cursor; Array(QueryState) states; @@ -185,24 +184,26 @@ static void stream_scan_identifier(Stream *stream) { * CaptureListPool ******************/ -static CaptureListPool capture_list_pool_new(uint16_t list_size) { +static CaptureListPool capture_list_pool_new() { return (CaptureListPool) { - .contents = ts_calloc(MAX_STATE_COUNT * list_size, sizeof(TSQueryCapture)), - .list_size = list_size, + .list = array_new(), .usage_map = UINT32_MAX, }; } -static void capture_list_pool_clear(CaptureListPool *self) { +static void capture_list_pool_reset(CaptureListPool *self, uint16_t list_size) { self->usage_map = UINT32_MAX; + uint32_t total_size = MAX_STATE_COUNT * list_size; + array_reserve(&self->list, total_size); + self->list.size = total_size; } static void capture_list_pool_delete(CaptureListPool *self) { - ts_free(self->contents); + array_delete(&self->list); } static TSQueryCapture *capture_list_pool_get(CaptureListPool *self, uint16_t id) { - return &self->contents[id * self->list_size]; + return &self->list.contents[id * (self->list.size / MAX_STATE_COUNT)]; } static inline uint32_t capture_list_bitmask_for_id(uint16_t id) { @@ -599,17 +600,16 @@ int ts_query_capture_id_for_name( } /*************** - * QueryContext + * QueryCursor ***************/ -TSQueryContext *ts_query_context_new(const TSQuery *query) { - TSQueryContext *self = ts_malloc(sizeof(TSQueryContext)); - *self = (TSQueryContext) { - .query = query, +TSQueryCursor *ts_query_cursor_new() { + TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); + *self = (TSQueryCursor) { .ascending = false, .states = array_new(), .finished_states = array_new(), - .capture_list_pool = capture_list_pool_new(query->max_capture_count), + .capture_list_pool = capture_list_pool_new(), .start_byte = 0, .end_byte = UINT32_MAX, .start_point = {0, 0}, @@ -618,7 +618,7 @@ TSQueryContext *ts_query_context_new(const TSQuery *query) { return self; } -void ts_query_context_delete(TSQueryContext *self) { +void ts_query_cursor_delete(TSQueryCursor *self) { array_delete(&self->states); array_delete(&self->finished_states); ts_tree_cursor_delete(&self->cursor); @@ -626,17 +626,22 @@ void ts_query_context_delete(TSQueryContext *self) { ts_free(self); } -void ts_query_context_exec(TSQueryContext *self, TSNode node) { +void ts_query_cursor_exec( + TSQueryCursor *self, + const TSQuery *query, + TSNode node +) { array_clear(&self->states); array_clear(&self->finished_states); ts_tree_cursor_reset(&self->cursor, node); - capture_list_pool_clear(&self->capture_list_pool); + capture_list_pool_reset(&self->capture_list_pool, query->max_capture_count); self->depth = 0; self->ascending = false; + self->query = query; } -void ts_query_context_set_byte_range( - TSQueryContext *self, +void ts_query_cursor_set_byte_range( + TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte ) { @@ -648,8 +653,8 @@ void ts_query_context_set_byte_range( self->end_byte = end_byte; } -void ts_query_context_set_point_range( - TSQueryContext *self, +void ts_query_cursor_set_point_range( + TSQueryCursor *self, TSPoint start_point, TSPoint end_point ) { @@ -661,8 +666,8 @@ void ts_query_context_set_point_range( self->end_point = end_point; } -static QueryState *ts_query_context_copy_state( - TSQueryContext *self, +static QueryState *ts_query_cursor_copy_state( + TSQueryCursor *self, QueryState *state ) { uint32_t new_list_id = capture_list_pool_acquire(&self->capture_list_pool); @@ -682,7 +687,7 @@ static QueryState *ts_query_context_copy_state( return new_state; } -bool ts_query_context_next(TSQueryContext *self) { +bool ts_query_cursor_next(TSQueryCursor *self) { if (self->finished_states.size > 0) { QueryState state = array_pop(&self->finished_states); capture_list_pool_release(&self->capture_list_pool, state.capture_list_id); @@ -853,7 +858,7 @@ bool ts_query_context_next(TSQueryContext *self) { // siblings. QueryState *next_state = state; if (step->depth > 0 && (!step->field || field_occurs_in_later_sibling)) { - QueryState *copy = ts_query_context_copy_state(self, state); + QueryState *copy = ts_query_cursor_copy_state(self, state); if (copy) next_state = copy; } @@ -873,7 +878,7 @@ bool ts_query_context_next(TSQueryContext *self) { }; } - // If the pattern is now done, then populate the query context's + // If the pattern is now done, then populate the query cursor's // finished state. next_state->step_index++; QueryStep *next_step = step + 1; @@ -902,7 +907,7 @@ bool ts_query_context_next(TSQueryContext *self) { return true; } -uint32_t ts_query_context_matched_pattern_index(const TSQueryContext *self) { +uint32_t ts_query_cursor_matched_pattern_index(const TSQueryCursor *self) { if (self->finished_states.size > 0) { QueryState *state = array_back(&self->finished_states); return state->pattern_index; @@ -910,8 +915,8 @@ uint32_t ts_query_context_matched_pattern_index(const TSQueryContext *self) { return 0; } -const TSQueryCapture *ts_query_context_matched_captures( - const TSQueryContext *self, +const TSQueryCapture *ts_query_cursor_matched_captures( + const TSQueryCursor *self, uint32_t *count ) { if (self->finished_states.size > 0) { From 67dcbc3e73483641b38964440b83b1467932882a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 012/558] Detect error when a prefix of a valid node name is used in a query --- cli/src/tests/query_test.rs | 8 ++++---- lib/binding_web/test/query-test.js | 3 +++ lib/src/query.c | 7 +++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c95bdfa2..b6d669c9 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -44,12 +44,12 @@ fn test_query_errors_on_invalid_symbols() { let language = get_language("javascript"); assert_eq!( - Query::new(language, "(non_existent1)"), - Err(QueryError::NodeType("non_existent1")) + Query::new(language, "(clas)"), + Err(QueryError::NodeType("clas")) ); assert_eq!( - Query::new(language, "(if_statement (non_existent2))"), - Err(QueryError::NodeType("non_existent2")) + Query::new(language, "(if_statement (arrayyyyy))"), + Err(QueryError::NodeType("arrayyyyy")) ); assert_eq!( Query::new(language, "(if_statement condition: (non_existent3))"), diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index 38c3209e..cfa45bd4 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -25,6 +25,9 @@ describe("Query", () => { assert.throws(() => { JavaScript.query("(non_existent)") }, "Bad node name 'non_existent'"); + assert.throws(() => { + JavaScript.query("(a)") + }, "Bad node name 'a'"); assert.throws(() => { JavaScript.query("(function_declaration non_existent:(identifier))") }, "Bad field name 'non_existent'"); diff --git a/lib/src/query.c b/lib/src/query.c index a082e131..323abebc 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -237,10 +237,9 @@ static TSSymbol ts_query_intern_node_name( ) { uint32_t symbol_count = ts_language_symbol_count(self->language); for (TSSymbol i = 0; i < symbol_count; i++) { - if ( - ts_language_symbol_type(self->language, i) == symbol_type && - !strncmp(ts_language_symbol_name(self->language, i), name, length) - ) return i; + if (ts_language_symbol_type(self->language, i) != symbol_type) continue; + const char *symbol_name = ts_language_symbol_name(self->language, i); + if (!strncmp(symbol_name, name, length) && !symbol_name[length]) return i; } return 0; } From 0528ad5f58bc2ce8a13d5d81240a0549d0c6e6ac Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 013/558] playground: Persist state across reloads --- docs/assets/js/playground.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/assets/js/playground.js b/docs/assets/js/playground.js index 1b151e42..572370cf 100644 --- a/docs/assets/js/playground.js +++ b/docs/assets/js/playground.js @@ -11,6 +11,8 @@ let tree; const demoContainer = document.getElementById('playground-container'); const languagesByName = {}; + loadState(); + await TreeSitter.init(); const parser = new TreeSitter(); @@ -25,6 +27,7 @@ let tree; scrollElem: outputContainerScroll }); const renderTreeOnCodeChange = debounce(renderTree, 50); + const saveStateOnChange = debounce(saveState, 2000); let languageName = languageSelect.value; let treeRows = null; @@ -81,6 +84,7 @@ let tree; tree = newTree; parseCount++; renderTreeOnCodeChange(); + saveStateOnChange(); } async function renderTree() { @@ -262,6 +266,20 @@ let tree; }; } + function loadState() { + const language = localStorage.getItem("language"); + const sourceCode = localStorage.getItem("sourceCode"); + if (language != null && sourceCode != null) { + codeInput.value = sourceCode; + languageSelect.value = language; + } + } + + function saveState() { + localStorage.setItem("language", languageSelect.value); + localStorage.setItem("sourceCode", codeEditor.getValue()); + } + function debounce(func, wait, immediate) { var timeout; return function() { From 49ce2fddb941a8fdc1910a7627a37b50e06a6d23 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 014/558] Add wasm binding for running tree queries in a limited range --- cli/src/tests/query_test.rs | 69 +++++++++++++++++++++++++++++- lib/binding_web/binding.c | 14 +++++- lib/binding_web/binding.js | 46 +++++++++++++------- lib/binding_web/test/query-test.js | 34 +++++++++++++-- lib/src/language.c | 3 +- lib/src/query.c | 1 + 6 files changed, 143 insertions(+), 24 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index b6d669c9..ff4dd7a1 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -56,8 +56,12 @@ fn test_query_errors_on_invalid_symbols() { Err(QueryError::NodeType("non_existent3")) ); assert_eq!( - Query::new(language, "(if_statement not_a_field: (identifier))"), - Err(QueryError::Field("not_a_field")) + Query::new(language, "(if_statement condit: (identifier))"), + Err(QueryError::Field("condit")) + ); + assert_eq!( + Query::new(language, "(if_statement conditioning: (identifier))"), + Err(QueryError::Field("conditioning")) ); }); } @@ -368,6 +372,67 @@ fn test_query_exec_within_byte_range() { }); } +#[test] +fn test_query_exec_different_queries() { + allocations::record(|| { + let language = get_language("javascript"); + let query1 = Query::new( + language, + " + (array (identifier) @id1) + ", + ) + .unwrap(); + let query2 = Query::new( + language, + " + (array (identifier) @id1) + (pair (identifier) @id2) + ", + ) + .unwrap(); + let query3 = Query::new( + language, + " + (array (identifier) @id1) + (pair (identifier) @id2) + (parenthesized_expression (identifier) @id3) + ", + ) + .unwrap(); + + let source = "[a, {b: b}, (c)];"; + + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let matches = cursor.exec(&query1, tree.root_node()); + assert_eq!( + collect_matches(matches, &query1, source), + &[(0, vec![("id1", "a")]),] + ); + + let matches = cursor.exec(&query3, tree.root_node()); + assert_eq!( + collect_matches(matches, &query3, source), + &[ + (0, vec![("id1", "a")]), + (1, vec![("id2", "b")]), + (2, vec![("id3", "c")]), + ] + ); + + let matches = cursor.exec(&query2, tree.root_node()); + assert_eq!( + collect_matches(matches, &query2, source), + &[(0, vec![("id1", "a")]), (1, vec![("id2", "b")]),] + ); + }); +} + #[test] fn test_query_capture_names() { allocations::record(|| { diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c index 501cf1cd..db4449a2 100644 --- a/lib/binding_web/binding.c +++ b/lib/binding_web/binding.c @@ -567,15 +567,25 @@ int ts_node_is_missing_wasm(const TSTree *tree) { /* Section - Query */ /******************/ -void ts_query_exec_wasm(const TSQuery *self, const TSTree *tree) { +void ts_query_exec_wasm( + const TSQuery *self, + const TSTree *tree, + uint32_t start_row, + uint32_t start_column, + uint32_t end_row, + uint32_t end_column +) { if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new(); TSNode node = unmarshal_node(tree); + TSPoint start_point = {start_row, code_unit_to_byte(start_column)}; + TSPoint end_point = {end_row, code_unit_to_byte(end_column)}; Array(const void *) result = array_new(); unsigned index = 0; unsigned match_count = 0; + ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point); ts_query_cursor_exec(scratch_query_cursor, self, node); while (ts_query_cursor_next(scratch_query_cursor)) { match_count++; @@ -586,7 +596,7 @@ void ts_query_exec_wasm(const TSQuery *self, const TSTree *tree) { &capture_count ); - array_grow_by(&result, 1 + 6 * capture_count); + array_grow_by(&result, 2 + 6 * capture_count); result.contents[index++] = (const void *)pattern_index; result.contents[index++] = (const void *)capture_count; diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 85bd4053..0fd1ea63 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -688,24 +688,30 @@ class Language { const nameLength = getValue(TRANSFER_BUFFER, 'i32'); captureNames[i] = UTF8ToString(nameAddress, nameLength); } + C._free(sourceAddress); return new Query(INTERNAL, address, captureNames); } else { const errorId = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - const utf8ErrorOffset = getValue(TRANSFER_BUFFER, 'i32'); - const errorOffset = UTF8ToString(sourceAddress, utf8ErrorOffset).length; - C._free(sourceAddress); - const suffix = source.slice(errorOffset, 100); + const errorByte = getValue(TRANSFER_BUFFER, 'i32'); + const errorIndex = UTF8ToString(sourceAddress, errorByte).length; + const suffix = source.slice(errorIndex, 100); + const word = suffix.match(QUERY_WORD_REGEX)[0]; + let error; switch (errorId) { - case 2: throw new RangeError( - `Bad node name '${suffix.match(QUERY_WORD_REGEX)[0]}'` - ); - case 3: throw new RangeError( - `Bad field name '${suffix.match(QUERY_WORD_REGEX)[0]}'` - ); - default: throw new SyntaxError( - `Bad syntax at offset ${errorOffset}: '${suffix}'...` - ); + case 2: + error = new RangeError(`Bad node name '${word}'`); + break; + case 3: + error = new RangeError(`Bad field name '${word}'`); + break; + default: + error = new SyntaxError(`Bad syntax at offset ${errorIndex}: '${suffix}'...`); + break; } + error.index = errorIndex; + error.length = word.length; + C._free(sourceAddress); + throw error; } } @@ -752,10 +758,20 @@ class Query { C._ts_query_delete(this[0]); } - exec(queryNode) { + exec(queryNode, startPosition, endPosition) { + if (!startPosition) startPosition = ZERO_POINT; + if (!endPosition) endPosition = ZERO_POINT; + marshalNode(queryNode); - C._ts_query_exec_wasm(this[0], queryNode.tree[0]); + C._ts_query_exec_wasm( + this[0], + queryNode.tree[0], + startPosition.row, + startPosition.column, + endPosition.row, + endPosition.column + ); const matchCount = getValue(TRANSFER_BUFFER, 'i32'); const nodesAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index cfa45bd4..4fd44165 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -41,10 +41,7 @@ describe("Query", () => { `); const matches = query.exec(tree.rootNode); assert.deepEqual( - matches.map(({pattern, captures}) => ({ - pattern, - captures: captures.map(({name, node}) => ({name, text: node.text})) - })), + formatMatches(matches), [ {pattern: 0, captures: [{name: 'fn-def', text: 'one'}]}, {pattern: 1, captures: [{name: 'fn-ref', text: 'two'}]}, @@ -52,4 +49,33 @@ describe("Query", () => { ] ); }); + + it('matches queries in specified ranges', () => { + tree = parser.parse("[a, b,\nc, d,\ne, f,\ng, h]"); + query = JavaScript.query('(identifier) @element'); + const matches = query.exec( + tree.rootNode, + {row: 1, column: 1}, + {row: 3, column: 1} + ); + assert.deepEqual( + formatMatches(matches), + [ + {pattern: 0, captures: [{name: 'element', text: 'd'}]}, + {pattern: 0, captures: [{name: 'element', text: 'e'}]}, + {pattern: 0, captures: [{name: 'element', text: 'f'}]}, + {pattern: 0, captures: [{name: 'element', text: 'g'}]}, + ] + ); + }); }); + +function formatMatches(matches) { + return matches.map(({pattern, captures}) => ({ + pattern, + captures: captures.map(({name, node}) => ({ + name, + text: node.text + })) + })) +} diff --git a/lib/src/language.c b/lib/src/language.c index 1bfb1a8d..e96a3cbf 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -96,7 +96,8 @@ TSFieldId ts_language_field_id_for_name( for (TSSymbol i = 1; i < count + 1; i++) { switch (strncmp(name, self->field_names[i], name_length)) { case 0: - return i; + if (self->field_names[i][name_length] == 0) return i; + break; case -1: return 0; default: diff --git a/lib/src/query.c b/lib/src/query.c index 323abebc..76f0d672 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -783,6 +783,7 @@ bool ts_query_cursor_next(TSQueryCursor *self) { .step_index = slice->step_index, .pattern_index = slice->pattern_index, .capture_list_id = capture_list_id, + .capture_count = 0, })); } From f08767c4825838e7403a3215985953918faccc9d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 11 Sep 2019 14:44:49 -0700 Subject: [PATCH 015/558] Add tree query editor to web UI and playground --- cli/src/web_ui.html | 67 ++++++++++---- docs/assets/css/style.scss | 8 +- docs/assets/js/playground.js | 160 ++++++++++++++++++++++++++++++++- docs/section-6-playground.html | 3 + 4 files changed, 218 insertions(+), 20 deletions(-) diff --git a/cli/src/web_ui.html b/cli/src/web_ui.html index 2422a3d8..62c23f3d 100644 --- a/cli/src/web_ui.html +++ b/cli/src/web_ui.html @@ -18,18 +18,31 @@ +
+ + +
+
- -
+ - +
+
+
+ +
+ + +

@@ -51,15 +64,13 @@
 
   
 
diff --git a/docs/assets/css/style.scss b/docs/assets/css/style.scss
index 7e4b4bb2..77acb21e 100644
--- a/docs/assets/css/style.scss
+++ b/docs/assets/css/style.scss
@@ -118,7 +118,7 @@ body {
 }
 
 #playground-container {
-  > .CodeMirror {
+  .CodeMirror {
     height: auto;
     max-height: 350px;
     border: 1px solid #aaa;
@@ -129,7 +129,7 @@ body {
     max-height: 350px;
   }
 
-  h4, select, .field {
+  h4, select, .field, label {
     display: inline-block;
     margin-right: 20px;
   }
@@ -161,3 +161,7 @@ a.highlighted {
   background-color: #ddd;
   text-decoration: underline;
 }
+
+.query-error {
+  text-decoration: underline red dashed;
+}
diff --git a/docs/assets/js/playground.js b/docs/assets/js/playground.js
index 572370cf..2366ed2f 100644
--- a/docs/assets/js/playground.js
+++ b/docs/assets/js/playground.js
@@ -1,14 +1,27 @@
 let tree;
 
 (async () => {
+  const CAPTURE_REGEX = /@\s*([\w\._-]+)/g;
+  const COLORS_BY_INDEX = [
+    'red',
+    'green',
+    'blue',
+    'orange',
+    'violet',
+  ];
+
   const scriptURL = document.currentScript.getAttribute('src');
+
   const codeInput = document.getElementById('code-input');
   const languageSelect = document.getElementById('language-select');
   const loggingCheckbox = document.getElementById('logging-checkbox');
   const outputContainer = document.getElementById('output-container');
   const outputContainerScroll = document.getElementById('output-container-scroll');
+  const playgroundContainer = document.getElementById('playground-container');
+  const queryCheckbox = document.getElementById('query-checkbox');
+  const queryContainer = document.getElementById('query-container');
+  const queryInput = document.getElementById('query-input');
   const updateTimeSpan = document.getElementById('update-time');
-  const demoContainer = document.getElementById('playground-container');
   const languagesByName = {};
 
   loadState();
@@ -20,6 +33,12 @@ let tree;
     lineNumbers: true,
     showCursorWhenSelecting: true
   });
+
+  const queryEditor = CodeMirror.fromTextArea(queryInput, {
+    lineNumbers: true,
+    showCursorWhenSelecting: true
+  });
+
   const cluster = new Clusterize({
     rows: [],
     noDataText: null,
@@ -28,22 +47,29 @@ let tree;
   });
   const renderTreeOnCodeChange = debounce(renderTree, 50);
   const saveStateOnChange = debounce(saveState, 2000);
+  const runTreeQueryOnChange = debounce(runTreeQuery, 150);
 
   let languageName = languageSelect.value;
   let treeRows = null;
   let treeRowHighlightedIndex = -1;
   let parseCount = 0;
   let isRendering = 0;
+  let query;
 
   codeEditor.on('changes', handleCodeChange);
+  codeEditor.on('viewportChange', runTreeQueryOnChange);
   codeEditor.on('cursorActivity', debounce(handleCursorMovement, 150));
+  queryEditor.on('changes', debounce(handleQueryChange, 150));
+
   loggingCheckbox.addEventListener('change', handleLoggingChange);
+  queryCheckbox.addEventListener('change', handleQueryEnableChange);
   languageSelect.addEventListener('change', handleLanguageChange);
   outputContainer.addEventListener('click', handleTreeClick);
 
+  handleQueryEnableChange();
   await handleLanguageChange()
 
-  demoContainer.style.visibility = 'visible';
+  playgroundContainer.style.visibility = 'visible';
 
   async function handleLanguageChange() {
     const newLanguageName = languageSelect.value;
@@ -65,6 +91,7 @@ let tree;
     languageName = newLanguageName;
     parser.setLanguage(languagesByName[newLanguageName]);
     handleCodeChange();
+    handleQueryChange();
   }
 
   async function handleCodeChange(editor, changes) {
@@ -84,6 +111,7 @@ let tree;
     tree = newTree;
     parseCount++;
     renderTreeOnCodeChange();
+    runTreeQueryOnChange();
     saveStateOnChange();
   }
 
@@ -168,6 +196,106 @@ let tree;
     handleCursorMovement();
   }
 
+  function runTreeQuery(_, startRow, endRow) {
+    if (endRow == null) {
+      const viewport = codeEditor.getViewport();
+      startRow = viewport.from;
+      endRow = viewport.to;
+    }
+
+    codeEditor.operation(() => {
+      const marks = codeEditor.getAllMarks();
+      marks.forEach(m => m.clear());
+
+      if (tree && query) {
+        const matches = query.exec(
+          tree.rootNode,
+          {row: startRow, column: 0},
+          {row: endRow, column: 0},
+        );
+        for (const {captures} of matches) {
+          for (const {name, node} of captures) {
+            const {startPosition, endPosition} = node;
+            codeEditor.markText(
+              {line: startPosition.row, ch: startPosition.column},
+              {line: endPosition.row, ch: endPosition.column},
+              {
+                inclusiveLeft: true,
+                inclusiveRight: true,
+                css: `color: ${colorForCaptureName(name)}`
+              }
+            );
+          }
+        }
+      }
+    });
+  }
+
+  function handleQueryChange() {
+    if (query) {
+      query.delete();
+      query.deleted = true;
+      query = null;
+    }
+
+    queryEditor.operation(() => {
+      queryEditor.getAllMarks().forEach(m => m.clear());
+      if (!queryCheckbox.checked) return;
+
+      const queryText = queryEditor.getValue();
+
+      try {
+        query = parser.getLanguage().query(queryText);
+        let match;
+
+        let row = 0;
+        queryEditor.eachLine((line) => {
+          while (match = CAPTURE_REGEX.exec(line.text)) {
+            queryEditor.markText(
+              {line: row, ch: match.index},
+              {line: row, ch: match.index + match[0].length},
+              {
+                inclusiveLeft: true,
+                inclusiveRight: true,
+                css: `color: ${colorForCaptureName(match[1])}`
+              }
+            );
+          }
+          row++;
+        });
+      } catch (error) {
+        const startPosition = queryEditor.posFromIndex(error.index);
+        const endPosition = {
+          line: startPosition.line,
+          ch: startPosition.ch + (error.length || 1)
+        };
+
+        if (error.index === queryText.length) {
+          if (startPosition.ch > 0) {
+            startPosition.ch--;
+          } else if (startPosition.row > 0) {
+            startPosition.row--;
+            startPosition.column = Infinity;
+          }
+        }
+
+        queryEditor.markText(
+          startPosition,
+          endPosition,
+          {
+            className: 'query-error',
+            inclusiveLeft: true,
+            inclusiveRight: true,
+            attributes: {title: error.message}
+          }
+        );
+      }
+    });
+
+    runTreeQuery();
+    saveQueryState();
+  }
+
   function handleCursorMovement() {
     if (isRendering) return;
 
@@ -240,6 +368,17 @@ let tree;
     }
   }
 
+  function handleQueryEnableChange() {
+    if (queryCheckbox.checked) {
+      queryContainer.style.visibility = '';
+      queryContainer.style.position = '';
+    } else {
+      queryContainer.style.visibility = 'hidden';
+      queryContainer.style.position = 'absolute';
+    }
+    handleQueryChange();
+  }
+
   function treeEditForEditorChange(change) {
     const oldLineCount = change.removed.length;
     const newLineCount = change.text.length;
@@ -266,18 +405,33 @@ let tree;
     };
   }
 
+  function colorForCaptureName(capture) {
+    const id = query.captureNames.indexOf(capture);
+    return COLORS_BY_INDEX[id % COLORS_BY_INDEX.length];
+  }
+
   function loadState() {
     const language = localStorage.getItem("language");
     const sourceCode = localStorage.getItem("sourceCode");
-    if (language != null && sourceCode != null) {
+    const query = localStorage.getItem("query");
+    const queryEnabled = localStorage.getItem("queryEnabled");
+    if (language != null && sourceCode != null && query != null) {
+      queryInput.value = query;
       codeInput.value = sourceCode;
       languageSelect.value = language;
+      queryCheckbox.checked = (queryEnabled === 'true');
     }
   }
 
   function saveState() {
     localStorage.setItem("language", languageSelect.value);
     localStorage.setItem("sourceCode", codeEditor.getValue());
+    saveQueryState();
+  }
+
+  function saveQueryState() {
+    localStorage.setItem("queryEnabled", queryCheckbox.checked);
+    localStorage.setItem("query", queryEditor.getValue());
   }
 
   function debounce(func, wait, immediate) {
diff --git a/docs/section-6-playground.html b/docs/section-6-playground.html
index 93d68867..60ce566b 100644
--- a/docs/section-6-playground.html
+++ b/docs/section-6-playground.html
@@ -31,6 +31,9 @@ permalink: playground
 
 
 
+
+
+
 
 

From 36427614d3fae9280136825864c1fb6399090357 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld 
Date: Wed, 11 Sep 2019 14:44:49 -0700
Subject: [PATCH 016/558] Fix typo in windows impl of count_leading_zeros

---
 lib/src/bits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/src/bits.h b/lib/src/bits.h
index 0caa1d8d..867a2a1e 100644
--- a/lib/src/bits.h
+++ b/lib/src/bits.h
@@ -10,7 +10,7 @@
 static inline uint32_t count_leading_zeros(uint32_t x) {
   if (x == 0) return 32;
   uint32_t result;
-  _BitScanReverse(&reuslt, x);
+  _BitScanReverse(&result, x);
   return result;
 }
 

From e52f1a26b36cf680377c5e5170b3408fba28d96b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld 
Date: Wed, 11 Sep 2019 14:44:49 -0700
Subject: [PATCH 017/558] Fix windows bitwise stuff for tree queries

---
 lib/src/bits.h  |  6 +++++-
 lib/src/query.c | 12 ++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/src/bits.h b/lib/src/bits.h
index 867a2a1e..3bec455d 100644
--- a/lib/src/bits.h
+++ b/lib/src/bits.h
@@ -3,6 +3,10 @@
 
 #include 
 
+static inline uint32_t bitmask_for_index(uint16_t id) {
+  return (1u << (31 - id));
+}
+
 #ifdef _WIN32
 
 #include 
@@ -11,7 +15,7 @@ static inline uint32_t count_leading_zeros(uint32_t x) {
   if (x == 0) return 32;
   uint32_t result;
   _BitScanReverse(&result, x);
-  return result;
+  return 31 - result;
 }
 
 #else
diff --git a/lib/src/query.c b/lib/src/query.c
index 76f0d672..14b0d67b 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -206,23 +206,19 @@ static TSQueryCapture *capture_list_pool_get(CaptureListPool *self, uint16_t id)
   return &self->list.contents[id * (self->list.size / MAX_STATE_COUNT)];
 }
 
-static inline uint32_t capture_list_bitmask_for_id(uint16_t id) {
-  // An id of zero corresponds to the highest-order bit in the bitmask.
-  return (1u << (31 - id));
-}
-
 static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
   // In the usage_map bitmask, ones represent free lists, and zeros represent
   // lists that are in use. A free list can quickly be found by counting
-  // the leading zeros in the usage map.
+  // the leading zeros in the usage map. An id of zero corresponds to the
+  // highest-order bit in the bitmask.
   uint16_t id = count_leading_zeros(self->usage_map);
   if (id == 32) return NONE;
-  self->usage_map &= ~capture_list_bitmask_for_id(id);
+  self->usage_map &= ~bitmask_for_index(id);
   return id;
 }
 
 static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
-  self->usage_map |= capture_list_bitmask_for_id(id);
+  self->usage_map |= bitmask_for_index(id);
 }
 
 /*********

From 33587c924ae0a7ba9e55fd1bc0aa101205e6ca32 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld 
Date: Wed, 11 Sep 2019 14:44:49 -0700
Subject: [PATCH 018/558] Remove an unused field, clean up some comments

---
 lib/src/query.c | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/lib/src/query.c b/lib/src/query.c
index 14b0d67b..5c20f0f3 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -26,8 +26,7 @@ typedef struct {
   TSSymbol symbol;
   TSFieldId field;
   uint16_t capture_id;
-  uint8_t depth;
-  bool field_is_multiple;
+  uint16_t depth;
 } QueryStep;
 
 /*
@@ -155,6 +154,7 @@ static void stream_skip_whitespace(Stream *stream) {
     if (iswspace(stream->next)) {
       stream_advance(stream);
     } else if (stream->next == ';') {
+      // skip over comments
       stream_advance(stream);
       while (stream->next && stream->next != '\n') {
         if (!stream_advance(stream)) break;
@@ -246,10 +246,7 @@ static uint16_t ts_query_intern_capture_name(
   uint32_t length
 ) {
   int id = ts_query_capture_id_for_name(self, name, length);
-  if (id >= 0) {
-    return (uint16_t)id;
-  }
-
+  if (id >= 0) return (uint16_t)id;
   CaptureSlice capture = {
     .offset = self->capture_data.size,
     .length = length,
@@ -267,6 +264,10 @@ static uint16_t ts_query_intern_capture_name(
 // that node. It is represented as an array of `(symbol, step index)` pairs,
 // sorted by symbol. Lookups use a binary search so that their cost scales
 // logarithmically with the number of patterns in the query.
+//
+// This returns `true` if the symbol is present and `false` otherwise.
+// If the symbol is not present `*result` is set to the index where the
+// symbol should be inserted.
 static inline bool ts_query__pattern_map_search(
   const TSQuery *self,
   TSSymbol needle,
@@ -545,6 +546,9 @@ TSQuery *ts_query_new(
       self->wildcard_root_pattern_count++;
     }
 
+    // Keep track of the maximum number of captures in pattern, because
+    // that numer determines how much space is needed to store each capture
+    // list.
     if (capture_count > self->max_capture_count) {
       self->max_capture_count = capture_count;
     }
@@ -690,8 +694,8 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
 
   while (self->finished_states.size == 0) {
     if (self->ascending) {
-      // Remove any states that were started within this node and are still
-      // not complete.
+      // When leaving a node, remove any unfinished states whose next step
+      // needed to match something within that node.
       uint32_t deleted_count = 0;
       for (unsigned i = 0, n = self->states.size; i < n; i++) {
         QueryState *state = &self->states.contents[i];
@@ -716,10 +720,9 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
 
       if (deleted_count) {
         LOG("failed %u of %u states\n", deleted_count, self->states.size);
+        self->states.size -= deleted_count;
       }
 
-      self->states.size -= deleted_count;
-
       if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
         self->ascending = false;
       } else if (ts_tree_cursor_goto_parent(&self->cursor)) {
@@ -758,9 +761,9 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
         PatternSlice *slice = &self->query->pattern_map.contents[i];
         QueryStep *step = &self->query->steps.contents[slice->step_index];
 
-        // Check that the node matches the criteria for the first step
-        // of the pattern.
         if (step->field) {
+          // Compute the current field id if it is needed and has not yet
+          // been computed.
           if (field_id == NONE) {
             field_id = ts_tree_cursor_current_field_id_ext(
               &self->cursor,
@@ -770,7 +773,8 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
           if (field_id != step->field) continue;
         }
 
-        // Add a new state at the start of this pattern.
+        // If this node matches the first step of the pattern, then add a new
+        // state at the start of this pattern.
         uint32_t capture_list_id = capture_list_pool_acquire(
           &self->capture_list_pool
         );
@@ -790,6 +794,8 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
         QueryStep *step = &self->query->steps.contents[slice->step_index];
         do {
           if (step->field) {
+            // Compute the current field id if it is needed and has not yet
+            // been computed.
             if (field_id == NONE) {
               field_id = ts_tree_cursor_current_field_id_ext(
                 &self->cursor,
@@ -801,9 +807,9 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
 
           LOG("start pattern %u\n", slice->pattern_index);
 
-          // If the node matches the first step of the pattern, then add
-          // a new in-progress state. First, acquire a list to hold the
-          // pattern's captures.
+          // If this node matches the first step of the pattern, then add a
+          // new in-progress state. First, acquire a list to hold the pattern's
+          // captures.
           uint32_t capture_list_id = capture_list_pool_acquire(
             &self->capture_list_pool
           );
@@ -817,6 +823,7 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
             .capture_count = 0,
           }));
 
+          // Advance to the next pattern whose root node matches this node.
           i++;
           if (i == self->query->pattern_map.size) break;
           slice = &self->query->pattern_map.contents[i];
@@ -834,8 +841,8 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
         if (state->start_depth + step->depth != self->depth) continue;
         if (step->symbol && step->symbol != symbol) continue;
         if (step->field) {
-          // Only compute the current field if it is needed for the current
-          // step of some in-progress pattern.
+          // Compute the current field id if it is needed and has not yet
+          // been computed.
           if (field_id == NONE) {
             field_id = ts_tree_cursor_current_field_id_ext(
               &self->cursor,
@@ -874,8 +881,8 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
           };
         }
 
-        // If the pattern is now done, then populate the query cursor's
-        // finished state.
+        // If the pattern is now done, then remove it from the list of
+        // in-progress states, and add it to the list of finished states.
         next_state->step_index++;
         QueryStep *next_step = step + 1;
         if (next_step->depth == PATTERN_DONE_MARKER) {

From a1fec71b19ba4d01ddfc4cb4616cdc0072e7400d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld 
Date: Wed, 11 Sep 2019 14:44:49 -0700
Subject: [PATCH 019/558] Tweak QueryCursor to allow iterating either matches
 or captures

For syntax highlighting, we want to iterate over all of the captures in
order, and don't care about grouping the captures by pattern.
---
 cli/src/query.rs                   |   2 +-
 cli/src/tests/query_test.rs        | 149 ++++++++++++++++++++++------
 docs/assets/js/playground.js       |  28 +++---
 lib/binding_rust/bindings.rs       |  56 ++++++-----
 lib/binding_rust/lib.rs            |  67 ++++++++++---
 lib/binding_web/binding.c          |  63 +++++++++---
 lib/binding_web/binding.js         |  73 +++++++++-----
 lib/binding_web/exports.json       |   3 +-
 lib/binding_web/test/query-test.js | 151 +++++++++++++++++++----------
 lib/include/tree_sitter/api.h      |  58 ++++++-----
 lib/src/query.c                    | 136 ++++++++++++++++++++------
 11 files changed, 559 insertions(+), 227 deletions(-)

diff --git a/cli/src/query.rs b/cli/src/query.rs
index 2f50325a..9e58c263 100644
--- a/cli/src/query.rs
+++ b/cli/src/query.rs
@@ -32,7 +32,7 @@ pub fn query_files_at_paths(
 
         let tree = parser.parse(&source_code, None).unwrap();
 
-        for mat in query_cursor.exec(&query, tree.root_node()) {
+        for mat in query_cursor.matches(&query, tree.root_node()) {
             writeln!(&mut stdout, "  pattern: {}", mat.pattern_index())?;
             for (capture_id, node) in mat.captures() {
                 writeln!(
diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
index ff4dd7a1..5c37a9ab 100644
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@@ -1,6 +1,6 @@
 use super::helpers::allocations;
 use super::helpers::fixtures::get_language;
-use tree_sitter::{Parser, Query, QueryCursor, QueryError, QueryMatch};
+use tree_sitter::{Node, Parser, Query, QueryCursor, QueryError, QueryMatch};
 
 #[test]
 fn test_query_errors_on_invalid_syntax() {
@@ -67,7 +67,7 @@ fn test_query_errors_on_invalid_symbols() {
 }
 
 #[test]
-fn test_query_exec_with_simple_pattern() {
+fn test_query_matches_with_simple_pattern() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(
@@ -82,7 +82,7 @@ fn test_query_exec_with_simple_pattern() {
         let tree = parser.parse(source, None).unwrap();
 
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source),
@@ -95,7 +95,7 @@ fn test_query_exec_with_simple_pattern() {
 }
 
 #[test]
-fn test_query_exec_with_multiple_matches_same_root() {
+fn test_query_matches_with_multiple_on_same_root() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(
@@ -122,7 +122,7 @@ fn test_query_exec_with_multiple_matches_same_root() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source),
@@ -147,7 +147,7 @@ fn test_query_exec_with_multiple_matches_same_root() {
 }
 
 #[test]
-fn test_query_exec_multiple_patterns_different_roots() {
+fn test_query_matches_with_multiple_patterns_different_roots() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(
@@ -169,7 +169,7 @@ fn test_query_exec_multiple_patterns_different_roots() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source),
@@ -183,7 +183,7 @@ fn test_query_exec_multiple_patterns_different_roots() {
 }
 
 #[test]
-fn test_query_exec_multiple_patterns_same_root() {
+fn test_query_matches_with_multiple_patterns_same_root() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(
@@ -211,7 +211,7 @@ fn test_query_exec_multiple_patterns_same_root() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source),
@@ -224,7 +224,7 @@ fn test_query_exec_multiple_patterns_same_root() {
 }
 
 #[test]
-fn test_query_exec_nested_matches_without_fields() {
+fn test_query_matches_with_nesting_and_no_fields() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(
@@ -248,7 +248,7 @@ fn test_query_exec_nested_matches_without_fields() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source),
@@ -263,7 +263,7 @@ fn test_query_exec_nested_matches_without_fields() {
 }
 
 #[test]
-fn test_query_exec_many_matches() {
+fn test_query_matches_with_many() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(language, "(array (identifier) @element)").unwrap();
@@ -274,7 +274,7 @@ fn test_query_exec_many_matches() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(&source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source.as_str()),
@@ -284,7 +284,7 @@ fn test_query_exec_many_matches() {
 }
 
 #[test]
-fn test_query_exec_too_many_match_permutations_to_track() {
+fn test_query_matches_with_too_many_permutations_to_track() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(
@@ -303,7 +303,7 @@ fn test_query_exec_too_many_match_permutations_to_track() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(&source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         // For this pathological query, some match permutations will be dropped.
         // Just check that a subset of the results are returned, and crash or
@@ -316,7 +316,7 @@ fn test_query_exec_too_many_match_permutations_to_track() {
 }
 
 #[test]
-fn test_query_exec_with_anonymous_tokens() {
+fn test_query_matches_with_anonymous_tokens() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(
@@ -334,7 +334,7 @@ fn test_query_exec_with_anonymous_tokens() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(&source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source),
@@ -347,7 +347,7 @@ fn test_query_exec_with_anonymous_tokens() {
 }
 
 #[test]
-fn test_query_exec_within_byte_range() {
+fn test_query_matches_within_byte_range() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query = Query::new(language, "(identifier) @element").unwrap();
@@ -359,7 +359,9 @@ fn test_query_exec_within_byte_range() {
         let tree = parser.parse(&source, None).unwrap();
 
         let mut cursor = QueryCursor::new();
-        let matches = cursor.set_byte_range(5, 15).exec(&query, tree.root_node());
+        let matches = cursor
+            .set_byte_range(5, 15)
+            .matches(&query, tree.root_node());
 
         assert_eq!(
             collect_matches(matches, &query, source),
@@ -373,7 +375,7 @@ fn test_query_exec_within_byte_range() {
 }
 
 #[test]
-fn test_query_exec_different_queries() {
+fn test_query_matches_different_queries_same_cursor() {
     allocations::record(|| {
         let language = get_language("javascript");
         let query1 = Query::new(
@@ -409,13 +411,13 @@ fn test_query_exec_different_queries() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(&source, None).unwrap();
 
-        let matches = cursor.exec(&query1, tree.root_node());
+        let matches = cursor.matches(&query1, tree.root_node());
         assert_eq!(
             collect_matches(matches, &query1, source),
             &[(0, vec![("id1", "a")]),]
         );
 
-        let matches = cursor.exec(&query3, tree.root_node());
+        let matches = cursor.matches(&query3, tree.root_node());
         assert_eq!(
             collect_matches(matches, &query3, source),
             &[
@@ -425,7 +427,7 @@ fn test_query_exec_different_queries() {
             ]
         );
 
-        let matches = cursor.exec(&query2, tree.root_node());
+        let matches = cursor.matches(&query2, tree.root_node());
         assert_eq!(
             collect_matches(matches, &query2, source),
             &[(0, vec![("id1", "a")]), (1, vec![("id2", "b")]),]
@@ -433,6 +435,81 @@ fn test_query_exec_different_queries() {
     });
 }
 
+#[test]
+fn test_query_captures() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (pair
+              key: * @method.def
+              (function
+                name: (identifier) @method.alias))
+
+            (variable_declarator
+              name: * @function.def
+              value: (function
+                name: (identifier) @function.alias))
+
+            ":" @delimiter
+            "=" @operator
+            "#,
+        )
+        .unwrap();
+
+        let source = "
+          a({
+            bc: function de() {
+              const fg = function hi() {}
+            },
+            jk: function lm() {
+              const no = function pq() {}
+            },
+          });
+        ";
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node());
+
+        assert_eq!(
+            collect_matches(matches, &query, source),
+            &[
+                (2, vec![("delimiter", ":")]),
+                (0, vec![("method.def", "bc"), ("method.alias", "de")]),
+                (3, vec![("operator", "=")]),
+                (1, vec![("function.def", "fg"), ("function.alias", "hi")]),
+                (2, vec![("delimiter", ":")]),
+                (0, vec![("method.def", "jk"), ("method.alias", "lm")]),
+                (3, vec![("operator", "=")]),
+                (1, vec![("function.def", "no"), ("function.alias", "pq")]),
+            ],
+        );
+
+        let captures = cursor.captures(&query, tree.root_node());
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[
+                ("method.def", "bc"),
+                ("delimiter", ":"),
+                ("method.alias", "de"),
+                ("function.def", "fg"),
+                ("operator", "="),
+                ("function.alias", "hi"),
+                ("method.def", "jk"),
+                ("delimiter", ":"),
+                ("method.alias", "lm"),
+                ("function.def", "no"),
+                ("operator", "="),
+                ("function.alias", "pq"),
+            ]
+        );
+    });
+}
+
 #[test]
 fn test_query_capture_names() {
     allocations::record(|| {
@@ -486,7 +563,7 @@ fn test_query_comments() {
         parser.set_language(language).unwrap();
         let tree = parser.parse(source, None).unwrap();
         let mut cursor = QueryCursor::new();
-        let matches = cursor.exec(&query, tree.root_node());
+        let matches = cursor.matches(&query, tree.root_node());
         assert_eq!(
             collect_matches(matches, &query, source),
             &[(0, vec![("fn-name", "one")]),],
@@ -503,14 +580,22 @@ fn collect_matches<'a>(
         .map(|m| {
             (
                 m.pattern_index(),
-                m.captures()
-                    .map(|(capture_id, node)| {
-                        (
-                            query.capture_names()[capture_id].as_str(),
-                            node.utf8_text(source.as_bytes()).unwrap(),
-                        )
-                    })
-                    .collect(),
+                collect_captures(m.captures(), query, source),
+            )
+        })
+        .collect()
+}
+
+fn collect_captures<'a, 'b>(
+    captures: impl Iterator)>,
+    query: &'b Query,
+    source: &'b str,
+) -> Vec<(&'b str, &'b str)> {
+    captures
+        .map(|(capture_id, node)| {
+            (
+                query.capture_names()[capture_id].as_str(),
+                node.utf8_text(source.as_bytes()).unwrap(),
             )
         })
         .collect()
diff --git a/docs/assets/js/playground.js b/docs/assets/js/playground.js
index 2366ed2f..d7d4ff33 100644
--- a/docs/assets/js/playground.js
+++ b/docs/assets/js/playground.js
@@ -47,7 +47,7 @@ let tree;
   });
   const renderTreeOnCodeChange = debounce(renderTree, 50);
   const saveStateOnChange = debounce(saveState, 2000);
-  const runTreeQueryOnChange = debounce(runTreeQuery, 150);
+  const runTreeQueryOnChange = debounce(runTreeQuery, 50);
 
   let languageName = languageSelect.value;
   let treeRows = null;
@@ -208,24 +208,22 @@ let tree;
       marks.forEach(m => m.clear());
 
       if (tree && query) {
-        const matches = query.exec(
+        const captures = query.captures(
           tree.rootNode,
           {row: startRow, column: 0},
           {row: endRow, column: 0},
         );
-        for (const {captures} of matches) {
-          for (const {name, node} of captures) {
-            const {startPosition, endPosition} = node;
-            codeEditor.markText(
-              {line: startPosition.row, ch: startPosition.column},
-              {line: endPosition.row, ch: endPosition.column},
-              {
-                inclusiveLeft: true,
-                inclusiveRight: true,
-                css: `color: ${colorForCaptureName(name)}`
-              }
-            );
-          }
+        for (const {name, node} of captures) {
+          const {startPosition, endPosition} = node;
+          codeEditor.markText(
+            {line: startPosition.row, ch: startPosition.column},
+            {line: endPosition.row, ch: endPosition.column},
+            {
+              inclusiveLeft: true,
+              inclusiveRight: true,
+              css: `color: ${colorForCaptureName(name)}`
+            }
+          );
         }
       }
     });
diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs
index effd0c81..2c8ac77d 100644
--- a/lib/binding_rust/bindings.rs
+++ b/lib/binding_rust/bindings.rs
@@ -607,19 +607,23 @@ extern "C" {
     #[doc = " Create a new cursor for executing a given query."]
     #[doc = ""]
     #[doc = " The cursor stores the state that is needed to iteratively search"]
-    #[doc = " for matches. To use the query cursor:"]
-    #[doc = " 1. First call `ts_query_cursor_exec` to start running a given query on"]
-    #[doc = "a given syntax node."]
-    #[doc = " 2. Then repeatedly call `ts_query_cursor_next` to iterate over the matches."]
-    #[doc = "    This will return `false` when there are no more matches left."]
-    #[doc = " 3. After each successful call to `ts_query_cursor_next`, you can call"]
-    #[doc = "    `ts_query_cursor_matched_pattern_index` to determine which pattern"]
-    #[doc = "     matched. You can also call `ts_query_cursor_matched_captures` to"]
-    #[doc = "     determine which nodes were captured, and by which capture names."]
+    #[doc = " for matches. To use the query cursor, first call `ts_query_cursor_exec`"]
+    #[doc = " to start running a given query on a given syntax node. Then, there are"]
+    #[doc = " two options for consuming the results of the query:"]
+    #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"]
+    #[doc = "    the *matches* in the order that they were found. Each match contains the"]
+    #[doc = "    index of the pattern that matched, and an array of captures. Because"]
+    #[doc = "    multiple patterns can match the same set of nodes, one match may contain"]
+    #[doc = "    captures that appear *before* some of the captures from a previous match."]
+    #[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"]
+    #[doc = "    individual *captures* in the order that they appear. This is useful if"]
+    #[doc = "    don\'t care about which pattern matched, and just want a single ordered"]
+    #[doc = "    sequence of captures."]
     #[doc = ""]
-    #[doc = " If you don\'t care about finding all of the matches, you can stop calling"]
-    #[doc = " `ts_query_cursor_next` at any point. And you can start executing another"]
-    #[doc = "  query on another node by calling `ts_query_cursor_exec` again."]
+    #[doc = " If you don\'t care about consuming all of the results, you can stop calling"]
+    #[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."]
+    #[doc = "  You can then start executing another query on another node by calling"]
+    #[doc = "  `ts_query_cursor_exec` again."]
     pub fn ts_query_cursor_new() -> *mut TSQueryCursor;
 }
 extern "C" {
@@ -640,18 +644,26 @@ extern "C" {
 }
 extern "C" {
     #[doc = " Advance to the next match of the currently running query."]
-    pub fn ts_query_cursor_next(arg1: *mut TSQueryCursor) -> bool;
+    #[doc = ""]
+    #[doc = " If there is another match, write its pattern index to `pattern_index`,"]
+    #[doc = " the number of captures to `capture_count`, and the captures themselves"]
+    #[doc = " to `*captures`, and return `true`. Otherwise, return `false`."]
+    pub fn ts_query_cursor_next_match(
+        self_: *mut TSQueryCursor,
+        pattern_index: *mut u32,
+        capture_count: *mut u32,
+        captures: *mut *const TSQueryCapture,
+    ) -> bool;
 }
 extern "C" {
-    #[doc = " Check which pattern matched."]
-    pub fn ts_query_cursor_matched_pattern_index(arg1: *const TSQueryCursor) -> u32;
-}
-extern "C" {
-    #[doc = " Check which pattern matched."]
-    pub fn ts_query_cursor_matched_captures(
-        arg1: *const TSQueryCursor,
-        arg2: *mut u32,
-    ) -> *const TSQueryCapture;
+    #[doc = " Advance to the next capture of the currently running query."]
+    #[doc = ""]
+    #[doc = " If there is another capture, write it to `capture` and return `true`."]
+    #[doc = " Otherwise, return `false`."]
+    pub fn ts_query_cursor_next_capture(
+        arg1: *mut TSQueryCursor,
+        capture: *mut TSQueryCapture,
+    ) -> bool;
 }
 extern "C" {
     #[doc = " Get the number of distinct node types in the language."]
diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs
index 2c20fd12..3eea8c2f 100644
--- a/lib/binding_rust/lib.rs
+++ b/lib/binding_rust/lib.rs
@@ -18,6 +18,7 @@ use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::sync::atomic::AtomicUsize;
 use std::{char, fmt, ptr, slice, str, u16};
+use std::mem::MaybeUninit;
 
 pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
 pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h");
@@ -144,7 +145,12 @@ pub struct Query {
 
 pub struct QueryCursor(*mut ffi::TSQueryCursor);
 
-pub struct QueryMatch<'a>(*mut ffi::TSQueryCursor, PhantomData<&'a ()>);
+pub struct QueryMatch<'a> {
+    pattern_index: usize,
+    capture_count: usize,
+    captures_ptr: *const ffi::TSQueryCapture,
+    cursor: PhantomData<&'a ()>,
+}
 
 #[derive(Debug, PartialEq, Eq)]
 pub enum QueryError<'a> {
@@ -996,14 +1002,52 @@ impl QueryCursor {
         QueryCursor(unsafe { ffi::ts_query_cursor_new() })
     }
 
-    pub fn exec<'a>(&'a mut self, query: &'a Query, node: Node<'a>) -> impl Iterator> + 'a {
+    pub fn matches<'a>(
+        &'a mut self,
+        query: &'a Query,
+        node: Node<'a>,
+    ) -> impl Iterator> + 'a {
         unsafe {
             ffi::ts_query_cursor_exec(self.0, query.ptr, node.0);
         }
         std::iter::from_fn(move || -> Option> {
             unsafe {
-                if ffi::ts_query_cursor_next(self.0) {
-                    Some(QueryMatch(self.0, PhantomData))
+                let mut pattern_index = 0u32;
+                let mut capture_count = 0u32;
+                let mut captures = ptr::null();
+                if ffi::ts_query_cursor_next_match(
+                    self.0,
+                    &mut pattern_index as *mut u32,
+                    &mut capture_count as *mut u32,
+                    &mut captures as *mut *const ffi::TSQueryCapture,
+                ) {
+                    Some(QueryMatch {
+                        pattern_index: pattern_index as usize,
+                        capture_count: capture_count as usize,
+                        captures_ptr: captures,
+                        cursor: PhantomData
+                    })
+                } else {
+                    None
+                }
+            }
+        })
+    }
+
+    pub fn captures<'a>(
+        &'a mut self,
+        query: &'a Query,
+        node: Node<'a>,
+    ) -> impl Iterator + 'a {
+        unsafe {
+            ffi::ts_query_cursor_exec(self.0, query.ptr, node.0);
+        }
+        std::iter::from_fn(move || -> Option<(usize, Node<'a>)> {
+            unsafe {
+                let mut capture = MaybeUninit::::uninit();
+                if ffi::ts_query_cursor_next_capture(self.0, capture.as_mut_ptr()) {
+                    let capture = capture.assume_init();
+                    Some((capture.index as usize, Node::new(capture.node).unwrap()))
                 } else {
                     None
                 }
@@ -1028,19 +1072,14 @@ impl QueryCursor {
 
 impl<'a> QueryMatch<'a> {
     pub fn pattern_index(&self) -> usize {
-        unsafe { ffi::ts_query_cursor_matched_pattern_index(self.0) as usize }
+        self.pattern_index
     }
 
     pub fn captures(&self) -> impl ExactSizeIterator {
-        unsafe {
-            let mut capture_count = 0u32;
-            let captures =
-                ffi::ts_query_cursor_matched_captures(self.0, &mut capture_count as *mut u32);
-            let captures = slice::from_raw_parts(captures, capture_count as usize);
-            captures
-                .iter()
-                .map(move |capture| (capture.index as usize, Node::new(capture.node).unwrap()))
-        }
+        let captures = unsafe { slice::from_raw_parts(self.captures_ptr, self.capture_count as usize) };
+        captures
+            .iter()
+            .map(|capture| (capture.index as usize, Node::new(capture.node).unwrap()))
     }
 }
 
diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c
index db4449a2..9a4dec0b 100644
--- a/lib/binding_web/binding.c
+++ b/lib/binding_web/binding.c
@@ -567,7 +567,7 @@ int ts_node_is_missing_wasm(const TSTree *tree) {
 /* Section - Query */
 /******************/
 
-void ts_query_exec_wasm(
+void ts_query_matches_wasm(
   const TSQuery *self,
   const TSTree *tree,
   uint32_t start_row,
@@ -580,24 +580,23 @@ void ts_query_exec_wasm(
   TSNode node = unmarshal_node(tree);
   TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
   TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
-
-  Array(const void *) result = array_new();
-
-  unsigned index = 0;
-  unsigned match_count = 0;
   ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
   ts_query_cursor_exec(scratch_query_cursor, self, node);
-  while (ts_query_cursor_next(scratch_query_cursor)) {
+
+  uint32_t index = 0;
+  uint32_t match_count = 0;
+  Array(const void *) result = array_new();
+
+  uint32_t pattern_index, capture_count;
+  const TSQueryCapture *captures;
+  while (ts_query_cursor_next_match(
+    scratch_query_cursor,
+    &pattern_index,
+    &capture_count,
+    &captures
+  )) {
     match_count++;
-    uint32_t pattern_index = ts_query_cursor_matched_pattern_index(scratch_query_cursor);
-    uint32_t capture_count;
-    const TSQueryCapture *captures = ts_query_cursor_matched_captures(
-      scratch_query_cursor,
-      &capture_count
-    );
-
     array_grow_by(&result, 2 + 6 * capture_count);
-
     result.contents[index++] = (const void *)pattern_index;
     result.contents[index++] = (const void *)capture_count;
     for (unsigned i = 0; i < capture_count; i++) {
@@ -611,3 +610,37 @@ void ts_query_exec_wasm(
   TRANSFER_BUFFER[0] = (const void *)(match_count);
   TRANSFER_BUFFER[1] = result.contents;
 }
+
+void ts_query_captures_wasm(
+  const TSQuery *self,
+  const TSTree *tree,
+  uint32_t start_row,
+  uint32_t start_column,
+  uint32_t end_row,
+  uint32_t end_column
+) {
+  if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
+
+  TSNode node = unmarshal_node(tree);
+  TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
+  TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
+  ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
+  ts_query_cursor_exec(scratch_query_cursor, self, node);
+
+  unsigned index = 0;
+  unsigned capture_count = 0;
+  Array(const void *) result = array_new();
+
+  TSQueryCapture capture;
+  while (ts_query_cursor_next_capture(scratch_query_cursor, &capture)) {
+    capture_count++;
+
+    array_grow_by(&result, 6);
+    result.contents[index++] = (const void *)capture.index;
+    marshal_node(result.contents + index, capture.node);
+    index += 5;
+  }
+
+  TRANSFER_BUFFER[0] = (const void *)(capture_count);
+  TRANSFER_BUFFER[1] = result.contents;
+}
diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js
index 0fd1ea63..8ed7fca7 100644
--- a/lib/binding_web/binding.js
+++ b/lib/binding_web/binding.js
@@ -5,7 +5,7 @@ const SIZE_OF_NODE = 5 * SIZE_OF_INT;
 const SIZE_OF_POINT = 2 * SIZE_OF_INT;
 const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT;
 const ZERO_POINT = {row: 0, column: 0};
-const QUERY_WORD_REGEX = /[\w-.]*/;
+const QUERY_WORD_REGEX = /[\w-.]*/g;
 
 var VERSION;
 var MIN_COMPATIBLE_VERSION;
@@ -694,7 +694,7 @@ class Language {
       const errorId = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
       const errorByte = getValue(TRANSFER_BUFFER, 'i32');
       const errorIndex = UTF8ToString(sourceAddress, errorByte).length;
-      const suffix = source.slice(errorIndex, 100);
+      const suffix = source.substr(errorIndex, 100);
       const word = suffix.match(QUERY_WORD_REGEX)[0];
       let error;
       switch (errorId) {
@@ -758,46 +758,75 @@ class Query {
     C._ts_query_delete(this[0]);
   }
 
-  exec(queryNode, startPosition, endPosition) {
+  matches(node, startPosition, endPosition) {
     if (!startPosition) startPosition = ZERO_POINT;
     if (!endPosition) endPosition = ZERO_POINT;
 
-    marshalNode(queryNode);
+    marshalNode(node);
 
-    C._ts_query_exec_wasm(
+    C._ts_query_matches_wasm(
       this[0],
-      queryNode.tree[0],
+      node.tree[0],
       startPosition.row,
       startPosition.column,
       endPosition.row,
       endPosition.column
     );
 
-    const matchCount = getValue(TRANSFER_BUFFER, 'i32');
-    const nodesAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
-    const result = new Array(matchCount);
+    const count = getValue(TRANSFER_BUFFER, 'i32');
+    const startAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
+    const result = new Array(count);
 
-    let address = nodesAddress;
-    for (let i = 0; i < matchCount; i++) {
+    let address = startAddress;
+    for (let i = 0; i < count; i++) {
       const pattern = getValue(address, 'i32');
       address += SIZE_OF_INT;
-      const captures = new Array(getValue(address, 'i32'));
+      const captureCount = getValue(address, 'i32');
       address += SIZE_OF_INT;
-      for (let j = 0, n = captures.length; j < n; j++) {
-        const captureIndex = getValue(address, 'i32');
-        address += SIZE_OF_INT;
-        const node = unmarshalNode(queryNode.tree, address);
-        address += SIZE_OF_NODE;
-        captures[j] = {name: this.captureNames[captureIndex], node};
-      }
+
+      const captures = new Array(captureCount);
+      address = unmarshalCaptures(this, node.tree, address, captures);
       result[i] = {pattern, captures};
     }
 
-    // Free the intermediate buffers
-    C._free(nodesAddress);
-
+    C._free(startAddress);
     return result;
   }
+
+  captures(node, startPosition, endPosition) {
+    if (!startPosition) startPosition = ZERO_POINT;
+    if (!endPosition) endPosition = ZERO_POINT;
+
+    marshalNode(node);
+
+    C._ts_query_captures_wasm(
+      this[0],
+      node.tree[0],
+      startPosition.row,
+      startPosition.column,
+      endPosition.row,
+      endPosition.column
+    );
+
+    const count = getValue(TRANSFER_BUFFER, 'i32');
+    const startAddress = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
+    const result = new Array(count);
+    unmarshalCaptures(this, node.tree, startAddress, result);
+
+    C._free(startAddress);
+    return result;
+  }
+}
+
+function unmarshalCaptures(query, tree, address, result) {
+  for (let i = 0, n = result.length; i < n; i++) {
+    const captureIndex = getValue(address, 'i32');
+    address += SIZE_OF_INT;
+    const node = unmarshalNode(tree, address);
+    address += SIZE_OF_NODE;
+    result[i] = {name: query.captureNames[captureIndex], node};
+  }
+  return address;
 }
 
 function assertInternal(x) {
diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json
index e2b187f7..6b0eab30 100644
--- a/lib/binding_web/exports.json
+++ b/lib/binding_web/exports.json
@@ -73,7 +73,8 @@
   "_ts_query_context_delete",
   "_ts_query_context_new",
   "_ts_query_delete",
-  "_ts_query_exec_wasm",
+  "_ts_query_matches_wasm",
+  "_ts_query_captures_wasm",
   "_ts_query_new",
   "_ts_tree_cursor_current_field_id_wasm",
   "_ts_tree_cursor_current_node_id_wasm",
diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js
index 4fd44165..5d7ce620 100644
--- a/lib/binding_web/test/query-test.js
+++ b/lib/binding_web/test/query-test.js
@@ -18,64 +18,117 @@ describe("Query", () => {
     if (query) query.delete();
   });
 
-  it('throws an error on invalid syntax', () => {
-    assert.throws(() => {
-      JavaScript.query("(function_declaration wat)")
-    }, "Bad syntax at offset 22: \'wat)\'...");
-    assert.throws(() => {
-      JavaScript.query("(non_existent)")
-    }, "Bad node name 'non_existent'");
-    assert.throws(() => {
-      JavaScript.query("(a)")
-    }, "Bad node name 'a'");
-    assert.throws(() => {
-      JavaScript.query("(function_declaration non_existent:(identifier))")
-    }, "Bad field name 'non_existent'");
+  describe('construction', () => {
+    it('throws an error on invalid syntax', () => {
+      assert.throws(() => {
+        JavaScript.query("(function_declaration wat)")
+      }, "Bad syntax at offset 22: \'wat)\'...");
+      assert.throws(() => {
+        JavaScript.query("(non_existent)")
+      }, "Bad node name 'non_existent'");
+      assert.throws(() => {
+        JavaScript.query("(a)")
+      }, "Bad node name 'a'");
+      assert.throws(() => {
+        JavaScript.query("(function_declaration non_existent:(identifier))")
+      }, "Bad field name 'non_existent'");
+    });
   });
 
-  it('matches simple queries',  () => {
-    tree = parser.parse("function one() { two(); function three() {} }");
-    query = JavaScript.query(`
-      (function_declaration name:(identifier) @fn-def)
-      (call_expression function:(identifier) @fn-ref)
-    `);
-    const matches = query.exec(tree.rootNode);
-    assert.deepEqual(
-      formatMatches(matches),
-      [
-        {pattern: 0, captures: [{name: 'fn-def', text: 'one'}]},
-        {pattern: 1, captures: [{name: 'fn-ref', text: 'two'}]},
-        {pattern: 0, captures: [{name: 'fn-def', text: 'three'}]},
-      ]
-    );
+  describe('.matches', () => {
+    it('returns all of the matches for the given query',  () => {
+      tree = parser.parse("function one() { two(); function three() {} }");
+      query = JavaScript.query(`
+        (function_declaration name:(identifier) @fn-def)
+        (call_expression function:(identifier) @fn-ref)
+      `);
+      const matches = query.matches(tree.rootNode);
+      assert.deepEqual(
+        formatMatches(matches),
+        [
+          {pattern: 0, captures: [{name: 'fn-def', text: 'one'}]},
+          {pattern: 1, captures: [{name: 'fn-ref', text: 'two'}]},
+          {pattern: 0, captures: [{name: 'fn-def', text: 'three'}]},
+        ]
+      );
+    });
+
+    it('can search in a specified ranges',  () => {
+      tree = parser.parse("[a, b,\nc, d,\ne, f,\ng, h]");
+      query = JavaScript.query('(identifier) @element');
+      const matches = query.matches(
+        tree.rootNode,
+        {row: 1, column: 1},
+        {row: 3, column: 1}
+      );
+      assert.deepEqual(
+        formatMatches(matches),
+        [
+          {pattern: 0, captures: [{name: 'element', text: 'd'}]},
+          {pattern: 0, captures: [{name: 'element', text: 'e'}]},
+          {pattern: 0, captures: [{name: 'element', text: 'f'}]},
+          {pattern: 0, captures: [{name: 'element', text: 'g'}]},
+        ]
+      );
+    });
   });
 
-  it('matches queries in specified ranges',  () => {
-    tree = parser.parse("[a, b,\nc, d,\ne, f,\ng, h]");
-    query = JavaScript.query('(identifier) @element');
-    const matches = query.exec(
-      tree.rootNode,
-      {row: 1, column: 1},
-      {row: 3, column: 1}
-    );
-    assert.deepEqual(
-      formatMatches(matches),
-      [
-        {pattern: 0, captures: [{name: 'element', text: 'd'}]},
-        {pattern: 0, captures: [{name: 'element', text: 'e'}]},
-        {pattern: 0, captures: [{name: 'element', text: 'f'}]},
-        {pattern: 0, captures: [{name: 'element', text: 'g'}]},
-      ]
-    );
+  describe('.captures', () => {
+    it('returns all of the captures for the given query, in order', () => {
+      tree = parser.parse(`
+        a({
+          bc: function de() {
+            const fg = function hi() {}
+          },
+          jk: function lm() {
+            const no = function pq() {}
+          },
+        });
+      `);
+      query = JavaScript.query(`
+        (pair
+          key: * @method.def
+          (function
+            name: (identifier) @method.alias))
+
+        (variable_declarator
+          name: * @function.def
+          value: (function
+            name: (identifier) @function.alias))
+
+        ":" @delimiter
+        "=" @operator
+      `);
+
+      const captures = query.captures(tree.rootNode);
+      assert.deepEqual(
+        formatCaptures(captures),
+        [
+          {name: "method.def", text: "bc"},
+          {name: "delimiter", text: ":"},
+          {name: "method.alias", text: "de"},
+          {name: "function.def", text: "fg"},
+          {name: "operator", text: "="},
+          {name: "function.alias", text: "hi"},
+          {name: "method.def", text: "jk"},
+          {name: "delimiter", text: ":"},
+          {name: "method.alias", text: "lm"},
+          {name: "function.def", text: "no"},
+          {name: "operator", text: "="},
+          {name: "function.alias", text: "pq"},
+        ]
+      );
+    });
   });
 });
 
 function formatMatches(matches) {
   return matches.map(({pattern, captures}) => ({
     pattern,
-    captures: captures.map(({name, node}) => ({
-      name,
-      text: node.text
-    }))
+    captures: formatCaptures(captures)
   }))
 }
+
+function formatCaptures(captures) {
+  return captures.map(({name, node}) => ({ name, text: node.text }))
+}
diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h
index 624658b4..a5c22eb9 100644
--- a/lib/include/tree_sitter/api.h
+++ b/lib/include/tree_sitter/api.h
@@ -673,19 +673,23 @@ int ts_query_capture_id_for_name(
  * Create a new cursor for executing a given query.
  *
  * The cursor stores the state that is needed to iteratively search
- * for matches. To use the query cursor:
- * 1. First call `ts_query_cursor_exec` to start running a given query on
-      a given syntax node.
- * 2. Then repeatedly call `ts_query_cursor_next` to iterate over the matches.
- *    This will return `false` when there are no more matches left.
- * 3. After each successful call to `ts_query_cursor_next`, you can call
- *    `ts_query_cursor_matched_pattern_index` to determine which pattern
- *     matched. You can also call `ts_query_cursor_matched_captures` to
- *     determine which nodes were captured, and by which capture names.
+ * for matches. To use the query cursor, first call `ts_query_cursor_exec`
+ * to start running a given query on a given syntax node. Then, there are
+ * two options for consuming the results of the query:
+ * 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the
+ *    the *matches* in the order that they were found. Each match contains the
+ *    index of the pattern that matched, and an array of captures. Because
+ *    multiple patterns can match the same set of nodes, one match may contain
+ *    captures that appear *before* some of the captures from a previous match.
+ * 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the
+ *    individual *captures* in the order that they appear. This is useful if
+ *    don't care about which pattern matched, and just want a single ordered
+ *    sequence of captures.
  *
- * If you don't care about finding all of the matches, you can stop calling
- * `ts_query_cursor_next` at any point. And you can start executing another
- *  query on another node by calling `ts_query_cursor_exec` again.
+ * If you don't care about consuming all of the results, you can stop calling
+ * `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point.
+ *  You can then start executing another query on another node by calling
+ *  `ts_query_cursor_exec` again.
  */
 TSQueryCursor *ts_query_cursor_new();
 
@@ -708,22 +712,26 @@ void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint);
 
 /**
  * Advance to the next match of the currently running query.
+ *
+ * If there is another match, write its pattern index to `pattern_index`,
+ * the number of captures to `capture_count`, and the captures themselves
+ * to `*captures`, and return `true`. Otherwise, return `false`.
  */
-bool ts_query_cursor_next(TSQueryCursor *);
-
-/**
- * Check which pattern matched.
- */
-uint32_t ts_query_cursor_matched_pattern_index(const TSQueryCursor *);
-
-/**
- * Check which pattern matched.
- */
-const TSQueryCapture *ts_query_cursor_matched_captures(
-  const TSQueryCursor *,
-  uint32_t *
+bool ts_query_cursor_next_match(
+  TSQueryCursor *self,
+  uint32_t *pattern_index,
+  uint32_t *capture_count,
+  const TSQueryCapture **captures
 );
 
+/**
+ * Advance to the next capture of the currently running query.
+ *
+ * If there is another capture, write it to `capture` and return `true`.
+ * Otherwise, return `false`.
+ */
+bool ts_query_cursor_next_capture(TSQueryCursor *, TSQueryCapture *capture);
+
 /**********************/
 /* Section - Language */
 /**********************/
diff --git a/lib/src/query.c b/lib/src/query.c
index 5c20f0f3..ea01cf24 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -54,11 +54,12 @@ typedef struct {
  * represented as one of these states.
  */
 typedef struct {
-  uint16_t step_index;
-  uint16_t pattern_index;
   uint16_t start_depth;
-  uint16_t capture_list_id;
-  uint16_t capture_count;
+  uint16_t pattern_index;
+  uint8_t step_index;
+  uint8_t capture_count;
+  uint8_t capture_list_id;
+  uint8_t consumed_capture_count;
 } QueryState;
 
 /*
@@ -96,12 +97,12 @@ struct TSQueryCursor {
   Array(QueryState) states;
   Array(QueryState) finished_states;
   CaptureListPool capture_list_pool;
-  bool ascending;
   uint32_t depth;
   uint32_t start_byte;
   uint32_t end_byte;
   TSPoint start_point;
   TSPoint end_point;
+  bool ascending;
 };
 
 static const TSQueryError PARENT_DONE = -1;
@@ -686,13 +687,8 @@ static QueryState *ts_query_cursor_copy_state(
   return new_state;
 }
 
-bool ts_query_cursor_next(TSQueryCursor *self) {
-  if (self->finished_states.size > 0) {
-    QueryState state = array_pop(&self->finished_states);
-    capture_list_pool_release(&self->capture_list_pool, state.capture_list_id);
-  }
-
-  while (self->finished_states.size == 0) {
+static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
+  do {
     if (self->ascending) {
       // When leaving a node, remove any unfinished states whose next step
       // needed to match something within that node.
@@ -784,6 +780,7 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
           .pattern_index = slice->pattern_index,
           .capture_list_id = capture_list_id,
           .capture_count = 0,
+          .consumed_capture_count = 0,
         }));
       }
 
@@ -821,6 +818,7 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
             .start_depth = self->depth,
             .capture_list_id = capture_list_id,
             .capture_count = 0,
+            .consumed_capture_count = 0,
           }));
 
           // Advance to the next pattern whose root node matches this node.
@@ -905,32 +903,108 @@ bool ts_query_cursor_next(TSQueryCursor *self) {
         self->ascending = true;
       }
     }
-  }
+  } while (self->finished_states.size == 0);
 
   return true;
 }
 
-uint32_t ts_query_cursor_matched_pattern_index(const TSQueryCursor *self) {
-  if (self->finished_states.size > 0) {
-    QueryState *state = array_back(&self->finished_states);
-    return state->pattern_index;
-  }
-  return 0;
-}
-
-const TSQueryCapture *ts_query_cursor_matched_captures(
-  const TSQueryCursor *self,
-  uint32_t *count
+bool ts_query_cursor_next_match(
+  TSQueryCursor *self,
+  uint32_t *pattern_index,
+  uint32_t *capture_count,
+  const TSQueryCapture **captures
 ) {
   if (self->finished_states.size > 0) {
-    QueryState *state = array_back(&self->finished_states);
-    *count = state->capture_count;
-    return capture_list_pool_get(
-      (CaptureListPool *)&self->capture_list_pool,
-      state->capture_list_id
-    );
+    QueryState state = array_pop(&self->finished_states);
+    capture_list_pool_release(&self->capture_list_pool, state.capture_list_id);
+  }
+
+  if (!ts_query_cursor__advance(self)) return false;
+
+  const QueryState *state = array_back(&self->finished_states);
+  *pattern_index = state->pattern_index;
+  *capture_count = state->capture_count;
+  *captures = capture_list_pool_get(
+    &self->capture_list_pool,
+    state->capture_list_id
+  );
+
+  return true;
+}
+
+bool ts_query_cursor_next_capture(
+  TSQueryCursor *self,
+  TSQueryCapture *capture
+) {
+  for (;;) {
+    if (self->finished_states.size > 0) {
+      // Find the position of the earliest capture in an unfinished match.
+      uint32_t first_unfinished_capture_byte = UINT32_MAX;
+      for (unsigned i = 0; i < self->states.size; i++) {
+        const QueryState *state = &self->states.contents[i];
+        if (state->capture_count > 0) {
+          const TSQueryCapture *captures = capture_list_pool_get(
+            &self->capture_list_pool,
+            state->capture_list_id
+          );
+          uint32_t capture_byte = ts_node_start_byte(captures[0].node);
+          if (capture_byte < first_unfinished_capture_byte) {
+            first_unfinished_capture_byte = capture_byte;
+          }
+        }
+      }
+
+      // Find the earliest capture in a finished match. It must not start
+      // after the first unfinished capture.
+      int first_finished_state_index = -1;
+      uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
+      for (unsigned i = 0; i < self->finished_states.size; i++) {
+        const QueryState *state = &self->finished_states.contents[i];
+        if (state->capture_count > state->consumed_capture_count) {
+          const TSQueryCapture *captures = capture_list_pool_get(
+            &self->capture_list_pool,
+            state->capture_list_id
+          );
+          uint32_t capture_byte = ts_node_start_byte(
+            captures[state->consumed_capture_count].node
+          );
+          if (capture_byte <= first_finished_capture_byte) {
+            first_finished_state_index = i;
+            first_finished_capture_byte = capture_byte;
+          }
+        } else {
+          capture_list_pool_release(
+            &self->capture_list_pool,
+            state->capture_list_id
+          );
+          array_erase(&self->finished_states, i);
+          i--;
+        }
+      }
+
+      if (first_finished_state_index != -1) {
+        QueryState *state = &self->finished_states.contents[
+          first_finished_state_index
+        ];
+        const TSQueryCapture *captures = capture_list_pool_get(
+          &self->capture_list_pool,
+          state->capture_list_id
+        );
+        *capture = captures[state->consumed_capture_count];
+        state->consumed_capture_count++;
+        if (state->consumed_capture_count == state->capture_count) {
+          capture_list_pool_release(
+            &self->capture_list_pool,
+            state->capture_list_id
+          );
+          array_erase(&self->finished_states, first_finished_state_index);
+        }
+        return true;
+      }
+    }
+
+    if (!ts_query_cursor__advance(self)) return false;
   }
-  return NULL;
 }
 
 #undef LOG

From 86205b9e6d2ceb2759f1e67e4e474cf73bb4a395 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld 
Date: Wed, 11 Sep 2019 14:44:49 -0700
Subject: [PATCH 020/558] Fix infinite loop on unterminated string in query

---
 cli/src/tests/query_test.rs | 5 +++--
 lib/src/query.c             | 7 ++++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
index 5c37a9ab..2ac178fc 100644
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@@ -31,9 +31,10 @@ fn test_query_errors_on_invalid_syntax() {
             Err(QueryError::Syntax(24))
         );
 
+        // Return an error at the beginning of an unterminated string.
         assert_eq!(
-            Query::new(language, "(if_statement condition:)"),
-            Err(QueryError::Syntax(24))
+            Query::new(language, r#"(identifier) "h "#),
+            Err(QueryError::Syntax(13))
         );
     });
 }
diff --git a/lib/src/query.c b/lib/src/query.c
index ea01cf24..10d409ed 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -398,7 +398,12 @@ static TSQueryError ts_query_parse_pattern(
 
     // Parse the string content
     const char *string_content = stream->input;
-    while (stream->next && stream->next != '"') stream_advance(stream);
+    while (stream->next != '"') {
+      if (!stream_advance(stream)) {
+        stream_reset(stream, string_content - 1);
+        return TSQueryErrorSyntax;
+      }
+    }
     uint32_t length = stream->input - string_content;
 
     // Add a step for the node

From 0d913dec652dbd030d2e90c9478226aead5f86d9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld 
Date: Wed, 11 Sep 2019 14:44:49 -0700
Subject: [PATCH 021/558] Fix layout issues in web-ui

---
 cli/src/web_ui.html | 9 +++++++--
 script/serve-docs   | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/cli/src/web_ui.html b/cli/src/web_ui.html
index 62c23f3d..093b09ba 100644
--- a/cli/src/web_ui.html
+++ b/cli/src/web_ui.html
@@ -7,7 +7,7 @@
 
 
 
-  
+