Merge branch 'master' into query-cursor-api

This commit is contained in:
Max Brunsfeld 2021-06-02 11:40:48 -07:00
commit f3ea60e23f
11 changed files with 150 additions and 119 deletions

View file

@ -1645,6 +1645,7 @@ fn test_query_matches_with_too_many_permutations_to_track() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
// For this pathological query, some match permutations will be dropped.
@ -1686,6 +1687,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
assert_eq!(
@ -2905,6 +2907,7 @@ fn test_query_captures_with_too_many_nested_results() {
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
cursor.set_match_limit(32);
let captures = cursor.captures(&query, tree.root_node(), source.as_bytes());
let captures = collect_captures(captures, &query, &source);

View file

@ -3,7 +3,6 @@ use super::generate::parse_grammar::GrammarJSON;
use std::ffi::{OsStr, OsString};
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use which::which;
@ -23,15 +22,15 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
let grammar_name = get_grammar_name(&src_dir)?;
let output_filename = format!("tree-sitter-{}.wasm", grammar_name);
let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" };
let emcc_path = which(emcc_bin)
.ok()
.and_then(|p| Command::new(&p).output().and(Ok(p)).ok());
let mut command;
if !force_docker {
let emcc_path = get_emcc_path();
if emcc_path.is_ok() {
command = Command::new(emcc_path.unwrap());
command.current_dir(&language_dir);
} else {
return Err(emcc_path.unwrap_err());
}
if !force_docker && emcc_path.is_some() {
command = Command::new(emcc_path.unwrap());
command.current_dir(&language_dir);
} else if Command::new("docker").output().is_ok() {
command = Command::new("docker");
command.args(&["run", "--rm"]);
@ -123,23 +122,3 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
Ok(())
}
fn get_emcc_path() -> Result<PathBuf> {
let emcc_bin;
if cfg!(windows) {
emcc_bin = "emcc.bat";
} else {
emcc_bin = "emcc";
};
let emcc_which = which(emcc_bin);
let emcc_path;
if emcc_which.is_ok() {
emcc_path = emcc_which.unwrap();
} else {
return Error::err("emcc was not found on PATH".to_string());
}
if Command::new(&emcc_path).output().is_ok() {
return Ok(emcc_path);
}
return Error::err("emcc binary doesn't work properly".to_string());
}

View file

@ -106,7 +106,7 @@ The main [`tree-sitter/tree-sitter`](https://github.com/tree-sitter/tree-sitter)
There are also several other dependent repositories that contain other published packages:
- [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
- [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/node-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com
- [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org).
## Publishing New Releases

View file

@ -730,15 +730,27 @@ extern "C" {
pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode);
}
extern "C" {
#[doc = " Check if this cursor has exceeded its maximum number of in-progress"]
#[doc = " matches."]
#[doc = " Manage the maximum number of in-progress matches allowed by this query"]
#[doc = " cursor."]
#[doc = ""]
#[doc = " Currently, query cursors have a fixed capacity for storing lists"]
#[doc = " of in-progress captures. If this capacity is exceeded, then the"]
#[doc = " earliest-starting match will silently be dropped to make room for"]
#[doc = " further matches."]
#[doc = " Query cursors have a maximum capacity for storing lists of in-progress"]
#[doc = " captures. If this capacity is exceeded, then the earliest-starting match will"]
#[doc = " silently be dropped to make room for further matches."]
#[doc = ""]
#[doc = " By default, this limit is 65,536 pending matches, which is effectively"]
#[doc = " unlimited for most queries and syntax trees. You can optionally set this to a"]
#[doc = " lower number if you want to have (and check) a tighter bound on query"]
#[doc = " complexity."]
#[doc = ""]
#[doc = " If you update the match limit, it must be > 0 and <= 65536."]
pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool;
}
extern "C" {
pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32;
}
extern "C" {
pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32);
}
extern "C" {
#[doc = " Set the range of bytes or (row, column) positions in which the query"]
#[doc = " will be executed."]

View file

@ -1649,6 +1649,19 @@ impl QueryCursor {
}
}
/// Return the maximum number of in-progress matches for this cursor.
pub fn match_limit(&self) -> u32 {
unsafe { ffi::ts_query_cursor_match_limit(self.ptr.as_ptr()) }
}
/// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and
/// <= 65536.
pub fn set_match_limit(&mut self, limit: u32) {
unsafe {
ffi::ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit);
}
}
/// Check if, on its last execution, this cursor exceeded its maximum number of
/// in-progress matches.
pub fn did_exceed_match_limit(&self) -> bool {

View file

@ -594,9 +594,15 @@ void ts_query_matches_wasm(
uint32_t start_row,
uint32_t start_column,
uint32_t end_row,
uint32_t end_column
uint32_t end_column,
uint32_t match_limit
) {
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
if (match_limit == 0) {
ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX);
} else {
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
}
TSNode node = unmarshal_node(tree);
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
@ -635,9 +641,15 @@ void ts_query_captures_wasm(
uint32_t start_row,
uint32_t start_column,
uint32_t end_row,
uint32_t end_column
uint32_t end_column,
uint32_t match_limit
) {
if (!scratch_query_cursor) scratch_query_cursor = ts_query_cursor_new();
if (match_limit == 0) {
ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX);
} else {
ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
}
TSNode node = unmarshal_node(tree);
TSPoint start_point = {start_row, code_unit_to_byte(start_column)};

View file

@ -953,9 +953,17 @@ class Query {
this[0] = 0;
}
matches(node, startPosition, endPosition) {
matches(node, startPosition, endPosition, options) {
if (!startPosition) startPosition = ZERO_POINT;
if (!endPosition) endPosition = ZERO_POINT;
if (!options) options = {};
let matchLimit = options.matchLimit;
if (typeof matchLimit === 'undefined') {
matchLimit = 0;
} else if (typeof matchLimit !== 'number') {
throw new Error('Arguments must be numbers');
}
marshalNode(node);
@ -965,7 +973,8 @@ class Query {
startPosition.row,
startPosition.column,
endPosition.row,
endPosition.column
endPosition.column,
matchLimit
);
const rawCount = getValue(TRANSFER_BUFFER, 'i32');
@ -1000,9 +1009,17 @@ class Query {
return result;
}
captures(node, startPosition, endPosition) {
captures(node, startPosition, endPosition, options) {
if (!startPosition) startPosition = ZERO_POINT;
if (!endPosition) endPosition = ZERO_POINT;
if (!options) options = {};
let matchLimit = options.matchLimit;
if (typeof matchLimit === 'undefined') {
matchLimit = 0;
} else if (typeof matchLimit !== 'number') {
throw new Error('Arguments must be numbers');
}
marshalNode(node);
@ -1012,7 +1029,8 @@ class Query {
startPosition.row,
startPosition.column,
endPosition.row,
endPosition.column
endPosition.column,
matchLimit
);
const count = getValue(TRANSFER_BUFFER, 'i32');

View file

@ -256,7 +256,7 @@ describe("Query", () => {
(array (identifier) @pre (identifier) @post)
`);
const captures = query.captures(tree.rootNode);
const captures = query.captures(tree.rootNode, null, null, {matchLimit: 32});
assert.ok(query.didExceedMatchLimit());
});
});

View file

@ -799,15 +799,19 @@ void ts_query_cursor_delete(TSQueryCursor *);
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
/**
* Check if this cursor has exceeded its maximum number of in-progress
* matches.
* Manage the maximum number of in-progress matches allowed by this query
* cursor.
*
* Currently, query cursors have a fixed capacity for storing lists
* of in-progress captures. If this capacity is exceeded, then the
* earliest-starting match will silently be dropped to make room for
* further matches.
* Query cursors have an optional maximum capacity for storing lists of
* in-progress captures. If this capacity is exceeded, then the
* earliest-starting match will silently be dropped to make room for further
* matches. This maximum capacity is optional by default, query cursors allow
* any number of pending matches, dynamically allocating new space for them as
* needed as the query is executed.
*/
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *);
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *);
void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t);
/**
* Set the range of bytes or (row, column) positions in which the query

View file

@ -1,42 +0,0 @@
#ifndef TREE_SITTER_BITS_H_
#define TREE_SITTER_BITS_H_
#include <stdint.h>
static inline uint32_t bitmask_for_index(uint16_t id) {
return (1u << (31 - id));
}
#ifdef __TINYC__
// Algorithm taken from the Hacker's Delight book
// See also https://graphics.stanford.edu/~seander/bithacks.html
static inline uint32_t count_leading_zeros(uint32_t x) {
int count = 0;
if (x == 0) return 32;
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
count = (((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
return count;
}
#elif defined _WIN32 && !defined __GNUC__
#include <intrin.h>
static inline uint32_t count_leading_zeros(uint32_t x) {
if (x == 0) return 32;
uint32_t result;
_BitScanReverse(&result, x);
return 31 - result;
}
#else
static inline uint32_t count_leading_zeros(uint32_t x) {
if (x == 0) return 32;
return __builtin_clz(x);
}
#endif
#endif // TREE_SITTER_BITS_H_

View file

@ -1,7 +1,6 @@
#include "tree_sitter/api.h"
#include "./alloc.h"
#include "./array.h"
#include "./bits.h"
#include "./language.h"
#include "./point.h"
#include "./tree_cursor.h"
@ -12,7 +11,6 @@
// #define LOG(...) fprintf(stderr, __VA_ARGS__)
#define LOG(...)
#define MAX_CAPTURE_LIST_COUNT 32
#define MAX_STEP_CAPTURE_COUNT 3
#define MAX_STATE_PREDECESSOR_COUNT 100
#define MAX_ANALYSIS_STATE_DEPTH 12
@ -157,10 +155,10 @@ typedef struct {
*/
typedef struct {
uint32_t id;
uint32_t capture_list_id;
uint16_t start_depth;
uint16_t step_index;
uint16_t pattern_index;
uint16_t capture_list_id;
uint16_t consumed_capture_count: 12;
bool seeking_immediate_match: 1;
bool has_in_progress_alternatives: 1;
@ -177,9 +175,17 @@ typedef Array(TSQueryCapture) CaptureList;
* currently in use by a query state.
*/
typedef struct {
CaptureList list[MAX_CAPTURE_LIST_COUNT];
Array(CaptureList) list;
CaptureList empty_list;
uint32_t usage_map;
// The maximum number of capture lists that we are allowed to allocate. We
// never allow `list` to allocate more entries than this, dropping pending
// matches if needed to stay under the limit.
uint32_t max_capture_list_count;
// The number of capture lists allocated in `list` that are not currently in
// use. We reuse those existing-but-unused capture lists before trying to
// allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
// list's length to indicate that it's not in use.
uint32_t free_capture_list_count;
} CaptureListPool;
/*
@ -361,54 +367,72 @@ static uint32_t stream_offset(Stream *self) {
static CaptureListPool capture_list_pool_new(void) {
return (CaptureListPool) {
.list = array_new(),
.empty_list = array_new(),
.usage_map = UINT32_MAX,
.max_capture_list_count = UINT32_MAX,
.free_capture_list_count = 0,
};
}
static void capture_list_pool_reset(CaptureListPool *self) {
self->usage_map = UINT32_MAX;
for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
array_clear(&self->list[i]);
for (uint16_t i = 0; i < self->list.size; i++) {
// This invalid size means that the list is not in use.
self->list.contents[i].size = UINT32_MAX;
}
self->free_capture_list_count = self->list.size;
}
static void capture_list_pool_delete(CaptureListPool *self) {
for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
array_delete(&self->list[i]);
for (uint16_t i = 0; i < self->list.size; i++) {
array_delete(&self->list.contents[i]);
}
array_delete(&self->list);
}
static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list;
return &self->list[id];
if (id >= self->list.size) return &self->empty_list;
return &self->list.contents[id];
}
static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
assert(id < MAX_CAPTURE_LIST_COUNT);
return &self->list[id];
assert(id < self->list.size);
return &self->list.contents[id];
}
static bool capture_list_pool_is_empty(const CaptureListPool *self) {
return self->usage_map == 0;
// The capture list pool is empty if all allocated lists are in use, and we
// have reached the maximum allowed number of allocated lists.
return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
}
static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
// In the usage_map bitmask, ones represent free lists, and zeros represent
// lists that are in use. A free list id can quickly be found by counting
// the leading zeros in the usage map. An id of zero corresponds to the
// highest-order bit in the bitmask.
uint16_t id = count_leading_zeros(self->usage_map);
if (id >= MAX_CAPTURE_LIST_COUNT) return NONE;
self->usage_map &= ~bitmask_for_index(id);
array_clear(&self->list[id]);
return id;
// First see if any already allocated capture list is currently unused.
if (self->free_capture_list_count > 0) {
for (uint16_t i = 0; i < self->list.size; i++) {
if (self->list.contents[i].size == UINT32_MAX) {
array_clear(&self->list.contents[i]);
self->free_capture_list_count--;
return i;
}
}
}
// Otherwise allocate and initialize a new capture list, as long as that
// doesn't put us over the requested maximum.
uint32_t i = self->list.size;
if (i >= self->max_capture_list_count) {
return NONE;
}
CaptureList list;
array_init(&list);
array_push(&self->list, list);
return i;
}
static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
if (id >= MAX_CAPTURE_LIST_COUNT) return;
array_clear(&self->list[id]);
self->usage_map |= bitmask_for_index(id);
if (id >= self->list.size) return;
self->list.contents[id].size = UINT32_MAX;
self->free_capture_list_count++;
}
/**************
@ -2302,6 +2326,14 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
return self->did_exceed_match_limit;
}
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
return self->capture_list_pool.max_capture_list_count;
}
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
self->capture_list_pool.max_capture_list_count = limit;
}
void ts_query_cursor_exec(
TSQueryCursor *self,
const TSQuery *query,