tree-sitter/lib/binding_rust/lib.rs

1685 lines
60 KiB
Rust
Raw Normal View History

2016-07-10 14:03:00 -07:00
mod ffi;
mod util;
2016-07-10 14:03:00 -07:00
extern crate regex;
extern crate serde;
extern crate serde_derive;
2018-12-13 16:32:22 -08:00
extern crate serde_json;
2018-10-13 14:09:36 -07:00
#[cfg(unix)]
use std::os::unix::io::AsRawFd;
2019-02-07 12:29:20 -08:00
use std::ffi::CStr;
2016-07-10 14:03:00 -07:00
use std::marker::PhantomData;
use std::mem::MaybeUninit;
2018-06-19 16:19:37 -07:00
use std::os::raw::{c_char, c_void};
2019-09-23 16:55:28 -07:00
use std::ptr::NonNull;
use std::sync::atomic::AtomicUsize;
use std::{char, fmt, iter, ptr, slice, str, u16};
2016-07-10 14:03:00 -07:00
2019-11-08 12:29:26 -08:00
/// The latest ABI version that is supported by the current version of the
/// library.
///
/// When Languages are generated by the Tree-sitter CLI, they are
/// assigned an ABI version number that corresponds to the current CLI version.
/// The Tree-sitter library is generally backwards-compatible with languages
/// generated using older CLI versions, but is not forwards-compatible.
2019-02-07 12:29:20 -08:00
pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
2019-11-08 12:29:26 -08:00
/// The earliest ABI version that is supported by the current version of the
/// library.
pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION;
pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h");
2019-11-08 12:29:26 -08:00
/// An opaque object that defines how to parse a particular language. The code for each
/// `Language` is generated by the Tree-sitter CLI.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(transparent)]
2018-10-09 08:23:02 -07:00
pub struct Language(*const ffi::TSLanguage);
2016-07-10 14:03:00 -07:00
2019-11-08 12:29:26 -08:00
/// A tree that represents the syntactic structure of a source code file.
pub struct Tree(NonNull<ffi::TSTree>);
2018-05-18 14:06:49 -07:00
2019-11-08 12:29:26 -08:00
/// A position in a multi-line text document, in terms of rows and columns.
///
/// Rows and columns are zero-based.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
2016-07-10 14:03:00 -07:00
pub struct Point {
pub row: usize,
pub column: usize,
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// A range of positions in a multi-line text document, both in terms of bytes and of
/// rows and columns.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Range {
pub start_byte: usize,
pub end_byte: usize,
pub start_point: Point,
pub end_point: Point,
}
2019-11-08 12:29:26 -08:00
/// A summary of a change to a text document.
2018-05-18 11:15:37 -07:00
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2016-07-10 14:03:00 -07:00
pub struct InputEdit {
pub start_byte: usize,
pub old_end_byte: usize,
pub new_end_byte: usize,
2016-07-10 14:03:00 -07:00
pub start_position: Point,
pub old_end_position: Point,
pub new_end_position: Point,
}
2019-11-08 12:29:26 -08:00
/// A single node within a syntax `Tree`.
2018-12-13 10:08:50 -08:00
#[derive(Clone, Copy)]
#[repr(transparent)]
2016-07-10 14:03:00 -07:00
pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
2019-11-08 12:29:26 -08:00
/// A stateful object that this is used to produce a `Tree` based on some source code.
2019-09-23 16:55:28 -07:00
pub struct Parser(NonNull<ffi::TSParser>);
2016-07-10 14:03:00 -07:00
2019-11-08 12:29:26 -08:00
/// A type of log message.
#[derive(Debug, PartialEq, Eq)]
2019-11-08 12:29:26 -08:00
pub enum LogType {
Parse,
Lex,
}
2019-11-08 12:29:26 -08:00
/// A callback that receives log messages during parser.
type Logger<'a> = Box<dyn FnMut(LogType, &str) + 'a>;
/// A stateful object for walking a syntax `Tree` efficiently.
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
/// A set of patterns that match nodes in a syntax tree.
#[derive(Debug)]
pub struct Query {
2019-09-23 16:55:28 -07:00
ptr: NonNull<ffi::TSQuery>,
capture_names: Vec<String>,
text_predicates: Vec<Box<[TextPredicate]>>,
property_settings: Vec<Box<[QueryProperty]>>,
property_predicates: Vec<Box<[(QueryProperty, bool)]>>,
}
2019-11-08 12:29:26 -08:00
/// A stateful object for executing a `Query` on a syntax `Tree`.
2019-09-23 16:55:28 -07:00
pub struct QueryCursor(NonNull<ffi::TSQueryCursor>);
2019-11-08 12:29:26 -08:00
/// A key-value pair associated with a particular pattern in a `Query`.
#[derive(Debug, PartialEq, Eq)]
pub struct QueryProperty {
pub key: Box<str>,
pub value: Option<Box<str>>,
pub capture_id: Option<usize>,
}
/// A match of a `Query` to a particular set of `Node`s.
pub struct QueryMatch<'a> {
pub pattern_index: usize,
pub captures: &'a [QueryCapture<'a>],
id: u32,
cursor: *mut ffi::TSQueryCursor,
}
2019-11-08 12:29:26 -08:00
/// A sequence of `QueryCapture`s within a `QueryMatch`.
pub struct QueryCaptures<'a, T: AsRef<[u8]>> {
ptr: *mut ffi::TSQueryCursor,
query: &'a Query,
text_callback: Box<dyn FnMut(Node<'a>) -> T + 'a>,
}
2019-11-08 12:29:26 -08:00
/// A particular `Node` that has been captured with a particular name within a `Query`.
#[derive(Clone, Copy)]
#[repr(C)]
pub struct QueryCapture<'a> {
pub node: Node<'a>,
pub index: u32,
}
2019-11-08 12:29:26 -08:00
/// An error that occurred when trying to assign an incompatible `Language` to a `Parser`.
#[derive(Debug, PartialEq, Eq)]
pub struct LanguageError {
version: usize,
}
/// An error that occurred in `Parser::set_included_ranges`.
#[derive(Debug, PartialEq, Eq)]
pub struct IncludedRangesError(pub usize);
2019-11-08 12:29:26 -08:00
/// An error that occurred when trying to create a `Query`.
#[derive(Debug, PartialEq, Eq)]
2019-09-23 16:55:28 -07:00
pub enum QueryError {
Syntax(usize, String),
NodeType(usize, String),
Field(usize, String),
Capture(usize, String),
Predicate(String),
}
2019-11-08 12:29:26 -08:00
#[derive(Debug)]
enum TextPredicate {
CaptureEqString(u32, String),
CaptureEqCapture(u32, u32),
CaptureMatchString(u32, regex::bytes::Regex),
}
2018-10-09 08:23:02 -07:00
impl Language {
2019-11-08 12:29:26 -08:00
/// Get the ABI version number that indicates which version of the Tree-sitter CLI
/// that was used to generate this `Language`.
pub fn version(&self) -> usize {
unsafe { ffi::ts_language_version(self.0) as usize }
}
2019-11-08 12:29:26 -08:00
/// Get the number of distinct node types in this language.
2018-10-09 08:23:02 -07:00
pub fn node_kind_count(&self) -> usize {
unsafe { ffi::ts_language_symbol_count(self.0) as usize }
}
2019-11-08 12:29:26 -08:00
/// Get the name of the node kind for the given numerical id.
2018-10-09 08:23:02 -07:00
pub fn node_kind_for_id(&self, id: u16) -> &'static str {
unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }
.to_str()
.unwrap()
}
/// Get the numeric id for the given node kind.
pub fn id_for_node_kind(&self, kind: &str, named: bool) -> u16 {
unsafe {
ffi::ts_language_symbol_for_name(
self.0,
kind.as_bytes().as_ptr() as *const c_char,
kind.len() as u32,
named,
)
}
}
2019-11-08 12:29:26 -08:00
/// Check if the node type for the given numerical id is named (as opposed
/// to an anonymous node type).
2018-10-09 08:23:02 -07:00
pub fn node_kind_is_named(&self, id: u16) -> bool {
unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
}
pub fn node_kind_is_visible(&self, id: u16) -> bool {
unsafe {
ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolType_TSSymbolTypeAnonymous
}
}
2019-11-08 12:29:26 -08:00
/// Get the number of distinct field names in this language.
pub fn field_count(&self) -> usize {
unsafe { ffi::ts_language_field_count(self.0) as usize }
}
2019-11-08 12:29:26 -08:00
/// Get the field names for the given numerical id.
pub fn field_name_for_id(&self, field_id: u16) -> &'static str {
unsafe { CStr::from_ptr(ffi::ts_language_field_name_for_id(self.0, field_id)) }
.to_str()
.unwrap()
}
2019-11-08 12:29:26 -08:00
/// Get the numerical id for the given field name.
pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option<u16> {
let field_name = field_name.as_ref();
let id = unsafe {
ffi::ts_language_field_id_for_name(
self.0,
field_name.as_ptr() as *const c_char,
field_name.len() as u32,
)
};
if id == 0 {
None
} else {
Some(id)
}
}
2018-10-09 08:23:02 -07:00
}
impl fmt::Display for LanguageError {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(
f,
"Incompatible language version {}. Expected minimum {}, maximum {}",
2019-11-08 12:29:26 -08:00
self.version, MIN_COMPATIBLE_LANGUAGE_VERSION, LANGUAGE_VERSION,
)
}
}
2016-07-10 14:03:00 -07:00
impl Parser {
/// Create a new parser.
2016-07-10 14:03:00 -07:00
pub fn new() -> Parser {
unsafe {
let parser = ffi::ts_parser_new();
2019-09-23 16:55:28 -07:00
Parser(NonNull::new_unchecked(parser))
2016-07-10 14:03:00 -07:00
}
}
/// Set the language that the parser should use for parsing.
///
/// Returns a Result indicating whether or not the language was successfully
/// assigned. True means assignment succeeded. False means there was a version
/// mismatch: the language was generated with an incompatible version of the
2019-11-08 12:43:26 -08:00
/// Tree-sitter CLI. Check the language's version using [Language::version]
/// and compare it to this library's [LANGUAGE_VERSION](LANGUAGE_VERSION) and
/// [MIN_COMPATIBLE_LANGUAGE_VERSION](MIN_COMPATIBLE_LANGUAGE_VERSION) constants.
pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> {
let version = language.version();
2019-11-08 12:29:26 -08:00
if version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION {
Err(LanguageError { version })
} else {
unsafe {
2019-09-23 16:55:28 -07:00
ffi::ts_parser_set_language(self.0.as_ptr(), language.0);
}
Ok(())
}
}
2019-11-08 12:29:26 -08:00
/// Get the parser's current language.
pub fn language(&self) -> Option<Language> {
2019-09-23 16:55:28 -07:00
let ptr = unsafe { ffi::ts_parser_language(self.0.as_ptr()) };
if ptr.is_null() {
None
} else {
Some(Language(ptr))
2016-07-10 14:03:00 -07:00
}
}
2019-11-08 12:29:26 -08:00
/// Get the parser's current logger.
2018-05-18 14:06:49 -07:00
pub fn logger(&self) -> Option<&Logger> {
2019-09-23 16:55:28 -07:00
let logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) };
2018-05-18 14:06:49 -07:00
unsafe { (logger.payload as *mut Logger).as_ref() }
}
2019-11-08 12:29:26 -08:00
/// Set the logging callback that a parser should use during parsing.
2018-05-18 14:06:49 -07:00
pub fn set_logger(&mut self, logger: Option<Logger>) {
2019-09-23 16:55:28 -07:00
let prev_logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) };
2018-05-18 14:06:49 -07:00
if !prev_logger.payload.is_null() {
drop(unsafe { Box::from_raw(prev_logger.payload as *mut Logger) });
2018-05-18 14:06:49 -07:00
}
2016-07-10 14:03:00 -07:00
let c_logger;
if let Some(logger) = logger {
2018-05-18 14:06:49 -07:00
let container = Box::new(logger);
unsafe extern "C" fn log(
payload: *mut c_void,
c_log_type: ffi::TSLogType,
c_message: *const c_char,
) {
let callback = (payload as *mut Logger).as_mut().unwrap();
if let Ok(message) = CStr::from_ptr(c_message).to_str() {
let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse {
LogType::Parse
} else {
LogType::Lex
};
callback(log_type, message);
}
};
let raw_container = Box::into_raw(container);
c_logger = ffi::TSLogger {
2018-05-18 14:06:49 -07:00
payload: raw_container as *mut c_void,
log: Some(log),
};
} else {
2018-10-09 08:23:02 -07:00
c_logger = ffi::TSLogger {
payload: ptr::null_mut(),
log: None,
};
}
2016-07-10 14:03:00 -07:00
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_parser_set_logger(self.0.as_ptr(), c_logger) };
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// Set the destination to which the parser should write debugging graphs
/// during parsing. The graphs are formatted in the DOT language. You may want
/// to pipe these graphs directly to a `dot(1)` process in order to generate
/// SVG output.
#[cfg(unix)]
pub fn print_dot_graphs(&mut self, file: &impl AsRawFd) {
let fd = file.as_raw_fd();
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::dup(fd)) }
}
2019-11-08 12:29:26 -08:00
/// Stop the parser from printing debugging graphs while parsing.
pub fn stop_printing_dot_graphs(&mut self) {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), -1) }
}
/// Parse a slice of UTF8 text.
///
/// # Arguments:
/// * `text` The UTF8-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
///
/// Returns a [Tree] if parsing succeeded, or `None` if:
/// * The parser has not yet had a language assigned with [Parser::set_language]
/// * The timeout set with [Parser::set_timeout_micros] expired
/// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped
pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
let bytes = text.as_ref();
let len = bytes.len();
self.parse_with(
&mut |i, _| if i < len { &bytes[i..] } else { &[] },
old_tree,
)
}
/// Parse a slice of UTF16 text.
///
/// # Arguments:
/// * `text` The UTF16-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_utf16(
2016-07-10 14:03:00 -07:00
&mut self,
input: impl AsRef<[u16]>,
2018-05-18 14:27:08 -07:00
old_tree: Option<&Tree>,
2016-07-10 14:03:00 -07:00
) -> Option<Tree> {
let code_points = input.as_ref();
let len = code_points.len();
self.parse_utf16_with(
&mut |i, _| if i < len { &code_points[i..] } else { &[] },
old_tree,
)
2018-10-08 22:33:43 -07:00
}
/// Parse UTF8 text provided in chunks by a callback.
///
/// # Arguments:
/// * `callback` A function that takes a byte offset and position and
/// returns a slice of UTF8-encoded text starting at that byte offset
/// and position. The slices can be of any length. If the given position
/// is at the end of the text, the callback should return an empty slice.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
2018-10-08 22:33:43 -07:00
&mut self,
callback: &mut F,
2018-10-08 22:33:43 -07:00
old_tree: Option<&Tree>,
) -> Option<Tree> {
// A pointer to this payload is passed on every call to the `read` C function.
// The payload contains two things:
// 1. A reference to the rust `callback`.
// 2. The text that was returned from the previous call to `callback`.
// This allows the callback to return owned values like vectors.
let mut payload: (&mut F, Option<T>) = (callback, None);
// This C function is passed to Tree-sitter as the input callback.
unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
2016-07-10 14:03:00 -07:00
payload: *mut c_void,
2018-06-19 16:19:37 -07:00
byte_offset: u32,
position: ffi::TSPoint,
2016-07-10 14:03:00 -07:00
bytes_read: *mut u32,
) -> *const c_char {
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
*text = Some(callback(byte_offset as usize, position.into()));
let slice = text.as_ref().unwrap().as_ref();
*bytes_read = slice.len() as u32;
return slice.as_ptr() as *const c_char;
2016-07-10 14:03:00 -07:00
};
let c_input = ffi::TSInput {
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
read: Some(read::<T, F>),
2016-07-10 14:03:00 -07:00
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
};
2019-09-23 16:55:28 -07:00
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr());
unsafe {
let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input);
NonNull::new(c_new_tree).map(Tree)
2016-07-10 14:03:00 -07:00
}
}
/// Parse UTF16 text provided in chunks by a callback.
///
/// # Arguments:
/// * `callback` A function that takes a code point offset and position and
/// returns a slice of UTF16-encoded text starting at that byte offset
/// and position. The slices can be of any length. If the given position
/// is at the end of the text, the callback should return an empty slice.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [Tree::edit].
pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
2016-07-10 14:03:00 -07:00
&mut self,
callback: &mut F,
2018-05-18 14:27:08 -07:00
old_tree: Option<&Tree>,
2016-07-10 14:03:00 -07:00
) -> Option<Tree> {
// A pointer to this payload is passed on every call to the `read` C function.
// The payload contains two things:
// 1. A reference to the rust `callback`.
// 2. The text that was returned from the previous call to `callback`.
// This allows the callback to return owned values like vectors.
let mut payload: (&mut F, Option<T>) = (callback, None);
// This C function is passed to Tree-sitter as the input callback.
unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>(
2016-07-10 14:03:00 -07:00
payload: *mut c_void,
2018-06-19 16:19:37 -07:00
byte_offset: u32,
position: ffi::TSPoint,
2016-07-10 14:03:00 -07:00
bytes_read: *mut u32,
) -> *const c_char {
let (callback, text) = (payload as *mut (&mut F, Option<T>)).as_mut().unwrap();
*text = Some(callback(
(byte_offset / 2) as usize,
2018-10-09 08:23:02 -07:00
Point {
row: position.row as usize,
column: position.column as usize / 2,
2018-10-09 08:23:02 -07:00
},
));
let slice = text.as_ref().unwrap().as_ref();
*bytes_read = slice.len() as u32 * 2;
slice.as_ptr() as *const c_char
2016-07-10 14:03:00 -07:00
};
let c_input = ffi::TSInput {
payload: &mut payload as *mut (&mut F, Option<T>) as *mut c_void,
read: Some(read::<T, F>),
2018-06-19 16:19:37 -07:00
encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
2016-07-10 14:03:00 -07:00
};
2019-09-23 16:55:28 -07:00
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr());
unsafe {
let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input);
NonNull::new(c_new_tree).map(Tree)
2016-07-10 14:03:00 -07:00
}
}
2019-11-08 12:29:26 -08:00
/// Instruct the parser to start the next parse from the beginning.
///
/// If the parser previously failed because of a timeout or a cancellation, then
/// by default, it will resume where it left off on the next call to `parse` or
/// other parsing functions. If you don't want to resume, and instead intend to
/// use this parser to parse some other document, you must call `reset` first.
pub fn reset(&mut self) {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_parser_reset(self.0.as_ptr()) }
}
2019-11-08 12:29:26 -08:00
/// Get the duration in microseconds that parsing is allowed to take.
///
2019-11-08 12:43:26 -08:00
/// This is set via [set_timeout_micros](Parser::set_timeout_micros).
pub fn timeout_micros(&self) -> u64 {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_parser_timeout_micros(self.0.as_ptr()) }
}
2019-11-08 12:29:26 -08:00
/// Set the maximum duration in microseconds that parsing should be allowed to
/// take before halting.
///
/// If parsing takes longer than this, it will halt early, returning `None`.
/// See `parse` for more information.
pub fn set_timeout_micros(&mut self, timeout_micros: u64) {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_parser_set_timeout_micros(self.0.as_ptr(), timeout_micros) }
}
2019-11-08 12:29:26 -08:00
/// Set the ranges of text that the parser should include when parsing.
///
/// By default, the parser will always include entire documents. This function
/// allows you to parse only a *portion* of a document but still return a syntax
/// tree whose ranges match up with the document as a whole. You can also pass
/// multiple disjoint ranges.
///
/// If `ranges` is empty, then the entire document will be parsed. Otherwise,
/// the given ranges must be ordered from earliest to latest in the document,
/// and they must not overlap. That is, the following must hold for all
/// `i` < `length - 1`:
///
/// ranges[i].end_byte <= ranges[i + 1].start_byte
///
/// If this requirement is not satisfied, method will panic.
pub fn set_included_ranges<'a>(
&mut self,
ranges: &'a [Range],
) -> Result<(), IncludedRangesError> {
let ts_ranges: Vec<ffi::TSRange> =
ranges.iter().cloned().map(|range| range.into()).collect();
let result = unsafe {
2019-09-23 16:55:28 -07:00
ffi::ts_parser_set_included_ranges(
self.0.as_ptr(),
ts_ranges.as_ptr(),
ts_ranges.len() as u32,
)
};
if result {
Ok(())
} else {
let mut prev_end_byte = 0;
for (i, range) in ranges.iter().enumerate() {
if range.start_byte < prev_end_byte || range.end_byte < range.start_byte {
return Err(IncludedRangesError(i));
}
prev_end_byte = range.end_byte;
}
Err(IncludedRangesError(0))
}
}
2019-11-08 12:29:26 -08:00
/// Get the parser's current cancellation flag pointer.
pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> {
2019-09-23 16:55:28 -07:00
(ffi::ts_parser_cancellation_flag(self.0.as_ptr()) as *const AtomicUsize).as_ref()
}
2019-11-08 12:29:26 -08:00
/// Set the parser's current cancellation flag pointer.
///
/// If a pointer is assigned, then the parser will periodically read from
2019-11-08 12:43:26 -08:00
/// this pointer during parsing. If it reads a non-zero value, it will halt early,
/// returning `None`. See [parse](Parser::parse) for more information.
pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) {
if let Some(flag) = flag {
ffi::ts_parser_set_cancellation_flag(
2019-09-23 16:55:28 -07:00
self.0.as_ptr(),
flag as *const AtomicUsize as *const usize,
);
} else {
2019-09-23 16:55:28 -07:00
ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null());
}
}
2016-07-10 14:03:00 -07:00
}
impl Drop for Parser {
fn drop(&mut self) {
self.stop_printing_dot_graphs();
2018-05-18 14:06:49 -07:00
self.set_logger(None);
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_parser_delete(self.0.as_ptr()) }
2016-07-10 14:03:00 -07:00
}
}
impl Tree {
2019-11-08 12:29:26 -08:00
/// Get the root node of the syntax tree.
2016-07-10 14:03:00 -07:00
pub fn root_node(&self) -> Node {
2019-09-23 16:55:28 -07:00
Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap()
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// Get the language that was used to parse the syntax tree.
pub fn language(&self) -> Language {
2019-09-23 16:55:28 -07:00
Language(unsafe { ffi::ts_tree_language(self.0.as_ptr()) })
}
2019-11-08 12:29:26 -08:00
/// Edit the syntax tree to keep it in sync with source code that has been
/// edited.
///
/// You must describe the edit both in terms of byte offsets and in terms of
/// row/column coordinates.
2016-07-10 14:03:00 -07:00
pub fn edit(&mut self, edit: &InputEdit) {
2019-02-04 20:42:56 -08:00
let edit = edit.into();
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) };
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// Create a new [TreeCursor] starting from the root of the tree.
2016-07-10 14:03:00 -07:00
pub fn walk(&self) -> TreeCursor {
2018-07-20 13:32:22 -07:00
self.root_node().walk()
2016-07-10 14:03:00 -07:00
}
2018-10-13 14:09:36 -07:00
2019-11-08 12:29:26 -08:00
/// Compare this old edited syntax tree to a new syntax tree representing the same
/// document, returning a sequence of ranges whose syntactic structure has changed.
///
/// For this to work correctly, this syntax tree must have been edited such that its
/// ranges match up to the new tree. Generally, you'll want to call this method right
/// after calling one of the [Parser::parse] functions. Call it on the old tree that
/// was passed to parse, and pass the new tree that was returned from `parse`.
pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator<Item = Range> {
let mut count = 0;
2019-01-25 12:05:21 -08:00
unsafe {
2019-09-23 16:55:28 -07:00
let ptr = ffi::ts_tree_get_changed_ranges(
self.0.as_ptr(),
other.0.as_ptr(),
&mut count as *mut _ as *mut u32,
);
util::CBufferIter::new(ptr, count).map(|r| r.into())
2019-01-25 12:05:21 -08:00
}
}
2016-07-10 14:03:00 -07:00
}
2018-05-18 14:27:08 -07:00
impl fmt::Debug for Tree {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{{Tree {:?}}}", self.root_node())
}
}
2016-07-10 14:03:00 -07:00
impl Drop for Tree {
fn drop(&mut self) {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_tree_delete(self.0.as_ptr()) }
2016-07-10 14:03:00 -07:00
}
}
impl Clone for Tree {
fn clone(&self) -> Tree {
2019-09-23 16:55:28 -07:00
unsafe { Tree(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) }
2016-07-10 14:03:00 -07:00
}
}
2018-05-18 14:27:08 -07:00
impl<'tree> Node<'tree> {
2016-07-10 14:03:00 -07:00
fn new(node: ffi::TSNode) -> Option<Self> {
if node.id.is_null() {
None
} else {
Some(Node(node, PhantomData))
}
}
2019-11-08 12:29:26 -08:00
/// Get this node's type as a numerical id.
2018-05-18 11:15:37 -07:00
pub fn kind_id(&self) -> u16 {
unsafe { ffi::ts_node_symbol(self.0) }
}
2019-11-08 12:29:26 -08:00
/// Get this node's type as a string.
2018-05-18 10:44:14 -07:00
pub fn kind(&self) -> &'static str {
2018-10-09 08:23:02 -07:00
unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
.to_str()
.unwrap()
2016-07-10 14:03:00 -07:00
}
/// Get the [Language] that was used to parse this node's syntax tree.
pub fn language(&self) -> Language {
Language(unsafe { ffi::ts_tree_language(self.0.tree) })
}
2019-11-08 12:29:26 -08:00
/// Check if this node is *named*.
///
/// Named nodes correspond to named rules in the grammar, whereas *anonymous* nodes
/// correspond to string literals in the grammar.
2018-05-18 10:44:14 -07:00
pub fn is_named(&self) -> bool {
unsafe { ffi::ts_node_is_named(self.0) }
}
2019-11-08 12:29:26 -08:00
/// Check if this node is *extra*.
///
/// Extra nodes represent things like comments, which are not required the grammar,
/// but can appear anywhere.
2019-06-19 15:58:29 -07:00
pub fn is_extra(&self) -> bool {
unsafe { ffi::ts_node_is_extra(self.0) }
}
2019-11-08 12:29:26 -08:00
/// Check if this node has been edited.
2018-05-18 10:44:14 -07:00
pub fn has_changes(&self) -> bool {
unsafe { ffi::ts_node_has_changes(self.0) }
}
2019-11-08 12:29:26 -08:00
/// Check if this node represents a syntax error or contains any syntax errors anywhere
/// within it.
2018-05-18 10:44:14 -07:00
pub fn has_error(&self) -> bool {
unsafe { ffi::ts_node_has_error(self.0) }
}
2019-11-08 12:29:26 -08:00
/// Check if this node represents a syntax error.
///
/// Syntax errors represent parts of the code that could not be incorporated into a
/// valid syntax tree.
pub fn is_error(&self) -> bool {
self.kind_id() == u16::MAX
}
2019-11-08 12:29:26 -08:00
/// Check if this node is *missing*.
///
/// Missing nodes are inserted by the parser in order to recover from certain kinds of
/// syntax errors.
pub fn is_missing(&self) -> bool {
unsafe { ffi::ts_node_is_missing(self.0) }
}
2019-11-08 12:29:26 -08:00
/// Get the byte offsets where this node starts.
pub fn start_byte(&self) -> usize {
unsafe { ffi::ts_node_start_byte(self.0) as usize }
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// Get the byte offsets where this node end.
pub fn end_byte(&self) -> usize {
unsafe { ffi::ts_node_end_byte(self.0) as usize }
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// Get the byte range of source code that this node represents.
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.start_byte()..self.end_byte()
}
2019-11-08 12:29:26 -08:00
/// Get the range of source code that this node represents, both in terms of raw bytes
/// and of row/column coordinates.
pub fn range(&self) -> Range {
Range {
start_byte: self.start_byte(),
end_byte: self.end_byte(),
start_point: self.start_position(),
end_point: self.end_position(),
}
}
2019-11-08 12:29:26 -08:00
/// Get this node's start position in terms of rows and columns.
2016-07-10 14:03:00 -07:00
pub fn start_position(&self) -> Point {
let result = unsafe { ffi::ts_node_start_point(self.0) };
result.into()
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// Get this node's end position in terms of rows and columns.
2016-07-10 14:03:00 -07:00
pub fn end_position(&self) -> Point {
let result = unsafe { ffi::ts_node_end_point(self.0) };
result.into()
2016-07-10 14:03:00 -07:00
}
2019-11-08 12:29:26 -08:00
/// Get the node's child at the given index, where zero represents the first
/// child.
///
/// This method is fairly fast, but its cost is technically log(i), so you
/// if you might be iterating over a long list of children, you should use
/// [Node::children] instead.
pub fn child(&self, i: usize) -> Option<Self> {
Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) })
2016-07-10 14:03:00 -07:00
}
/// Get this node's number of children.
pub fn child_count(&self) -> usize {
unsafe { ffi::ts_node_child_count(self.0) as usize }
}
/// Get this node's *named* child at the given index.
///
/// See also [Node::is_named].
/// This method is fairly fast, but its cost is technically log(i), so you
/// if you might be iterating over a long list of children, you should use
/// [Node::named_children] instead.
pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {
Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) })
}
/// Get this node's number of *named* children.
///
/// See also [Node::is_named].
pub fn named_child_count(&self) -> usize {
unsafe { ffi::ts_node_named_child_count(self.0) as usize }
}
2019-11-08 12:29:26 -08:00
/// Get the first child with the given field name.
///
/// If multiple children may have the same field name, access them using
/// [children_by_field_name](Node::children_by_field_name)
2019-02-07 12:29:20 -08:00
pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option<Self> {
let field_name = field_name.as_ref();
Self::new(unsafe {
ffi::ts_node_child_by_field_name(
self.0,
field_name.as_ptr() as *const c_char,
field_name.len() as u32,
)
})
}
2019-11-08 12:29:26 -08:00
/// Get this node's child with the given numerical field id.
///
2019-11-08 12:43:26 -08:00
/// See also [child_by_field_name](Node::child_by_field_name). You can convert a field name to
/// an id using [Language::field_id_for_name].
pub fn child_by_field_id(&self, field_id: u16) -> Option<Self> {
Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) })
}
/// Iterate over this node's children.
///
/// A [TreeCursor] is used to retrieve the children efficiently. Obtain
/// a [TreeCursor] by calling [Tree::walk] or [Node::walk]. To avoid unnecessary
/// allocations, you should reuse the same cursor for subsequent calls to
/// this method.
///
/// If you're walking the tree recursively, you may want to use the `TreeCursor`
/// APIs directly instead.
pub fn children<'a>(
&self,
cursor: &'a mut TreeCursor<'tree>,
) -> impl ExactSizeIterator<Item = Node<'tree>> + 'a {
cursor.reset(*self);
cursor.goto_first_child();
(0..self.child_count()).into_iter().map(move |_| {
let result = cursor.node();
cursor.goto_next_sibling();
result
})
}
/// Iterate over this node's named children.
///
/// See also [Node::children].
pub fn named_children<'a>(
&self,
cursor: &'a mut TreeCursor<'tree>,
) -> impl ExactSizeIterator<Item = Node<'tree>> + 'a {
cursor.reset(*self);
cursor.goto_first_child();
(0..self.named_child_count()).into_iter().map(move |_| {
while !cursor.node().is_named() {
if !cursor.goto_next_sibling() {
break;
}
}
let result = cursor.node();
cursor.goto_next_sibling();
result
})
2018-12-13 16:32:10 -08:00
}
/// Iterate over this node's children with a given field name.
2019-11-08 12:29:26 -08:00
///
/// See also [Node::children].
pub fn children_by_field_name<'a>(
&self,
field_name: &str,
cursor: &'a mut TreeCursor<'tree>,
) -> impl Iterator<Item = Node<'tree>> + 'a {
let field_id = self.language().field_id_for_name(field_name);
self.children_by_field_id(field_id.unwrap_or(0), cursor)
2018-05-18 10:44:14 -07:00
}
/// Iterate over this node's children with a given field id.
2019-11-08 12:29:26 -08:00
///
/// See also [Node::children_by_field_name].
pub fn children_by_field_id<'a>(
&self,
field_id: u16,
cursor: &'a mut TreeCursor<'tree>,
) -> impl Iterator<Item = Node<'tree>> + 'a {
cursor.reset(*self);
cursor.goto_first_child();
let mut done = false;
iter::from_fn(move || {
while !done {
while cursor.field_id() != Some(field_id) {
if !cursor.goto_next_sibling() {
return None;
}
}
let result = cursor.node();
if !cursor.goto_next_sibling() {
done = true;
}
return Some(result);
}
None
})
2018-05-18 10:44:14 -07:00
}
2019-11-08 12:29:26 -08:00
/// Get this node's immediate parent.
2018-05-18 14:27:08 -07:00
pub fn parent(&self) -> Option<Self> {
2016-07-10 14:03:00 -07:00
Self::new(unsafe { ffi::ts_node_parent(self.0) })
}
2019-11-08 12:29:26 -08:00
/// Get this node's next sibling.
2018-05-18 14:27:08 -07:00
pub fn next_sibling(&self) -> Option<Self> {
2018-05-18 10:44:14 -07:00
Self::new(unsafe { ffi::ts_node_next_sibling(self.0) })
}
2019-11-08 12:29:26 -08:00
/// Get this node's previous sibling.
2018-05-18 14:27:08 -07:00
pub fn prev_sibling(&self) -> Option<Self> {
2018-05-18 10:44:14 -07:00
Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) })
}
2019-11-08 12:29:26 -08:00
/// Get this node's next named sibling.
2018-05-18 14:27:08 -07:00
pub fn next_named_sibling(&self) -> Option<Self> {
2018-05-18 10:44:14 -07:00
Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) })
}
2019-11-08 12:29:26 -08:00
/// Get this node's previous named sibling.
2018-05-18 14:27:08 -07:00
pub fn prev_named_sibling(&self) -> Option<Self> {
2018-05-18 10:44:14 -07:00
Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) })
}
2019-11-08 12:29:26 -08:00
/// Get the smallest node within this node that spans the given range.
pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
Self::new(unsafe {
ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32)
})
}
2019-11-08 12:29:26 -08:00
/// Get the smallest named node within this node that spans the given range.
pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
Self::new(unsafe {
ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32)
})
}
2019-11-08 12:29:26 -08:00
/// Get the smallest node within this node that spans the given range.
pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
Self::new(unsafe {
ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into())
})
}
2019-11-08 12:29:26 -08:00
/// Get the smallest named node within this node that spans the given range.
pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
Self::new(unsafe {
ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into())
})
}
pub fn to_sexp(&self) -> String {
let c_string = unsafe { ffi::ts_node_string(self.0) };
2018-10-09 08:23:02 -07:00
let result = unsafe { CStr::from_ptr(c_string) }
.to_str()
.unwrap()
.to_string();
unsafe { util::free_ptr(c_string as *mut c_void) };
result
}
2018-07-20 13:32:22 -07:00
2019-02-06 19:42:47 -08:00
pub fn utf8_text<'a>(&self, source: &'a [u8]) -> Result<&'a str, str::Utf8Error> {
str::from_utf8(&source[self.start_byte()..self.end_byte()])
2018-10-13 14:09:36 -07:00
}
pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] {
2019-02-06 19:42:47 -08:00
&source.as_ref()[self.start_byte()..self.end_byte()]
2018-10-13 14:09:36 -07:00
}
2019-11-08 12:29:26 -08:00
/// Create a new [TreeCursor] starting from this node.
2018-07-20 13:32:22 -07:00
pub fn walk(&self) -> TreeCursor<'tree> {
TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
}
2019-02-04 20:42:56 -08:00
2019-11-08 12:29:26 -08:00
/// Edit this node to keep it in-sync with source code that has been edited.
///
/// This function is only rarely needed. When you edit a syntax tree with the
/// [Tree::edit] method, all of the nodes that you retrieve from the tree
/// afterward will already reflect the edit. You only need to use [Node::edit]
/// when you have a specific [Node] instance that you want to keep and continue
/// to use after an edit.
2019-02-04 20:42:56 -08:00
pub fn edit(&mut self, edit: &InputEdit) {
let edit = edit.into();
unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) }
}
2016-07-10 14:03:00 -07:00
}
2018-05-18 11:15:37 -07:00
impl<'a> PartialEq for Node<'a> {
fn eq(&self, other: &Self) -> bool {
self.0.id == other.0.id
}
}
impl<'a> Eq for Node<'a> {}
2018-05-18 11:15:37 -07:00
impl<'a> fmt::Debug for Node<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
2018-10-09 08:23:02 -07:00
write!(
f,
"{{Node {} {} - {}}}",
self.kind(),
self.start_position(),
self.end_position()
)
2018-05-18 11:15:37 -07:00
}
}
2016-07-10 14:03:00 -07:00
impl<'a> TreeCursor<'a> {
2019-11-08 12:29:26 -08:00
/// Get the tree cursor's current [Node].
2018-10-13 14:09:36 -07:00
pub fn node(&self) -> Node<'a> {
2016-07-10 14:03:00 -07:00
Node(
unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
PhantomData,
)
}
2019-11-08 12:29:26 -08:00
/// Get the numerical field id of this tree cursor's current node.
///
/// See also [field_name](TreeCursor::field_name).
2019-02-07 12:29:20 -08:00
pub fn field_id(&self) -> Option<u16> {
unsafe {
let id = ffi::ts_tree_cursor_current_field_id(&self.0);
if id == 0 {
None
} else {
Some(id)
}
}
}
2019-11-08 12:29:26 -08:00
/// Get the field name of this tree cursor's current node.
2019-02-07 12:29:20 -08:00
pub fn field_name(&self) -> Option<&str> {
unsafe {
let ptr = ffi::ts_tree_cursor_current_field_name(&self.0);
if ptr.is_null() {
None
} else {
Some(CStr::from_ptr(ptr).to_str().unwrap())
}
}
}
2019-11-08 12:29:26 -08:00
/// Move this cursor to the first child of its current node.
///
/// This returns `true` if the cursor successfully moved, and returns `false`
/// if there were no children.
2018-05-18 10:44:14 -07:00
pub fn goto_first_child(&mut self) -> bool {
2016-07-10 14:03:00 -07:00
return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
}
2019-11-08 12:29:26 -08:00
/// Move this cursor to the parent of its current node.
///
/// This returns `true` if the cursor successfully moved, and returns `false`
/// if there was no parent node (the cursor was already on the root node).
2018-05-18 10:44:14 -07:00
pub fn goto_parent(&mut self) -> bool {
2016-07-10 14:03:00 -07:00
return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) };
}
2019-11-08 12:29:26 -08:00
/// Move this cursor to the next sibling of its current node.
///
/// This returns `true` if the cursor successfully moved, and returns `false`
/// if there was no next sibling node.
2018-05-18 10:44:14 -07:00
pub fn goto_next_sibling(&mut self) -> bool {
2016-07-10 14:03:00 -07:00
return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) };
}
2019-11-08 12:29:26 -08:00
/// Move this cursor to the first child of its current node that extends beyond
/// the given byte offset.
///
/// This returns the index of the child node if one was found, and returns `None`
/// if no such child was found.
pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option<usize> {
2018-12-13 16:32:22 -08:00
let result =
unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) };
2016-07-10 14:03:00 -07:00
if result < 0 {
None
} else {
Some(result as usize)
2016-07-10 14:03:00 -07:00
}
}
2019-11-08 12:29:26 -08:00
/// Re-initialize this tree cursor to start at a different node.
pub fn reset(&mut self, node: Node<'a>) {
unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) };
}
2016-07-10 14:03:00 -07:00
}
impl<'a> Drop for TreeCursor<'a> {
fn drop(&mut self) {
unsafe { ffi::ts_tree_cursor_delete(&mut self.0) }
}
}
impl Query {
2019-11-08 12:29:26 -08:00
/// Create a new query from a string containing one or more S-expression
/// patterns.
///
/// The query is associated with a particular language, and can only be run
/// on syntax nodes parsed with that language. References to Queries can be
/// shared between multiple threads.
2019-09-23 16:55:28 -07:00
pub fn new(language: Language, source: &str) -> Result<Self, QueryError> {
let mut error_offset = 0u32;
let mut error_type: ffi::TSQueryError = 0;
let bytes = source.as_bytes();
// Compile the query.
let ptr = unsafe {
ffi::ts_query_new(
language.0,
bytes.as_ptr() as *const c_char,
bytes.len() as u32,
&mut error_offset as *mut u32,
&mut error_type as *mut ffi::TSQueryError,
)
};
// On failure, build an error based on the error code and offset.
if ptr.is_null() {
let offset = error_offset as usize;
let mut line_start = 0;
let mut row = 0;
let line_containing_error = source.split("\n").find_map(|line| {
row += 1;
let line_end = line_start + line.len() + 1;
if line_end > offset {
Some(line)
} else {
line_start = line_end;
None
}
});
let message = if let Some(line) = line_containing_error {
line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^"
} else {
"Unexpected EOF".to_string()
};
// if line_containing_error
return if error_type != ffi::TSQueryError_TSQueryErrorSyntax {
let suffix = source.split_at(offset).1;
let end_offset = suffix
.find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-')
.unwrap_or(source.len());
2019-09-23 16:55:28 -07:00
let name = suffix.split_at(end_offset).0.to_string();
match error_type {
ffi::TSQueryError_TSQueryErrorNodeType => Err(QueryError::NodeType(row, name)),
ffi::TSQueryError_TSQueryErrorField => Err(QueryError::Field(row, name)),
ffi::TSQueryError_TSQueryErrorCapture => Err(QueryError::Capture(row, name)),
_ => Err(QueryError::Syntax(row, message)),
}
} else {
Err(QueryError::Syntax(row, message))
};
}
let string_count = unsafe { ffi::ts_query_string_count(ptr) };
let capture_count = unsafe { ffi::ts_query_capture_count(ptr) };
let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize };
let mut result = Query {
2019-09-23 16:55:28 -07:00
ptr: unsafe { NonNull::new_unchecked(ptr) },
capture_names: Vec::with_capacity(capture_count as usize),
text_predicates: Vec::with_capacity(pattern_count),
property_predicates: Vec::with_capacity(pattern_count),
property_settings: Vec::with_capacity(pattern_count),
};
// Build a vector of strings to store the capture names.
for i in 0..capture_count {
unsafe {
let mut length = 0u32;
let name =
ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8;
let name = slice::from_raw_parts(name, length as usize);
let name = str::from_utf8_unchecked(name);
result.capture_names.push(name.to_string());
}
}
// Build a vector of strings to represent literal values used in predicates.
let string_values = (0..string_count)
.map(|i| unsafe {
let mut length = 0u32;
let value =
ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32)
as *const u8;
let value = slice::from_raw_parts(value, length as usize);
let value = str::from_utf8_unchecked(value);
value.to_string()
})
.collect::<Vec<_>>();
// Build a vector of predicates for each pattern.
for i in 0..pattern_count {
let predicate_steps = unsafe {
let mut length = 0u32;
let raw_predicates =
ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32);
slice::from_raw_parts(raw_predicates, length as usize)
};
let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone;
let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture;
let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString;
let mut text_predicates = Vec::new();
let mut property_predicates = Vec::new();
let mut property_settings = Vec::new();
for p in predicate_steps.split(|s| s.type_ == type_done) {
if p.is_empty() {
continue;
}
if p[0].type_ != type_string {
return Err(QueryError::Predicate(format!(
"Expected predicate to start with a function name. Got @{}.",
result.capture_names[p[0].value_id as usize],
)));
}
// Build a predicate for each of the known predicate function names.
let operator_name = &string_values[p[0].value_id as usize];
match operator_name.as_str() {
"eq?" => {
if p.len() != 3 {
return Err(QueryError::Predicate(format!(
"Wrong number of arguments to eq? predicate. Expected 2, got {}.",
p.len() - 1
)));
}
if p[1].type_ != type_capture {
return Err(QueryError::Predicate(format!(
"First argument to eq? predicate must be a capture name. Got literal \"{}\".",
string_values[p[1].value_id as usize],
)));
}
text_predicates.push(if p[2].type_ == type_capture {
TextPredicate::CaptureEqCapture(p[1].value_id, p[2].value_id)
} else {
TextPredicate::CaptureEqString(
p[1].value_id,
string_values[p[2].value_id as usize].clone(),
)
});
}
"match?" => {
if p.len() != 3 {
return Err(QueryError::Predicate(format!(
"Wrong number of arguments to match? predicate. Expected 2, got {}.",
p.len() - 1
)));
}
if p[1].type_ != type_capture {
return Err(QueryError::Predicate(format!(
"First argument to match? predicate must be a capture name. Got literal \"{}\".",
string_values[p[1].value_id as usize],
)));
}
if p[2].type_ == type_capture {
return Err(QueryError::Predicate(format!(
"Second argument to match? predicate must be a literal. Got capture @{}.",
result.capture_names[p[2].value_id as usize],
)));
}
let regex = &string_values[p[2].value_id as usize];
text_predicates.push(TextPredicate::CaptureMatchString(
p[1].value_id,
regex::bytes::Regex::new(regex).map_err(|_| {
QueryError::Predicate(format!("Invalid regex '{}'", regex))
})?,
));
}
"set!" => property_settings.push(Self::parse_property(
"set!",
&result.capture_names,
&string_values,
&p[1..],
)?),
"is?" | "is-not?" => property_predicates.push((
Self::parse_property(
&operator_name,
&result.capture_names,
&string_values,
&p[1..],
)?,
operator_name == "is?",
)),
_ => {
return Err(QueryError::Predicate(format!(
"Unknown query predicate function {}",
operator_name,
)))
}
}
}
result
.text_predicates
.push(text_predicates.into_boxed_slice());
result
.property_predicates
.push(property_predicates.into_boxed_slice());
result
.property_settings
.push(property_settings.into_boxed_slice());
}
Ok(result)
}
2019-11-08 12:29:26 -08:00
/// Get the byte offset where the given pattern starts in the query's source.
pub fn start_byte_for_pattern(&self, pattern_index: usize) -> usize {
if pattern_index >= self.text_predicates.len() {
panic!(
"Pattern index is {} but the pattern count is {}",
pattern_index,
self.text_predicates.len(),
);
}
2019-09-23 16:55:28 -07:00
unsafe {
ffi::ts_query_start_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize
}
}
2019-11-08 12:29:26 -08:00
/// Get the number of patterns in the query.
pub fn pattern_count(&self) -> usize {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_query_pattern_count(self.ptr.as_ptr()) as usize }
}
2019-11-08 12:29:26 -08:00
/// Get the names of the captures used in the query.
pub fn capture_names(&self) -> &[String] {
&self.capture_names
}
2019-11-08 12:29:26 -08:00
/// Get the properties that are checked for the given pattern index.
pub fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] {
&self.property_predicates[index]
}
2019-11-08 12:29:26 -08:00
/// Get the properties that are set for the given pattern index.
pub fn property_settings(&self, index: usize) -> &[QueryProperty] {
&self.property_settings[index]
}
2019-11-08 12:29:26 -08:00
/// Disable a certain capture within a query.
///
/// This prevents the capture from being returned in matches, and also avoids any
/// resource usage associated with recording the capture.
2019-09-26 15:58:41 -07:00
pub fn disable_capture(&mut self, name: &str) {
unsafe {
ffi::ts_query_disable_capture(
self.ptr.as_ptr(),
name.as_bytes().as_ptr() as *const c_char,
name.len() as u32,
);
}
}
2020-01-15 17:08:55 -08:00
/// Disable a certain pattern within a query.
///
/// This prevents the pattern from matching, and also avoids any resource usage
/// associated with the pattern.
pub fn disable_pattern(&mut self, index: usize) {
unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) }
}
fn parse_property(
function_name: &str,
capture_names: &[String],
string_values: &[String],
args: &[ffi::TSQueryPredicateStep],
) -> Result<QueryProperty, QueryError> {
if args.len() == 0 || args.len() > 3 {
return Err(QueryError::Predicate(format!(
"Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.",
function_name,
args.len(),
)));
}
let mut i = 0;
let mut capture_id = None;
if args[i].type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture {
capture_id = Some(args[i].value_id as usize);
i += 1;
if i == args.len() {
return Err(QueryError::Predicate(format!(
"No key specified for {} predicate.",
function_name,
)));
}
if args[i].type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture {
return Err(QueryError::Predicate(format!(
"Invalid arguments to {} predicate. Expected string, got @{}",
function_name, capture_names[args[i].value_id as usize]
)));
}
}
let key = &string_values[args[i].value_id as usize];
i += 1;
let mut value = None;
if i < args.len() {
if args[i].type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture {
return Err(QueryError::Predicate(format!(
"Invalid arguments to {} predicate. Expected string, got @{}",
function_name, capture_names[args[i].value_id as usize]
)));
}
value = Some(string_values[args[i].value_id as usize].as_str());
}
Ok(QueryProperty::new(key, value, capture_id))
}
}
impl QueryCursor {
2019-11-08 12:29:26 -08:00
/// Create a new cursor for executing a given query.
///
/// The cursor stores the state that is needed to iteratively search for matches.
pub fn new() -> Self {
2019-09-23 16:55:28 -07:00
QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) })
}
/// Iterate over all of the matches in the order that they were found.
///
/// Each match contains the index of the pattern that matched, and a list of captures.
/// Because multiple patterns can match the same set of nodes, one match may contain
/// captures that appear *before* some of the captures from a previous match.
pub fn matches<'a, T: AsRef<[u8]>>(
&'a mut self,
query: &'a Query,
node: Node<'a>,
mut text_callback: impl FnMut(Node<'a>) -> T + 'a,
) -> impl Iterator<Item = QueryMatch<'a>> + 'a {
2019-09-23 16:55:28 -07:00
let ptr = self.0.as_ptr();
unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) };
std::iter::from_fn(move || loop {
unsafe {
let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
if ffi::ts_query_cursor_next_match(ptr, m.as_mut_ptr()) {
let result = QueryMatch::new(m.assume_init(), ptr);
if result.satisfies_text_predicates(query, &mut text_callback) {
return Some(result);
2019-09-16 10:25:44 -07:00
}
} else {
return None;
}
}
})
}
/// Iterate over all of the individual captures in the order that they appear.
///
/// This is useful if don't care about which pattern matched, and just want a single,
/// ordered sequence of captures.
pub fn captures<'a, T: AsRef<[u8]>>(
&'a mut self,
query: &'a Query,
node: Node<'a>,
text_callback: impl FnMut(Node<'a>) -> T + 'a,
) -> QueryCaptures<'a, T> {
2019-09-23 16:55:28 -07:00
let ptr = self.0.as_ptr();
unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) };
QueryCaptures {
ptr,
query,
text_callback: Box::new(text_callback),
}
}
2019-11-08 12:29:26 -08:00
/// Set the range in which the query will be executed, in terms of byte offsets.
pub fn set_byte_range(&mut self, start: usize, end: usize) -> &mut Self {
unsafe {
ffi::ts_query_cursor_set_byte_range(self.0.as_ptr(), start as u32, end as u32);
}
self
}
2019-11-08 12:29:26 -08:00
/// Set the range in which the query will be executed, in terms of rows and columns.
pub fn set_point_range(&mut self, start: Point, end: Point) -> &mut Self {
unsafe {
ffi::ts_query_cursor_set_point_range(self.0.as_ptr(), start.into(), end.into());
}
self
}
}
impl<'a> QueryMatch<'a> {
pub fn remove(self) {
unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) }
}
fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
QueryMatch {
cursor,
id: m.id,
pattern_index: m.pattern_index as usize,
captures: unsafe {
slice::from_raw_parts(
m.captures as *const QueryCapture<'a>,
m.capture_count as usize,
)
},
}
}
fn satisfies_text_predicates<T: AsRef<[u8]>>(
&self,
query: &Query,
text_callback: &mut impl FnMut(Node<'a>) -> T,
) -> bool {
query.text_predicates[self.pattern_index]
.iter()
.all(|predicate| match predicate {
TextPredicate::CaptureEqCapture(i, j) => {
let node1 = self.capture_for_index(*i).unwrap();
let node2 = self.capture_for_index(*j).unwrap();
text_callback(node1).as_ref() == text_callback(node2).as_ref()
}
TextPredicate::CaptureEqString(i, s) => {
let node = self.capture_for_index(*i).unwrap();
text_callback(node).as_ref() == s.as_bytes()
}
TextPredicate::CaptureMatchString(i, r) => {
let node = self.capture_for_index(*i).unwrap();
r.is_match(text_callback(node).as_ref())
}
})
}
fn capture_for_index(&self, capture_index: u32) -> Option<Node<'a>> {
for c in self.captures {
if c.index == capture_index {
return Some(c.node);
}
}
None
}
}
impl QueryProperty {
pub fn new(key: &str, value: Option<&str>, capture_id: Option<usize>) -> Self {
QueryProperty {
capture_id,
key: key.to_string().into_boxed_str(),
value: value.map(|s| s.to_string().into_boxed_str()),
}
}
}
impl<'a, T: AsRef<[u8]>> Iterator for QueryCaptures<'a, T> {
type Item = (QueryMatch<'a>, usize);
fn next(&mut self) -> Option<Self::Item> {
loop {
unsafe {
let mut capture_index = 0u32;
let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
if ffi::ts_query_cursor_next_capture(
self.ptr,
m.as_mut_ptr(),
&mut capture_index as *mut u32,
) {
let result = QueryMatch::new(m.assume_init(), self.ptr);
if result.satisfies_text_predicates(self.query, &mut self.text_callback) {
return Some((result, capture_index as usize));
} else {
result.remove();
}
} else {
return None;
}
}
}
}
}
impl PartialEq for Query {
fn eq(&self, other: &Self) -> bool {
self.ptr == other.ptr
}
}
impl Drop for Query {
fn drop(&mut self) {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_query_delete(self.ptr.as_ptr()) }
}
}
impl Drop for QueryCursor {
fn drop(&mut self) {
2019-09-23 16:55:28 -07:00
unsafe { ffi::ts_query_cursor_delete(self.0.as_ptr()) }
}
}
2018-05-18 14:27:08 -07:00
impl Point {
pub fn new(row: usize, column: usize) -> Self {
2018-05-18 14:27:08 -07:00
Point { row, column }
}
}
2018-05-18 11:15:37 -07:00
impl fmt::Display for Point {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "({}, {})", self.row, self.column)
}
}
2016-07-10 14:03:00 -07:00
impl Into<ffi::TSPoint> for Point {
fn into(self) -> ffi::TSPoint {
ffi::TSPoint {
row: self.row as u32,
column: self.column as u32,
2016-07-10 14:03:00 -07:00
}
}
}
2018-06-19 16:19:37 -07:00
impl From<ffi::TSPoint> for Point {
fn from(point: ffi::TSPoint) -> Self {
Self {
row: point.row as usize,
column: point.column as usize,
2018-06-19 16:19:37 -07:00
}
}
}
impl Into<ffi::TSRange> for Range {
fn into(self) -> ffi::TSRange {
ffi::TSRange {
start_byte: self.start_byte as u32,
end_byte: self.end_byte as u32,
start_point: self.start_point.into(),
end_point: self.end_point.into(),
}
}
}
2019-01-25 12:05:21 -08:00
impl From<ffi::TSRange> for Range {
fn from(range: ffi::TSRange) -> Self {
Self {
start_byte: range.start_byte as usize,
end_byte: range.end_byte as usize,
start_point: range.start_point.into(),
end_point: range.end_point.into(),
}
}
}
2019-02-04 20:42:56 -08:00
impl<'a> Into<ffi::TSInputEdit> for &'a InputEdit {
fn into(self) -> ffi::TSInputEdit {
ffi::TSInputEdit {
start_byte: self.start_byte as u32,
old_end_byte: self.old_end_byte as u32,
new_end_byte: self.new_end_byte as u32,
start_point: self.start_position.into(),
old_end_point: self.old_end_position.into(),
new_end_point: self.new_end_position.into(),
}
}
}
2019-09-23 16:55:28 -07:00
unsafe impl Send for Language {}
unsafe impl Send for Parser {}
unsafe impl Send for Query {}
unsafe impl Send for Tree {}
unsafe impl Sync for Language {}
unsafe impl Sync for Query {}