Start work on an API for querying trees

This commit is contained in:
Max Brunsfeld 2019-09-09 15:41:13 -07:00
parent 4151a428ec
commit fe7c74e7aa
10 changed files with 1430 additions and 12 deletions

View file

@ -19,6 +19,16 @@ pub struct TSParser {
pub struct TSTree {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSQuery {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSQueryContext {
_unused: [u8; 0],
}
pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
pub type TSInputEncoding = u32;
@ -93,6 +103,17 @@ pub struct TSTreeCursor {
pub id: *const ::std::os::raw::c_void,
pub context: [u32; 2usize],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSQueryCapture {
pub node: TSNode,
pub index: u32,
}
pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0;
pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1;
pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2;
pub const TSQueryError_TSQueryErrorField: TSQueryError = 3;
pub type TSQueryError = u32;
extern "C" {
#[doc = " Create a new parser."]
pub fn ts_parser_new() -> *mut TSParser;
@ -538,6 +559,65 @@ extern "C" {
extern "C" {
pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor;
}
extern "C" {
#[doc = " Create a new query based on a given language and string containing"]
#[doc = " one or more S-expression patterns."]
#[doc = ""]
#[doc = " If all of the given patterns are valid, this returns a `TSQuery`."]
#[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"]
#[doc = " of information about the problem:"]
#[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."]
#[doc = " 2. The type of error is written to the `error_type` parameter."]
pub fn ts_query_new(
arg1: *const TSLanguage,
source: *const ::std::os::raw::c_char,
source_len: u32,
error_offset: *mut u32,
error_type: *mut TSQueryError,
) -> *mut TSQuery;
}
extern "C" {
#[doc = " Delete a query, freeing all of the memory that it used."]
pub fn ts_query_delete(arg1: *mut TSQuery);
}
extern "C" {
pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32;
}
extern "C" {
pub fn ts_query_capture_name_for_id(
self_: *const TSQuery,
index: u32,
length: *mut u32,
) -> *const ::std::os::raw::c_char;
}
extern "C" {
pub fn ts_query_capture_id_for_name(
self_: *const TSQuery,
name: *const ::std::os::raw::c_char,
length: u32,
) -> ::std::os::raw::c_int;
}
extern "C" {
pub fn ts_query_context_new(arg1: *const TSQuery) -> *mut TSQueryContext;
}
extern "C" {
pub fn ts_query_context_delete(arg1: *mut TSQueryContext);
}
extern "C" {
pub fn ts_query_context_exec(arg1: *mut TSQueryContext, arg2: TSNode);
}
extern "C" {
pub fn ts_query_context_next(arg1: *mut TSQueryContext) -> bool;
}
extern "C" {
pub fn ts_query_context_matched_pattern_index(arg1: *const TSQueryContext) -> u32;
}
extern "C" {
pub fn ts_query_context_matched_captures(
arg1: *const TSQueryContext,
arg2: *mut u32,
) -> *const TSQueryCapture;
}
extern "C" {
#[doc = " Get the number of distinct node types in the language."]
pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;

View file

@ -17,7 +17,7 @@ use std::ffi::CStr;
use std::marker::PhantomData;
use std::os::raw::{c_char, c_void};
use std::sync::atomic::AtomicUsize;
use std::{fmt, ptr, str, u16};
use std::{char, fmt, ptr, slice, str, u16};
pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h");
@ -136,6 +136,23 @@ pub struct TreePropertyCursor<'a, P> {
source: &'a [u8],
}
#[derive(Debug)]
pub struct Query {
ptr: *mut ffi::TSQuery,
capture_names: Vec<String>,
}
pub struct QueryContext<'a>(*mut ffi::TSQueryContext, PhantomData<&'a ()>);
pub struct QueryMatch<'a>(&'a QueryContext<'a>);
#[derive(Debug, PartialEq, Eq)]
pub enum QueryError<'a> {
Syntax(usize),
NodeType(&'a str),
Field(&'a str),
}
impl Language {
pub fn version(&self) -> usize {
unsafe { ffi::ts_language_version(self.0) as usize }
@ -921,6 +938,117 @@ impl<'a, P> TreePropertyCursor<'a, P> {
}
}
impl Query {
pub fn new(language: Language, source: &str) -> Result<Self, QueryError> {
let mut error_offset = 0u32;
let mut error_type: ffi::TSQueryError = 0;
let bytes = source.as_bytes();
let ptr = unsafe {
ffi::ts_query_new(
language.0,
bytes.as_ptr() as *const c_char,
bytes.len() as u32,
&mut error_offset as *mut u32,
&mut error_type as *mut ffi::TSQueryError,
)
};
if ptr.is_null() {
let offset = error_offset as usize;
Err(match error_type {
ffi::TSQueryError_TSQueryErrorNodeType | ffi::TSQueryError_TSQueryErrorField => {
let suffix = source.split_at(offset).1;
let end_offset = suffix
.find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-')
.unwrap_or(source.len());
let name = suffix.split_at(end_offset).0;
if error_type == ffi::TSQueryError_TSQueryErrorNodeType {
QueryError::NodeType(name)
} else {
QueryError::Field(name)
}
}
_ => QueryError::Syntax(offset),
})
} else {
let capture_count = unsafe { ffi::ts_query_capture_count(ptr) };
let capture_names = (0..capture_count)
.map(|i| unsafe {
let mut length = 0u32;
let name =
ffi::ts_query_capture_name_for_id(ptr, i as u32, &mut length as *mut u32)
as *const u8;
let name = slice::from_raw_parts(name, length as usize);
let name = str::from_utf8_unchecked(name);
name.to_string()
})
.collect();
Ok(Query { ptr, capture_names })
}
}
pub fn capture_names(&self) -> &[String] {
&self.capture_names
}
pub fn context(&self) -> QueryContext {
let context = unsafe { ffi::ts_query_context_new(self.ptr) };
QueryContext(context, PhantomData)
}
}
impl<'a> QueryContext<'a> {
pub fn exec(&'a self, node: Node<'a>) -> impl Iterator<Item = QueryMatch<'a>> + 'a {
unsafe {
ffi::ts_query_context_exec(self.0, node.0);
}
std::iter::from_fn(move || -> Option<QueryMatch<'a>> {
unsafe {
if ffi::ts_query_context_next(self.0) {
Some(QueryMatch(self))
} else {
None
}
}
})
}
}
impl<'a> QueryMatch<'a> {
pub fn pattern_index(&self) -> usize {
unsafe { ffi::ts_query_context_matched_pattern_index((self.0).0) as usize }
}
pub fn captures(&self) -> impl ExactSizeIterator<Item = (usize, Node)> {
unsafe {
let mut capture_count = 0u32;
let captures =
ffi::ts_query_context_matched_captures((self.0).0, &mut capture_count as *mut u32);
let captures = slice::from_raw_parts(captures, capture_count as usize);
captures
.iter()
.map(move |capture| (capture.index as usize, Node::new(capture.node).unwrap()))
}
}
}
impl PartialEq for Query {
fn eq(&self, other: &Self) -> bool {
self.ptr == other.ptr
}
}
impl Drop for Query {
fn drop(&mut self) {
unsafe { ffi::ts_query_delete(self.ptr) }
}
}
impl<'a> Drop for QueryContext<'a> {
fn drop(&mut self) {
unsafe { ffi::ts_query_context_delete(self.0) }
}
}
impl Point {
pub fn new(row: usize, column: usize) -> Self {
Point { row, column }