From e3e1bdba759f3ac4a4a03e891a930d4629518c0e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 17 Mar 2020 13:15:34 -0700 Subject: [PATCH] tags: Start work on C API --- Cargo.lock | 6 - tags/Cargo.toml | 8 +- tags/include/tree_sitter/tags.h | 92 ++++++++++++++++ tags/src/c_lib.rs | 188 ++++++++++++++++++++++++++++++++ tags/src/lib.rs | 2 + 5 files changed, 283 insertions(+), 13 deletions(-) create mode 100644 tags/include/tree_sitter/tags.h create mode 100644 tags/src/c_lib.rs diff --git a/Cargo.lock b/Cargo.lock index 3c8da16a..2c298eed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -603,9 +603,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" name = "serde" version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "serde_derive" @@ -791,9 +788,6 @@ version = "0.1.6" dependencies = [ "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.6.3", ] diff --git a/tags/Cargo.toml b/tags/Cargo.toml index e6fc2425..43557bb2 100644 --- a/tags/Cargo.toml +++ b/tags/Cargo.toml @@ -9,7 +9,7 @@ authors = [ license = "MIT" readme = "README.md" edition = "2018" -keywords = ["incremental", "parsing", "syntax", "highlighting"] +keywords = ["incremental", "parsing", "syntax", "tagging"] categories = ["parsing", "text-editors"] repository = "https://github.com/tree-sitter/tree-sitter" @@ -18,14 +18,8 @@ crate-type = ["lib", "staticlib"] [dependencies] regex = "1" -serde_json = "1.0" -serde_derive = "1.0" memchr = "2.3" -[dependencies.serde] -version = "1.0" -features = ["derive"] - [dependencies.tree-sitter] version = ">= 0.3.7" path = "../lib" diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h new file mode 100644 index 00000000..d492ad31 --- /dev/null +++ b/tags/include/tree_sitter/tags.h @@ -0,0 +1,92 @@ +#ifndef TREE_SITTER_TAGS_H_ +#define TREE_SITTER_TAGS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "tree_sitter/api.h" + +typedef enum { + TSTagsOk, + TSTagsUnknownScope, + TSTagsTimeout, + TSTagsInvalidLanguage, + TSTagsInvalidUtf8, + TSTagsInvalidRegex, + TSTagsInvalidQuery, +} TSTagsError; + +typedef enum { + TSTagKindFunction, + TSTagKindMethod, + TSTagKindClass, + TSTagKindModule, + TSTagKindCall, +} TSTagKind; + +typedef struct { + TSTagKind kind; + uint32_t start_byte; + uint32_t end_byte; + uint32_t name_start_byte; + uint32_t name_end_byte; + uint32_t line_start_byte; + uint32_t line_end_byte; + TSPoint start_point; + TSPoint end_point; + const char *docs; + uint32_t docs_length; +} TSTag; + +typedef struct TSTagger TSTagger; +typedef struct TSTagsBuffer TSTagsBuffer; + +// Construct a tagger. +TSTagger *ts_tagger_new(); + +// Delete a tagger. +void ts_tagger_delete(TSTagger *); + +// Add a `TSLanguage` to a tagger. The language is associated with a scope name, +// which can be used later to select a language for tagging. Along with the language, +// you must provide two tree query strings, one for matching tags themselves, and one +// specifying local variable definitions. +TSTagsError ts_tagger_add_language( + TSTagger *self, + const char *scope_name, + const TSLanguage *language, + const char *tags_query, + const char *locals_query, + uint32_t tags_query_len, + uint32_t locals_query_len +); + +// Compute syntax highlighting for a given document. You must first +// create a `TSTagsBuffer` to hold the output. +TSTagsError ts_tagger_tag( + const TSTagger *self, + const char *scope_name, + const char *source_code, + uint32_t source_code_len, + TSTagsBuffer *output, + const size_t *cancellation_flag +); + +// A tags buffer stores the results produced by a tagging call. It can be reused +// for multiple calls. +TSTagsBuffer *ts_tags_buffer_new(); + +// Delete a tags buffer. +void ts_tags_buffer_delete(TSTagsBuffer *); + +// Access the tags within a tag buffer. +const TSTag *ts_tags_buffer_line_offsets(const TSTagsBuffer *); +uint32_t ts_tags_buffer_len(const TSTagsBuffer *); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_TAGS_H_ diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs new file mode 100644 index 00000000..714d956e --- /dev/null +++ b/tags/src/c_lib.rs @@ -0,0 +1,188 @@ +use super::{Error, TagKind, TagsConfiguration, TagsContext}; +use std::collections::HashMap; +use std::ffi::CStr; +use std::process::abort; +use std::{fmt, slice, str}; +use tree_sitter::Language; + +#[repr(C)] +enum TSTagsError { + Ok, + UnknownScope, + Timeout, + InvalidLanguage, + InvalidUtf8, + InvalidRegex, + InvalidQuery, +} + +#[repr(C)] +enum TSTagKind { + Function, + Method, + Class, + Module, + Call, +} + +#[repr(C)] +struct TSPoint { + row: u32, + column: u32, +} + +#[repr(C)] +struct TSTag { + kind: TSTagKind, + start_byte: u32, + end_byte: u32, + name_start_byte: u32, + name_end_byte: u32, + line_start_byte: u32, + line_end_byte: u32, + start_point: TSPoint, + end_point: TSPoint, + docs: *const u8, + docs_length: u32, +} + +struct TSTagger { + languages: HashMap, +} + +struct TSTagsBuffer { + context: TagsContext, + tags: Vec, + docs: Vec, +} + +#[no_mangle] +unsafe extern "C" fn ts_tagger_add_language( + this: *mut TSTagger, + scope_name: *const i8, + language: Language, + tags_query: *const u8, + locals_query: *const u8, + tags_query_len: u32, + locals_query_len: u32, +) -> TSTagsError { + let tagger = unwrap_mut_ptr(this); + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); + let tags_query = slice::from_raw_parts(tags_query, tags_query_len as usize); + let locals_query = slice::from_raw_parts(locals_query, locals_query_len as usize); + let tags_query = match str::from_utf8(tags_query) { + Ok(e) => e, + Err(_) => return TSTagsError::InvalidUtf8, + }; + let locals_query = match str::from_utf8(locals_query) { + Ok(e) => e, + Err(_) => return TSTagsError::InvalidUtf8, + }; + match TagsConfiguration::new(language, tags_query, locals_query) { + Ok(c) => { + tagger.languages.insert(scope_name.to_string(), c); + TSTagsError::Ok + } + Err(Error::Query(_)) => TSTagsError::InvalidQuery, + Err(Error::Regex(_)) => TSTagsError::InvalidRegex, + } +} + +#[no_mangle] +unsafe extern "C" fn ts_tagger_tag( + this: *mut TSTagger, + scope_name: *const i8, + source_code: *const u8, + source_code_len: u32, + output: *mut TSTagsBuffer, + cancellation_flag: *const usize, +) -> TSTagsError { + let tagger = unwrap_mut_ptr(this); + let buffer = unwrap_mut_ptr(output); + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); + if let Some(config) = tagger.languages.get(scope_name) { + let source_code = slice::from_raw_parts(source_code, source_code_len as usize); + for tag in buffer.context.generate_tags(config, source_code) { + let prev_docs_len = buffer.docs.len(); + if let Some(docs) = tag.docs { + buffer.docs.extend_from_slice(docs.as_bytes()); + } + let docs = &buffer.docs[prev_docs_len..]; + buffer.tags.push(TSTag { + kind: match tag.kind { + TagKind::Function => TSTagKind::Function, + TagKind::Method => TSTagKind::Method, + TagKind::Class => TSTagKind::Class, + TagKind::Module => TSTagKind::Module, + TagKind::Call => TSTagKind::Call, + }, + start_byte: tag.range.start as u32, + end_byte: tag.range.end as u32, + name_start_byte: tag.name_range.start as u32, + name_end_byte: tag.name_range.end as u32, + line_start_byte: tag.line_range.start as u32, + line_end_byte: tag.line_range.end as u32, + start_point: TSPoint { + row: tag.span.start.row as u32, + column: tag.span.start.column as u32, + }, + end_point: TSPoint { + row: tag.span.end.row as u32, + column: tag.span.end.column as u32, + }, + docs: docs.as_ptr(), + docs_length: docs.len() as u32, + }); + } + TSTagsError::Ok + } else { + TSTagsError::UnknownScope + } +} + +#[no_mangle] +extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { + Box::into_raw(Box::new(TSTagsBuffer { + context: TagsContext::new(), + tags: Vec::new(), + docs: Vec::new(), + })) +} + +#[no_mangle] +extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { + drop(unsafe { Box::from_raw(this) }) +} + +#[no_mangle] +extern "C" fn ts_tags_buffer_line_offsets(this: *const TSTagsBuffer) -> *const TSTag { + let buffer = unwrap_ptr(this); + buffer.tags.as_ptr() +} + +#[no_mangle] +extern "C" fn ts_tags_buffer_len(this: *const TSTagsBuffer) -> u32 { + let buffer = unwrap_ptr(this); + buffer.tags.len() as u32 +} + +fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { + unsafe { result.as_ref() }.unwrap_or_else(|| { + eprintln!("{}:{} - pointer must not be null", file!(), line!()); + abort(); + }) +} + +fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { + unsafe { result.as_mut() }.unwrap_or_else(|| { + eprintln!("{}:{} - pointer must not be null", file!(), line!()); + abort(); + }) +} + +fn unwrap(result: Result) -> T { + result.unwrap_or_else(|error| { + eprintln!("tree-sitter tag error: {}", error); + abort(); + }) +} diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 8e3625e5..5f579d1d 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -1,3 +1,5 @@ +mod c_lib; + use memchr::{memchr, memrchr}; use regex::Regex; use std::ops::Range;