2023-08-28 21:16:02 +03:00
#![ doc = include_str!( " ../README.md " ) ]
2019-01-07 17:57:27 -08:00
use std ::collections ::HashMap ;
2022-09-06 22:41:52 -07:00
use std ::ffi ::{ OsStr , OsString } ;
2024-02-13 02:59:11 -05:00
use std ::io ::{ BufRead , BufReader } ;
2020-09-24 13:47:27 -07:00
use std ::ops ::Range ;
2019-01-07 17:57:27 -08:00
use std ::path ::{ Path , PathBuf } ;
use std ::process ::Command ;
2019-10-24 12:01:27 -07:00
use std ::sync ::Mutex ;
2019-01-11 13:30:45 -08:00
use std ::time ::SystemTime ;
2023-01-06 06:37:22 +02:00
use std ::{ env , fs , mem } ;
2024-02-12 16:13:02 -05:00
use anyhow ::{ anyhow , Context , Error , Result } ;
use fs4 ::FileExt ;
use indoc ::indoc ;
use libloading ::{ Library , Symbol } ;
use once_cell ::unsync ::OnceCell ;
use regex ::{ Regex , RegexBuilder } ;
use serde ::{ Deserialize , Deserializer , Serialize } ;
2021-09-03 12:49:42 +03:00
use tree_sitter ::{ Language , QueryError , QueryErrorKind } ;
2019-10-24 12:01:27 -07:00
use tree_sitter_highlight ::HighlightConfiguration ;
2020-09-24 15:03:51 -07:00
use tree_sitter_tags ::{ Error as TagsError , TagsConfiguration } ;
2022-09-06 22:41:52 -07:00
use which ::which ;
2023-12-08 19:42:53 -05:00
pub const EMSCRIPTEN_TAG : & str = concat! ( " docker.io/emscripten/emsdk: " , env! ( " EMSCRIPTEN_VERSION " ) ) ;
2019-01-07 17:57:27 -08:00
2021-06-09 15:03:27 -04:00
#[ derive(Default, Deserialize, Serialize) ]
pub struct Config {
#[ serde(default) ]
2021-06-30 22:55:46 +09:00
#[ serde(
rename = " parser-directories " ,
deserialize_with = " deserialize_parser_directories "
) ]
2021-06-09 15:03:27 -04:00
pub parser_directories : Vec < PathBuf > ,
}
2021-06-30 22:55:46 +09:00
// Replace `~` or `$HOME` with home path string.
// (While paths like "~/.tree-sitter/config.json" can be deserialized,
// they're not valid path for I/O modules.)
fn deserialize_parser_directories < ' de , D > ( deserializer : D ) -> Result < Vec < PathBuf > , D ::Error >
where
D : Deserializer < ' de > ,
{
let paths = Vec ::< PathBuf > ::deserialize ( deserializer ) ? ;
2024-02-03 19:38:37 -05:00
let Some ( home ) = dirs ::home_dir ( ) else {
return Ok ( paths ) ;
2021-06-30 22:55:46 +09:00
} ;
let standardized = paths
. into_iter ( )
. map ( | path | standardize_path ( path , & home ) )
. collect ( ) ;
Ok ( standardized )
}
fn standardize_path ( path : PathBuf , home : & Path ) -> PathBuf {
if let Ok ( p ) = path . strip_prefix ( " ~ " ) {
return home . join ( p ) ;
}
if let Ok ( p ) = path . strip_prefix ( " $HOME " ) {
return home . join ( p ) ;
}
path
}
2021-06-09 15:03:27 -04:00
impl Config {
2024-02-03 19:38:37 -05:00
#[ must_use ]
pub fn initial ( ) -> Self {
2021-06-09 15:03:27 -04:00
let home_dir = dirs ::home_dir ( ) . expect ( " Cannot determine home directory " ) ;
2024-02-03 19:38:37 -05:00
Self {
2021-06-09 15:03:27 -04:00
parser_directories : vec ! [
home_dir . join ( " github " ) ,
home_dir . join ( " src " ) ,
home_dir . join ( " source " ) ,
2024-02-21 02:09:00 -05:00
home_dir . join ( " projects " ) ,
home_dir . join ( " dev " ) ,
home_dir . join ( " git " ) ,
2021-06-09 15:03:27 -04:00
] ,
}
}
}
2019-01-07 17:57:27 -08:00
#[ cfg(unix) ]
2023-09-21 00:52:50 -04:00
const DYLIB_EXTENSION : & str = " so " ;
2019-01-07 17:57:27 -08:00
#[ cfg(windows) ]
2024-02-03 19:38:37 -05:00
const DYLIB_EXTENSION : & str = " dll " ;
2019-01-07 17:57:27 -08:00
2023-09-21 00:52:50 -04:00
const BUILD_TARGET : & str = env! ( " BUILD_TARGET " ) ;
2019-01-16 14:09:19 -08:00
2019-10-24 12:01:27 -07:00
pub struct LanguageConfiguration < ' a > {
2019-08-07 17:41:45 -07:00
pub scope : Option < String > ,
pub content_regex : Option < Regex > ,
2024-02-13 07:51:41 +01:00
pub first_line_regex : Option < Regex > ,
2019-08-07 17:41:45 -07:00
pub injection_regex : Option < Regex > ,
pub file_types : Vec < String > ,
2019-09-18 17:35:47 -07:00
pub root_path : PathBuf ,
2019-10-17 10:14:05 -07:00
pub highlights_filenames : Option < Vec < String > > ,
pub injections_filenames : Option < Vec < String > > ,
pub locals_filenames : Option < Vec < String > > ,
2020-03-13 16:13:19 -07:00
pub tags_filenames : Option < Vec < String > > ,
2023-08-18 16:14:01 -04:00
pub language_name : String ,
2019-08-07 17:41:45 -07:00
language_id : usize ,
2019-09-18 17:35:47 -07:00
highlight_config : OnceCell < Option < HighlightConfiguration > > ,
2020-03-04 14:27:31 -08:00
tags_config : OnceCell < Option < TagsConfiguration > > ,
2019-10-24 12:01:27 -07:00
highlight_names : & ' a Mutex < Vec < String > > ,
use_all_highlight_names : bool ,
2019-01-07 17:57:27 -08:00
}
pub struct Loader {
parser_lib_path : PathBuf ,
2024-02-16 14:42:19 -05:00
languages_by_id : Vec < ( PathBuf , OnceCell < Language > , Option < Vec < PathBuf > > ) > ,
2019-10-24 12:01:27 -07:00
language_configurations : Vec < LanguageConfiguration < 'static > > ,
2019-08-07 17:41:45 -07:00
language_configuration_ids_by_file_type : HashMap < String , Vec < usize > > ,
2023-09-21 00:52:50 -04:00
language_configuration_in_current_path : Option < usize > ,
2024-02-13 07:51:41 +01:00
language_configuration_ids_by_first_line_regex : HashMap < String , Vec < usize > > ,
2019-10-24 12:01:27 -07:00
highlight_names : Box < Mutex < Vec < String > > > ,
use_all_highlight_names : bool ,
2021-09-08 00:08:13 +03:00
debug_build : bool ,
2022-09-06 22:41:52 -07:00
#[ cfg(feature = " wasm " ) ]
wasm_store : Mutex < Option < tree_sitter ::WasmStore > > ,
2019-01-07 17:57:27 -08:00
}
unsafe impl Send for Loader { }
unsafe impl Sync for Loader { }
impl Loader {
2021-06-09 15:03:27 -04:00
pub fn new ( ) -> Result < Self > {
2023-01-06 06:37:22 +02:00
let parser_lib_path = match env ::var ( " TREE_SITTER_LIBDIR " ) {
Ok ( path ) = > PathBuf ::from ( path ) ,
_ = > dirs ::cache_dir ( )
2024-02-03 19:38:37 -05:00
. ok_or_else ( | | anyhow! ( " Cannot determine cache directory " ) ) ?
2023-01-11 04:11:04 +02:00
. join ( " tree-sitter " )
. join ( " lib " ) ,
2023-01-06 06:37:22 +02:00
} ;
2021-06-09 15:03:27 -04:00
Ok ( Self ::with_parser_lib_path ( parser_lib_path ) )
}
2024-02-03 19:38:37 -05:00
#[ must_use ]
2021-06-09 15:03:27 -04:00
pub fn with_parser_lib_path ( parser_lib_path : PathBuf ) -> Self {
2024-02-03 19:38:37 -05:00
Self {
2019-01-07 17:57:27 -08:00
parser_lib_path ,
2019-08-07 17:41:45 -07:00
languages_by_id : Vec ::new ( ) ,
language_configurations : Vec ::new ( ) ,
2019-01-11 13:30:45 -08:00
language_configuration_ids_by_file_type : HashMap ::new ( ) ,
2023-09-21 00:52:50 -04:00
language_configuration_in_current_path : None ,
2024-02-13 07:51:41 +01:00
language_configuration_ids_by_first_line_regex : HashMap ::new ( ) ,
2019-10-24 12:01:27 -07:00
highlight_names : Box ::new ( Mutex ::new ( Vec ::new ( ) ) ) ,
use_all_highlight_names : true ,
2021-09-08 00:08:13 +03:00
debug_build : false ,
2022-09-06 22:41:52 -07:00
#[ cfg(feature = " wasm " ) ]
2024-02-06 23:18:27 +01:00
wasm_store : Mutex ::default ( ) ,
2019-01-07 17:57:27 -08:00
}
}
2023-09-21 00:52:50 -04:00
pub fn configure_highlights ( & mut self , names : & [ String ] ) {
2019-10-24 12:01:27 -07:00
self . use_all_highlight_names = false ;
let mut highlights = self . highlight_names . lock ( ) . unwrap ( ) ;
highlights . clear ( ) ;
highlights . extend ( names . iter ( ) . cloned ( ) ) ;
}
2024-02-03 19:38:37 -05:00
#[ must_use ]
2019-10-24 12:01:27 -07:00
pub fn highlight_names ( & self ) -> Vec < String > {
self . highlight_names . lock ( ) . unwrap ( ) . clone ( )
}
2021-06-09 15:03:27 -04:00
pub fn find_all_languages ( & mut self , config : & Config ) -> Result < ( ) > {
2021-06-10 09:43:07 -04:00
if config . parser_directories . is_empty ( ) {
eprintln! ( " Warning: You have not configured any parser directories! " ) ;
eprintln! ( " Please run `tree-sitter init-config` and edit the resulting " ) ;
eprintln! ( " configuration file to indicate where we should look for " ) ;
2023-09-21 00:52:50 -04:00
eprintln! ( " language grammars. \n " ) ;
2021-06-10 09:43:07 -04:00
}
2021-06-09 15:03:27 -04:00
for parser_container_dir in & config . parser_directories {
2019-02-06 12:59:19 -08:00
if let Ok ( entries ) = fs ::read_dir ( parser_container_dir ) {
for entry in entries {
let entry = entry ? ;
if let Some ( parser_dir_name ) = entry . file_name ( ) . to_str ( ) {
if parser_dir_name . starts_with ( " tree-sitter- " ) {
2019-08-07 17:41:45 -07:00
self . find_language_configurations_at_path (
& parser_container_dir . join ( parser_dir_name ) ,
2023-09-21 00:52:50 -04:00
false ,
2019-08-07 17:41:45 -07:00
)
. ok ( ) ;
2019-02-06 12:59:19 -08:00
}
2019-01-07 17:57:27 -08:00
}
}
}
}
Ok ( ( ) )
}
2019-08-07 17:41:45 -07:00
pub fn languages_at_path ( & mut self , path : & Path ) -> Result < Vec < Language > > {
2023-09-21 00:52:50 -04:00
if let Ok ( configurations ) = self . find_language_configurations_at_path ( path , true ) {
2019-08-07 17:41:45 -07:00
let mut language_ids = configurations
. iter ( )
. map ( | c | c . language_id )
. collect ::< Vec < _ > > ( ) ;
2024-02-03 19:38:37 -05:00
language_ids . sort_unstable ( ) ;
2019-08-07 17:41:45 -07:00
language_ids . dedup ( ) ;
language_ids
. into_iter ( )
. map ( | id | self . language_for_id ( id ) )
. collect ::< Result < Vec < _ > > > ( )
2019-01-11 13:30:45 -08:00
} else {
2019-08-07 17:41:45 -07:00
Ok ( Vec ::new ( ) )
2019-01-11 13:30:45 -08:00
}
2019-01-07 17:57:27 -08:00
}
2024-02-03 19:38:37 -05:00
#[ must_use ]
2019-08-07 17:41:45 -07:00
pub fn get_all_language_configurations ( & self ) -> Vec < ( & LanguageConfiguration , & Path ) > {
self . language_configurations
. iter ( )
. map ( | c | ( c , self . languages_by_id [ c . language_id ] . 0. as_ref ( ) ) )
. collect ( )
}
2019-02-20 14:38:19 -08:00
pub fn language_configuration_for_scope (
& self ,
scope : & str ,
) -> Result < Option < ( Language , & LanguageConfiguration ) > > {
2019-08-07 17:41:45 -07:00
for configuration in & self . language_configurations {
if configuration . scope . as_ref ( ) . map_or ( false , | s | s = = scope ) {
let language = self . language_for_id ( configuration . language_id ) ? ;
return Ok ( Some ( ( language , configuration ) ) ) ;
2019-02-20 14:38:19 -08:00
}
}
Ok ( None )
}
2024-02-13 02:59:11 -05:00
pub fn language_configuration_for_first_line_regex (
& self ,
path : & Path ,
) -> Result < Option < ( Language , & LanguageConfiguration ) > > {
self . language_configuration_ids_by_first_line_regex
. iter ( )
. try_fold ( None , | _ , ( regex , ids ) | {
if let Some ( regex ) = Self ::regex ( Some ( regex ) ) {
let file = fs ::File ::open ( path ) ? ;
let reader = BufReader ::new ( file ) ;
let first_line = reader . lines ( ) . next ( ) . transpose ( ) ? ;
if let Some ( first_line ) = first_line {
if regex . is_match ( & first_line ) & & ! ids . is_empty ( ) {
let configuration = & self . language_configurations [ ids [ 0 ] ] ;
let language = self . language_for_id ( configuration . language_id ) ? ;
return Ok ( Some ( ( language , configuration ) ) ) ;
}
}
}
Ok ( None )
} )
}
2019-01-11 13:30:45 -08:00
pub fn language_configuration_for_file_name (
2019-02-19 11:24:50 -08:00
& self ,
2019-01-07 17:57:27 -08:00
path : & Path ,
2019-02-06 16:03:50 -08:00
) -> Result < Option < ( Language , & LanguageConfiguration ) > > {
2019-08-07 17:41:45 -07:00
// Find all the language configurations that match this file name
// or a suffix of the file name.
let configuration_ids = path
2019-01-07 17:57:27 -08:00
. file_name ( )
. and_then ( | n | n . to_str ( ) )
2019-01-11 13:30:45 -08:00
. and_then ( | file_name | self . language_configuration_ids_by_file_type . get ( file_name ) )
2019-01-07 17:57:27 -08:00
. or_else ( | | {
path . extension ( )
. and_then ( | extension | extension . to_str ( ) )
. and_then ( | extension | {
2019-01-11 13:30:45 -08:00
self . language_configuration_ids_by_file_type . get ( extension )
2019-01-07 17:57:27 -08:00
} )
} ) ;
2019-08-07 17:41:45 -07:00
if let Some ( configuration_ids ) = configuration_ids {
if ! configuration_ids . is_empty ( ) {
2024-02-03 19:38:37 -05:00
let configuration = if configuration_ids . len ( ) = = 1 {
& self . language_configurations [ configuration_ids [ 0 ] ]
2019-08-07 17:41:45 -07:00
}
// If multiple language configurations match, then determine which
// one to use by applying the configurations' content regexes.
else {
2024-02-03 19:38:37 -05:00
let file_contents =
fs ::read ( path ) . with_context ( | | format! ( " Failed to read path {path:?} " ) ) ? ;
2020-07-23 16:05:50 -07:00
let file_contents = String ::from_utf8_lossy ( & file_contents ) ;
2019-08-07 17:41:45 -07:00
let mut best_score = - 2 isize ;
let mut best_configuration_id = None ;
for configuration_id in configuration_ids {
let config = & self . language_configurations [ * configuration_id ] ;
// If the language configuration has a content regex, assign
// a score based on the length of the first match.
let score ;
if let Some ( content_regex ) = & config . content_regex {
if let Some ( mat ) = content_regex . find ( & file_contents ) {
score = ( mat . end ( ) - mat . start ( ) ) as isize ;
}
// If the content regex does not match, then *penalize* this
// language configuration, so that language configurations
// without content regexes are preferred over those with
// non-matching content regexes.
else {
score = - 1 ;
}
} else {
score = 0 ;
}
if score > best_score {
best_configuration_id = Some ( * configuration_id ) ;
best_score = score ;
}
}
2024-02-03 19:38:37 -05:00
& self . language_configurations [ best_configuration_id . unwrap ( ) ]
} ;
2019-08-07 17:41:45 -07:00
let language = self . language_for_id ( configuration . language_id ) ? ;
return Ok ( Some ( ( language , configuration ) ) ) ;
2019-01-07 17:57:27 -08:00
}
}
2019-08-07 17:41:45 -07:00
2019-01-07 17:57:27 -08:00
Ok ( None )
}
2019-02-19 11:24:50 -08:00
pub fn language_configuration_for_injection_string (
& self ,
string : & str ,
) -> Result < Option < ( Language , & LanguageConfiguration ) > > {
let mut best_match_length = 0 ;
let mut best_match_position = None ;
2019-08-07 17:41:45 -07:00
for ( i , configuration ) in self . language_configurations . iter ( ) . enumerate ( ) {
if let Some ( injection_regex ) = & configuration . injection_regex {
if let Some ( mat ) = injection_regex . find ( string ) {
let length = mat . end ( ) - mat . start ( ) ;
if length > best_match_length {
best_match_position = Some ( i ) ;
best_match_length = length ;
2019-02-19 11:24:50 -08:00
}
}
}
}
2019-08-07 17:41:45 -07:00
if let Some ( i ) = best_match_position {
let configuration = & self . language_configurations [ i ] ;
let language = self . language_for_id ( configuration . language_id ) ? ;
Ok ( Some ( ( language , configuration ) ) )
2019-02-19 11:24:50 -08:00
} else {
Ok ( None )
}
}
2019-08-07 17:41:45 -07:00
fn language_for_id ( & self , id : usize ) -> Result < Language > {
2024-02-16 14:42:19 -05:00
let ( path , language , externals ) = & self . languages_by_id [ id ] ;
2019-08-07 17:41:45 -07:00
language
. get_or_try_init ( | | {
let src_path = path . join ( " src " ) ;
2024-02-16 14:42:19 -05:00
self . load_language_at_path ( & src_path , & [ & src_path ] , externals . as_deref ( ) )
2019-08-07 17:41:45 -07:00
} )
2023-11-27 15:50:08 -08:00
. cloned ( )
2019-01-11 13:30:45 -08:00
}
2024-02-02 10:26:18 -05:00
pub fn load_language_at_path (
& self ,
src_path : & Path ,
header_paths : & [ & Path ] ,
2024-02-16 14:42:19 -05:00
external_files : Option < & [ PathBuf ] > ,
2024-02-02 10:26:18 -05:00
) -> Result < Language > {
2019-02-13 19:30:59 -08:00
let grammar_path = src_path . join ( " grammar.json " ) ;
2019-01-11 13:30:45 -08:00
2019-02-13 19:30:59 -08:00
#[ derive(Deserialize) ]
struct GrammarJSON {
name : String ,
}
2019-05-30 16:52:30 -07:00
let mut grammar_file =
2021-06-09 12:32:22 -04:00
fs ::File ::open ( grammar_path ) . with_context ( | | " Failed to read grammar.json " ) ? ;
2019-05-30 16:52:30 -07:00
let grammar_json : GrammarJSON = serde_json ::from_reader ( BufReader ::new ( & mut grammar_file ) )
2021-06-09 12:32:22 -04:00
. with_context ( | | " Failed to parse grammar.json " ) ? ;
2019-02-13 19:30:59 -08:00
2024-02-16 14:42:19 -05:00
self . load_language_at_path_with_name (
src_path ,
header_paths ,
& grammar_json . name ,
external_files ,
)
2019-01-07 17:57:27 -08:00
}
2022-09-06 22:41:52 -07:00
pub fn load_language_at_path_with_name (
2019-01-11 13:30:45 -08:00
& self ,
2022-09-06 22:41:52 -07:00
src_path : & Path ,
2024-02-02 10:26:18 -05:00
header_paths : & [ & Path ] ,
2022-09-06 22:41:52 -07:00
name : & str ,
2024-02-16 14:42:19 -05:00
external_files : Option < & [ PathBuf ] > ,
2019-02-06 16:03:50 -08:00
) -> Result < Language > {
2021-09-08 00:08:13 +03:00
let mut lib_name = name . to_string ( ) ;
2022-09-06 22:41:52 -07:00
let language_fn_name = format! ( " tree_sitter_ {} " , replace_dashes_with_underscores ( name ) ) ;
2021-09-08 00:08:13 +03:00
if self . debug_build {
lib_name . push_str ( " .debug._ " ) ;
}
2022-09-06 22:41:52 -07:00
fs ::create_dir_all ( & self . parser_lib_path ) ? ;
2023-10-31 16:56:11 -07:00
let mut library_path = self . parser_lib_path . join ( lib_name ) ;
library_path . set_extension ( DYLIB_EXTENSION ) ;
2022-09-06 22:41:52 -07:00
let parser_path = src_path . join ( " parser.c " ) ;
2024-02-03 19:38:37 -05:00
let scanner_path = self . get_scanner_path ( src_path ) ;
2019-01-07 17:57:27 -08:00
2024-02-16 22:42:06 -05:00
let mut paths_to_check = vec! [ parser_path . clone ( ) ] ;
if let Some ( scanner_path ) = scanner_path . as_ref ( ) {
2024-02-17 19:34:55 -05:00
paths_to_check . push ( scanner_path . clone ( ) ) ;
2024-02-16 22:42:06 -05:00
}
paths_to_check . extend (
external_files
. unwrap_or_default ( )
. iter ( )
. map ( | p | src_path . join ( p ) ) ,
) ;
2024-02-16 14:42:19 -05:00
2023-10-31 16:56:11 -07:00
#[ cfg(feature = " wasm " ) ]
2022-09-07 14:40:11 -07:00
if self . wasm_store . lock ( ) . unwrap ( ) . is_some ( ) {
library_path . set_extension ( " wasm " ) ;
}
2024-02-16 14:42:19 -05:00
let mut recompile = needs_recompile ( & library_path , & paths_to_check )
2021-06-09 12:32:22 -04:00
. with_context ( | | " Failed to compare source and binary timestamps " ) ? ;
2019-05-30 16:52:30 -07:00
2023-10-31 16:56:11 -07:00
#[ cfg(feature = " wasm " ) ]
2022-09-06 22:41:52 -07:00
if let Some ( wasm_store ) = self . wasm_store . lock ( ) . unwrap ( ) . as_mut ( ) {
if recompile {
self . compile_parser_to_wasm (
name ,
src_path ,
scanner_path
. as_ref ( )
2024-02-03 19:38:37 -05:00
. and_then ( | p | p . strip_prefix ( src_path ) . ok ( ) ) ,
2022-09-06 22:41:52 -07:00
& library_path ,
false ,
) ? ;
2019-01-11 14:44:32 -08:00
}
2019-01-11 13:30:45 -08:00
2022-09-06 22:41:52 -07:00
let wasm_bytes = fs ::read ( & library_path ) ? ;
2023-11-26 10:41:33 -08:00
return Ok ( wasm_store . load_language ( name , & wasm_bytes ) ? ) ;
2023-10-31 16:56:11 -07:00
}
2024-02-07 09:30:24 -05:00
let lock_path = if env ::var ( " CROSS_RUNNER " ) . is_ok ( ) {
PathBuf ::from ( " /tmp " )
. join ( " tree-sitter " )
. join ( " lock " )
. join ( format! ( " {name} .lock " ) )
} else {
dirs ::cache_dir ( )
2024-02-16 16:25:24 -05:00
. ok_or_else ( | | anyhow! ( " Cannot determine cache directory " ) ) ?
2024-02-07 09:30:24 -05:00
. join ( " tree-sitter " )
. join ( " lock " )
. join ( format! ( " {name} .lock " ) )
} ;
if let Ok ( lock_file ) = fs ::OpenOptions ::new ( ) . write ( true ) . open ( & lock_path ) {
recompile = false ;
if lock_file . try_lock_exclusive ( ) . is_err ( ) {
// if we can't acquire the lock, another process is compiling the parser, wait for it and don't recompile
lock_file . lock_exclusive ( ) ? ;
recompile = false ;
} else {
// if we can acquire the lock, check if the lock file is older than 30 seconds, a
// run that was interrupted and left the lock file behind should not block
// subsequent runs
let time = lock_file . metadata ( ) ? . modified ( ) ? . elapsed ( ) ? . as_secs ( ) ;
if time > 30 {
fs ::remove_file ( & lock_path ) ? ;
recompile = true ;
}
2022-09-06 22:41:52 -07:00
}
2024-02-07 09:30:24 -05:00
}
2022-09-06 22:41:52 -07:00
2024-02-07 09:30:24 -05:00
if recompile {
fs ::create_dir_all ( lock_path . parent ( ) . unwrap ( ) ) . with_context ( | | {
format! (
" Failed to create directory {:?} " ,
lock_path . parent ( ) . unwrap ( )
)
} ) ? ;
let lock_file = fs ::OpenOptions ::new ( )
. create ( true )
. truncate ( true )
. write ( true )
. open ( & lock_path ) ? ;
lock_file . lock_exclusive ( ) ? ;
self . compile_parser_to_dylib (
header_paths ,
& parser_path ,
2024-02-17 00:00:52 -05:00
scanner_path . as_deref ( ) ,
2024-02-07 09:30:24 -05:00
& library_path ,
& lock_file ,
& lock_path ,
) ? ;
2024-02-17 00:00:52 -05:00
if scanner_path . is_some ( ) {
self . check_external_scanner ( name , & library_path ) ? ;
}
2022-09-06 22:41:52 -07:00
}
2024-02-07 09:30:24 -05:00
let library = unsafe { Library ::new ( & library_path ) }
. with_context ( | | format! ( " Error opening dynamic library {library_path:?} " ) ) ? ;
let language = unsafe {
2024-02-19 14:50:29 -05:00
let language_fn = library
. get ::< Symbol < unsafe extern " C " fn ( ) -> Language > > ( language_fn_name . as_bytes ( ) )
2024-02-07 09:30:24 -05:00
. with_context ( | | format! ( " Failed to load symbol {language_fn_name} " ) ) ? ;
language_fn ( )
} ;
mem ::forget ( library ) ;
Ok ( language )
2022-09-06 22:41:52 -07:00
}
fn compile_parser_to_dylib (
& self ,
2024-02-02 10:26:18 -05:00
header_paths : & [ & Path ] ,
2022-09-06 22:41:52 -07:00
parser_path : & Path ,
2024-02-17 00:00:52 -05:00
scanner_path : Option < & Path > ,
library_path : & Path ,
2024-02-07 09:30:24 -05:00
lock_file : & fs ::File ,
lock_path : & Path ,
2022-09-06 22:41:52 -07:00
) -> Result < ( ) , Error > {
let mut config = cc ::Build ::new ( ) ;
config
. cpp ( true )
. opt_level ( 2 )
. cargo_metadata ( false )
2024-02-22 09:13:59 -05:00
. cargo_warnings ( false )
2022-09-06 22:41:52 -07:00
. target ( BUILD_TARGET )
2023-10-27 11:57:04 +01:00
. host ( BUILD_TARGET )
. flag_if_supported ( " -Werror=implicit-function-declaration " ) ;
2022-09-06 22:41:52 -07:00
let compiler = config . get_compiler ( ) ;
let mut command = Command ::new ( compiler . path ( ) ) ;
for ( key , value ) in compiler . env ( ) {
command . env ( key , value ) ;
}
2023-10-27 11:57:04 +01:00
if compiler . is_like_msvc ( ) {
2024-02-03 19:38:37 -05:00
command . args ( [ " /nologo " , " /LD " ] ) ;
for path in header_paths {
2024-02-02 10:26:18 -05:00
command . arg ( format! ( " /I {} " , path . to_string_lossy ( ) ) ) ;
2024-02-03 19:38:37 -05:00
}
2022-09-06 22:41:52 -07:00
if self . debug_build {
command . arg ( " /Od " ) ;
2019-01-11 13:30:45 -08:00
} else {
2022-09-06 22:41:52 -07:00
command . arg ( " /O2 " ) ;
}
command . arg ( parser_path ) ;
2024-02-14 15:07:13 -05:00
2022-09-06 22:41:52 -07:00
if let Some ( scanner_path ) = scanner_path . as_ref ( ) {
2024-02-14 15:07:13 -05:00
if scanner_path . extension ( ) ! = Some ( " c " . as_ref ( ) ) {
eprintln! ( " Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future. " ) ;
}
2022-09-06 22:41:52 -07:00
command . arg ( scanner_path ) ;
}
command
. arg ( " /link " )
2023-10-27 11:57:04 +01:00
. arg ( format! ( " /out: {} " , library_path . to_str ( ) . unwrap ( ) ) ) ;
2022-09-06 22:41:52 -07:00
} else {
command
. arg ( " -shared " )
. arg ( " -fno-exceptions " )
. arg ( " -g " )
. arg ( " -o " )
2024-02-03 19:38:37 -05:00
. arg ( library_path ) ;
2023-10-27 11:57:04 +01:00
2024-02-03 19:38:37 -05:00
for path in header_paths {
2024-02-02 10:26:18 -05:00
command . arg ( format! ( " -I {} " , path . to_string_lossy ( ) ) ) ;
2024-02-03 19:38:37 -05:00
}
2024-02-02 10:26:18 -05:00
2023-10-27 11:57:04 +01:00
if ! cfg! ( windows ) {
command . arg ( " -fPIC " ) ;
}
2022-09-06 22:41:52 -07:00
if self . debug_build {
command . arg ( " -O0 " ) ;
} else {
command . arg ( " -O2 " ) ;
}
2021-06-20 13:23:59 +03:00
2022-09-06 22:41:52 -07:00
if let Some ( scanner_path ) = scanner_path . as_ref ( ) {
if scanner_path . extension ( ) = = Some ( " c " . as_ref ( ) ) {
command . arg ( " -xc " ) . arg ( " -std=c99 " ) . arg ( scanner_path ) ;
} else {
2024-02-14 15:07:13 -05:00
eprintln! ( " Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future. " ) ;
2022-09-06 22:41:52 -07:00
command . arg ( scanner_path ) ;
2019-01-11 13:30:45 -08:00
}
2019-01-07 17:57:27 -08:00
}
2022-09-06 22:41:52 -07:00
command . arg ( " -xc " ) . arg ( parser_path ) ;
}
2019-01-11 13:30:45 -08:00
2024-02-22 09:13:59 -05:00
// For conditional compilation of external scanner code when
// used internally by `tree-sitter parse` and other sub commands.
command . arg ( " -DTREE_SITTER_INTERNAL_BUILD " ) ;
// Always use the same allocator in the CLI as any scanner, useful for debugging and
// tracking memory leaks in tests.
2024-02-26 19:22:38 -05:00
#[ cfg(not(any(target_os = " macos " , target_os = " ios " ))) ]
2024-02-22 09:13:59 -05:00
command . arg ( " -DTS_REUSE_ALLOCATOR " ) ;
2024-02-14 13:09:40 -05:00
let output = command . output ( ) . with_context ( | | {
format! ( " Failed to execute the C compiler with the following command: \n {command:?} " )
} ) ? ;
2024-02-07 09:30:24 -05:00
lock_file . unlock ( ) ? ;
fs ::remove_file ( lock_path ) ? ;
2022-09-06 22:41:52 -07:00
if ! output . status . success ( ) {
return Err ( anyhow! (
" Parser compilation failed. \n Stdout: {} \n Stderr: {} " ,
String ::from_utf8_lossy ( & output . stdout ) ,
String ::from_utf8_lossy ( & output . stderr )
) ) ;
}
2019-01-07 17:57:27 -08:00
2024-02-17 00:00:52 -05:00
Ok ( ( ) )
}
#[ cfg(unix) ]
fn check_external_scanner ( & self , name : & str , library_path : & Path ) -> Result < ( ) > {
let prefix = if cfg! ( target_os = " macos " ) { " _ " } else { " " } ;
let mut must_have = vec! [
format! ( " {prefix} tree_sitter_ {name} _external_scanner_create " ) ,
format! ( " {prefix} tree_sitter_ {name} _external_scanner_destroy " ) ,
format! ( " {prefix} tree_sitter_ {name} _external_scanner_serialize " ) ,
format! ( " {prefix} tree_sitter_ {name} _external_scanner_deserialize " ) ,
format! ( " {prefix} tree_sitter_ {name} _external_scanner_scan " ) ,
] ;
let command = Command ::new ( " nm " )
. arg ( " -W " )
. arg ( " -U " )
. arg ( library_path )
. output ( ) ;
if let Ok ( output ) = command {
if output . status . success ( ) {
let mut found_non_static = false ;
for line in String ::from_utf8_lossy ( & output . stdout ) . lines ( ) {
if line . contains ( " T " ) {
if let Some ( function_name ) =
line . split_whitespace ( ) . collect ::< Vec < _ > > ( ) . get ( 2 )
{
if ! line . contains ( " tree_sitter_ " ) {
2023-08-16 01:24:45 -04:00
if ! found_non_static {
found_non_static = true ;
2024-02-17 00:00:52 -05:00
eprintln! ( " Warning: Found non-static non-tree-sitter functions in the external scannner " ) ;
2023-08-16 01:24:45 -04:00
}
eprintln! ( " ` {function_name} ` " ) ;
2024-02-17 00:00:52 -05:00
} else {
must_have . retain ( | f | f ! = function_name ) ;
2023-08-16 01:24:45 -04:00
}
}
}
2024-02-17 00:00:52 -05:00
}
if found_non_static {
eprintln! ( " Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name " ) ;
}
if ! must_have . is_empty ( ) {
let missing = must_have
. iter ( )
. map ( | f | format! ( " ` {f} ` " ) )
. collect ::< Vec < _ > > ( )
. join ( " \n " ) ;
return Err ( anyhow! ( format! (
indoc! { "
Missing required functions in the external scanner , parsing won ' t work without these !
{ }
You can read more about this at https ://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
" },
missing ,
) ) ) ;
2023-08-16 01:24:45 -04:00
}
}
}
2022-09-06 22:41:52 -07:00
Ok ( ( ) )
}
2019-01-11 13:30:45 -08:00
2024-02-17 00:00:52 -05:00
#[ cfg(windows) ]
fn check_external_scanner ( & self , _name : & str , _library_path : & Path ) -> Result < ( ) > {
// TODO: there's no nm command on windows, whoever wants to implement this can and should :)
// let mut must_have = vec![
// format!("tree_sitter_{name}_external_scanner_create"),
// format!("tree_sitter_{name}_external_scanner_destroy"),
// format!("tree_sitter_{name}_external_scanner_serialize"),
// format!("tree_sitter_{name}_external_scanner_deserialize"),
// format!("tree_sitter_{name}_external_scanner_scan"),
// ];
Ok ( ( ) )
}
2023-10-31 16:56:11 -07:00
pub fn compile_parser_to_wasm (
2022-09-06 22:41:52 -07:00
& self ,
language_name : & str ,
src_path : & Path ,
scanner_filename : Option < & Path > ,
2024-02-17 00:00:52 -05:00
output_path : & Path ,
2022-09-06 22:41:52 -07:00
force_docker : bool ,
) -> Result < ( ) , Error > {
2023-12-08 19:42:53 -05:00
#[ derive(PartialEq, Eq) ]
enum EmccSource {
Native ( PathBuf ) ,
Docker ,
Podman ,
}
fn path_of_bin (
name : & str ,
test : impl Fn ( & Path ) -> std ::io ::Result < std ::process ::Output > ,
) -> Option < PathBuf > {
let bin_path = which ( name ) . ok ( ) ? ;
if test ( & bin_path ) . is_ok ( ) {
Some ( bin_path )
} else {
None
2019-01-11 14:44:32 -08:00
}
2023-12-08 19:42:53 -05:00
}
2022-09-06 22:41:52 -07:00
2023-12-08 19:42:53 -05:00
// Order of preference: emscripten > docker > podman > error
let source = if force_docker {
None
2022-09-06 22:41:52 -07:00
} else {
2023-12-08 19:42:53 -05:00
path_of_bin ( if cfg! ( windows ) { " emcc.bat " } else { " emcc " } , | p | {
Command ::new ( p ) . output ( )
} )
. map ( EmccSource ::Native )
}
. or_else ( | | {
path_of_bin ( " docker " , | docker | {
// `docker info` should succeed iff the daemon is running
// see https://docs.docker.com/config/daemon/troubleshoot/#check-whether-docker-is-running
Command ::new ( docker ) . args ( [ " info " ] ) . output ( )
} )
. map ( | _ | EmccSource ::Docker )
} )
. or_else ( | | {
path_of_bin ( " podman " , | podman | {
Command ::new ( podman ) . arg ( " --version " ) . output ( )
} )
. map ( | _ | EmccSource ::Podman )
} ) ;
let Some ( cmd ) = source else {
2022-09-06 22:41:52 -07:00
return Err ( anyhow! (
" You must have either emcc or docker on your PATH to run this command "
) ) ;
2023-12-08 19:42:53 -05:00
} ;
let mut command = match cmd {
EmccSource ::Native ( emcc_path ) = > {
let mut command = Command ::new ( emcc_path ) ;
command . current_dir ( src_path ) ;
command
}
EmccSource ::Docker | EmccSource ::Podman = > {
let mut command = match cmd {
EmccSource ::Docker = > Command ::new ( " docker " ) ,
EmccSource ::Podman = > Command ::new ( " podman " ) ,
_ = > unreachable! ( ) ,
} ;
command . args ( [ " run " , " --rm " ] ) ;
// Mount the parser directory as a volume
command . args ( [ " --workdir " , " /src " ] ) ;
let mut volume_string = OsString ::from ( & src_path ) ;
volume_string . push ( " :/src:Z " ) ;
command . args ( [ OsStr ::new ( " --volume " ) , & volume_string ] ) ;
// In case `docker` is an alias to `podman`, ensure that podman
// mounts the current directory as writable by the container
// user which has the same uid as the host user. Setting the
// podman-specific variable is more reliable than attempting to
// detect whether `docker` is an alias for `podman`.
// see https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode
command . env ( " PODMAN_USERNS " , " keep-id " ) ;
// Get the current user id so that files created in the docker container will have
// the same owner.
#[ cfg(unix) ]
{
#[ link(name = " c " ) ]
extern " C " {
fn getuid ( ) -> u32 ;
}
// don't need to set user for podman since PODMAN_USERNS=keep-id is already set
if cmd = = EmccSource ::Docker {
let user_id = unsafe { getuid ( ) } ;
command . args ( [ " --user " , & user_id . to_string ( ) ] ) ;
}
} ;
// Run `emcc` in a container using the `emscripten-slim` image
command . args ( [ EMSCRIPTEN_TAG , " emcc " ] ) ;
command
}
} ;
2019-01-07 17:57:27 -08:00
2022-09-06 22:41:52 -07:00
let output_name = " output.wasm " ;
2023-12-08 19:42:53 -05:00
command . args ( [
2022-09-06 22:41:52 -07:00
" -o " ,
output_name ,
" -Os " ,
" -s " ,
" WASM=1 " ,
" -s " ,
2024-02-11 17:22:08 -05:00
" SIDE_MODULE=2 " ,
2022-09-06 22:41:52 -07:00
" -s " ,
" TOTAL_MEMORY=33554432 " ,
" -s " ,
" NODEJS_CATCH_EXIT=0 " ,
" -s " ,
2023-12-08 19:42:53 -05:00
& format! ( " EXPORTED_FUNCTIONS=[ \" _tree_sitter_ {language_name} \" ] " ) ,
2022-09-06 22:41:52 -07:00
" -fno-exceptions " ,
2022-09-07 14:40:11 -07:00
" -fvisibility=hidden " ,
2022-09-06 22:41:52 -07:00
" -I " ,
" . " ,
] ) ;
if let Some ( scanner_filename ) = scanner_filename {
if scanner_filename
. extension ( )
. and_then ( | ext | ext . to_str ( ) )
. map_or ( false , | ext | [ " cc " , " cpp " ] . contains ( & ext ) )
{
2024-02-14 15:07:13 -05:00
eprintln! ( " Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future. " ) ;
2022-09-06 22:41:52 -07:00
command . arg ( " -xc++ " ) ;
}
2023-12-08 19:42:53 -05:00
command . arg ( scanner_filename ) ;
2022-09-06 22:41:52 -07:00
}
command . arg ( " parser.c " ) ;
2024-02-07 02:18:02 -05:00
let status = command
. spawn ( )
. with_context ( | | " Failed to run emcc command " ) ?
. wait ( ) ? ;
if ! status . success ( ) {
return Err ( anyhow! ( " emcc command failed " ) ) ;
2022-09-06 22:41:52 -07:00
}
2023-12-08 19:42:53 -05:00
fs ::rename ( src_path . join ( output_name ) , output_path )
2022-09-06 22:41:52 -07:00
. context ( " failed to rename wasm output file " ) ? ;
2024-02-07 09:30:24 -05:00
2022-09-06 22:41:52 -07:00
Ok ( ( ) )
2019-01-07 17:57:27 -08:00
}
2024-02-03 19:38:37 -05:00
#[ must_use ]
2019-10-24 12:01:27 -07:00
pub fn highlight_config_for_injection_string < ' a > (
& ' a self ,
string : & str ,
) -> Option < & ' a HighlightConfiguration > {
match self . language_configuration_for_injection_string ( string ) {
Err ( e ) = > {
2024-02-03 19:38:37 -05:00
eprintln! ( " Failed to load language for injection string ' {string} ': {e} " , ) ;
2019-10-24 12:01:27 -07:00
None
}
Ok ( None ) = > None ,
2023-07-20 03:42:52 -04:00
Ok ( Some ( ( language , configuration ) ) ) = > {
2024-02-07 02:02:32 -05:00
match configuration . highlight_config ( language , None ) {
2023-07-20 03:42:52 -04:00
Err ( e ) = > {
eprintln! (
2024-02-03 19:38:37 -05:00
" Failed to load property sheet for injection string '{string}': {e} " ,
2023-07-20 03:42:52 -04:00
) ;
None
}
Ok ( None ) = > None ,
Ok ( Some ( config ) ) = > Some ( config ) ,
2019-10-24 12:01:27 -07:00
}
2023-07-20 03:42:52 -04:00
}
2019-10-24 12:01:27 -07:00
}
}
2023-09-21 00:52:50 -04:00
pub fn find_language_configurations_at_path (
& mut self ,
2019-08-07 17:41:45 -07:00
parser_path : & Path ,
2023-09-21 00:52:50 -04:00
set_current_path_config : bool ,
2019-08-07 17:41:45 -07:00
) -> Result < & [ LanguageConfiguration ] > {
2024-02-16 14:42:19 -05:00
#[ derive(Deserialize, Clone, Default) ]
2019-10-17 10:14:05 -07:00
#[ serde(untagged) ]
enum PathsJSON {
2023-09-21 00:52:50 -04:00
#[ default ]
2019-10-17 10:14:05 -07:00
Empty ,
Single ( String ) ,
Multiple ( Vec < String > ) ,
}
impl PathsJSON {
fn into_vec ( self ) -> Option < Vec < String > > {
match self {
2024-02-03 19:38:37 -05:00
Self ::Empty = > None ,
Self ::Single ( s ) = > Some ( vec! [ s ] ) ,
Self ::Multiple ( s ) = > Some ( s ) ,
2019-10-17 10:14:05 -07:00
}
}
}
2019-01-07 17:57:27 -08:00
#[ derive(Deserialize) ]
struct LanguageConfigurationJSON {
2019-08-07 17:41:45 -07:00
#[ serde(default) ]
path : PathBuf ,
2019-02-20 14:38:19 -08:00
scope : Option < String > ,
2019-01-07 17:57:27 -08:00
#[ serde(rename = " file-types " ) ]
file_types : Option < Vec < String > > ,
#[ serde(rename = " content-regex " ) ]
content_regex : Option < String > ,
#[ serde(rename = " first-line-regex " ) ]
first_line_regex : Option < String > ,
2019-02-19 11:24:50 -08:00
#[ serde(rename = " injection-regex " ) ]
injection_regex : Option < String > ,
2019-10-17 10:14:05 -07:00
#[ serde(default) ]
highlights : PathsJSON ,
#[ serde(default) ]
injections : PathsJSON ,
#[ serde(default) ]
locals : PathsJSON ,
2020-03-13 16:13:19 -07:00
#[ serde(default) ]
tags : PathsJSON ,
2024-02-16 14:42:19 -05:00
#[ serde(default, rename = " external-files " ) ]
external_files : PathsJSON ,
2019-01-07 17:57:27 -08:00
}
#[ derive(Deserialize) ]
struct PackageJSON {
2019-08-07 17:41:45 -07:00
#[ serde(default) ]
2019-01-07 17:57:27 -08:00
#[ serde(rename = " tree-sitter " ) ]
2019-08-07 17:41:45 -07:00
tree_sitter : Vec < LanguageConfigurationJSON > ,
2019-01-07 17:57:27 -08:00
}
2023-08-18 16:14:01 -04:00
#[ derive(Deserialize) ]
struct GrammarJSON {
name : String ,
}
2019-08-07 17:41:45 -07:00
let initial_language_configuration_count = self . language_configurations . len ( ) ;
2024-02-03 19:38:37 -05:00
if let Ok ( package_json_contents ) = fs ::read_to_string ( parser_path . join ( " package.json " ) ) {
2019-05-30 12:05:53 -07:00
let package_json = serde_json ::from_str ::< PackageJSON > ( & package_json_contents ) ;
if let Ok ( package_json ) = package_json {
2019-08-07 17:41:45 -07:00
let language_count = self . languages_by_id . len ( ) ;
for config_json in package_json . tree_sitter {
// Determine the path to the parser directory. This can be specified in
// the package.json, but defaults to the directory containing the package.json.
let language_path = parser_path . join ( config_json . path ) ;
2023-08-18 16:14:01 -04:00
let grammar_path = language_path . join ( " src " ) . join ( " grammar.json " ) ;
let mut grammar_file = fs ::File ::open ( grammar_path )
. with_context ( | | " Failed to read grammar.json " ) ? ;
let grammar_json : GrammarJSON =
serde_json ::from_reader ( BufReader ::new ( & mut grammar_file ) )
. with_context ( | | " Failed to parse grammar.json " ) ? ;
2019-08-07 17:41:45 -07:00
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None ;
2024-02-16 14:42:19 -05:00
for ( id , ( path , _ , _ ) ) in
2019-08-07 17:41:45 -07:00
self . languages_by_id . iter ( ) . enumerate ( ) . skip ( language_count )
{
if language_path = = * path {
language_id = Some ( id ) ;
}
}
// If not, add a new language path to the list.
2024-02-16 14:42:19 -05:00
let language_id = if let Some ( language_id ) = language_id {
language_id
} else {
self . languages_by_id . push ( (
language_path ,
OnceCell ::new ( ) ,
config_json . external_files . clone ( ) . into_vec ( ) . map ( | files | {
files . into_iter ( )
. map ( | path | {
let path = parser_path . join ( path ) ;
// prevent p being above/outside of parser_path
if path . starts_with ( parser_path ) {
Ok ( path )
} else {
Err ( anyhow! ( " External file path {path:?} is outside of parser directory {parser_path:?} " ) )
}
} )
. collect ::< Result < Vec < _ > > > ( )
} ) . transpose ( ) ? ,
) ) ;
2019-08-07 17:41:45 -07:00
self . languages_by_id . len ( ) - 1
2024-02-16 14:42:19 -05:00
} ;
2019-01-07 17:57:27 -08:00
2019-08-07 17:41:45 -07:00
let configuration = LanguageConfiguration {
2019-09-18 17:35:47 -07:00
root_path : parser_path . to_path_buf ( ) ,
2023-09-21 00:52:50 -04:00
language_name : grammar_json . name . clone ( ) ,
2019-08-07 17:41:45 -07:00
scope : config_json . scope ,
language_id ,
file_types : config_json . file_types . unwrap_or ( Vec ::new ( ) ) ,
2024-02-13 07:51:41 +01:00
content_regex : Self ::regex ( config_json . content_regex . as_deref ( ) ) ,
first_line_regex : Self ::regex ( config_json . first_line_regex . as_deref ( ) ) ,
injection_regex : Self ::regex ( config_json . injection_regex . as_deref ( ) ) ,
2019-10-17 10:14:05 -07:00
injections_filenames : config_json . injections . into_vec ( ) ,
locals_filenames : config_json . locals . into_vec ( ) ,
2020-03-13 16:13:19 -07:00
tags_filenames : config_json . tags . into_vec ( ) ,
2019-10-17 10:14:05 -07:00
highlights_filenames : config_json . highlights . into_vec ( ) ,
2019-10-24 12:01:27 -07:00
highlight_config : OnceCell ::new ( ) ,
2020-03-04 14:27:31 -08:00
tags_config : OnceCell ::new ( ) ,
2023-09-21 00:52:50 -04:00
highlight_names : & self . highlight_names ,
2019-10-24 12:01:27 -07:00
use_all_highlight_names : self . use_all_highlight_names ,
2019-08-07 17:41:45 -07:00
} ;
2019-05-30 12:05:53 -07:00
for file_type in & configuration . file_types {
self . language_configuration_ids_by_file_type
. entry ( file_type . to_string ( ) )
2023-09-21 00:52:50 -04:00
. or_default ( )
2019-08-07 17:41:45 -07:00
. push ( self . language_configurations . len ( ) ) ;
2019-05-30 12:05:53 -07:00
}
2024-02-13 07:51:41 +01:00
if let Some ( first_line_regex ) = & configuration . first_line_regex {
self . language_configuration_ids_by_first_line_regex
. entry ( first_line_regex . to_string ( ) )
. or_default ( )
. push ( self . language_configurations . len ( ) ) ;
}
2019-08-07 17:41:45 -07:00
2019-10-24 12:01:27 -07:00
self . language_configurations
. push ( unsafe { mem ::transmute ( configuration ) } ) ;
2023-09-21 00:52:50 -04:00
if set_current_path_config
& & self . language_configuration_in_current_path . is_none ( )
{
self . language_configuration_in_current_path =
Some ( self . language_configurations . len ( ) - 1 ) ;
}
2019-05-30 12:05:53 -07:00
}
2019-01-07 17:57:27 -08:00
}
}
2019-08-11 09:21:49 -07:00
if self . language_configurations . len ( ) = = initial_language_configuration_count
& & parser_path . join ( " src " ) . join ( " grammar.json " ) . exists ( )
{
2023-08-18 16:14:01 -04:00
let grammar_path = parser_path . join ( " src " ) . join ( " grammar.json " ) ;
let mut grammar_file =
fs ::File ::open ( grammar_path ) . with_context ( | | " Failed to read grammar.json " ) ? ;
let grammar_json : GrammarJSON =
serde_json ::from_reader ( BufReader ::new ( & mut grammar_file ) )
. with_context ( | | " Failed to parse grammar.json " ) ? ;
2019-10-24 12:01:27 -07:00
let configuration = LanguageConfiguration {
root_path : parser_path . to_owned ( ) ,
2023-08-18 16:14:01 -04:00
language_name : grammar_json . name ,
2019-10-24 12:01:27 -07:00
language_id : self . languages_by_id . len ( ) ,
file_types : Vec ::new ( ) ,
scope : None ,
content_regex : None ,
2024-02-13 07:51:41 +01:00
first_line_regex : None ,
2019-10-24 12:01:27 -07:00
injection_regex : None ,
injections_filenames : None ,
locals_filenames : None ,
highlights_filenames : None ,
2020-03-13 16:13:19 -07:00
tags_filenames : None ,
2019-10-24 12:01:27 -07:00
highlight_config : OnceCell ::new ( ) ,
2020-03-04 14:27:31 -08:00
tags_config : OnceCell ::new ( ) ,
2023-09-21 00:52:50 -04:00
highlight_names : & self . highlight_names ,
2019-10-24 12:01:27 -07:00
use_all_highlight_names : self . use_all_highlight_names ,
} ;
self . language_configurations
. push ( unsafe { mem ::transmute ( configuration ) } ) ;
2019-08-11 09:21:49 -07:00
self . languages_by_id
2024-02-16 14:42:19 -05:00
. push ( ( parser_path . to_owned ( ) , OnceCell ::new ( ) , None ) ) ;
2019-08-11 09:21:49 -07:00
}
2019-08-07 17:41:45 -07:00
Ok ( & self . language_configurations [ initial_language_configuration_count .. ] )
2019-01-07 17:57:27 -08:00
}
2019-10-24 12:01:27 -07:00
2024-02-13 07:51:41 +01:00
fn regex ( pattern : Option < & str > ) -> Option < Regex > {
pattern . and_then ( | r | RegexBuilder ::new ( r ) . multi_line ( true ) . build ( ) . ok ( ) )
2019-10-24 12:01:27 -07:00
}
2021-06-09 12:51:28 -04:00
pub fn select_language (
& mut self ,
path : & Path ,
current_dir : & Path ,
scope : Option < & str > ,
) -> Result < Language > {
if let Some ( scope ) = scope {
if let Some ( config ) = self
. language_configuration_for_scope ( scope )
2023-09-21 00:52:50 -04:00
. with_context ( | | format! ( " Failed to load language for scope ' {scope} ' " ) ) ?
2021-06-09 12:51:28 -04:00
{
Ok ( config . 0 )
} else {
2023-09-21 00:52:50 -04:00
Err ( anyhow! ( " Unknown scope '{scope}' " ) )
2021-06-09 12:51:28 -04:00
}
} else if let Some ( ( lang , _ ) ) = self
. language_configuration_for_file_name ( path )
. with_context ( | | {
format! (
" Failed to load language for file name {} " ,
& path . file_name ( ) . unwrap ( ) . to_string_lossy ( )
)
} ) ?
{
Ok ( lang )
2023-09-21 00:52:50 -04:00
} else if let Some ( id ) = self . language_configuration_in_current_path {
Ok ( self . language_for_id ( self . language_configurations [ id ] . language_id ) ? )
2021-06-09 12:51:28 -04:00
} else if let Some ( lang ) = self
2023-09-21 00:52:50 -04:00
. languages_at_path ( current_dir )
2021-06-09 12:51:28 -04:00
. with_context ( | | " Failed to load language in current directory " ) ?
. first ( )
. cloned ( )
{
Ok ( lang )
2024-02-13 02:59:11 -05:00
} else if let Some ( lang ) = self . language_configuration_for_first_line_regex ( path ) ? {
Ok ( lang . 0 )
2021-06-09 12:51:28 -04:00
} else {
Err ( anyhow! ( " No language found " ) )
}
}
2021-09-08 00:08:13 +03:00
pub fn use_debug_build ( & mut self , flag : bool ) {
self . debug_build = flag ;
}
2022-09-06 22:41:52 -07:00
#[ cfg(feature = " wasm " ) ]
pub fn use_wasm ( & mut self , engine : tree_sitter ::wasmtime ::Engine ) {
2024-02-06 23:18:27 +01:00
* self . wasm_store . lock ( ) . unwrap ( ) = Some ( tree_sitter ::WasmStore ::new ( engine ) . unwrap ( ) ) ;
2022-09-06 22:41:52 -07:00
}
2023-10-31 16:56:11 -07:00
2024-02-03 19:38:37 -05:00
#[ must_use ]
2023-10-31 16:56:11 -07:00
pub fn get_scanner_path ( & self , src_path : & Path ) -> Option < PathBuf > {
let mut path = src_path . join ( " scanner.c " ) ;
for extension in [ " c " , " cc " , " cpp " ] {
path . set_extension ( extension ) ;
if path . exists ( ) {
return Some ( path ) ;
}
}
None
}
2019-01-07 17:57:27 -08:00
}
2019-10-24 12:01:27 -07:00
impl < ' a > LanguageConfiguration < ' a > {
2023-07-20 03:42:52 -04:00
pub fn highlight_config (
& self ,
language : Language ,
2023-07-20 08:04:59 -04:00
paths : Option < & [ String ] > ,
2023-07-20 03:42:52 -04:00
) -> Result < Option < & HighlightConfiguration > > {
2023-07-20 08:04:59 -04:00
let ( highlights_filenames , injections_filenames , locals_filenames ) = match paths {
Some ( paths ) = > (
Some (
paths
. iter ( )
. filter ( | p | p . ends_with ( " highlights.scm " ) )
. cloned ( )
2023-09-01 17:01:39 -04:00
. collect ::< Vec < _ > > ( ) ,
2023-07-20 08:04:59 -04:00
) ,
Some (
paths
. iter ( )
. filter ( | p | p . ends_with ( " tags.scm " ) )
. cloned ( )
2023-09-01 17:01:39 -04:00
. collect ::< Vec < _ > > ( ) ,
2023-07-20 08:04:59 -04:00
) ,
Some (
paths
. iter ( )
. filter ( | p | p . ends_with ( " locals.scm " ) )
. cloned ( )
2023-09-01 17:01:39 -04:00
. collect ::< Vec < _ > > ( ) ,
2023-07-20 08:04:59 -04:00
) ,
) ,
None = > ( None , None , None ) ,
} ;
2024-02-13 02:59:11 -05:00
self . highlight_config
2019-02-19 11:24:50 -08:00
. get_or_try_init ( | | {
2023-07-20 08:04:59 -04:00
let ( highlights_query , highlight_ranges ) = self . read_queries (
if highlights_filenames . is_some ( ) {
2023-09-01 17:01:39 -04:00
highlights_filenames . as_deref ( )
2023-07-20 08:04:59 -04:00
} else {
2023-09-01 17:01:39 -04:00
self . highlights_filenames . as_deref ( )
2023-07-20 08:04:59 -04:00
} ,
" highlights.scm " ,
) ? ;
let ( injections_query , injection_ranges ) = self . read_queries (
if injections_filenames . is_some ( ) {
2023-09-01 17:01:39 -04:00
injections_filenames . as_deref ( )
2023-07-20 08:04:59 -04:00
} else {
2023-09-01 17:01:39 -04:00
self . injections_filenames . as_deref ( )
2023-07-20 08:04:59 -04:00
} ,
" injections.scm " ,
) ? ;
let ( locals_query , locals_ranges ) = self . read_queries (
if locals_filenames . is_some ( ) {
2023-09-01 17:01:39 -04:00
locals_filenames . as_deref ( )
2023-07-20 08:04:59 -04:00
} else {
2023-09-01 17:01:39 -04:00
self . locals_filenames . as_deref ( )
2023-07-20 08:04:59 -04:00
} ,
" locals.scm " ,
) ? ;
2019-09-18 17:35:47 -07:00
2019-10-17 10:14:05 -07:00
if highlights_query . is_empty ( ) {
Ok ( None )
2019-09-18 17:35:47 -07:00
} else {
2019-10-24 12:01:27 -07:00
let mut result = HighlightConfiguration ::new (
language ,
2023-08-18 16:14:01 -04:00
& self . language_name ,
2019-10-24 12:01:27 -07:00
& highlights_query ,
& injections_query ,
& locals_query ,
)
2021-09-03 12:49:42 +03:00
. map_err ( | error | match error . kind {
QueryErrorKind ::Language = > Error ::from ( error ) ,
_ = > {
if error . offset < injections_query . len ( ) {
Self ::include_path_in_query_error (
error ,
& injection_ranges ,
& injections_query ,
0 ,
)
} else if error . offset < injections_query . len ( ) + locals_query . len ( ) {
Self ::include_path_in_query_error (
error ,
& locals_ranges ,
& locals_query ,
injections_query . len ( ) ,
)
} else {
Self ::include_path_in_query_error (
error ,
& highlight_ranges ,
& highlights_query ,
injections_query . len ( ) + locals_query . len ( ) ,
)
}
2020-09-24 13:47:27 -07:00
}
} ) ? ;
2019-10-24 12:01:27 -07:00
let mut all_highlight_names = self . highlight_names . lock ( ) . unwrap ( ) ;
if self . use_all_highlight_names {
for capture_name in result . query . capture_names ( ) {
2023-09-01 05:33:52 +03:00
if ! all_highlight_names . iter ( ) . any ( | x | x = = capture_name ) {
2024-02-03 19:38:37 -05:00
all_highlight_names . push ( ( * capture_name ) . to_string ( ) ) ;
2019-10-24 12:01:27 -07:00
}
}
}
2023-09-21 00:52:50 -04:00
result . configure ( all_highlight_names . as_slice ( ) ) ;
2024-02-03 19:38:37 -05:00
drop ( all_highlight_names ) ;
2019-10-24 12:01:27 -07:00
Ok ( Some ( result ) )
2019-10-17 10:14:05 -07:00
}
2019-02-19 11:24:50 -08:00
} )
2024-02-13 02:59:11 -05:00
. map ( Option ::as_ref )
2019-02-19 11:24:50 -08:00
}
2020-03-04 14:27:31 -08:00
pub fn tags_config ( & self , language : Language ) -> Result < Option < & TagsConfiguration > > {
self . tags_config
. get_or_try_init ( | | {
2020-09-24 15:03:51 -07:00
let ( tags_query , tags_ranges ) =
2023-09-01 17:01:39 -04:00
self . read_queries ( self . tags_filenames . as_deref ( ) , " tags.scm " ) ? ;
2020-09-24 15:03:51 -07:00
let ( locals_query , locals_ranges ) =
2023-09-01 17:01:39 -04:00
self . read_queries ( self . locals_filenames . as_deref ( ) , " locals.scm " ) ? ;
2020-03-04 14:27:31 -08:00
if tags_query . is_empty ( ) {
Ok ( None )
} else {
TagsConfiguration ::new ( language , & tags_query , & locals_query )
2020-09-24 15:03:51 -07:00
. map ( Some )
. map_err ( | error | {
if let TagsError ::Query ( error ) = error {
if error . offset < locals_query . len ( ) {
Self ::include_path_in_query_error (
error ,
& locals_ranges ,
& locals_query ,
0 ,
)
} else {
Self ::include_path_in_query_error (
error ,
& tags_ranges ,
& tags_query ,
locals_query . len ( ) ,
)
}
} else {
error . into ( )
}
} )
2020-03-04 14:27:31 -08:00
}
} )
. map ( Option ::as_ref )
}
2023-09-21 00:52:50 -04:00
fn include_path_in_query_error (
2020-09-24 15:03:51 -07:00
mut error : QueryError ,
2023-09-21 00:52:50 -04:00
ranges : & [ ( String , Range < usize > ) ] ,
2020-09-24 15:03:51 -07:00
source : & str ,
start_offset : usize ,
2021-06-09 12:32:22 -04:00
) -> Error {
2020-09-24 15:03:51 -07:00
let offset_within_section = error . offset - start_offset ;
let ( path , range ) = ranges
. iter ( )
. find ( | ( _ , range ) | range . contains ( & offset_within_section ) )
2024-02-03 19:38:37 -05:00
. unwrap_or_else ( | | ranges . last ( ) . unwrap ( ) ) ;
2020-09-24 15:03:51 -07:00
error . offset = offset_within_section - range . start ;
error . row = source [ range . start .. offset_within_section ]
. chars ( )
. filter ( | c | * c = = '\n' )
. count ( ) ;
2024-02-03 19:38:37 -05:00
Error ::from ( error ) . context ( format! ( " Error in query file {path:?} " ) )
2020-09-24 15:03:51 -07:00
}
2024-02-04 01:30:33 -05:00
#[ allow(clippy::type_complexity) ]
2020-09-24 13:47:27 -07:00
fn read_queries (
& self ,
2023-09-01 17:01:39 -04:00
paths : Option < & [ String ] > ,
2020-09-24 13:47:27 -07:00
default_path : & str ,
) -> Result < ( String , Vec < ( String , Range < usize > ) > ) > {
let mut query = String ::new ( ) ;
let mut path_ranges = Vec ::new ( ) ;
2023-09-01 17:01:39 -04:00
if let Some ( paths ) = paths {
2020-03-04 14:27:31 -08:00
for path in paths {
2020-09-24 13:47:27 -07:00
let abs_path = self . root_path . join ( path ) ;
let prev_query_len = query . len ( ) ;
2021-06-09 12:32:22 -04:00
query + = & fs ::read_to_string ( & abs_path )
2024-02-03 19:38:37 -05:00
. with_context ( | | format! ( " Failed to read query file {path:?} " ) ) ? ;
2020-09-24 13:47:27 -07:00
path_ranges . push ( ( path . clone ( ) , prev_query_len .. query . len ( ) ) ) ;
2020-03-04 14:27:31 -08:00
}
} else {
2024-02-12 16:13:02 -05:00
// highlights.scm is needed to test highlights, and tags.scm to test tags
if default_path = = " highlights.scm " | | default_path = = " tags.scm " {
eprintln! (
indoc! { "
Warning : you should add a ` { } ` entry pointing to the highlights path in ` tree - sitter ` language list in the grammar ' s package . json
See more here : https ://tree-sitter.github.io/tree-sitter/syntax-highlighting#query-paths
" },
default_path . replace ( " .scm " , " " )
) ;
}
2020-03-04 14:27:31 -08:00
let queries_path = self . root_path . join ( " queries " ) ;
let path = queries_path . join ( default_path ) ;
if path . exists ( ) {
2021-06-09 12:32:22 -04:00
query = fs ::read_to_string ( & path )
2024-02-03 19:38:37 -05:00
. with_context ( | | format! ( " Failed to read query file {path:?} " ) ) ? ;
2020-09-24 13:47:27 -07:00
path_ranges . push ( ( default_path . to_string ( ) , 0 .. query . len ( ) ) ) ;
2020-03-04 14:27:31 -08:00
}
}
2020-09-24 13:47:27 -07:00
Ok ( ( query , path_ranges ) )
2020-03-04 14:27:31 -08:00
}
2019-02-19 11:24:50 -08:00
}
2024-02-16 14:42:19 -05:00
fn needs_recompile ( lib_path : & Path , paths_to_check : & [ PathBuf ] ) -> Result < bool > {
2019-01-11 13:30:45 -08:00
if ! lib_path . exists ( ) {
return Ok ( true ) ;
}
2024-02-16 14:42:19 -05:00
let lib_mtime =
mtime ( lib_path ) . with_context ( | | format! ( " Failed to read mtime of {lib_path:?} " ) ) ? ;
for path in paths_to_check {
if mtime ( path ) ? > lib_mtime {
2019-01-11 13:30:45 -08:00
return Ok ( true ) ;
}
}
Ok ( false )
}
2019-02-06 16:03:50 -08:00
fn mtime ( path : & Path ) -> Result < SystemTime > {
2019-01-11 13:30:45 -08:00
Ok ( fs ::metadata ( path ) ? . modified ( ) ? )
2019-01-07 17:57:27 -08:00
}
2019-01-15 10:27:39 -08:00
fn replace_dashes_with_underscores ( name : & str ) -> String {
let mut result = String ::with_capacity ( name . len ( ) ) ;
for c in name . chars ( ) {
if c = = '-' {
result . push ( '_' ) ;
} else {
result . push ( c ) ;
}
}
result
}